Index: benchmark/io/http/http_ring.cpp
===================================================================
--- benchmark/io/http/http_ring.cpp	(revision da3963ae1399ac41655fc82eda47fad820dd3ccf)
+++ benchmark/io/http/http_ring.cpp	(revision 565acf59c5402ff35d213dc020ead86f8cb51d16)
@@ -9,32 +9,10 @@
 #include <liburing.h>
 
-typedef enum {
-	EVENT_END,
-	EVENT_ACCEPT,
-	EVENT_REQUEST,
-	EVENT_ANSWER
-} event_t;
-
-struct __attribute__((aligned(128))) request_t {
-	event_t type;
-	int fd;
-	size_t length;
-	char * buff;
-	char data[0];
-
-	static struct request_t * create(event_t type, size_t extra) {
-		auto ret = (struct request_t *)malloc(sizeof(struct request_t) + extra);
-		ret->type = type;
-		ret->length = extra;
-		ret->buff = ret->data;
-		return ret;
-	}
-
-	static struct request_t * create(event_t type) {
-		return create(type, 0);
-	}
-};
-
+// #define NOBATCHING
+// #define USE_ASYNC
+
+// Options passed to each threads
 struct __attribute__((aligned(128))) options_t {
+	// Data passed to accept
 	struct {
 		int sockfd;
@@ -44,14 +22,62 @@
 	} acpt;
 
+	// Termination notification
 	int endfd;
+
+	// The ring to use for io
 	struct io_uring * ring;
-
+};
+
+//=========================================================
+// General statistics
+struct __attribute__((aligned(128))) stats_block_t {
 	struct {
-		size_t subs = 0;
-		size_t cnts = 0;
-	} result;
+		volatile size_t conns = 0;
+		volatile size_t reads = 0;
+		volatile size_t writes = 0;
+		volatile size_t full_writes = 0;
+	} completions;
+
+	struct {
+		volatile size_t conns = 0;
+		struct {
+			volatile size_t pipes = 0;
+			volatile size_t reset = 0;
+			volatile size_t other = 0;
+		} requests;
+
+		struct {
+			volatile size_t pipes = 0;
+			volatile size_t reset = 0;
+			volatile size_t other = 0;
+		} answers;
+	} errors;
+
+	struct {
+		volatile size_t current = 0;
+		volatile size_t max = 0;
+		volatile size_t used = 0;
+	} conns;
+
+	volatile size_t recycle_errors = 0;
 };
 
+// Each thread gets its own block of stats
+// and there is a global block for tallying at the end
+thread_local stats_block_t stats;
+stats_block_t global_stats;
+
+// Get an array of current connections
+// This is just for debugging, to make sure
+// no two state-machines get the same fd
+const size_t array_max = 25000;
+class connection * volatile conns[array_max] = { 0 };
+
+// Max fd we've seen, keep track so it's convenient to adjust the array size after
+volatile int max_fd = 0;
+
 //=========================================================
+// Some small wrappers for ring operations used outside the connection state machine
+// get sqe + error handling
 static struct io_uring_sqe * get_sqe(struct io_uring * ring) {
 	struct io_uring_sqe * sqe = io_uring_get_sqe(ring);
@@ -63,38 +89,15 @@
 }
 
-static void submit(struct io_uring * ) {
-	// io_uring_submit(ring);
-}
-
-//=========================================================
+// read of the event fd is not done by a connection
+// use nullptr as the user data
 static void ring_end(struct io_uring * ring, int fd, char * buffer, size_t len) {
 	struct io_uring_sqe * sqe = get_sqe(ring);
 	io_uring_prep_read(sqe, fd, buffer, len, 0);
-	io_uring_sqe_set_data(sqe, request_t::create(EVENT_END));
-	submit(ring);
+	io_uring_sqe_set_data(sqe, nullptr);
+	io_uring_submit(ring);
 }
 
-static void ring_accept(struct io_uring * ring, int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags) {
-	auto req = request_t::create(EVENT_ACCEPT);
-	struct io_uring_sqe * sqe = get_sqe(ring);
-	io_uring_prep_accept(sqe, sockfd, addr, addrlen, flags);
-	io_uring_sqe_set_data(sqe, req);
-	submit(ring);
-	// std::cout << "Submitted accept: " << req << std::endl;
-}
-
-static void ring_request(struct io_uring * ring, int fd) {
-	size_t size = 1024;
-	auto req = request_t::create(EVENT_REQUEST, size);
-	req->fd = fd;
-
-	struct io_uring_sqe * sqe = get_sqe(ring);
-	io_uring_prep_read(sqe, fd, req->buff, size, 0);
-	io_uring_sqe_set_data(sqe, req);
-	submit(ring);
-	// std::cout << "Submitted request: " << req << " (" << (void*)req->buffer << ")"<<std::endl;
-}
-
 //=========================================================
+// All answers are fixed and determined by the return code
 enum HttpCode {
 	OK200 = 0,
@@ -108,273 +111,428 @@
 };
 
+// Get a fix reply based on the return code
 const char * http_msgs[] = {
-	"HTTP/1.1 200 OK\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: %zu \n\n%s",
-	"HTTP/1.1 400 Bad Request\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
-	"HTTP/1.1 404 Not Found\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
-	"HTTP/1.1 405 Method Not Allowed\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
-	"HTTP/1.1 408 Request Timeout\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
-	"HTTP/1.1 413 Payload Too Large\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
-	"HTTP/1.1 414 URI Too Long\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
+	"HTTP/1.1 200 OK\r\nServer: HttoForall\r\nContent-Type: text/plain\r\nContent-Length: 15\r\nConnection: keep-alive\r\n\r\nHello, World!\r\n",
+	"HTTP/1.1 400 Bad Request\r\nServer: HttoForall\r\nContent-Type: text/plain\r\nContent-Length: 0 \r\n\r\n",
+	"HTTP/1.1 404 Not Found\r\nServer: HttoForall\r\nContent-Type: text/plain\r\nContent-Length: 0 \r\n\r\n",
+	"HTTP/1.1 405 Method Not \r\nServer: HttoForall\r\nContent-Type: text/plain\r\nContent-Length: 0 \r\n\r\n",
+	"HTTP/1.1 408 Request Timeout\r\nServer: HttoForall\r\nContent-Type: text/plain\r\nContent-Length: 0 \r\n\r\n",
+	"HTTP/1.1 413 Payload Too Large\r\nServer: HttoForall\r\nContent-Type: text/plain\r\nContent-Length: 0 \r\n\r\n",
+	"HTTP/1.1 414 URI Too Long\r\nServer: HttoForall\r\nContent-Type: text/plain\r\nContent-Length: 0 \r\n\r\n",
 };
-
-static_assert( KNOWN_CODES == (sizeof(http_msgs ) / sizeof(http_msgs [0])));
-
-const int http_codes[] = {
-	200,
-	400,
-	404,
-	405,
-	408,
-	413,
-	414,
+static_assert( KNOWN_CODES == (sizeof(http_msgs) / sizeof(http_msgs[0])) );
+
+// Pre-compute the length of these replys
+const size_t http_lens[] = {
+	strlen(http_msgs[0]),
+	strlen(http_msgs[1]),
+	strlen(http_msgs[2]),
+	strlen(http_msgs[3]),
+	strlen(http_msgs[4]),
+	strlen(http_msgs[5]),
+	strlen(http_msgs[6]),
 };
-
-static_assert( KNOWN_CODES == (sizeof(http_codes) / sizeof(http_codes[0])));
-
-int code_val(HttpCode code) {
-	return http_codes[code];
-}
-
-static void ring_answer(struct io_uring * ring, int fd, HttpCode code) {
-	size_t size = 256;
-	auto req = request_t::create(EVENT_ANSWER, size);
-	req->fd = fd;
-
-	const char * fmt = http_msgs[code];
-	const char * date = "";
-	size = snprintf(req->buff, size, fmt, date, size);
-
-	struct io_uring_sqe * sqe = get_sqe(ring);
-	io_uring_prep_write(sqe, fd, req->buff, size, 0);
-	io_uring_sqe_set_data(sqe, req);
-	submit(ring);
-	// std::cout << "Submitted good answer: " << req << " (" << (void*)req->buffer << ")"<<std::endl;
-}
-
-static void ring_answer(struct io_uring * ring, int fd, const std::string &) {
-	// size_t size = 256;
-	// auto req = request_t::create(EVENT_ANSWER, size);
-	// req->fd = fd;
-
-	// const char * fmt = http_msgs[OK200];
-	// const char * date = "";
-	// size_t len = snprintf(req->buffer, size, fmt, date, ans.size(), ans.c_str());
-	// req->length = len;
-
-	// struct io_uring_sqe * sqe = get_sqe(ring);
-	// io_uring_prep_write(sqe, fd, req->buffer, len, 0);
-	// io_uring_sqe_set_data(sqe, req);
-	// submit(ring);
-	// std::cout << "Submitted good answer: " << req << " (" << (void*)req->buffer << ")"<<std::endl;
-
-
-	static const char* RESPONSE = "HTTP/1.1 200 OK\r\n" \
-						"Content-Length: 15\r\n" \
-						"Content-Type: text/html\r\n" \
-						"Connection: keep-alive\r\n" \
-						"Server: testserver\r\n" \
-						"\r\n" \
-						"Hello, World!\r\n";
-
-	static const size_t RLEN = strlen(RESPONSE);
-
-	size_t size = 256;
-	auto req = request_t::create(EVENT_ANSWER, size);
-	req->fd = fd;
-	req->buff = (char*)RESPONSE;
-	req->length = RLEN;
-
-	// const char * fmt = http_msgs[OK200];
-	// const char * date = "";
-	// size_t len = snprintf(req->buffer, size, fmt, date, ans.size(), ans.c_str());
-	// req->length = len;
-
-	struct io_uring_sqe * sqe = get_sqe(ring);
-	io_uring_prep_write(sqe, fd, RESPONSE, RLEN, 0);
-	io_uring_sqe_set_data(sqe, req);
-	submit(ring);
-}
+static_assert( KNOWN_CODES == (sizeof(http_lens) / sizeof(http_lens[0])) );
 
 //=========================================================
-static void handle_new_conn(struct io_uring * ring, int fd) {
-	if( fd < 0 ) {
-		int err = -fd;
-		if( err == ECONNABORTED ) return;
-		std::cerr << "accept error: (" << errno << ") " << strerror(errno) << std::endl;
-		exit(EXIT_FAILURE);
-	}
-
-	ring_request(ring, fd);
-}
-
-static void handle_request(struct io_uring * ring, struct request_t * in, int res) {
-	if( res < 0 ) {
-		int err = -res;
-		switch(err) {
-			case EPIPE:
-			case ECONNRESET:
-				close(in->fd);
-				free(in);
+// Finate state machine responsible for handling each connection
+class __attribute__((aligned(128))) connection {
+private:
+	// The state of the machine
+	enum {
+		ACCEPTING,  // Accept sent waiting for connection
+		REQUESTING, // Waiting for new request
+		ANSWERING,  // Either request received submitting answer or short answer sent, need to submit rest
+	} state;
+
+	// The file descriptor of the connection
+	int fd;
+
+	// request data
+	static const size_t buffer_size = 1024;	// Size of the read buffer
+	const char * buffer;                      // Buffer into which requests are read
+
+	// send data
+	size_t to_send;		// Data left to send
+	const char * iterator;	// Pointer to rest of the message to send
+
+	// stats
+	// how many requests/answers were complete, that is, a valid cqe was obtained
+	struct {
+		size_t requests = 0;
+		size_t answers = 0;
+	} stats;
+
+private:
+	connection()
+		: state(ACCEPTING)
+		, fd(0)
+		, buffer( new char[buffer_size])
+		, iterator(nullptr)
+	{}
+
+	~connection() {
+		delete [] buffer;
+		::stats.conns.current--;
+	}
+
+	// Close the current connection
+	void close(int err) {
+		// std::cout << "(" << this->stats.requests << "," << this->stats.answers << ", e" << err << ") ";
+		conns[fd] = nullptr;
+
+		if(fd != 0) {
+			::close(fd);
+		}
+		delete this;
+	}
+
+	//--------------------------------------------------
+	// Wrappers for submit so we can tweak it more easily
+	static void submit(struct io_uring * ring, struct io_uring_sqe * sqe, connection * conn) {
+		(void)ring;
+		#ifdef USE_ASYNC
+			io_uring_sqe_set_flags(sqe, IOSQE_ASYNC);
+		#endif
+		io_uring_sqe_set_data(sqe, conn);
+		#ifdef NOBATCHING
+			io_uring_submit(ring);
+		#endif
+	}
+
+	void submit(struct io_uring * ring, struct io_uring_sqe * sqe) {
+		submit(ring, sqe, this);
+	}
+
+	//--------------------------------------------------
+	// get a new request from the client
+	void request(struct io_uring * ring) {
+		state = REQUESTING;
+		struct io_uring_sqe * sqe = get_sqe(ring);
+		io_uring_prep_recv(sqe, fd, (void*)buffer, buffer_size, 0);
+		submit(ring, sqe);
+	}
+
+	//--------------------------------------------------
+	// Send a new answer based on a return code
+	void answer(struct io_uring * ring, HttpCode code) {
+		iterator = http_msgs[code];
+		to_send  = http_lens[code];
+		if(to_send != 124) {
+			std::cerr << "Answer has weird size: " << to_send << " (" << (int)code << ")" << std::endl;
+		}
+		answer(ring);
+	}
+
+	// send a new answer to the client
+	// Reused for incomplete writes
+	void answer(struct io_uring * ring) {
+		state = ANSWERING;
+		struct io_uring_sqe * sqe = get_sqe(ring);
+		io_uring_prep_send(sqe, fd, iterator, to_send, 0);
+		submit(ring, sqe);
+	}
+
+	//--------------------------------------------------
+	// Handle a new connection, results for getting an cqe while in the ACCEPTING state
+	void newconn(struct io_uring * ring, int ret) {
+		// Check errors
+		if( ret < 0 ) {
+			int err = -ret;
+			if( err == ECONNABORTED ) {
+				::stats.errors.conns++;
+				this->close(err);
 				return;
-			default:
-				std::cerr << "request error: (" << err << ") " << strerror(err) << std::endl;
-				exit(EXIT_FAILURE);
-		}
-	}
-
-	if(res == 0) {
-		close(in->fd);
-		free(in);
-		return;
-	}
-
-	const char * it = in->buff;
-	if( !strstr( it, "\r\n\r\n" ) ) {
-		std::cout << "Incomplete request" << std::endl;
-		close(in->fd);
-		free(in);
-		return;
-	}
-
-	it = in->buff;
-	const std::string reply = "Hello, World!\n";
-	int ret = memcmp(it, "GET ", 4);
-	if( ret != 0 ) {
-		ring_answer(ring, in->fd, E400);
-		goto NEXT;
-	}
-
-	it += 4;
-	ret = memcmp(it, "/plaintext", 10);
-	if( ret != 0 ) {
-		ring_answer(ring, in->fd, E404);
-		goto NEXT;
-	}
-
-	ring_answer(ring, in->fd, reply);
-
-	NEXT:
-		ring_request(ring, in->fd);
-		return;
-}
-
-static void handle_answer(struct io_uring * ring, struct request_t * in, int res) {
-	if( res < 0 ) {
-		int err = -res;
-		switch(err) {
-			case EPIPE:
-			case ECONNRESET:
-				close(in->fd);
-				free(in);
-				return;
-			default:
-				std::cerr << "answer error: (" << err << ") " << strerror(err) << std::endl;
-				exit(EXIT_FAILURE);
-		}
-	}
-
-	if( res >= in->length ) {
-		free(in);
-		return;
-	}
-
-	struct io_uring_sqe * sqe = get_sqe(ring);
-	io_uring_prep_write(sqe, in->fd, in->buff + res, in->length - res, 0);
-	io_uring_sqe_set_data(sqe, in);
-	submit(ring);
-	// std::cout << "Re-Submitted request: " << in << " (" << (void*)in->buffer << ")"<<std::endl;
-
-	ring_request(ring, in->fd);
-}
+			}
+			std::cerr << "accept error: (" << errno << ") " << strerror(errno) << std::endl;
+			exit(EXIT_FAILURE);
+		}
+
+		// Count the connections
+		::stats.completions.conns++;
+		::stats.conns.current++;
+		if(::stats.conns.current > ::stats.conns.max) {
+			::stats.conns.max = ::stats.conns.current;
+		}
+
+		// Read on the data
+		fd = ret;
+		request(ring);
+
+		// check the max fd so we know if we exceeded the array
+		for(;;) {
+			int expected = max_fd;
+			if(expected >= fd) return;
+			if( __atomic_compare_exchange_n(&max_fd, &expected, fd, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) ) return;
+		}
+
+		// check if we have enough space to fit inside the array
+		if(fd >= array_max) {
+			std::cerr << "accept error: fd " << fd << " is too high" << std::endl;
+			return;
+		}
+
+		// Put our connection into the global array
+		// No one else should be using it so if they are that's a bug
+		auto exist = __atomic_exchange_n( &conns[fd], this, __ATOMIC_SEQ_CST);
+		if( exist ) {
+			size_t first = __atomic_fetch_add(&global_stats.recycle_errors, 1, __ATOMIC_SEQ_CST);
+			if( first == 0 ) {
+				std::cerr << "First: accept has existing connection " << std::endl;
+			}
+		}
+	}
+
+	// Handle a new request, results for getting an cqe while in the REQUESTING state
+	void newrequest(struct io_uring * ring, int res) {
+		// Check errors
+		if( res < 0 ) {
+			int err = -res;
+			switch(err) {
+				case EPIPE:
+					::stats.errors.requests.pipes++;
+					break;
+					// Don't fall through the get better stats
+				case ECONNRESET:
+					::stats.errors.requests.reset++;
+					break;
+				default:
+					::stats.errors.requests.other++;
+					std::cerr << "request error: (" << err << ") " << strerror(err) << std::endl;
+					exit(EXIT_FAILURE);
+			}
+
+			// Connection failed, close it
+			this->close(err);
+			return;
+		}
+
+		// Update stats
+		::stats.completions.reads++;
+
+		// Is this an EOF
+		if(res == 0) {
+			// Yes, close the connection
+			this->close(0);
+			return;
+		}
+
+		// Find the end of the request header
+		const char * it = buffer;
+		if( !strstr( it, "\r\n\r\n" ) ) {
+			// This state machine doesn't support incomplete reads
+			// Print them to output so it's clear there is an issue
+			std::cout << "Incomplete request" << std::endl;
+			this->close(EBADR);
+			return;
+		}
+
+		// Find the method to use
+		it = buffer;
+		int ret = memcmp(it, "GET ", 4);
+		if( ret != 0 ) {
+			// We only support get, answer with an error
+			answer(ring, E400);
+			return;
+		}
+
+		// Find the target
+		it += 4;
+		ret = memcmp(it, "/plaintext", 10);
+		if( ret != 0 ) {
+			// We only support /plaintext, answer with an error
+			answer(ring, E404);
+			return;
+		}
+
+		// Correct request, answer with the payload
+		this->stats.requests++;
+		answer(ring, OK200);
+	}
+
+	// Handle a partial or full answer sent, results for getting an cqe while in the ANSWERING state
+	void writedone(struct io_uring * ring, int res) {
+		// Check errors
+		if( res < 0 ) {
+			int err = -res;
+			switch(err) {
+				case EPIPE:
+					::stats.errors.answers.pipes++;
+					break;
+					// Don't fall through the get better stats
+				case ECONNRESET:
+					::stats.errors.answers.reset++;
+					break;
+				default:
+					::stats.errors.answers.other++;
+					std::cerr << "answer error: (" << err << ") " << strerror(err) << std::endl;
+					exit(EXIT_FAILURE);
+			}
+
+			this->close(err);
+			return;
+		}
+
+		// Update stats
+		::stats.completions.writes++;
+		if(res == 124) ::stats.completions.full_writes++;
+
+		// Is this write completed
+		if( res == to_send ) {
+			// Yes, more stats
+			this->stats.answers++;
+			if(this->stats.answers == 1) ::stats.conns.used++;
+			// Then read a new request
+			request(ring);
+			return;
+		}
+
+		// Not a completed read, push the rest
+		to_send -= res;
+		iterator += res;
+		answer(ring);
+	}
+public:
+	// Submit a call to accept and create a new connection object
+	static void accept(struct io_uring * ring, const struct options_t & opt) {
+		struct io_uring_sqe * sqe = get_sqe(ring);
+		io_uring_prep_accept(sqe, opt.acpt.sockfd, opt.acpt.addr, opt.acpt.addrlen, opt.acpt.flags);
+		submit(ring, sqe, new connection());
+		// std::cout << "Submitted accept: " << req << std::endl;
+	}
+
+	// Handle a new cqe
+	void handle(struct io_uring * ring, int res, const struct options_t & opt) {
+		switch(state) {
+		case ACCEPTING:
+			connection::accept(ring, opt);
+			newconn(ring, res);
+			break;
+		case REQUESTING:
+			newrequest(ring, res);
+			break;
+		case ANSWERING:
+			writedone(ring, res);
+			break;
+		}
+	}
+};
 
 //=========================================================
-extern "C" {
-extern int __io_uring_flush_sq(struct io_uring *ring);
-}
-
+// Main loop of the WebServer
+// Effectively uses one thread_local copy of everything per kernel thread
 void * proc_loop(void * arg) {
-	size_t count = 0;
+	// Get the thread local argument
 	struct options_t & opt = *(struct options_t *)arg;
-
 	struct io_uring * ring = opt.ring;
 
+	// Track the shutdown using a event_fd
 	char endfd_buf[8];
 	ring_end(ring, opt.endfd, endfd_buf, 8);
 
-	ring_accept(ring, opt.acpt.sockfd, opt.acpt.addr, opt.acpt.addrlen, opt.acpt.flags);
-
-	bool done = false;
+	// Accept our first connection
+	// May not take effect until io_uring_submit_and_wait
+	connection::accept(ring, opt);
+
+	int reset = 1;       // Counter to print stats once in a while
+	bool done = false;   // Are we done
+	size_t sqes = 0;     // Number of sqes we submitted
+	size_t call = 0;     // Number of submits we made
 	while(!done) {
-    		struct io_uring_cqe *cqe;
-		int ret;
-		while(-EAGAIN == (ret = io_uring_wait_cqe_nr(ring, &cqe, 0))) {
-			ret = io_uring_submit_and_wait(ring, 1);
-			if (ret < 0) {
-				fprintf( stderr, "io_uring get error: (%d) %s\n", (int)-ret, strerror(-ret) );
-				exit(EXIT_FAILURE);
-			}
-			opt.result.subs += ret;
-			opt.result.cnts++;
-		}
-
-		if (ret < 0 && -EAGAIN != ret) {
-			fprintf( stderr, "io_uring peek error: (%d) %s\n", (int)-ret, strerror(-ret) );
+		// Submit all the answers we have and wait for responses
+		int ret = io_uring_submit_and_wait(ring, 1);
+
+		// check errors
+		if (ret < 0) {
+			fprintf( stderr, "io_uring S&W error: (%d) %s\n", (int)-ret, strerror(-ret) );
 			exit(EXIT_FAILURE);
 		}
 
-		auto req = (struct request_t *)cqe->user_data;
-		// std::cout << req << " completed with " << cqe->res << std::endl;
-
-		switch(req->type) {
-			case EVENT_END:
+		// Check how good we are at batching sqes
+		sqes += ret;
+		call++;
+
+		struct io_uring_cqe *cqe;
+		unsigned head;
+		unsigned count = 0;
+
+        	// go through all cqes
+        	io_uring_for_each_cqe(ring, head, cqe) {
+			if (0 == cqe->user_data) {
 				done = true;
 				break;
-			case EVENT_ACCEPT:
-				handle_new_conn(ring, cqe->res);
-				free(req);
-				ring_accept(ring, opt.acpt.sockfd, opt.acpt.addr, opt.acpt.addrlen, opt.acpt.flags);
-				break;
-			case EVENT_REQUEST:
-				handle_request(ring, req, cqe->res);
-				break;
-			case EVENT_ANSWER:
-				handle_answer(ring, req, cqe->res);
-				break;
-		}
-
-		io_uring_cqe_seen(ring, cqe);
-	}
-
-	return (void*)count;
+			}
+
+			auto req = (class connection *)cqe->user_data;
+			req->handle( ring, cqe->res, opt );
+
+			// Every now and then, print some stats
+			reset--;
+			if(reset == 0) {
+				std::cout << "Submit average: " << sqes << "/" << call << "(" << (((double)sqes) / call) << ")" << std::endl;
+				// Reset to some random number of completions
+				// use the ring_fd in the number of threads don't all print at once
+				reset = 100000 + (100000 * (ring->ring_fd % 5));
+			}
+
+			// Keep track of how many cqes we have seen
+			count++;
+		}
+
+		// Mark the cqes as seen
+		io_uring_cq_advance(ring, count);
+	}
+
+	// Tally all the thread local statistics
+	__atomic_fetch_add( &global_stats.completions.conns, ::stats.completions.conns, __ATOMIC_SEQ_CST );
+	__atomic_fetch_add( &global_stats.completions.reads, ::stats.completions.reads, __ATOMIC_SEQ_CST );
+	__atomic_fetch_add( &global_stats.completions.writes, ::stats.completions.writes, __ATOMIC_SEQ_CST );
+	__atomic_fetch_add( &global_stats.completions.full_writes, ::stats.completions.full_writes, __ATOMIC_SEQ_CST );
+	__atomic_fetch_add( &global_stats.errors.conns, ::stats.errors.conns, __ATOMIC_SEQ_CST );
+	__atomic_fetch_add( &global_stats.errors.requests.pipes, ::stats.errors.requests.pipes, __ATOMIC_SEQ_CST );
+	__atomic_fetch_add( &global_stats.errors.requests.reset, ::stats.errors.requests.reset, __ATOMIC_SEQ_CST );
+	__atomic_fetch_add( &global_stats.errors.requests.other, ::stats.errors.requests.other, __ATOMIC_SEQ_CST );
+	__atomic_fetch_add( &global_stats.errors.answers.pipes, ::stats.errors.answers.pipes, __ATOMIC_SEQ_CST );
+	__atomic_fetch_add( &global_stats.errors.answers.reset, ::stats.errors.answers.reset, __ATOMIC_SEQ_CST );
+	__atomic_fetch_add( &global_stats.errors.answers.other, ::stats.errors.answers.other, __ATOMIC_SEQ_CST );
+	__atomic_fetch_add( &global_stats.conns.current, ::stats.conns.current, __ATOMIC_SEQ_CST );
+	__atomic_fetch_add( &global_stats.conns.max, ::stats.conns.max, __ATOMIC_SEQ_CST );
+	__atomic_fetch_add( &global_stats.conns.used, ::stats.conns.used, __ATOMIC_SEQ_CST );
+
+	return nullptr;
 }
 
 //=========================================================
-struct __attribute__((aligned(128))) aligned_ring {
-	struct io_uring storage;
-};
-
-#include <bit>
-
-#include <pthread.h>
+#include <bit> // for ispow2
+
 extern "C" {
-	#include <signal.h>
-	#include <sys/eventfd.h>
-	#include <sys/socket.h>
-	#include <netinet/in.h>
+	#include <pthread.h>      // for pthreads
+	#include <signal.h>       // for signal(SIGPIPE, SIG_IGN);
+	#include <sys/eventfd.h>  // use for termination
+	#include <sys/socket.h>   // for sockets in general
+	#include <netinet/in.h>   // for sockaddr_in, AF_INET
 }
 
 int main(int argc, char * argv[]) {
+	// Initialize the array of connection-fd associations
+	for(int i = 0; i < array_max; i++) {
+		conns[i] = nullptr;
+	}
+
+	// Make sure we ignore all sigpipes
 	signal(SIGPIPE, SIG_IGN);
 
-	unsigned nthreads = 1;
-	unsigned port = 8800;
-	unsigned entries = 256;
-	unsigned backlog = 10;
-	bool attach = false;
+	// Default command line arguments
+	unsigned nthreads = 1;      // number of kernel threads
+	unsigned port = 8800;       // which port to listen on
+	unsigned entries = 256;     // number of entries per ring/kernel thread
+	unsigned backlog = 262144;  // backlog argument to listen
+	bool attach = false;        // Whether or not to attach all the rings
+	bool sqpoll = false;        // Whether or not to use SQ Polling
 
 	//===================
-	// Arguments
+	// Arguments Parsing
 	int c;
-	while ((c = getopt (argc, argv, "t:p:e:b:a")) != -1) {
+	while ((c = getopt (argc, argv, "t:p:e:b:aS")) != -1) {
 		switch (c)
 		{
@@ -394,7 +552,10 @@
 			attach = true;
 			break;
+		case 'S':
+			sqpoll = true;
+			break;
 		case '?':
 		default:
-			std::cerr << "Usage: -t <threads> -p <port> -e <entries> -b <backlog> -a" << std::endl;
+			std::cerr << "Usage: -t <threads> -p <port> -e <entries> -b <backlog> -aS" << std::endl;
 			return EXIT_FAILURE;
 		}
@@ -416,4 +577,5 @@
 	//===================
 	// End FD
+	// Create a single event fd to notify the kernel threads when the server shutsdown
 	int efd = eventfd(0, EFD_SEMAPHORE);
 	if (efd < 0) {
@@ -424,4 +586,5 @@
 	//===================
 	// Open Socket
+	// Listen on specified port
 	std::cout << getpid() << " : Listening on port " << port << std::endl;
 	int server_fd = socket(AF_INET, SOCK_STREAM, 0);
@@ -439,4 +602,6 @@
 	address.sin_port = htons( port );
 
+	// In case the port is already in use, don't just return an error
+	// Linux is very slow at reclaiming port so just retry regularly
 	int waited = 0;
 	while(true) {
@@ -444,14 +609,17 @@
 		if(ret < 0) {
 			if(errno == EADDRINUSE) {
+				// Port is in used let's retry later
 				if(waited == 0) {
 					std::cerr << "Waiting for port" << std::endl;
 				} else {
+					// To be cure, print how long we have been waiting
 					std::cerr << "\r" << waited;
 					std::cerr.flush();
 				}
 				waited ++;
-				usleep( 1000000 );
+				usleep( 1000000 ); // Wait and retry
 				continue;
 			}
+			// Some other error occured, this is a real error
 			std::cerr << "bind error: (" << errno << ") " << strerror(errno) << std::endl;
 			exit(EXIT_FAILURE);
@@ -474,19 +642,39 @@
 	std::cout << std::endl;
 
+	// Create the desired number of kernel-threads and for each
+	// create a ring. Create the rings in the main so we can attach them
+	// Since the rings are all in a dense VLA, aligned them so we don't get false sharing
+	// it's unlikely but better safe than sorry
+	struct __attribute__((aligned(128))) aligned_ring {
+		struct io_uring storage;
+	};
 	aligned_ring thrd_rings[nthreads];
 	pthread_t    thrd_hdls[nthreads];
 	options_t    thrd_opts[nthreads];
+	bool no_drops  = true;
+	bool fast_poll = true;
+	bool nfix_sqpl = true;
 	for(unsigned i = 0; i < nthreads; i++) {
-		if(!attach || i == 0) {
-			io_uring_queue_init(entries, &thrd_rings[i].storage, 0);
-		}
-		else {
-			struct io_uring_params p;
-			memset(&p, 0, sizeof(p));
-			p.flags = IORING_SETUP_ATTACH_WQ;
+		struct io_uring_params p = { };
+
+		if(sqpoll) { // If sqpoll is on, add the flag
+			p.flags |= IORING_SETUP_SQPOLL;
+			p.sq_thread_idle = 100;
+		}
+
+		if (attach && i != 0) { // If attach is on, add the flag, except for the first ring
+			p.flags |= IORING_SETUP_ATTACH_WQ;
 			p.wq_fd = thrd_rings[0].storage.ring_fd;
-			io_uring_queue_init_params(entries, &thrd_rings[i].storage, &p);
-		}
-
+		}
+
+		// Create the ring
+		io_uring_queue_init_params(entries, &thrd_rings[i].storage, &p);
+
+		// Check if some of the note-worthy features are there
+		if(0 == (p.features & IORING_FEAT_NODROP         )) { no_drops  = false; }
+		if(0 == (p.features & IORING_FEAT_FAST_POLL      )) { fast_poll = false; }
+		if(0 == (p.features & IORING_FEAT_SQPOLL_NONFIXED)) { nfix_sqpl = false; }
+
+		// Write the socket options we want to the options we pass to the threads
 		thrd_opts[i].acpt.sockfd  = server_fd;
 		thrd_opts[i].acpt.addr    = (struct sockaddr *)&address;
@@ -502,4 +690,9 @@
 		}
 	}
+
+	// Tell the user if the features are present
+	if( no_drops ) std::cout << "No Drop Present" << std::endl;
+	if( fast_poll) std::cout << "Fast Poll Present" << std::endl;
+	if(!nfix_sqpl) std::cout << "Non-Fixed SQ Poll not Present" << std::endl;
 
 	//===================
@@ -510,4 +703,5 @@
 		int ret;
 		do {
+			// Wait for a Ctrl-D to close the server
 			ret = read(STDIN_FILENO, buffer, 128);
 			if(ret < 0) {
@@ -526,4 +720,5 @@
 
 	//===================
+	// Use eventfd_write to tell the threads we are closing
 	(std::cout << "Sending Shutdown to Threads... ").flush();
 	ret = eventfd_write(efd, nthreads);
@@ -535,7 +730,6 @@
 
 	//===================
+	// Join all the threads and close the rings
 	(std::cout << "Stopping Threads Done... ").flush();
-	size_t total = 0;
-	size_t count = 0;
 	for(unsigned i = 0; i < nthreads; i++) {
 		void * retval;
@@ -545,14 +739,11 @@
 			exit(EXIT_FAILURE);
 		}
-		// total += (size_t)retval;
-		total += thrd_opts[i].result.subs;
-		count += thrd_opts[i].result.cnts;
 
 		io_uring_queue_exit(thrd_opts[i].ring);
 	}
 	std::cout << "done" << std::endl;
-	std::cout << "Submit average: " << total << "/" << count << "(" << (((double)total) / count) << ")" << std::endl;
 
 	//===================
+	// Close the sockets
 	(std::cout << "Closing Socket... ").flush();
 	ret = shutdown( server_fd, SHUT_RD );
@@ -567,4 +758,16 @@
 		exit(EXIT_FAILURE);
 	}
-	std::cout << "done" << std::endl;
+	std::cout << "done" << std::endl << std::endl;
+
+	// Print stats and exit
+	std::cout << "Errors: " << global_stats.errors.conns << "c, (" << global_stats.errors.requests.pipes << "p, " << global_stats.errors.requests.reset << "r, " << global_stats.errors.requests.other << "o" << ")r, (" << global_stats.errors.answers.pipes << "p, " << global_stats.errors.answers.reset << "r, " << global_stats.errors.answers.other << "o" << ")a" << std::endl;
+	std::cout << "Completions: " << global_stats.completions.conns << "c, " << global_stats.completions.reads << "r, " << global_stats.completions.writes << "w" << std::endl;
+	std::cout << "Full Writes: " << global_stats.completions.full_writes << std::endl;
+	std::cout << "Max FD: " << max_fd << std::endl;
+	std::cout << "Successful connections: " << global_stats.conns.used << std::endl;
+	std::cout << "Max concurrent connections: " << global_stats.conns.max << std::endl;
+	std::cout << "Accepts on non-zeros: " << global_stats.recycle_errors << std::endl;
+	std::cout << "Leaked conn objects: " << global_stats.conns.current << std::endl;
 }
+
+// compile-command: "g++ http_ring.cpp -std=c++2a -pthread -luring -O3" //
Index: benchmark/io/http/parhttperf
===================================================================
--- benchmark/io/http/parhttperf	(revision 565acf59c5402ff35d213dc020ead86f8cb51d16)
+++ benchmark/io/http/parhttperf	(revision 565acf59c5402ff35d213dc020ead86f8cb51d16)
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+NTHREADS=$1
+shift
+echo "Running $NTHREADS"
+
+mkdir -p out
+rm -v out/*
+for ((i=0; i<$NTHREADS; i++))
+do
+	# echo "httperf --client $i/$NTHREADS $@ > out/result.$i.out"
+	httperf --client $i/$NTHREADS $@ > out/result.$i.out &
+done
+
+wait
+echo "Experiment terminated"
+
+FILES=""
+for ((i=0; i<$NTHREADS; i++))
+do
+	FILES="$FILES out/result.$i.out"
+done
+./parse-httperf.py $FILES ||  echo $FILES
Index: benchmark/io/http/parse-httperf.py
===================================================================
--- benchmark/io/http/parse-httperf.py	(revision 565acf59c5402ff35d213dc020ead86f8cb51d16)
+++ benchmark/io/http/parse-httperf.py	(revision 565acf59c5402ff35d213dc020ead86f8cb51d16)
@@ -0,0 +1,258 @@
+#!/usr/bin/python3
+
+import argparse
+import decimal
+import math
+import re
+import sys
+
+import collections, functools, operator
+
+def parse(reg, lines):
+	m = [re.findall(reg,l) for l in lines]
+	return [*filter(None, m)][0][0]
+
+def wavg(vals, ws):
+	t = sum(ws)
+	if t == 0:
+		return 0.0
+	s = sum([vals[i] * ws[i] for i in range(len(vals))])
+	return s / t
+
+def hist(s):
+	s = s.split()
+	h = [int(v) for v in s]
+	return dict([(k, v) for (k,v) in enumerate(h) if v != 0])
+
+class Result:
+	def __init__(self):
+		self.total = {}
+		self.connect = {}
+		self.request = {}
+		self.reply = {}
+		self.misc = {}
+		self.errors = {}
+		self.session = {}
+
+	@staticmethod
+	def from_file(file):
+		r = Result()
+		lines  = [l for l in file]
+		print(lines[0].strip())
+		#------------------------------
+		# total
+		totals = parse(r'^Total: connections ([0-9]+) requests ([0-9]+) replies ([0-9]+) test-duration ([\.0-9]+) s', lines)
+		r.total = {
+			'connections': int(totals[0]),
+			'requests': int(totals[1]),
+			'replies': int(totals[2]),
+			'duration': float(totals[3])
+		}
+
+		#------------------------------
+		# connection
+		connection1 = parse(r'^Connection rate: ([\.0-9]+) conn/s \(([\.0-9]+) ms/conn, <=([0-9]+) concurrent connections\)', lines)
+		connection2 = parse(r'^Connection time \[ms\]: min ([\.0-9]+) avg ([\.0-9]+) max ([\.0-9]+) median ([\.0-9]+) stddev ([\.0-9]+)', lines)
+		connection3 = parse(r'^Connection time \[ms\]: connect ([\.0-9]+)', lines)
+		connection4 = parse(r'^Connection length \[replies/conn\]: ([\.0-9]+)', lines)
+		r.connect = {
+			'rate': { 'conn/s': float(connection1[0]), 'ms/conn': float(connection1[1]), '<=': int(connection1[2]) },
+			'time': { 'min': float(connection2[0]), 'avg': float(connection2[1]), 'max': float(connection2[2]), 'median': float(connection2[3]), 'stddev': float(connection2[4]) },
+			'connect': float(connection3),
+			'length': float(connection4)
+		}
+
+		#------------------------------
+		# request
+		request1 = parse(r'^Request rate: ([\.0-9]+) req/s \(([\.0-9]+) ms/req\)', lines)
+		request2 = parse(r'^Request size \[B\]: ([\.0-9]+)', lines)
+		r.request = {
+			'req/s': float(request1[0]),
+			'ms/req': float(request1[1]),
+			'size': float(request2)
+		}
+
+		#------------------------------
+		# reply
+		replies1 = parse(r'^Reply rate \[replies/s\]: min ([\.0-9]+) avg ([\.0-9]+) max ([\.0-9]+) stddev ([\.0-9]+)', lines)
+		replies2 = parse(r'^Reply time \[ms\]: response ([\.0-9]+) transfer ([\.0-9]+)', lines)
+		replies3 = parse(r'^Reply size \[B\]: header ([\.0-9]+) content ([\.0-9]+) footer ([\.0-9]+) \(total ([\.0-9]+)\)', lines)
+		replies4 = parse(r'^Reply status: 1xx=([0-9]+) 2xx=([0-9]+) 3xx=([0-9]+) 4xx=([0-9]+) 5xx=([0-9]+)', lines)
+		r.reply = {
+			'rate' : { 'min': float(replies1[0]), 'avg': float(replies1[1]), 'max': float(replies1[2]), 'stddev': float(replies1[3]) },
+			'time' : { 'response': float(replies2[0]), 'transfer': float(replies2[1]) },
+			'size' : { 'header': float(replies3[0]), 'content': float(replies3[1]), 'footer': float(replies3[2]), 'total': float(replies3[3]) },
+			'status' : { '1xx': int(replies4[0]), '2xx': int(replies4[1]), '3xx': int(replies4[2]), '4xx': int(replies4[3]), '5xx': int(replies4[4]) }
+		}
+
+		#------------------------------
+		# misc
+		misc1 = parse(r'^CPU time \[s\]: user ([\.0-9]+) system ([\.0-9]+) \(user ([\.0-9]+)% system ([\.0-9]+)% total ([\.0-9]+)%\)', lines)
+		misc2 = parse(r'^Net I/O: ([\.0-9]+) KB/s \(([\.0-9]+)\*10\^([0-9]+) bps\)', lines)
+		r.misc = {
+			'usr': float(misc1[0]),
+			'sys': float(misc1[1]),
+			'usr%': float(misc1[2]),
+			'sys%': float(misc1[3]),
+			'total%': float(misc1[4]),
+			'KB/S': float(misc2[0]),
+			'bps': float(misc2[1]) * math.pow(10, int(misc2[2]))
+		}
+
+		#------------------------------
+		# errors
+		errors1 = parse(r'^Errors: total ([0-9]+) client-timo ([0-9]+) socket-timo ([0-9]+) connrefused ([0-9]+) connreset ([0-9]+)', lines)
+		errors2 = parse(r'^Errors: fd-unavail ([0-9]+) addrunavail ([0-9]+) ftab-full ([0-9]+) other ([0-9]+)', lines)
+		r.errors = {
+			'total': int(errors1[0]),
+			'client-timout': int(errors1[1]),
+			'socket-timout': int(errors1[2]),
+			'connection-refused': int(errors1[3]),
+			'connection-reset': int(errors1[4]),
+			'fd-unavailable': int(errors2[0]),
+			'address-unavailable': int(errors2[1]),
+			'ftab-full': int(errors2[2]),
+			'other': int(errors2[3])
+		}
+
+		#------------------------------
+		# session
+		session1 = parse(r'^Session rate \[sess/s\]: min ([\.0-9]+) avg ([\.0-9]+) max ([\.0-9]+) stddev ([\.0-9]+) \(([0-9]+)/([0-9]+)\)', lines)
+		session2 = parse(r'^Session: avg ([\.0-9]+) connections/session', lines)
+		session3 = parse(r'^Session lifetime \[s\]: ([\.0-9]+)', lines)
+		session4 = parse(r'^Session failtime \[s\]: ([\.0-9]+)', lines)
+		session5 = parse(r'^Session length histogram: ([ 0-9]+)', lines)
+		r.session = {
+			'rate': { 'min': float(session1[0]), 'avg': float(session1[1]), 'max': float(session1[2]), 'stddev': float(session1[3]) },
+			'successes': int(session1[4]),
+			'totals': int(session1[5]),
+			'conns/ses': float(session2),
+			'lifetime': float(session3),
+			'failtime': float(session4),
+			'hist': hist(session5)
+		}
+
+		return r
+
+if __name__ == "__main__":
+	#------------------------------
+	# parse args
+	parser = argparse.ArgumentParser(description='Script aggregates httperf output')
+	parser.add_argument('files', metavar='files', type=argparse.FileType('r'), nargs='*', help='a list of files to aggregate')
+
+	try:
+		args =  parser.parse_args()
+	except:
+		print('ERROR: invalid arguments', file=sys.stderr)
+		parser.print_help(sys.stderr)
+		sys.exit(1)
+
+	if len(args.files) == 0:
+		print('No input files', file=sys.stderr)
+		parser.print_help(sys.stderr)
+		sys.exit(1)
+
+	#------------------------------
+	# Construct objects
+	results = [Result.from_file(f) for f in args.files]
+
+	#==================================================
+	# Print
+	#==================================================
+	totals = dict(functools.reduce(operator.add, map(collections.Counter, [r.total for r in results])))
+	totals['duration-'] = min([r.total['duration'] for r in results])
+	totals['duration+'] = max([r.total['duration'] for r in results])
+	print("")
+	print("")
+	print("Total: connections {:,} requests {:,} replies {:,} test-duration {}-{} s".format(totals['connections'], totals['requests'], totals['replies'], totals['duration-'], totals['duration+']))
+	print("")
+
+	#==================================================
+	connections = {
+		'conn/s': sum([r.connect['rate']['conn/s'] for r in results]),
+		'<=': sum([r.connect['rate']['<='] for r in results]),
+		'min': min([r.connect['time']['min'] for r in results]),
+		'avg': wavg([r.connect['time']['avg'] for r in results], [r.total['connections'] for r in results]),
+		'max': max([r.connect['time']['max'] for r in results]),
+		'median': wavg([r.connect['time']['median'] for r in results], [r.total['connections'] for r in results]),
+		'stddev': wavg([r.connect['time']['stddev'] for r in results], [r.total['connections'] for r in results]),
+		'connect': wavg([r.connect['connect'] for r in results], [r.total['connections'] for r in results]),
+		'length': wavg([r.connect['length'] for r in results], [r.total['connections'] for r in results])
+	}
+	print("Connection rate: {:,.2f} conn/s ({:.2f} ms/conn, <={:,} concurrent connections)".format(connections['conn/s'], 1000.0 / connections['conn/s'], connections['<=']))
+	print("Connection time [ms]: min {:,.2f} avg {:,.2f} max {:,.2f} avg median {:,.2f} avg stddev {:,.2f}".format(connections['min'], connections['avg'], connections['max'], connections['median'], connections['stddev']))
+	print("Connection time [ms]: connect {:,.2f}".format(connections['connect']))
+	print("Connection length [replies/conn]: {:,.2f}".format(connections['length']))
+	print("")
+
+	#==================================================
+	requests = {
+		'req/s': sum([r.request['req/s'] for r in results]),
+		'size': wavg([r.request['size'] for r in results], [r.total['requests'] for r in results])
+	}
+	print("Request rate: {:,.2f} req/s ({:.2f} ms/req)".format(requests['req/s'], 1000.0 / requests['req/s']))
+	print("Request size [B]: {:,.2f}".format(requests['size']))
+	print("")
+
+	#==================================================
+	replies = {
+		'min': sum([r.reply['rate']['min'] for r in results]),
+		'avg': sum([r.reply['rate']['avg'] for r in results]),
+		'max': sum([r.reply['rate']['max'] for r in results]),
+		'std':  wavg([r.reply['rate']['stddev'] for r in results], [r.total['replies'] for r in results])
+	}
+	print("Reply rate [replies/s]: min {:,.2f} avg {:,.2f} max {:,.2f} avg stddev {:,.2f}".format(replies['min'], replies['avg'], replies['max'], replies['std']))
+	replies = {
+		'rs': wavg([r.reply['time']['response'] for r in results], [r.total['replies'] for r in results]),
+		'tr': wavg([r.reply['time']['transfer'] for r in results], [r.total['replies'] for r in results])
+	}
+	print("Reply time [ms]: response {:,.2f} transfer {:,.2f}".format(replies['rs'], replies['tr']))
+	replies = {
+		'hd': wavg([r.reply['size']['header' ] for r in results], [r.total['replies'] for r in results]),
+		'ct': wavg([r.reply['size']['content'] for r in results], [r.total['replies'] for r in results]),
+		'ft': wavg([r.reply['size']['footer' ] for r in results], [r.total['replies'] for r in results]),
+		'tt': wavg([r.reply['size']['total'  ] for r in results], [r.total['replies'] for r in results])
+	}
+	print("Reply size [B]: header {:,.2f} content {:,.2f} footer {:,.2f} (total {:,.2f})".format(replies['hd'], replies['ct'], replies['ft'], replies['tt']))
+	replies = {
+		'1xx': sum([r.reply['status']['1xx'] for r in results]),
+		'2xx': sum([r.reply['status']['2xx'] for r in results]),
+		'3xx': sum([r.reply['status']['3xx'] for r in results]),
+		'4xx': sum([r.reply['status']['4xx'] for r in results]),
+		'5xx': sum([r.reply['status']['5xx'] for r in results])
+	}
+	print("Reply status: 1xx={:,} 2xx={:,} 3xx={:,} 4xx={:,} 5xx={:,}".format(replies['1xx'], replies['2xx'], replies['3xx'], replies['4xx'], replies['5xx']))
+	print("")
+
+	#==================================================
+	misc = dict(functools.reduce(operator.add, map(collections.Counter, [r.misc for r in results])))
+	print("CPU time [s]: user {:.2f} system {:.2f} (user {:.2f}% system {:.2f}% total {:.2f}%)".format(misc['usr'], misc['sys'], misc['usr%'], misc['sys%'], misc['total%']))
+	print("Net I/O: {:,.2f} KB/s ({} bps)".format(misc['KB/S'], decimal.Decimal(misc['bps']).normalize().to_eng_string()))
+	print("")
+
+	#==================================================
+	errors = dict(functools.reduce(lambda a, b: a.update(b) or a, [r.errors for r in results], collections.Counter()))
+	print("Errors: total {} client-timo {} socket-timo {} connrefused {} connreset {}".format(errors['total'], errors['client-timout'], errors['socket-timout'], errors['connection-refused'], errors['connection-reset']))
+	print("Errors: fd-unavail {} addrunavail {} ftab-full {} other {}".format(errors['fd-unavailable'], errors['address-unavailable'], errors['ftab-full'], errors['other']))
+	print("")
+
+	#==================================================
+	sessions = {
+		'min': sum([r.session['rate']['min'] for r in results]),
+		'avg': wavg([r.session['rate']['avg'] for r in results], [r.session['totals'] for r in results]),
+		'max': sum([r.session['rate']['max'] for r in results]),
+		'stddev':  wavg([r.session['rate']['stddev'] for r in results], [r.session['totals'] for r in results]),
+		'successes': sum([r.session['successes'] for r in results]),
+		'totals': sum([r.session['totals'] for r in results]),
+		'conns/ses': wavg([r.session['conns/ses'] for r in results], [r.session['totals'] for r in results]),
+		'lifetime': wavg([r.session['lifetime'] for r in results], [r.session['successes'] for r in results]),
+		'failtime': wavg([r.session['failtime'] for r in results], [r.session['totals'] - r.session['successes'] for r in results]),
+	}
+	print("Session rate [sess/s]: min {:.2f} avg {:.2f} max {:.2f} avg stddev {:.2f} ({:,}/{:,})".format(sessions['min'], sessions['avg'], sessions['max'], sessions['stddev'], sessions['successes'], sessions['totals']))
+	print("Session: avg {:.2f} connections/session".format(sessions['conns/ses']))
+	print("Session lifetime [s]: {:.2f}".format(sessions['lifetime']))
+	print("Session failtime [s]: {:.2f}".format(sessions['failtime']))
+
+	hist = dict(functools.reduce(operator.add, map(collections.Counter, [r.session['hist'] for r in results])))
+	hist = ["{}: {}".format(key, value) for key, value in sorted(hist.items(), key=lambda x: x[0])]
+	print("Session length histogram: [{}]".format(", ".join(hist)))
Index: doc/LaTeXmacros/common.tex
===================================================================
--- doc/LaTeXmacros/common.tex	(revision da3963ae1399ac41655fc82eda47fad820dd3ccf)
+++ doc/LaTeXmacros/common.tex	(revision 565acf59c5402ff35d213dc020ead86f8cb51d16)
@@ -11,6 +11,6 @@
 %% Created On       : Sat Apr  9 10:06:17 2016
 %% Last Modified By : Peter A. Buhr
-%% Last Modified On : Sat Jan 23 09:06:39 2021
-%% Update Count     : 491
+%% Last Modified On : Mon Feb  8 21:45:41 2021
+%% Update Count     : 522
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
@@ -32,23 +32,40 @@
 \setlist[enumerate]{listparindent=\parindent}% global
 \setlist[enumerate,2]{leftmargin=\parindent,labelsep=*,align=parleft,label=\alph*.}% local
-\setlist[description]{itemsep=0pt,listparindent=\parindent,leftmargin=\parindent,labelsep=1.5ex}
+\setlist[description]{topsep=0.5ex,itemsep=0pt,listparindent=\parindent,leftmargin=\parindent,labelsep=1.5ex}
 
 % Names used in the document.
 
 \usepackage{xspace}
-\newcommand{\CFAIcon}{\textsf{C}\raisebox{\depth}{\rotatebox{180}{\textsf{A}}}\xspace} % Cforall symbolic name
-\newcommand{\CFA}{\protect\CFAIcon}		% safe for section/caption
-\newcommand{\CFL}{\textrm{Cforall}\xspace} % Cforall symbolic name
-\newcommand{\Celeven}{\textrm{C11}\xspace} % C11 symbolic name
-\newcommand{\CC}{\textrm{C}\kern-.1em\hbox{+\kern-.25em+}\xspace} % C++ symbolic name
-\newcommand{\CCeleven}{\textrm{C}\kern-.1em\hbox{+\kern-.25em+}11\xspace} % C++11 symbolic name
-\newcommand{\CCfourteen}{\textrm{C}\kern-.1em\hbox{+\kern-.25em+}14\xspace} % C++14 symbolic name
-\newcommand{\CCseventeen}{\textrm{C}\kern-.1em\hbox{+\kern-.25em+}17\xspace} % C++17 symbolic name
-\newcommand{\CCtwenty}{\textrm{C}\kern-.1em\hbox{+\kern-.25em+}20\xspace} % C++20 symbolic name
+\newcommand{\CFAIcon}{\textsf{C}\raisebox{\depth}{\rotatebox{180}{\textsf{A}}}} % Cforall icon
+\newcommand{\CFA}{\protect\CFAIcon\xspace}			% CFA symbolic name
+\newcommand{\CFL}{\textrm{Cforall}\xspace}			% Cforall non-icon name
+\newcommand{\Celeven}{\textrm{C11}\xspace}			% C11 symbolic name
+\newcommand{\CCIcon}{\textrm{C}\kern-.1em\hbox{+\kern-.25em+}} % C++ icon
+\newcommand{\CC}{\protect\CCIcon\xspace}			% C++ symbolic name
+% numbers disallowed in latex variables names => use number names
+\newcommand{\CCeleven}{\protect\CCIcon{11}\xspace}	% C++11 symbolic name
+\newcommand{\CCfourteen}{\protect\CCIcon{14}\xspace} % C++14 symbolic name
+\newcommand{\CCseventeen}{\protect\CCIcon{17}\xspace} % C++17 symbolic name
+\newcommand{\CCtwenty}{\protect\CCIcon{20}\xspace}	% C++20 symbolic name
 \newcommand{\Csharp}{C\raisebox{-0.7ex}{\Large$^\sharp$}\xspace} % C# symbolic name
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
+% remove special-character warning in PDF side-bar names
 \makeatletter
+\@ifpackageloaded{hyperref}{
+  \pdfstringdefDisableCommands{
+  \def\CFA{\CFL}
+  \def\Celeven{C11\xspace}
+  \def\CC{C++\xspace}
+  \def\CCeleven{C++11\xspace}
+  \def\CCfourteen{C++14\xspace}
+  \def\CCseventeen{C++17\xspace}
+  \def\CCtwenty{C++20\xspace}
+  \def\Csharp{C\#\xspace}
+  \def\lstinline{\xspace}% must use {} as delimiters, e.g., \lstinline{...}
+  }{}
+}
+
 % parindent is relative, i.e., toggled on/off in environments like itemize, so store the value for
 % use rather than use \parident directly.
@@ -81,6 +98,6 @@
     \vskip 50\p@
   }}
-\renewcommand\section{\@startsection{section}{1}{\z@}{-3.5ex \@plus -1ex \@minus -.2ex}{1.75ex \@plus .2ex}{\normalfont\large\bfseries}}
-\renewcommand\subsection{\@startsection{subsection}{2}{\z@}{-3.25ex \@plus -1ex \@minus -.2ex}{1.5ex \@plus .2ex}{\normalfont\normalsize\bfseries}}
+\renewcommand\section{\@startsection{section}{1}{\z@}{-3.0ex \@plus -1ex \@minus -.2ex}{1.5ex \@plus .2ex}{\normalfont\large\bfseries}}
+\renewcommand\subsection{\@startsection{subsection}{2}{\z@}{-2.75ex \@plus -1ex \@minus -.2ex}{1.25ex \@plus .2ex}{\normalfont\normalsize\bfseries}}
 \renewcommand\subsubsection{\@startsection{subsubsection}{3}{\z@}{-2.5ex \@plus -1ex \@minus -.2ex}{1.0ex \@plus .2ex}{\normalfont\normalsize\bfseries}}
 \renewcommand\paragraph{\@startsection{paragraph}{4}{\z@}{-2.0ex \@plus -1ex \@minus -.2ex}{-1em}{\normalfont\normalsize\bfseries}}
@@ -89,5 +106,5 @@
 \newcommand{\italic}[1]{\emph{\hyperpage{#1}}}
 \newcommand{\Definition}[1]{\textbf{\hyperpage{#1}}}
-\newcommand{\see}[1]{\emph{see}~#1}
+\newcommand{\see}[1]{(see #1)}
 
 % Define some commands that produce formatted index entries suitable for cross-references.
@@ -235,4 +252,5 @@
 \newcommand{\LstKeywordStyle}[1]{{\lst@basicstyle{\lst@keywordstyle{#1}}}}
 \newcommand{\LstCommentStyle}[1]{{\lst@basicstyle{\lst@commentstyle{#1}}}}
+\newcommand{\LstStringStyle}[1]{{\lst@basicstyle{\lst@stringstyle{#1}}}}
 
 \newlength{\gcolumnposn}				% temporary hack because lstlisting does not handle tabs correctly
@@ -260,11 +278,11 @@
 xleftmargin=\parindentlnth,				% indent code to paragraph indentation
 extendedchars=true,						% allow ASCII characters in the range 128-255
-escapechar=§,							% LaTeX escape in CFA code §...§ (section symbol), emacs: C-q M-'
-mathescape=true,						% LaTeX math escape in CFA code $...$
+escapechar=\$,							% LaTeX escape in CFA code §...§ (section symbol), emacs: C-q M-'
+mathescape=false,						% LaTeX math escape in CFA code $...$
 keepspaces=true,						%
 showstringspaces=false,					% do not show spaces with cup
 showlines=true,							% show blank lines at end of code
 aboveskip=4pt,							% spacing above/below code block
-belowskip=-2pt,
+belowskip=0pt,
 numberstyle=\footnotesize\sf,			% numbering style
 % replace/adjust listing characters that look bad in sanserif
@@ -279,10 +297,9 @@
 \lstset{
 language=CFA,
-moredelim=**[is][\color{red}]{®}{®},	% red highlighting ®...® (registered trademark symbol) emacs: C-q M-.
-moredelim=**[is][\color{blue}]{ß}{ß},	% blue highlighting ß...ß (sharp s symbol) emacs: C-q M-_
-moredelim=**[is][\color{OliveGreen}]{¢}{¢}, % green highlighting ¢...¢ (cent symbol) emacs: C-q M-"
-moredelim=[is][\lstset{keywords={}}]{¶}{¶}, % keyword escape ¶...¶ (pilcrow symbol) emacs: C-q M-^
-% replace/adjust listing characters that look bad in sanserif
-add to literate={`}{\ttfamily\upshape\hspace*{-0.1ex}`}1
+moredelim=**[is][\color{red}]{@}{@},	% red highlighting @...@
+%moredelim=**[is][\color{red}]{®}{®},	% red highlighting ®...® (registered trademark symbol) emacs: C-q M-.
+%moredelim=**[is][\color{blue}]{ß}{ß},	% blue highlighting ß...ß (sharp s symbol) emacs: C-q M-_
+%moredelim=**[is][\color{OliveGreen}]{¢}{¢}, % green highlighting ¢...¢ (cent symbol) emacs: C-q M-"
+%moredelim=[is][\lstset{keywords={}}]{¶}{¶}, % keyword escape ¶...¶ (pilcrow symbol) emacs: C-q M-^
 }% lstset
 \lstset{#1}
Index: doc/bibliography/pl.bib
===================================================================
--- doc/bibliography/pl.bib	(revision da3963ae1399ac41655fc82eda47fad820dd3ccf)
+++ doc/bibliography/pl.bib	(revision 565acf59c5402ff35d213dc020ead86f8cb51d16)
@@ -1797,13 +1797,14 @@
 }
 
-@article{Delisle19,
+@article{Delisle20,
     keywords	= {concurrency, Cforall},
     contributer	= {pabuhr@plg},
     author	= {Thierry Delisle and Peter A. Buhr},
     title	= {Advanced Control-flow and Concurrency in \textsf{C}$\mathbf{\forall}$},
-    year	= 2019,
+    year	= 2020,
     journal	= spe,
-    pages	= {1-33},
-    note	= {submitted},
+    pages	= {1-38},
+    note	= {\href{https://doi-org.proxy.lib.uwaterloo.ca/10.1002/spe.2925}{https://\-doi-org.proxy.lib.uwaterloo.ca/\-10.1002/\-spe.2925}},
+    note	= {},
 }
 
Index: doc/theses/andrew_beach_MMath/existing.tex
===================================================================
--- doc/theses/andrew_beach_MMath/existing.tex	(revision da3963ae1399ac41655fc82eda47fad820dd3ccf)
+++ doc/theses/andrew_beach_MMath/existing.tex	(revision 565acf59c5402ff35d213dc020ead86f8cb51d16)
@@ -1,3 +1,3 @@
-\chapter{\texorpdfstring{\CFA Existing Features}{Cforall Existing Features}}
+\chapter{\CFA Existing Features}
 
 \CFA (C-for-all)~\cite{Cforall} is an open-source project extending ISO C with
@@ -12,5 +12,5 @@
 obvious to the reader.
 
-\section{\texorpdfstring{Overloading and \lstinline|extern|}{Overloading and extern}}
+\section{Overloading and \lstinline{extern}}
 \CFA has extensive overloading, allowing multiple definitions of the same name
 to be defined.~\cite{Moss18}
@@ -42,5 +42,5 @@
 
 \section{Reference Type}
-\CFA adds a rebindable reference type to C, but more expressive than the \CC
+\CFA adds a rebindable reference type to C, but more expressive than the \Cpp
 reference.  Multi-level references are allowed and act like auto-dereferenced
 pointers using the ampersand (@&@) instead of the pointer asterisk (@*@). \CFA
@@ -59,7 +59,7 @@
 
 Both constructors and destructors are operators, which means they are just
-functions with special operator names rather than type names in \CC. The
+functions with special operator names rather than type names in \Cpp. The
 special operator names may be used to call the functions explicitly (not
-allowed in \CC for constructors).
+allowed in \Cpp for constructors).
 
 In general, operator names in \CFA are constructed by bracketing an operator
@@ -88,5 +88,5 @@
 matching overloaded destructor @void ^?{}(T &);@ is called.  Without explicit
 definition, \CFA creates a default and copy constructor, destructor and
-assignment (like \CC). It is possible to define constructors/destructors for
+assignment (like \Cpp). It is possible to define constructors/destructors for
 basic and existing types.
 
@@ -94,5 +94,5 @@
 \CFA uses parametric polymorphism to create functions and types that are
 defined over multiple types. \CFA polymorphic declarations serve the same role
-as \CC templates or Java generics. The ``parametric'' means the polymorphism is
+as \Cpp templates or Java generics. The ``parametric'' means the polymorphism is
 accomplished by passing argument operations to associate \emph{parameters} at
 the call site, and these parameters are used in the function to differentiate
@@ -134,5 +134,5 @@
 
 Note, a function named @do_once@ is not required in the scope of @do_twice@ to
-compile it, unlike \CC template expansion. Furthermore, call-site inferencing
+compile it, unlike \Cpp template expansion. Furthermore, call-site inferencing
 allows local replacement of the most specific parametric functions needs for a
 call.
@@ -178,5 +178,5 @@
 }
 \end{cfa}
-The generic type @node(T)@ is an example of a polymorphic-type usage.  Like \CC
+The generic type @node(T)@ is an example of a polymorphic-type usage.  Like \Cpp
 templates usage, a polymorphic-type usage must specify a type parameter.
 
Index: doc/theses/andrew_beach_MMath/features.tex
===================================================================
--- doc/theses/andrew_beach_MMath/features.tex	(revision da3963ae1399ac41655fc82eda47fad820dd3ccf)
+++ doc/theses/andrew_beach_MMath/features.tex	(revision 565acf59c5402ff35d213dc020ead86f8cb51d16)
@@ -5,10 +5,24 @@
 
 \section{Virtuals}
+Virtual types and casts are not part of the exception system nor are they
+required for an exception system. But an object-oriented style hierarchy is a
+great way of organizing exceptions so a minimal virtual system has been added
+to \CFA.
+
+The pattern of a simple hierarchy was borrowed from object-oriented
+programming was chosen for several reasons.
+The first is that it allows new exceptions to be added in user code
+and in libraries independently of each other. Another is it allows for
+different levels of exception grouping (all exceptions, all IO exceptions or
+a particular IO exception). Also it also provides a simple way of passing
+data back and forth across the throw.
+
 Virtual types and casts are not required for a basic exception-system but are
 useful for advanced exception features. However, \CFA is not object-oriented so
-there is no obvious concept of virtuals.  Hence, to create advanced exception
-features for this work, I needed to designed and implemented a virtual-like
+there is no obvious concept of virtuals. Hence, to create advanced exception
+features for this work, I needed to design and implement a virtual-like
 system for \CFA.
 
+% NOTE: Maybe we should but less of the rational here.
 Object-oriented languages often organized exceptions into a simple hierarchy,
 \eg Java.
@@ -30,5 +44,5 @@
 \end{center}
 The hierarchy provides the ability to handle an exception at different degrees
-of specificity (left to right).  Hence, it is possible to catch a more general
+of specificity (left to right). Hence, it is possible to catch a more general
 exception-type in higher-level code where the implementation details are
 unknown, which reduces tight coupling to the lower-level implementation.
@@ -61,12 +75,14 @@
 While much of the virtual infrastructure is created, it is currently only used
 internally for exception handling. The only user-level feature is the virtual
-cast, which is the same as the \CC \lstinline[language=C++]|dynamic_cast|.
+cast, which is the same as the \Cpp \lstinline[language=C++]|dynamic_cast|.
+\label{p:VirtualCast}
 \begin{cfa}
 (virtual TYPE)EXPRESSION
 \end{cfa}
-Note, the syntax and semantics matches a C-cast, rather than the unusual \CC
-syntax for special casts. Both the type of @EXPRESSION@ and @TYPE@ must be a
-pointer to a virtual type. The cast dynamically checks if the @EXPRESSION@ type
-is the same or a subtype of @TYPE@, and if true, returns a pointer to the
+Note, the syntax and semantics matches a C-cast, rather than the function-like
+\Cpp syntax for special casts. Both the type of @EXPRESSION@ and @TYPE@ must be
+a pointer to a virtual type.
+The cast dynamically checks if the @EXPRESSION@ type is the same or a subtype
+of @TYPE@, and if true, returns a pointer to the
 @EXPRESSION@ object, otherwise it returns @0p@ (null pointer).
 
@@ -77,36 +93,38 @@
 
 Exceptions are defined by the trait system; there are a series of traits, and
-if a type satisfies them, then it can be used as an exception.  The following
+if a type satisfies them, then it can be used as an exception. The following
 is the base trait all exceptions need to match.
 \begin{cfa}
 trait is_exception(exceptT &, virtualT &) {
-	virtualT const & @get_exception_vtable@(exceptT *);
+	virtualT const & get_exception_vtable(exceptT *);
 };
 \end{cfa}
-The function takes any pointer, including the null pointer, and returns a
-reference to the virtual-table object. Defining this function also establishes
-the virtual type and a virtual-table pair to the \CFA type-resolver and
-promises @exceptT@ is a virtual type and a child of the base exception-type.
-
-{\color{blue} PAB: I do not understand this paragraph.}
-One odd thing about @get_exception_vtable@ is that it should always be a
-constant function, returning the same value regardless of its argument.  A
-pointer or reference to the virtual table instance could be used instead,
-however using a function has some ease of implementation advantages and allows
-for easier disambiguation because the virtual type name (or the address of an
-instance that is in scope) can be used instead of the mangled virtual table
-name.  Also note the use of the word ``promise'' in the trait
-description. Currently, \CFA cannot check to see if either @exceptT@ or
-@virtualT@ match the layout requirements. This is considered part of
-@get_exception_vtable@'s correct implementation.
+The trait is defined over two types, the exception type and the virtual table
+type. This should be one-to-one, each exception type has only one virtual
+table type and vice versa. The only assertion in the trait is
+@get_exception_vtable@, which takes a pointer of the exception type and
+returns a reference to the virtual table type instance.
+
+The function @get_exception_vtable@ is actually a constant function.
+Recardless of the value passed in (including the null pointer) it should
+return a reference to the virtual table instance for that type.
+The reason it is a function instead of a constant is that it make type
+annotations easier to write as you can use the exception type instead of the
+virtual table type; which usually has a mangled name.
+% Also \CFA's trait system handles functions better than constants and doing
+% it this way
+
+% I did have a note about how it is the programmer's responsibility to make
+% sure the function is implemented correctly. But this is true of every
+% similar system I know of (except Agda's I guess) so I took it out.
 
 \section{Raise}
-\CFA provides two kinds of exception raise: termination (see
-\VRef{s:Termination}) and resumption (see \VRef{s:Resumption}), which are
+\CFA provides two kinds of exception raise: termination
+\see{\VRef{s:Termination}} and resumption \see{\VRef{s:Resumption}}, which are
 specified with the following traits.
 \begin{cfa}
 trait is_termination_exception(
 		exceptT &, virtualT & | is_exception(exceptT, virtualT)) {
-	void @defaultTerminationHandler@(exceptT &);
+	void defaultTerminationHandler(exceptT &);
 };
 \end{cfa}
@@ -118,5 +136,5 @@
 trait is_resumption_exception(
 		exceptT &, virtualT & | is_exception(exceptT, virtualT)) {
-	void @defaultResumptionHandler@(exceptT &);
+	void defaultResumptionHandler(exceptT &);
 };
 \end{cfa}
@@ -125,9 +143,18 @@
 
 Finally there are three convenience macros for referring to the these traits:
-@IS_EXCEPTION@, @IS_TERMINATION_EXCEPTION@ and @IS_RESUMPTION_EXCEPTION@.  Each
-takes the virtual type's name, and for polymorphic types only, the
-parenthesized list of polymorphic arguments. These macros do the name mangling
-to get the virtual-table name and provide the arguments to both sides
-{\color{blue}(PAB: What's a ``side''?)}
+@IS_EXCEPTION@, @IS_TERMINATION_EXCEPTION@ and @IS_RESUMPTION_EXCEPTION@.
+All three traits are hard to use while naming the virtual table as it has an
+internal mangled name. These macros take the exception name as their first
+argument and do the mangling. They all take a second argument for polymorphic
+types which is the parenthesized list of polymorphic arguments. These
+arguments are passed to both the exception type and the virtual table type as
+the arguments do have to match.
+
+For example consider a function that is polymorphic over types that have a
+defined arithmetic exception:
+\begin{cfa}
+forall(Num | IS_EXCEPTION(Arithmetic, (Num)))
+void some_math_function(Num & left, Num & right);
+\end{cfa}
 
 \subsection{Termination}
@@ -146,12 +173,13 @@
 throw EXPRESSION;
 \end{cfa}
-The expression must return a termination-exception reference, where the
-termination exception has a type with a @void defaultTerminationHandler(T &)@
-(default handler) defined. The handler is found at the call site using \CFA's
-trait system and passed into the exception system along with the exception
-itself.
-
-At runtime, a representation of the exception type and an instance of the
-exception type is copied into managed memory (heap) to ensure it remains in
+The expression must return a reference to a termination exception, where the
+termination exception is any type that satifies @is_termination_exception@
+at the call site.
+Through \CFA's trait system the functions in the traits are passed into the
+throw code. A new @defaultTerminationHandler@ can be defined in any scope to
+change the throw's behavior (see below).
+
+At runtime, the exception returned by the expression
+is copied into managed memory (heap) to ensure it remains in
 scope during unwinding. It is the user's responsibility to ensure the original
 exception object at the throw is freed when it goes out of scope. Being
@@ -165,7 +193,7 @@
 try {
 	GUARDED_BLOCK
-} @catch (EXCEPTION_TYPE$\(_1\)$ * NAME)@ { // termination handler 1
+} catch (EXCEPTION_TYPE$\(_1\)$ * NAME$\(_1\)$) { // termination handler 1
 	HANDLER_BLOCK$\(_1\)$
-} @catch (EXCEPTION_TYPE$\(_2\)$ * NAME)@ { // termination handler 2
+} catch (EXCEPTION_TYPE$\(_2\)$ * NAME$\(_2\)$) { // termination handler 2
 	HANDLER_BLOCK$\(_2\)$
 }
@@ -178,16 +206,18 @@
 Exception matching checks the representation of the thrown exception-type is
 the same or a descendant type of the exception types in the handler clauses. If
-there is a match, a pointer to the exception object created at the throw is
-bound to @NAME@ and the statements in the associated @HANDLER_BLOCK@ are
-executed. If control reaches the end of the handler, the exception is freed,
-and control continues after the try statement.
+it is the same of a descendent of @EXCEPTION_TYPE@$_i$ then @NAME@$_i$ is
+bound to a pointer to the exception and the statements in @HANDLER_BLOCK@$_i$
+are executed. If control reaches the end of the handler, the exception is
+freed and control continues after the try statement.
 
 The default handler visible at the throw statement is used if no matching
 termination handler is found after the entire stack is searched. At that point,
 the default handler is called with a reference to the exception object
-generated at the throw. If the default handler returns, the system default
-action is executed, which often terminates the program. This feature allows
+generated at the throw. If the default handler returns, control continues
+from after the throw statement. This feature allows
 each exception type to define its own action, such as printing an informative
 error message, when an exception is not handled in the program.
+However the default handler for all exception types triggers a cancellation
+using the exception.
 
 \subsection{Resumption}
@@ -196,6 +226,6 @@
 Resumption raise, called ``resume'', is as old as termination
 raise~\cite{Goodenough75} but is less popular. In many ways, resumption is
-simpler and easier to understand, as it is simply a dynamic call (as in
-Lisp). The semantics of resumption is: search the stack for a matching handler,
+simpler and easier to understand, as it is simply a dynamic call.
+The semantics of resumption is: search the stack for a matching handler,
 execute the handler, and continue execution after the resume. Notice, the stack
 cannot be unwound because execution returns to the raise point. Resumption is
@@ -209,12 +239,13 @@
 \end{cfa}
 The semantics of the @throwResume@ statement are like the @throw@, but the
-expression has a type with a @void defaultResumptionHandler(T &)@ (default
-handler) defined, where the handler is found at the call site by the type
-system.  At runtime, a representation of the exception type and an instance of
-the exception type is \emph{not} copied because the stack is maintained during
-the handler search.
+expression has return a reference a type that satifies the trait
+@is_resumption_exception@. Like with termination the exception system can
+use these assertions while (throwing/raising/handling) the exception.
+
+At runtime, no copies are made. As the stack is not unwound the exception and
+any values on the stack will remain in scope while the resumption is handled.
 
 Then the exception system searches the stack starting from the resume and
-proceeding towards the base of the stack, from callee to caller. At each stack
+proceeding to the base of the stack, from callee to caller. At each stack
 frame, a check is made for resumption handlers defined by the @catchResume@
 clauses of a @try@ statement.
@@ -222,7 +253,7 @@
 try {
 	GUARDED_BLOCK
-} @catchResume (EXCEPTION_TYPE$\(_1\)$ * NAME)@ { // resumption handler 1
+} catchResume (EXCEPTION_TYPE$\(_1\)$ * NAME$\(_1\)$) {
 	HANDLER_BLOCK$\(_1\)$
-} @catchResume (EXCEPTION_TYPE$\(_2\)$ * NAME)@ { // resumption handler 2
+} catchResume (EXCEPTION_TYPE$\(_2\)$ * NAME$\(_2\)$) {
 	HANDLER_BLOCK$\(_2\)$
 }
@@ -253,8 +284,8 @@
 current point on the stack because new try statements may have been pushed by
 the handler or functions called from the handler. If there is no match back to
-the point of the current handler, the search skips the stack frames already
-searched by the first resume and continues after the try statement. The default
-handler always continues from default handler associated with the point where
-the exception is created.
+the point of the current handler, the search skips\label{p:searchskip} the
+stack frames already searched by the first resume and continues after
+the try statement. The default handler always continues from default
+handler associated with the point where the exception is created.
 
 % This might need a diagram. But it is an important part of the justification
@@ -275,7 +306,8 @@
 \end{verbatim}
 
-This resumption search-pattern reflect the one for termination, which matches
-with programmer expectations. However, it avoids the \emph{recursive
-resumption} problem. If parts of the stack are searched multiple times, loops
+This resumption search pattern reflects the one for termination, and so
+should come naturally to most programmers.
+However, it avoids the \emph{recursive resumption} problem.
+If parts of the stack are searched multiple times, loops
 can easily form resulting in infinite recursion.
 
@@ -283,20 +315,23 @@
 \begin{cfa}
 try {
-	throwResume$\(_1\)$ (E &){};
-} catch( E * ) {
-	throwResume;
-}
-\end{cfa}
-Based on termination semantics, programmer expectation is for the re-resume to
-continue searching the stack frames after the try statement. However, the
-current try statement is still on the stack below the handler issuing the
-reresume (see \VRef{s:Reraise}). Hence, the try statement catches the re-raise
-again and does another re-raise \emph{ad infinitum}, which is confusing and
-difficult to debug. The \CFA resumption search-pattern skips the try statement
-so the reresume search continues after the try, mathcing programmer
-expectation.
+	throwResume (E &){}; // first
+} catchResume(E *) {
+	throwResume (E &){}; // second
+}
+\end{cfa}
+If this handler is ever used it will be placed on top of the stack above the
+try statement. If the stack was not masked than the @throwResume@ in the
+handler would always be caught by the handler, leading to an infinite loop.
+Masking avoids this problem and other more complex versions of it involving
+multiple handlers and exception types.
+
+Other masking stratagies could be used; such as masking the handlers that
+have caught an exception. This one was choosen because it creates a symmetry
+with termination (masked sections of the stack would be unwound with
+termination) and having only one pattern to learn is easier.
 
 \section{Conditional Catch}
-Both termination and resumption handler-clauses may perform conditional matching:
+Both termination and resumption handler clauses can be given an additional
+condition to further control which exceptions they handle:
 \begin{cfa}
 catch (EXCEPTION_TYPE * NAME ; @CONDITION@)
@@ -305,5 +340,5 @@
 exception matches, @CONDITION@ is executed. The condition expression may
 reference all names in scope at the beginning of the try block and @NAME@
-introduced in the handler clause.  If the condition is true, then the handler
+introduced in the handler clause. If the condition is true, then the handler
 matches. Otherwise, the exception search continues at the next appropriate kind
 of handler clause in the try block.
@@ -322,4 +357,7 @@
 
 \section{Reraise}
+\color{red}{From Andrew: I recomend we talk about why the language doesn't
+have rethrows/reraises instead.}
+
 \label{s:Reraise}
 Within the handler block or functions called from the handler block, it is
@@ -327,5 +365,7 @@
 @throwResume@, respective.
 \begin{cfa}
-catch( ... ) {
+try {
+	...
+} catch( ... ) {
 	... throw; // rethrow
 } catchResume( ... ) {
@@ -340,5 +380,4 @@
 handler is generated that does a program-level abort.
 
-
 \section{Finally Clauses}
 A @finally@ clause may be placed at the end of a @try@ statement.
@@ -346,18 +385,20 @@
 try {
 	GUARDED_BLOCK
-} ...	// any number or kind of handler clauses
-} finally {
+} ... // any number or kind of handler clauses
+... finally {
 	FINALLY_BLOCK
 }
 \end{cfa}
-The @FINALLY_BLOCK@ is executed when the try statement is unwound from the
-stack, \ie when the @GUARDED_BLOCK@ or any handler clause finishes. Hence, the
-finally block is always executed.
+The @FINALLY_BLOCK@ is executed when the try statement is removed from the
+stack, including when the @GUARDED_BLOCK@ or any handler clause finishes or
+during an unwind.
+The only time the block is not executed is if the program is exited before
+that happens.
 
 Execution of the finally block should always finish, meaning control runs off
 the end of the block. This requirement ensures always continues as if the
 finally clause is not present, \ie finally is for cleanup not changing control
-flow.  Because of this requirement, local control flow out of the finally block
-is forbidden.  The compiler precludes any @break@, @continue@, @fallthru@ or
+flow. Because of this requirement, local control flow out of the finally block
+is forbidden. The compiler precludes any @break@, @continue@, @fallthru@ or
 @return@ that causes control to leave the finally block. Other ways to leave
 the finally block, such as a long jump or termination are much harder to check,
@@ -369,6 +410,7 @@
 possible forwards the cancellation exception to a different stack.
 
+Cancellation is not an exception operation like termination or resumption.
 There is no special statement for starting a cancellation; instead the standard
-library function @cancel_stack@ is called passing an exception.  Unlike a
+library function @cancel_stack@ is called passing an exception. Unlike a
 raise, this exception is not used in matching only to pass information about
 the cause of the cancellation.
@@ -377,17 +419,18 @@
 \begin{description}
 \item[Main Stack:]
-
 The main stack is the one used by the program main at the start of execution,
-and is the only stack in a sequential program.  Hence, when cancellation is
-forwarded to the main stack, there is no other forwarding stack, so after the
-stack is unwound, there is a program-level abort.
+and is the only stack in a sequential program. Even in a concurrent program
+the main stack is only dependent on the environment that started the program.
+Hence, when the main stack is cancelled there is nowhere else in the program
+to notify. After the stack is unwound, there is a program-level abort.
 
 \item[Thread Stack:]
 A thread stack is created for a @thread@ object or object that satisfies the
-@is_thread@ trait.  A thread only has two points of communication that must
+@is_thread@ trait. A thread only has two points of communication that must
 happen: start and join. As the thread must be running to perform a
-cancellation, it must occur after start and before join, so join is a
-cancellation point.  After the stack is unwound, the thread halts and waits for
-another thread to join with it. The joining thread, checks for a cancellation,
+cancellation, it must occur after start and before join, so join is used
+for communication here.
+After the stack is unwound, the thread halts and waits for
+another thread to join with it. The joining thread checks for a cancellation,
 and if present, resumes exception @ThreadCancelled@.
 
@@ -397,18 +440,21 @@
 the exception is not caught. The implicit join does a program abort instead.
 
-This semantics is for safety. One difficult problem for any exception system is
-defining semantics when an exception is raised during an exception search:
-which exception has priority, the original or new exception? No matter which
-exception is selected, it is possible for the selected one to disrupt or
-destroy the context required for the other. {\color{blue} PAB: I do not
-understand the following sentences.} This loss of information can happen with
-join but as the thread destructor is always run when the stack is being unwound
-and one termination/cancellation is already active. Also since they are
-implicit they are easier to forget about.
+This semantics is for safety. If an unwind is triggered while another unwind
+is underway only one of them can proceed as they both want to ``consume'' the
+stack. Letting both try to proceed leads to very undefined behaviour.
+Both termination and cancellation involve unwinding and, since the default
+@defaultResumptionHandler@ preforms a termination that could more easily
+happen in an implicate join inside a destructor. So there is an error message
+and an abort instead.
+
+The recommended way to avoid the abort is to handle the intial resumption
+from the implicate join. If required you may put an explicate join inside a
+finally clause to disable the check and use the local
+@defaultResumptionHandler@ instead.
 
 \item[Coroutine Stack:] A coroutine stack is created for a @coroutine@ object
-or object that satisfies the @is_coroutine@ trait.  A coroutine only knows of
-two other coroutines, its starter and its last resumer.  The last resumer has
-the tightest coupling to the coroutine it activated.  Hence, cancellation of
+or object that satisfies the @is_coroutine@ trait. A coroutine only knows of
+two other coroutines, its starter and its last resumer. The last resumer has
+the tightest coupling to the coroutine it activated. Hence, cancellation of
 the active coroutine is forwarded to the last resumer after the stack is
 unwound, as the last resumer has the most precise knowledge about the current
Index: doc/theses/andrew_beach_MMath/future.tex
===================================================================
--- doc/theses/andrew_beach_MMath/future.tex	(revision da3963ae1399ac41655fc82eda47fad820dd3ccf)
+++ doc/theses/andrew_beach_MMath/future.tex	(revision 565acf59c5402ff35d213dc020ead86f8cb51d16)
@@ -1,122 +1,133 @@
 \chapter{Future Work}
 
+\section{Language Improvements}
+\CFA is a developing programming language. As such, there are partially or
+unimplemented features of the language (including several broken components)
+that I had to workaround while building an exception handling system largely in
+the \CFA language (some C components).  The following are a few of these
+issues, and once implemented/fixed, how this would affect the exception system.
+\begin{itemize}
+\item
+The implementation of termination is not portable because it includes
+hand-crafted assembly statements. These sections must be ported by hand to
+support more hardware architectures, such as the ARM processor.
+\item
+Due to a type-system problem, the catch clause cannot bind the exception to a
+reference instead of a pointer. Since \CFA has a very general reference
+capability, programmers will want to use it. Once fixed, this capability should
+result in little or no change in the exception system.
+\item
+Termination handlers cannot use local control-flow transfers, \eg by @break@,
+@return@, \etc. The reason is that current code generation hoists a handler
+into a nested function for convenience (versus assemble-code generation at the
+@try@ statement). Hence, when the handler runs, its code is not in the lexical
+scope of the @try@ statement, where the local control-flow transfers are
+meaningful.
+\item
+There is no detection of colliding unwinds. It is possible for clean-up code
+run during an unwind to trigger another unwind that escapes the clean-up code
+itself; such as a termination exception caught further down the stack or a
+cancellation. There do exist ways to handle this but currently they are not
+even detected and the first unwind will simply be forgotten, often leaving
+it in a bad state.
+\item
+Also the exception system did not have a lot of time to be tried and tested.
+So just letting people use the exception system more will reveal new
+quality of life upgrades that can be made with time.
+\end{itemize}
+
 \section{Complete Virtual System}
-The virtual system should be completed. It was never supposed to be part of
-this project and so minimal work was done on it. A draft of what the complete
-system might look like was created but it was never finalized or implemented.
-A future project in \CFA would be to complete that work and to update the
-parts of the exception system that use the current version.
+The virtual system should be completed. It was not supposed to be part of this
+project, but was thrust upon it to do exception inheritance; hence, only
+minimal work was done. A draft for a complete virtual system is available but
+it is not finalized.  A future \CFA project is to complete that work and then
+update the exception system that uses the current version.
 
-There are several improvements to the virtual system that would improve
-the exception traits. The biggest one is an assertion that checks that one
-virtual type is a child of another virtual type. This would capture many of
-the requirements much more precisely.
+There are several improvements to the virtual system that would improve the
+exception traits. The most important one is an assertion to check one virtual
+type is a child of another. This check precisely captures many of the
+correctness requirements.
 
 The full virtual system might also include other improvement like associated
-types. This is a proposed feature that would allow traits to refer to types
-not listed in their header. This would allow the exception traits to not
-refer to the virtual table type explicatly which would remove the need for
-the interface macros.
+types to allow traits to refer to types not listed in their header. This
+feature allows exception traits to not refer to the virtual-table type
+explicitly, removing the need for the current interface macros.
 
-\section{Additional Throws}
-Several other kinds of throws, beyond the termination throw (@throw@),
-the resumption throw (@throwResume@) and the re-throws, were considered.
-None were as useful as the core throws but they would likely be worth
-revising.
+\section{Additional Raises}
+Several other kinds of exception raises were considered beyond termination
+(@throw@), resumption (@throwResume@), and reraise.
 
-The first ones are throws for asynchronous exceptions, throwing exceptions
-from one stack to another. These act like signals allowing for communication
-between the stacks. This is usually used with resumption as it allows the
-target stack to continue execution normally after the exception has been
-handled.
+The first is a non-local/concurrent raise providing asynchronous exceptions,
+\ie raising an exception on another stack. This semantics acts like signals
+allowing for out-of-band communication among coroutines and threads. This kind
+of raise is often restricted to resumption to allow the target stack to
+continue execution normally after the exception has been handled. That is,
+allowing one coroutine/thread to unwind the stack of another via termination is
+bad software engineering.
 
-This would much more coordination between the concurrency system and the
-exception system to handle. Most of the interesting design decisions around
-applying asynchronous exceptions appear to be around masking (controlling
-which exceptions may be thrown at a stack). It would likely require more of
-the virtual system and would also effect how default handlers are set.
+Non-local/concurrent requires more coordination between the concurrency system
+and the exception system. Many of the interesting design decisions centre
+around masking (controlling which exceptions may be thrown at a stack). It
+would likely require more of the virtual system and would also effect how
+default handlers are set.
 
-The other throws were designed to mimic bidirectional algebraic effects.
-Algebraic effects are used in some functional languages and allow a function
+Other raises were considered to mimic bidirectional algebraic effects.
+Algebraic effects are used in some functional languages allowing one function
 to have another function on the stack resolve an effect (which is defined with
-a function-like interface).
-These can be mimiced with resumptions and the the new throws were designed
-to try and mimic bidirectional algebraic effects, where control can go back
-and forth between the function effect caller and handler while the effect
-is underway.
+a functional-like interface).  This semantics can be mimicked with resumptions
+and new raises were discussed to mimic bidirectional algebraic-effects, where
+control can go back and forth between the function-effect caller and handler
+while the effect is underway.
 % resume-top & resume-reply
+These raises would be like the resumption raise except using different search
+patterns to find the handler.
 
-These throws would likely be just like the resumption throw except they would
-use different search patterns to find the handler to reply to.
+\section{Zero-Cost Try}
+\CFA does not have zero-cost try-statements because the compiler generates C
+code rather than assembler code \see{\VPageref{p:zero-cost}}. When the compiler
+does create its own assembly (or LLVM byte-code), then zero-cost try-statements
+are possible. The downside of zero-cost try-statements is the LSDA complexity,
+its size (program bloat), and the high cost of raising an exception.
 
-\section{Zero-Cost Exceptions}
-\CFA does not have zero-cost exceptions because it does not generate assembly
-but instead generates C code. See the implementation section. When the
-compiler does start to create its own assembly (or LLVM byte code) then
-zero-cost exceptions could be implemented.
+Alternatively, some research could be done into the simpler alternative method
+with a non-zero-cost try-statement but much lower cost exception raise. For
+example, programs are starting to use exception in the normal control path, so
+more exceptions are thrown. In these cases, the cost balance switches towards
+low-cost raise. Unfortunately, while exceptions remain exceptional, the
+libunwind model will probably remain the most effective option.
 
-Now in zero-cost exceptions the only part that is zero-cost are the try
-blocks. Some research could be done into the alternative methods for systems
-that expect a lot more exceptions to be thrown, allowing some overhead in
-entering and leaving try blocks to make throws faster. But while exceptions
-remain exceptional the libunwind model will probably remain the most effective
-option.
+Zero-cost resumptions is still an open problem. First, because libunwind does
+not support a successful-exiting stack-search without doing an unwind.
+Workarounds are possible but awkward. Ideally an extension to libunwind could
+be made, but that would either require separate maintenance or gain enough
+support to have it folded into the standard.
 
-Zero-cost resumptions have more problems to solve. First because libunwind
-does not support a successful exiting stack search without doing an unwind.
-There are several ways to hack that functionality in. Ideally an extension to
-libunwind could be made, but that would either require seperate maintenance
-or gain enough support to have it folded into the standard.
-
-Also new techniques to skip previously searched parts of the stack will have
-to be developed. The recursive resume problem still remains and ideally the
-same pattern of ignoring sections of the stack.
+Also new techniques to skip previously searched parts of the stack need to be
+developed to handle the recursive resume problem and support advanced algebraic
+effects.
 
 \section{Signal Exceptions}
-Exception Handling: Issues and a Proposed Notation suggests there are three
-types of exceptions: escape, notify and signal.
-Escape exceptions are our termination exceptions, notify exceptions are
-resumption exceptions and that leaves signal exception unimplemented.
+Goodenough~\cite{Goodenough75} suggests three types of exceptions: escape,
+notify and signal.  Escape are termination exceptions, notify are resumption
+exceptions, leaving signal unimplemented.
 
-Signal exceptions allow either behaviour, that is after the exception is
-handled control can either return to the throw or from where the handler is
-defined.
+A signal exception allows either behaviour, \ie after an exception is handled,
+the handler has the option of returning to the raise or after the @try@
+statement. Currently, \CFA fixes the semantics of the handler return
+syntactically by the @catch@ or @catchResume@ clause.
 
-The design should be rexamined and be updated for \CFA. A very direct
-translation would perhaps have a new throw and catch pair and a statement
-(or statements) could be used to decide if the handler returns to the throw
-or continues where it is, but there are other options.
+Signal exception should be reexamined and possibly be supported in \CFA. A very
+direct translation is to have a new raise and catch pair, and a new statement
+(or statements) would indicate if the handler returns to the raise or continues
+where it is; but there may be other options.
 
-For instance resumption could be extended to cover this use by allowing
-local control flow out of it. This would require an unwind as part of the
-transition as there are stack frames that have to be removed.
-This would mean there is no notify like throw but because \CFA does not have
-exception signatures a termination can be thrown from any resumption handler
-already so there are already ways one could try to do this in existing \CFA.
+For instance, resumption could be extended to cover this use by allowing local
+control flow out of it. This approach would require an unwind as part of the
+transition as there are stack frames that have to be removed.  This approach
+means there is no notify raise, but because \CFA does not have exception
+signatures, a termination can be thrown from within any resumption handler so
+there is already a way to do mimic this in existing \CFA.
 
 % Maybe talk about the escape; and escape CONTROL_STMT; statements or how
 % if we could choose if _Unwind_Resume proceeded to the clean-up stage this
 % would be much easier to implement.
-
-\section{Language Improvements}
-There is also a lot of work that are not follow ups to this work in terms of
-research, some have no interesting research to be done at all, but would
-improve \CFA as a programming language. The full list of these would
-naturally be quite extensive but here are a few examples that involve
-exceptions:
-
-\begin{itemize}
-\item The implementation of termination is not portable because it includes
-some assembly statements. These sections will have to be re-written to so
-\CFA has full support on more machines.
-\item Allowing exception handler to bind the exception to a reference instead
-of a pointer. This should actually result in no change in behaviour so there
-is no reason not to allow it. It is however a small improvement; giving a bit
-of flexibility to the user in what style they want to use.
-\item Enabling local control flow (by @break@, @return@ and
-similar statements) out of a termination handler. The current set-up makes
-this very difficult but the catch function that runs the handler after it has
-been matched could be inlined into the function's body, which would make this
-much easier. (To do the same for try blocks would probably wait for zero-cost
-exceptions, which would allow the try block to be inlined as well.)
-\end{itemize}
Index: doc/theses/andrew_beach_MMath/implement.tex
===================================================================
--- doc/theses/andrew_beach_MMath/implement.tex	(revision da3963ae1399ac41655fc82eda47fad820dd3ccf)
+++ doc/theses/andrew_beach_MMath/implement.tex	(revision 565acf59c5402ff35d213dc020ead86f8cb51d16)
@@ -2,99 +2,107 @@
 % Goes over how all the features are implemented.
 
+The implementation work for this thesis covers two components: the virtual
+system and exceptions. Each component is discussed in detail.
+
 \section{Virtual System}
+\label{s:VirtualSystem}
 % Virtual table rules. Virtual tables, the pointer to them and the cast.
-The \CFA virtual system only has one public facing feature: virtual casts.
-However there is a lot of structure to support that and provide some other
-features for the standard library.
-
-All of this is accessed through a field inserted at the beginning of every
-virtual type. Currently it is called @virtual_table@ but it is not
-ment to be accessed by the user. This field is a pointer to the type's
-virtual table instance. It is assigned once during the object's construction
-and left alone after that.
-
-\subsection{Virtual Table Construction}
-For each virtual type a virtual table is constructed. This is both a new type
-and an instance of that type. Other instances of the type could be created
-but the system doesn't use them. So this section will go over the creation of
-the type and the instance.
-
-Creating the single instance is actually very important. The address of the
-table acts as the unique identifier for the virtual type. Similarly the first
-field in every virtual table is the parent's id; a pointer to the parent
-virtual table instance.
-
-The remaining fields contain the type's virtual members. First come the ones
-present on the parent type, in the same order as they were the parent, and
-then any that this type introduces. The types of the ones inherited from the
-parent may have a slightly modified type, in that references to the
-dispatched type are replaced with the current virtual type. These are always
-taken by pointer or reference.
-
-The structure itself is created where the virtual type is created. The name
-of the type is created by mangling the name of the base type. The name of the
-instance is also generated by name mangling.
-
-The fields are initialized automatically.
+While the \CFA virtual system currently has only one public feature, virtual
+cast \see{\VPageref{p:VirtualCast}}, substantial structure is required to
+support it, and provide features for exception handling and the standard
+library.
+
+\subsection{Virtual Table}
+The virtual system is accessed through a private constant field inserted at the
+beginning of every virtual type, called the virtual-table pointer. This field
+points at a type's virtual table and is assigned during the object's
+construction.  The address of a virtual table acts as the unique identifier for
+the virtual type, and the first field of a virtual table is a pointer to the
+parent virtual-table or @0p@.  The remaining fields are duplicated from the
+parent tables in this type's inheritance chain, followed by any fields this type
+introduces. Parent fields are duplicated so they can be changed (\CC
+\lstinline[language=c++]|override|), so that references to the dispatched type
+are replaced with the current virtual type.
+\PAB{Can you create a simple diagram of the layout?}
+% These are always taken by pointer or reference.
+
+% For each virtual type, a virtual table is constructed. This is both a new type
+% and an instance of that type. Other instances of the type could be created
+% but the system doesn't use them. So this section will go over the creation of
+% the type and the instance.
+
+A virtual table is created when the virtual type is created. The name of the
+type is created by mangling the name of the base type. The name of the instance
+is also generated by name mangling.  The fields are initialized automatically.
 The parent field is initialized by getting the type of the parent field and
 using that to calculate the mangled name of the parent's virtual table type.
 There are two special fields that are included like normal fields but have
 special initialization rules: the @size@ field is the type's size and is
-initialized with a sizeof expression, the @align@ field is the type's
-alignment and uses an alignof expression. The remaining fields are resolved
-to a name matching the field's name and type using the normal visibility
-and overload resolution rules of the type system.
-
-These operations are split up into several groups depending on where they
-take place which can vary for monomorphic and polymorphic types. The first
-devision is between the declarations and the definitions. Declarations, such
-as a function signature or a structure's name, must always be visible but may
-be repeated so they go in headers. Definitions, such as function bodies and a
-structure's layout, don't have to be visible on use but must occur exactly
-once and go into source files.
-
+initialized with a @sizeof@ expression, the @align@ field is the type's
+alignment and uses an @alignof@ expression. The remaining fields are resolved
+to a name matching the field's name and type using the normal visibility and
+overload resolution rules of the type system.
+
+These operations are split up into several groups depending on where they take
+place which varies for monomorphic and polymorphic types. The first devision is
+between the declarations and the definitions. Declarations, such as a function
+signature or a aggregate's name, must always be visible but may be repeated in
+the form of forward declarations in headers. Definitions, such as function
+bodies and a aggregate's layout, can be separately compiled but must occur
+exactly once in a source file.
+
+\begin{sloppypar}
 The declarations include the virtual type definition and forward declarations
 of the virtual table instance, constructor, message function and
-@get_exception_vtable@. The definition includes the storage and
-initialization of the virtual table instance and the bodies of the three
-functions.
+@get_exception_vtable@. The definition includes the storage and initialization
+of the virtual table instance and the bodies of the three functions.
+\end{sloppypar}
 
 Monomorphic instances put all of these two groups in one place each.
-
-Polymorphic instances also split out the core declarations and definitions
-from the per-instance information. The virtual table type and most of the
-functions are polymorphic so they are all part of the core. The virtual table
-instance and the @get_exception_vtable@ function.
-
+Polymorphic instances also split out the core declarations and definitions from
+the per-instance information. The virtual table type and most of the functions
+are polymorphic so they are all part of the core. The virtual table instance
+and the @get_exception_vtable@ function.
+
+\begin{sloppypar}
 Coroutines and threads need instances of @CoroutineCancelled@ and
-@ThreadCancelled@ respectively to use all of their functionality.
-When a new data type is declared with @coroutine@ or @thread@
-the forward declaration for the instance is created as well. The definition
-of the virtual table is created at the definition of the main function.
+@ThreadCancelled@ respectively to use all of their functionality.  When a new
+data type is declared with @coroutine@ or @thread@ the forward declaration for
+the instance is created as well. The definition of the virtual table is created
+at the definition of the main function.
+\end{sloppypar}
 
 \subsection{Virtual Cast}
-Virtual casts are implemented as a function call that does the check and a
-old C-style cast to do the type conversion. The C-cast is just to make sure
-the generated code is correct so the rest of the section is about that
-function.
-
-The function is @__cfa__virtual_cast@ and it is implemented in the
-standard library. It takes a pointer to the target type's virtual table and
-the object pointer being cast. The function is very simple, getting the
-object's virtual table pointer and then checking to see if it or any of
-its ancestors, by using the parent pointers, are the same as the target type
-virtual table pointer. It does this in a simple loop.
-
-For the generated code a forward decaration of the virtual works as follows.
-There is a forward declaration of @__cfa__virtual_cast@ in every cfa
-file so it can just be used. The object argument is the expression being cast
-so that is just placed in the argument list.
-
-To build the target type parameter the compiler will create a mapping from
-concrete type-name -- so for polymorphic types the parameters are filled in
--- to virtual table address. Every virtual table declaraction is added to the
-this table; repeats are ignored unless they have conflicting definitions.
-This does mean the declaractions have to be in scope, but they should usually
-be introduced as part of the type definition.
+Virtual casts are implemented as a function call that does the subtype check
+and a C coercion-cast to do the type conversion.
+% The C-cast is just to make sure the generated code is correct so the rest of
+% the section is about that function.
+The function is
+\begin{cfa}
+void * __cfa__virtual_cast( struct __cfa__parent_vtable const * parent,
+	struct __cfa__parent_vtable const * const * child );
+}
+\end{cfa}
+and it is implemented in the standard library. It takes a pointer to the target
+type's virtual table and the object pointer being cast. The function performs a
+linear search starting at the object's virtual-table and walking through the
+the parent pointers, checking to if it or any of its ancestors are the same as
+the target-type virtual table-pointer.
+
+For the generated code, a forward declaration of the virtual works as follows.
+There is a forward declaration of @__cfa__virtual_cast@ in every \CFA file so
+it can just be used. The object argument is the expression being cast so that
+is just placed in the argument list.
+
+To build the target type parameter, the compiler creates a mapping from
+concrete type-name -- so for polymorphic types the parameters are filled in --
+to virtual table address. Every virtual table declaration is added to the this
+table; repeats are ignored unless they have conflicting definitions.  Note,
+these declarations do not have to be in scope, but they should usually be
+introduced as part of the type definition.
+
+\PAB{I do not understood all of \VRef{s:VirtualSystem}. I think you need to
+write more to make it clear.}
+
 
 \section{Exceptions}
@@ -106,60 +114,151 @@
 % resumption doesn't as well.
 
-Many modern languages work with an interal stack that function push and pop
-their local data to. Stack unwinding removes large sections of the stack,
-often across functions.
-
-At a very basic level this can be done with @setjmp@ \& @longjmp@
-which simply move the top of the stack, discarding everything on the stack
-above a certain point. However this ignores all the clean-up code that should
-be run when certain sections of the stack are removed (for \CFA these are from
-destructors and finally clauses) and also requires that the point to which the
-stack is being unwound is known ahead of time. libunwind is used to address
-both of these problems.
-
-Libunwind, provided in @unwind.h@ on most platorms, is a C library
-that provides \CPP style stack unwinding. Its operation is divided into two
-phases. The search phase -- phase 1 -- is used to scan the stack and decide
-where the unwinding will stop, this allows for a dynamic target. The clean-up
-phase -- phase 2 -- does the actual unwinding and also runs any clean-up code
-as it goes.
-
-To use the libunwind each function must have a personality function and an
-LSDA (Language Specific Data Area). Libunwind actually does very little, it
-simply moves down the stack from function to function. Most of the actions are
-implemented by the personality function which libunwind calls on every
-function. Since this is shared across many functions or even every function in
-a language it will need a bit more information. This is provided by the LSDA
-which has the unique information for each function.
-
-Theoretically the LSDA can contain anything but conventionally it is a table
-with entries reperenting areas of the function and what has to be done there
-during unwinding. These areas are described in terms of where the instruction
-pointer is. If the current value of the instruction pointer is between two
-values reperenting the beginning and end of a region then execution is
-currently being executed. These are used to mark out try blocks and the
-scopes of objects with destructors to run.
-
-GCC will generate an LSDA and attach its personality function with the
-@-fexceptions@ flag. However this only handles the cleanup attribute.
-This attribute is used on a variable and specifies a function that should be
-run when the variable goes out of scope. The function is passed a pointer to
-the object as well so it can be used to mimic destructors. It however cannot
-be used to mimic try statements.
-
-\subsection{Implementing Personality Functions}
-Personality functions have a complex interface specified by libunwind.
-This section will cover some of the important parts of that interface.
-
-\begin{lstlisting}
-typedef _Unwind_Reason_Code (*_Unwind_Personality_Fn)(
-    int version,
-    _Unwind_Action action,
-    _Unwind_Exception_Class exception_class,
-    _Unwind_Exception * exception,
-    struct _Unwind_Context * context);
+% Many modern languages work with an interal stack that function push and pop
+% their local data to. Stack unwinding removes large sections of the stack,
+% often across functions.
+
+Stack unwinding is the process of removing stack frames (activations) from the
+stack. On function entry and return, unwinding is handled directly by the code
+embedded in the function. Usually, the stack-frame size is known statically
+based on parameter and local variable declarations.  For dynamically-sized
+local variables, a runtime computation is necessary to know the frame
+size. Finally, a function's frame-size may change during execution as local
+variables (static or dynamic sized) go in and out of scope.
+Allocating/deallocating stack space is usually an $O(1)$ operation achieved by
+bumping the hardware stack-pointer up or down as needed.
+
+Unwinding across multiple stack frames is more complex because individual stack
+management code associated with each frame is bypassed. That is, the location
+of a function's frame-management code is largely unknown and dispersed
+throughout the function, hence the current frame size managed by that code is
+also unknown. Hence, code unwinding across frames does not have direct
+knowledge about what is on the stack, and hence, how much of the stack needs to
+be removed.
+
+% At a very basic level this can be done with @setjmp@ \& @longjmp@ which simply
+% move the top of the stack, discarding everything on the stack above a certain
+% point. However this ignores all the cleanup code that should be run when
+% certain sections of the stack are removed (for \CFA these are from destructors
+% and finally clauses) and also requires that the point to which the stack is
+% being unwound is known ahead of time. libunwind is used to address both of
+% these problems.
+
+The traditional unwinding mechanism for C is implemented by saving a snap-shot
+of a function's state with @setjmp@ and restoring that snap-shot with
+@longjmp@. This approach bypasses the need to know stack details by simply
+reseting to a snap-shot of an arbitrary but existing function frame on the
+stack. It is up to the programmer to ensure the snap-shot is valid when it is
+reset, making this unwinding approach fragile with potential errors that are
+difficult to debug because the stack becomes corrupted.
+
+However, many languages define cleanup actions that must be taken when objects
+are deallocated from the stack or blocks end, such as running a variable's
+destructor or a @try@ statement's @finally@ clause. Handling these mechanisms
+requires walking the stack and checking each stack frame for these potential
+actions.
+
+For exceptions, it must be possible to walk the stack frames in search of @try@
+statements to match and execute a handler. For termination exceptions, it must
+also be possible to unwind all stack frames from the throw to the matching
+catch, and each of these frames must be checked for cleanup actions. Stack
+walking is where most of the complexity and expense of exception handling
+appears.
+
+One of the most popular tools for stack management is libunwind, a low-level
+library that provides tools for stack walking, handler execution, and
+unwinding. What follows is an overview of all the relevant features of
+libunwind needed for this work, and how \CFA uses them to implement exception
+handling.
+
+\subsection{libunwind Usage}
+Libunwind, accessed through @unwind.h@ on most platforms, is a C library that
+provides \CC-style stack-unwinding. Its operation is divided into two phases:
+search and cleanup. The dynamic target search -- phase 1 -- is used to scan the
+stack and decide where unwinding should stop (but no unwinding occurs). The
+cleanup -- phase 2 -- does the unwinding and also runs any cleanup code.
+
+To use libunwind, each function must have a personality function and a Language
+Specific Data Area (LSDA).  The LSDA has the unique information for each
+function to tell the personality function where a function is executing, its
+current stack frame, and what handlers should be checked.  Theoretically, the
+LSDA can contain any information but conventionally it is a table with entries
+representing regions of the function and what has to be done there during
+unwinding. These regions are bracketed by the instruction pointer. If the
+instruction pointer is within a region's start/end, then execution is currently
+executing in that region. Regions are used to mark out the scopes of objects
+with destructors and try blocks.
+
+% Libunwind actually does very little, it simply moves down the stack from
+% function to function. Most of the actions are implemented by the personality
+% function which libunwind calls on every function. Since this is shared across
+% many functions or even every function in a language it will need a bit more
+% information.
+
+The GCC compilation flag @-fexceptions@ causes the generation of an LSDA and
+attaches its personality function. \PAB{to what is it attached?}  However, this
+flag only handles the cleanup attribute
+\begin{cfa}
+void clean_up( int * var ) { ... }
+int avar __attribute__(( __cleanup(clean_up) ));
+\end{cfa}
+which is used on a variable and specifies a function, \eg @clean_up@, run when
+the variable goes out of scope. The function is passed a pointer to the object
+so it can be used to mimic destructors. However, this feature cannot be used to
+mimic @try@ statements.
+
+\subsection{Personality Functions}
+Personality functions have a complex interface specified by libunwind.  This
+section covers some of the important parts of the interface.
+
+A personality function performs four tasks, although not all have to be
+present.
+\begin{lstlisting}[language=C,{moredelim=**[is][\color{red}]{@}{@}}]
+typedef _Unwind_Reason_Code (*@_Unwind_Personality_Fn@) (
+	_Unwind_Action @action@,
+	_Unwind_Exception_Class @exception_class@,
+	_Unwind_Exception * @exception@,
+	struct _Unwind_Context * @context@
+);
 \end{lstlisting}
-
-The return value, the reason code, is an enumeration of possible messages
+The @action@ argument is a bitmask of possible actions:
+\begin{enumerate}
+\item
+@_UA_SEARCH_PHASE@ specifies a search phase and tells the personality function
+to check for handlers.  If there is a handler in a stack frame, as defined by
+the language, the personality function returns @_URC_HANDLER_FOUND@; otherwise
+it return @_URC_CONTINUE_UNWIND@.
+
+\item
+@_UA_CLEANUP_PHASE@ specifies a cleanup phase, where the entire frame is
+unwound and all cleanup code is run. The personality function does whatever
+cleanup the language defines (such as running destructors/finalizers) and then
+generally returns @_URC_CONTINUE_UNWIND@.
+
+\item
+\begin{sloppypar}
+@_UA_HANDLER_FRAME@ specifies a cleanup phase on a function frame that found a
+handler. The personality function must prepare to return to normal code
+execution and return @_URC_INSTALL_CONTEXT@.
+\end{sloppypar}
+
+\item
+@_UA_FORCE_UNWIND@ specifies a forced unwind call. Forced unwind only performs
+the cleanup phase and uses a different means to decide when to stop
+\see{\VRef{s:ForcedUnwind}}.
+\end{enumerate}
+
+The @exception_class@ argument is a copy of the
+\lstinline[language=C]|exception|'s @exception_class@ field.
+
+The \lstinline[language=C]|exception| argument is a pointer to the user
+provided storage object. It has two public fields, the exception class, which
+is actually just a number, identifying the exception handling mechanism that
+created it, and the cleanup function. The cleanup function is called if
+required by the exception.
+
+The @context@ argument is a pointer to an opaque type passed to helper
+functions called inside the personality function.
+
+The return value, @_Unwind_Reason_Code@, is an enumeration of possible messages
 that can be passed several places in libunwind. It includes a number of
 messages for special cases (some of which should never be used by the
@@ -167,138 +266,90 @@
 personality function should always return @_URC_CONTINUE_UNWIND@.
 
-The @version@ argument is the verson of the implementation that is
-calling the personality function. At this point it appears to always be 1 and
-it will likely stay that way until a new version of the API is updated.
-
-The @action@ argument is set of flags that tell the personality
-function when it is being called and what it must do on this invocation.
-The flags are as follows:
-\begin{itemize}
-\item@_UA_SEARCH_PHASE@: This flag is set whenever the personality
-function is called during the search phase. The personality function should
-decide if unwinding will stop in this function or not. If it does then the
-personality function should return @_URC_HANDLER_FOUND@.
-\item@_UA_CLEANUP_PHASE@: This flag is set whenever the personality
-function is called during the cleanup phase. If no other flags are set this
-means the entire frame will be unwound and all cleanup code should be run.
-\item@_UA_HANDLER_FRAME@: This flag is set during the cleanup phase
-on the function frame that found the handler. The personality function must
-prepare to return to normal code execution and return
-@_URC_INSTALL_CONTEXT@.
-\item@_UA_FORCE_UNWIND@: This flag is set if the personality function
-is called through a forced unwind call. Forced unwind only performs the
-cleanup phase and uses a different means to decide when to stop. See its
-section below.
-\end{itemize}
-
-The @exception_class@ argument is a copy of the @exception@'s
-@exception_class@ field.
-
-The @exception@ argument is a pointer to the user provided storage
-object. It has two public fields, the exception class which is actually just
-a number that identifies the exception handling mechanism that created it and
-the other is the clean-up function. The clean-up function is called if the
-exception needs to 
-
-The @context@ argument is a pointer to an opaque type. This is passed
-to the many helper functions that can be called inside the personality
-function.
-
 \subsection{Raise Exception}
-This could be considered the central function of libunwind. It preforms the
-two staged unwinding the library is built around and most of the rest of the
-interface of libunwind is here to support it. It's signature is as follows:
-
-\begin{lstlisting}
+Raising an exception is the central function of libunwind and it performs a
+two-staged unwinding.
+\begin{cfa}
 _Unwind_Reason_Code _Unwind_RaiseException(_Unwind_Exception *);
+\end{cfa}
+First, the function begins the search phase, calling the personality function
+of the most recent stack frame. It continues to call personality functions
+traversing the stack from newest to oldest until a function finds a handler or
+the end of the stack is reached. In the latter case, raise exception returns
+@_URC_END_OF_STACK@.
+
+Second, when a handler is matched, raise exception continues onto the cleanup
+phase.
+Once again, it calls the personality functions of each stack frame from newest
+to oldest. This pass stops at the stack frame containing the matching handler.
+If that personality function has not install a handler, it is an error.
+
+If an error is encountered, raise exception returns either
+@_URC_FATAL_PHASE1_ERROR@ or @_URC_FATAL_PHASE2_ERROR@ depending on when the
+error occurred.
+
+\subsection{Forced Unwind}
+\label{s:ForcedUnwind}
+Forced Unwind is the other central function in libunwind.
+\begin{cfa}
+_Unwind_Reason_Code _Unwind_ForcedUnwind( _Unwind_Exception *,
+	_Unwind_Stop_Fn, void *);
+\end{cfa}
+It also unwinds the stack but it does not use the search phase. Instead another
+function, the stop function, is used to stop searching.  The exception is the
+same as the one passed to raise exception. The extra arguments are the stop
+function and the stop parameter. The stop function has a similar interface as a
+personality function, except it is also passed the stop parameter.
+\begin{lstlisting}[language=C,{moredelim=**[is][\color{red}]{@}{@}}]
+typedef _Unwind_Reason_Code (*@_Unwind_Stop_Fn@)(
+	_Unwind_Action @action@,
+	_Unwind_Exception_Class @exception_class@,
+	_Unwind_Exception * @exception@,
+	struct _Unwind_Context * @context@,
+	void * @stop_parameter@);
 \end{lstlisting}
 
-When called the function begins the search phase, calling the personality
-function of the most recent stack frame. It will continue to call personality
-functions traversing the stack new-to-old until a function finds a handler or
-the end of the stack is reached. In the latter case raise exception will
-return with @_URC_END_OF_STACK@.
-
-Once a handler has been found raise exception continues onto the the cleanup
-phase. Once again it will call the personality functins of each stack frame
-from newest to oldest. This pass will stop at the stack frame that found the
-handler last time, if that personality function does not install the handler
-it is an error.
-
-If an error is encountered raise exception will return either
-@_URC_FATAL_PHASE1_ERROR@ or @_URC_FATAL_PHASE2_ERROR@ depending
-on when the error occured.
-
-\subsection{Forced Unwind}
-This is the second big function in libunwind. It also unwinds a stack but it
-does not use the search phase. Instead another function, the stop function,
-is used to decide when to stop.
-
-\begin{lstlisting}
-_Unwind_Reason_Code _Unwind_ForcedUnwind(
-    _Unwind_Exception *, _Unwind_Stop_Fn, void *);
-\end{lstlisting}
-
-The exception is the same as the one passed to raise exception. The extra
-arguments are the stop function and the stop parameter. The stop function has
-a similar interface as a personality function, except it is also passed the
-stop parameter.
-
-\begin{lstlisting}
-typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn)(
-    int version,
-    _Unwind_Action action,
-    _Unwind_Exception_Class exception_class,
-    _Unwind_Exception * exception,
-    struct _Unwind_Context * context,
-    void * stop_parameter);
-\end{lstlisting}
-
 The stop function is called at every stack frame before the personality
-function is called and then once more once after all frames of the stack have
-been unwound.
-
-Each time it is called the stop function should return @_URC_NO_REASON@
-or transfer control directly to other code outside of libunwind. The
-framework does not provide any assistance here.
-
-Its arguments are the same as the paired personality function.
-The actions @_UA_CLEANUP_PHASE@ and @_UA_FORCE_UNWIND@ are always
-set when it is called. By the official standard that is all but both GCC and
-Clang add an extra action on the last call at the end of the stack:
-@_UA_END_OF_STACK@.
+function is called and then once more after all frames of the stack are
+unwound.
+
+Each time it is called, the stop function should return @_URC_NO_REASON@ or
+transfer control directly to other code outside of libunwind. The framework
+does not provide any assistance here.
+
+\begin{sloppypar}
+Its arguments are the same as the paired personality function.  The actions
+@_UA_CLEANUP_PHASE@ and @_UA_FORCE_UNWIND@ are always set when it is
+called. Beyond the libunwind standard, both GCC and Clang add an extra action
+on the last call at the end of the stack: @_UA_END_OF_STACK@.
+\end{sloppypar}
 
 \section{Exception Context}
 % Should I have another independent section?
 % There are only two things in it, top_resume and current_exception. How it is
-% stored changes depending on wheither or not the thread-library is linked.
-
-The exception context is a piece of global storage used to maintain data
-across different exception operations and to communicate between different
-components.
-
-Each stack has its own exception context. In a purely sequental program, using
-only core Cforall, there is only one stack and the context is global. However
-if the library @libcfathread@ is linked then there can be multiple
-stacks so they will each need their own.
-
-To handle this code always gets the exception context from the function
-@this_exception_context@. The main exception handling code is in
-@libcfa@ and that library also defines the function as a weak symbol
-so it acts as a default. Meanwhile in @libcfathread@ the function is
-defined as a strong symbol that replaces it when the libraries are linked
-together.
-
-The version of the function defined in @libcfa@ is very simple. It
-returns a pointer to a global static variable. With only one stack this
-global instance is associated with the only stack.
-
-The version of the function defined in @libcfathread@ has to handle
-more as there are multiple stacks. The exception context is included as
-part of the per-stack data stored as part of coroutines. In the cold data
-section, stored at the base of each stack, is the exception context for that
-stack. The @this_exception_context@ uses the concurrency library to get
-the current coroutine and through it the cold data section and the exception
-context.
+% stored changes depending on whether or not the thread-library is linked.
+
+The exception context is global storage used to maintain data across different
+exception operations and to communicate among different components.
+
+Each stack must have its own exception context. In a sequential \CFA program,
+there is only one stack with a single global exception-context. However, when
+the library @libcfathread@ is linked, there are multiple stacks where each
+needs its own exception context.
+
+General access to the exception context is provided by function
+@this_exception_context@. For sequential execution, this function is defined as
+a weak symbol in the \CFA system-library, @libcfa@. When a \CFA program is
+concurrent, it links with @libcfathread@, where this function is defined with a
+strong symbol replacing the sequential version.
+
+% The version of the function defined in @libcfa@ is very simple. It returns a
+% pointer to a global static variable. With only one stack this global instance
+% is associated with the only stack.
+
+For coroutines, @this_exception_context@ accesses the exception context stored
+at the base of the stack. For threads, @this_exception_context@ uses the
+concurrency library to access the current stack of the thread or coroutine
+being executed by the thread, and then accesses the exception context stored at
+the base of this stack.
 
 \section{Termination}
@@ -306,133 +357,132 @@
 % catches. Talk about GCC nested functions.
 
-Termination exceptions use libunwind quite heavily because it matches the
-intended use from \CPP exceptions very closely. The main complication is that
-since the \CFA compiler works by translating to C code it cannot generate the
-assembly to form the LSDA for try blocks or destructors.
+Termination exceptions use libunwind heavily because it matches the intended
+use from \CC exceptions closely. The main complication for \CFA is that the
+compiler generates C code, making it very difficult to generate the assembly to
+form the LSDA for try blocks or destructors.
 
 \subsection{Memory Management}
-The first step of termination is to copy the exception into memory managed by
-the exception system. Currently the system just uses malloc, without reserved
-memory or and ``small allocation" optimizations. The exception handling
-mechanism manages memory for the exception as well as memory for libunwind
-and the system's own per-exception storage.
-
-Exceptions are stored in variable sized block. The first component is a fixed
-sized data structure that contains the information for libunwind and the
-exception system. The second component is a blob of memory that is big enough
-to store the exception. Macros with pointer arthritic and type cast are
-used to move between the components or go from the embedded
+The first step of a termination raise is to copy the exception into memory
+managed by the exception system. Currently, the system uses @malloc@, rather
+than reserved memory or the stack top. The exception handling mechanism manages
+memory for the exception as well as memory for libunwind and the system's own
+per-exception storage.
+
+Exceptions are stored in variable-sized blocks. \PAB{Show a memory layout
+figure.} The first component is a fixed sized data structure that contains the
+information for libunwind and the exception system. The second component is an
+area of memory big enough to store the exception. Macros with pointer arthritic
+and type cast are used to move between the components or go from the embedded
 @_Unwind_Exception@ to the entire node.
 
-All of these nodes are strung together in a linked list. One linked list per
-stack, with the head stored in the exception context. Within each linked list
-the most recently thrown exception is at the head and the older exceptions
-are further down the list. This list format allows exceptions to be thrown
-while a different exception is being handled. Only the exception at the head
-of the list is currently being handled, the other will wait for the
-exceptions before them to be removed.
-
-The virtual members in the exception's virtual table. The size of the
-exception, the copy function and the free function are all in the virtual
-table so they are decided per-exception type. The size and copy function are
-used right away when the exception is copied in to managed memory. After the
-exception is handled the free function is used to clean up the exception and
-then the entire node is passed to free.
-
-\subsection{Try Statements \& Catch Clauses}
-The try statements with termination handlers have a pretty complex conversion
-to compensate for the lack of assembly generation. Libunwind requires an LSDA
-(Language Specific Data Area) and personality function for a function to
-unwind across it. The LSDA in particular is hard to generate at the level of
-C which is what the \CFA compiler outputs so a work-around is used.
-
-This work around is a function called @__cfaehm_try_terminate@ in the
-standard library. The contents of a try block and the termination handlers
-are converted into functions. These are then passed to the try terminate
-function and it calls them. This puts the try statements in their own
-functions so that no function has to deal with both termination handlers and
-destructors.
-
-This function has some custom embedded assembly that defines its personality
-function and LSDA. This is hand coded in C which is why there is only one
-version of it, the compiler has no capability to generate it. The personality
-function is structured so that it may be expanded, but really it only handles
-this one function. Notably it does not handle any destructors so the function
-is constructed so that it does need to run it.
+All of these nodes are linked together in a list, one list per stack, with the
+list head stored in the exception context. Within each linked list, the most
+recently thrown exception is at the head followed by older thrown
+exceptions. This format allows exceptions to be thrown, while a different
+exception is being handled. The exception at the head of the list is currently
+being handled, while other exceptions wait for the exceptions before them to be
+removed.
+
+The virtual members in the exception's virtual table provide the size of the
+exception, the copy function, and the free function, so they are specific to an
+exception type. The size and copy function are used immediately to copy an
+exception into managed memory. After the exception is handled the free function
+is used to clean up the exception and then the entire node is passed to free.
+
+\subsection{Try Statements and Catch Clauses}
+The try statement with termination handlers is complex because it must
+compensate for the lack of assembly-code generated from \CFA. Libunwind
+requires an LSDA and personality function for control to unwind across a
+function. The LSDA in particular is hard to mimic in generated C code.
+
+The workaround is a function called @__cfaehm_try_terminate@ in the standard
+library. The contents of a try block and the termination handlers are converted
+into functions. These are then passed to the try terminate function and it
+calls them. This approach puts a try statement in its own functions so that no
+function has to deal with both termination handlers and destructors. \PAB{I do
+not understand the previous sentence.}
+
+This function has some custom embedded assembly that defines \emph{its}
+personality function and LSDA. The assembly is created with handcrafted C @asm@
+statements, which is why there is only one version of it. The personality
+function is structured so that it can be expanded, but currently it only
+handles this one function.  Notably, it does not handle any destructors so the
+function is constructed so that it does need to run it. \PAB{I do not
+understand the previous sentence.}
 
 The three functions passed to try terminate are:
-\begin{itemize}
-\item The try function: This function is the try block, all the code inside
-the try block is placed inside the try function. It takes no parameters and
-has no return value. This function is called during regular execution to run
-the try block.
-\item The match function: This function decides if this try statement should
-handle any given termination exception. It takes a pointer to the exception
-and returns 0 if the exception is not handled here. Otherwise the return value
-is the id of the handler that should handle the exception. It is called
-during the search phase.
-It is constructed from the conditional part of each handler. It runs each
-check in turn, first checking to see if the object
-\item The catch function: This function handles the exception. It takes a
-pointer to the exception and the handler's id and returns nothing. It is
-called after the clean-up phase.
-It is constructed by stitching together the bodies of each handler
-\end{itemize}
-All three are created with GCC nested functions. GCC nested functions can be
-used to create closures, functions that can refer to the state of other
-functions on the stack. This allows the functions to refer to the main
-function and all the variables in scope.
-
-These nested functions and all other functions besides
-@__cfaehm_try_terminate@ in \CFA use the GCC personality function and
-the @-fexceptions@ flag to generate the LSDA. This allows destructors
-to be implemented with the cleanup attribute.
+\begin{description}
+\item[try function:] This function is the try block, all the code inside the
+try block is placed inside the try function. It takes no parameters and has no
+return value. This function is called during regular execution to run the try
+block.
+
+\item[match function:] This function is called during the search phase and
+decides if a catch clause matches the termination exception.  It is constructed
+from the conditional part of each handler and runs each check, top to bottom,
+in turn, first checking to see if the exception type matches and then if the
+condition is true. It takes a pointer to the exception and returns 0 if the
+exception is not handled here. Otherwise the return value is the id of the
+handler that matches the exception.
+
+\item[handler function:] This function handles the exception. It takes a
+pointer to the exception and the handler's id and returns nothing. It is called
+after the cleanup phase.  It is constructed by stitching together the bodies of
+each handler and dispatches to the selected handler.
+\end{description}
+All three functions are created with GCC nested functions. GCC nested functions
+can be used to create closures, functions that can refer to the state of other
+functions on the stack. This approach allows the functions to refer to all the
+variables in scope for the function containing the @try@ statement.  These
+nested functions and all other functions besides @__cfaehm_try_terminate@ in
+\CFA use the GCC personality function and the @-fexceptions@ flag to generate
+the LSDA. This allows destructors to be implemented with the cleanup attribute.
 
 \section{Resumption}
 % The stack-local data, the linked list of nodes.
 
-Resumption uses a list of nodes for its stack traversal. The head of the list
-is stored in the exception context. The nodes in the list just have a pointer
+Resumption simple to implement because there is no stack unwinding. The
+resumption raise uses a list of nodes for its stack traversal. The head of the
+list is stored in the exception context. The nodes in the list have a pointer
 to the next node and a pointer to the handler function.
 
-The on a resumption throw the this list is traversed. At each node the
-handler function is called and is passed the exception by pointer. It returns
-true if the exception was handled and false otherwise.
-
-The handler function does both the matching and catching. It tries each
-the condition of @catchResume@ in order, top-to-bottom and until it
-finds a handler that matches. If no handler matches then the function returns
-false. Otherwise the matching handler is run, if it completes successfully
-the function returns true. Rethrows, through the @throwResume;@
-statement, cause the function to return true.
+A resumption raise traverses this list. At each node the handler function is
+called, passing the exception by pointer. It returns true if the exception is
+handled and false otherwise.
+
+The handler function does both the matching and handling. It computes the
+condition of each @catchResume@ in top-to-bottom order, until it finds a
+handler that matches. If no handler matches then the function returns
+false. Otherwise the matching handler is run; if it completes successfully, the
+function returns true. Reresume, through the @throwResume;@ statement, cause
+the function to return true.
 
 % Recursive Resumption Stuff:
-Blocking out part of the stack is accomplished by updating the front of the
-list as the search continues. Before the handler at a node is called the head
-of the list is updated to the next node of the current node. After the search
-is complete, successful or not, the head of the list is reset.
-
-This means the current handler and every handler that has already been
-checked are not on the list while a handler is run. If a resumption is thrown
-during the handling of another resumption the active handlers and all the
-other handler checked up to this point will not be checked again.
+Search skipping \see{\VPageref{p:searchskip}}, which ignores parts of the stack
+already examined, is accomplished by updating the front of the list as the
+search continues. Before the handler at a node is called the head of the list
+is updated to the next node of the current node. After the search is complete,
+successful or not, the head of the list is reset.
+
+This mechanism means the current handler and every handler that has already
+been checked are not on the list while a handler is run. If a resumption is
+thrown during the handling of another resumption the active handlers and all
+the other handler checked up to this point are not checked again.
 
 This structure also supports new handler added while the resumption is being
 handled. These are added to the front of the list, pointing back along the
-stack -- the first one will point over all the checked handlers -- and the
-ordering is maintained.
-
-\subsection{Libunwind Compatibility}
-Resumption does not use libunwind for two simple reasons. The first is that
-it does not have to unwind anything so would never need to use the clean-up
-phase. Still the search phase could be used to make it free to enter or exit
-a try statement with resumption handlers in the same way termination handlers
-are for the same trade off in the cost of the throw. This is where the second
-reason comes in, there is no way to return from a search without installing
-a handler or raising an error.
-
-Although work arounds could be created none seemed to be worth it for the
-prototype. This implementation has no difference in behaviour and is much
-simpler.
+stack -- the first one points over all the checked handlers -- and the ordering
+is maintained.
+
+\label{p:zero-cost}
+Note, the resumption implementation has a cost for entering/exiting a @try@
+statement with @catchResume@ clauses, whereas a @try@ statement with @catch@
+clauses has zero-cost entry/exit. While resumption does not need the stack
+unwinding and cleanup provided by libunwind, it could use the search phase to
+providing zero-cost enter/exit using the LSDA. Unfortunately, there is no way
+to return from a libunwind search without installing a handler or raising an
+error.  Although workarounds might be possible, they are beyond the scope of
+this thesis. The current resumption implementation has simplicity in its
+favour.
 % Seriously, just compare the size of the two chapters and then consider
 % that unwind is required knowledge for that chapter.
@@ -440,13 +490,12 @@
 \section{Finally}
 % Uses destructors and GCC nested functions.
-Finally clauses are a simple decomposition to some of the existing features.
-The code in the block is placed into a GCC nested function with a unique name,
-no arguments or return values. This nested function is then set as the
-clean-up function of an empty object that is declared at the beginning of a
-block placed around the contexts of the try statement.
+Finally clauses is placed into a GCC nested-function with a unique name, and no
+arguments or return values. This nested function is then set as the cleanup
+function of an empty object that is declared at the beginning of a block placed
+around the context of the associated @try@ statement.
 
 The rest is handled by GCC. The try block and all handlers are inside the
-block. When they are complete control exits the block and the empty object
-is cleaned up, which runs the function that contains the finally code.
+block. At completion, control exits the block and the empty object is cleaned
+up, which runs the function that contains the finally code.
 
 \section{Cancellation}
@@ -454,43 +503,44 @@
 
 Cancellation also uses libunwind to do its stack traversal and unwinding,
-however it uses a different primary function @_Unwind_ForcedUnwind@.
-Details of its interface can be found in the unwind section.
-
-The first step of cancellation is to find the stack was cancelled and which
-type of stack it is. Luckily the threads library stores the main thread
-pointer and the current thread pointer and every thread stores a pointer to
+however it uses a different primary function @_Unwind_ForcedUnwind@.  Details
+of its interface can be found in the \VRef{s:ForcedUnwind}.
+
+The first step of cancellation is to find the cancelled stack and its type:
+coroutine or thread. Fortunately, the thread library stores the main thread
+pointer and the current thread pointer, and every thread stores a pointer to
 its main coroutine and the coroutine it is currently executing.
 
-So if the the current thread's main and current coroutine do not match, it is
-a coroutine cancellation. Otherwise if the main and current thread do not
-match, it is a thread cancellation. Otherwise it is a main thread
-cancellation.
-
-However if the threading library is not linked then execution must be on the
-main stack as that is the only one that exists. So the entire check is skipped
-using the linker and weak symbols. Instead the main thread cancellation is
-unconditionally preformed.
-
-Regardless of how they are choosen afterwords the stop function and the stop
-parameter are passed to the forced unwind functon. The general pattern of all
-three stop functions is the same, they continue unwinding until the end of
-stack when they do there primary work.
-
-Main stack cancellation it is very simple. The ``transfer" is just an abort,
-the program stops executing.
-
-The coroutine cancellation stores the exception on the coroutine and then
-does a coroutine context switch. The rest is handled inside resume. Every time
-control returns from a resumed thread there is a check to see if it is
-cancelled. If it is the exception is retrieved and the CoroutineCancelled
-exception is constructed and loaded. It is then thrown as a regular exception
-with the default handler coming from the context of the resumption call.
-
-The thread cancellation stores the exception on the thread's main stack and
-then returns to the scheduler. The rest is handled by the joiner. The wait
-for the joined thread to finish works the same but after that it checks
-to see if there was a cancellation. If there was the exception is retrieved
-and the ThreadCancelled exception is constructed. The default handler is
-passed in as a function pointer. If it is null (as it is for the
-auto-generated joins on destructor call) it a default is used that simply
-calls abort; which gives the required handling on implicate join.
+The first check is if the current thread's main and current coroutine do not
+match, implying a coroutine cancellation; otherwise, it is a thread
+cancellation. Otherwise it is a main thread cancellation. \PAB{Previous
+sentence does not make sense.}
+
+However, if the threading library is not linked, the sequential execution is on
+the main stack. Hence, the entire check is skipped because the weak-symbol
+function is loaded. Therefore, a main thread cancellation is unconditionally
+performed.
+
+Regardless of how the stack is chosen, the stop function and parameter are
+passed to the forced-unwind function. The general pattern of all three stop
+functions is the same: they continue unwinding until the end of stack when they
+do there primary work.
+
+For main stack cancellation, the transfer is just a program abort.
+
+For coroutine cancellation, the exception is stored on the coroutine's stack,
+and the coroutine context switches to its last resumer. The rest is handled on
+the backside of the resume, which check if the resumed coroutine is
+cancelled. If cancelled, the exception is retrieved from the resumed coroutine,
+and a @CoroutineCancelled@ exception is constructed and loaded with the
+cancelled exception. It is then resumed as a regular exception with the default
+handler coming from the context of the resumption call.
+
+For thread cancellation, the exception is stored on the thread's main stack and
+then context switched to the scheduler. The rest is handled by the thread
+joiner. When the join is complete, the joiner checks if the joined thread is
+cancelled. If cancelled, the exception is retrieved and the joined thread, and
+a @ThreadCancelled@ exception is constructed and loaded with the cancelled
+exception. The default handler is passed in as a function pointer. If it is
+null (as it is for the auto-generated joins on destructor call), the default is
+used, which is a program abort.
+%; which gives the required handling on implicate join.
Index: doc/theses/andrew_beach_MMath/thesis-frontpgs.tex
===================================================================
--- doc/theses/andrew_beach_MMath/thesis-frontpgs.tex	(revision da3963ae1399ac41655fc82eda47fad820dd3ccf)
+++ doc/theses/andrew_beach_MMath/thesis-frontpgs.tex	(revision 565acf59c5402ff35d213dc020ead86f8cb51d16)
@@ -36,5 +36,5 @@
 
         A thesis \\
-        presented to the University of Waterloo \\ 
+        presented to the University of Waterloo \\
         in fulfillment of the \\
         thesis requirement for the degree of \\
@@ -64,5 +64,5 @@
 \cleardoublepage
 
- 
+
 %----------------------------------------------------------------------
 % EXAMINING COMMITTEE (Required for Ph.D. theses only)
@@ -71,15 +71,16 @@
 \begin{center}\textbf{Examining Committee Membership}\end{center}
   \noindent
-The following served on the Examining Committee for this thesis. The decision of the Examining Committee is by majority vote.
-  \bigskip
-  
-  \noindent
-\begin{tabbing}
-Internal-External Member: \=  \kill % using longest text to define tab length
-External Examiner: \>  Bruce Bruce \\ 
+The following served on the Examining Committee for this thesis. The decision
+of the Examining Committee is by majority vote.
+  \bigskip
+
+  \noindent
+\begin{tabbing}
+Internal-External Member: \=  \kill % using longest text to define tab length
+External Examiner: \>  Bruce Bruce \\
 \> Professor, Dept. of Philosophy of Zoology, University of Wallamaloo \\
-\end{tabbing} 
-  \bigskip
-  
+\end{tabbing}
+  \bigskip
+
   \noindent
 \begin{tabbing}
@@ -91,5 +92,5 @@
 \end{tabbing}
   \bigskip
-  
+
   \noindent
   \begin{tabbing}
@@ -99,5 +100,5 @@
 \end{tabbing}
   \bigskip
-  
+
   \noindent
 \begin{tabbing}
@@ -107,5 +108,5 @@
 \end{tabbing}
   \bigskip
-  
+
   \noindent
 \begin{tabbing}
@@ -123,8 +124,10 @@
   % December 13th, 2006.  It is designed for an electronic thesis.
   \noindent
-I hereby declare that I am the sole author of this thesis. This is a true copy of the thesis, including any required final revisions, as accepted by my examiners.
-
-  \bigskip
-  
+I hereby declare that I am the sole author of this thesis. This is a true copy
+of the thesis, including any required final revisions, as accepted by my
+examiners.
+
+  \bigskip
+
   \noindent
 I understand that my thesis may be made electronically available to the public.
Index: doc/theses/andrew_beach_MMath/thesis.tex
===================================================================
--- doc/theses/andrew_beach_MMath/thesis.tex	(revision da3963ae1399ac41655fc82eda47fad820dd3ccf)
+++ doc/theses/andrew_beach_MMath/thesis.tex	(revision 565acf59c5402ff35d213dc020ead86f8cb51d16)
@@ -45,5 +45,5 @@
 % FRONT MATERIAL
 %----------------------------------------------------------------------
-\input{thesis-frontpgs} 
+\input{thesis-frontpgs}
 
 %----------------------------------------------------------------------
@@ -65,5 +65,5 @@
 A \gls{computer} could compute $\pi$ all day long. In fact, subsets of digits
 of $\pi$'s decimal approximation would make a good source for psuedo-random
-vectors, \gls{rvec} . 
+vectors, \gls{rvec} .
 
 %----------------------------------------------------------------------
@@ -96,41 +96,41 @@
 
 \begin{itemize}
-\item A well-prepared PDF should be 
+\item A well-prepared PDF should be
   \begin{enumerate}
     \item Of reasonable size, {\it i.e.} photos cropped and compressed.
-    \item Scalable, to allow enlargment of text and drawings. 
-  \end{enumerate} 
+    \item Scalable, to allow enlargment of text and drawings.
+  \end{enumerate}
 \item Photos must be bit maps, and so are not scaleable by definition. TIFF and
 BMP are uncompressed formats, while JPEG is compressed. Most photos can be
 compressed without losing their illustrative value.
-\item Drawings that you make should be scalable vector graphics, \emph{not} 
+\item Drawings that you make should be scalable vector graphics, \emph{not}
 bit maps. Some scalable vector file formats are: EPS, SVG, PNG, WMF. These can
-all be converted into PNG or PDF, that pdflatex recognizes. Your drawing 
-package probably can export to one of these formats directly. Otherwise, a 
-common procedure is to print-to-file through a Postscript printer driver to 
-create a PS file, then convert that to EPS (encapsulated PS, which has a 
-bounding box to describe its exact size rather than a whole page). 
+all be converted into PNG or PDF, that pdflatex recognizes. Your drawing
+package probably can export to one of these formats directly. Otherwise, a
+common procedure is to print-to-file through a Postscript printer driver to
+create a PS file, then convert that to EPS (encapsulated PS, which has a
+bounding box to describe its exact size rather than a whole page).
 Programs such as GSView (a Ghostscript GUI) can create both EPS and PDF from
 PS files. Appendix~\ref{AppendixA} shows how to generate properly sized Matlab
 plots and save them as PDF.
 \item It's important to crop your photos and draw your figures to the size that
-you want to appear in your thesis. Scaling photos with the 
-includegraphics command will cause loss of resolution. And scaling down 
+you want to appear in your thesis. Scaling photos with the
+includegraphics command will cause loss of resolution. And scaling down
 drawings may cause any text annotations to become too small.
 \end{itemize}
- 
+
 For more information on \LaTeX\, see the uWaterloo Skills for the
-Academic Workplace \href{https://uwaterloo.ca/information-systems-technology/services/electronic-thesis-preparation-and-submission-support/ethesis-guide/creating-pdf-version-your-thesis/creating-pdf-files-using-latex/latex-ethesis-and-large-documents}{course notes}. 
+Academic Workplace \href{https://uwaterloo.ca/information-systems-technology/services/electronic-thesis-preparation-and-submission-support/ethesis-guide/creating-pdf-version-your-thesis/creating-pdf-files-using-latex/latex-ethesis-and-large-documents}{course notes}.
 \footnote{
 Note that while it is possible to include hyperlinks to external documents,
-it is not wise to do so, since anything you can't control may change over time. 
-It \emph{would} be appropriate and necessary to provide external links to 
-additional resources for a multimedia ``enhanced'' thesis. 
-But also note that if the \package{hyperref} package is not included, 
-as for the print-optimized option in this thesis template, any \cmmd{href} 
+it is not wise to do so, since anything you can't control may change over time.
+It \emph{would} be appropriate and necessary to provide external links to
+additional resources for a multimedia ``enhanced'' thesis.
+But also note that if the \package{hyperref} package is not included,
+as for the print-optimized option in this thesis template, any \cmmd{href}
 commands in your logical document are no longer defined.
 A work-around employed by this thesis template is to define a dummy
-\cmmd{href} command (which does nothing) in the preamble of the document, 
-before the \package{hyperref} package is included. 
+\cmmd{href} command (which does nothing) in the preamble of the document,
+before the \package{hyperref} package is included.
 The dummy definition is then redifined by the
 \package{hyperref} package when it is included.
@@ -138,5 +138,5 @@
 
 The classic book by Leslie Lamport \cite{lamport.book}, author of \LaTeX , is
-worth a look too, and the many available add-on packages are described by 
+worth a look too, and the many available add-on packages are described by
 Goossens \textit{et al} \cite{goossens.book}.
 
@@ -180,7 +180,7 @@
 Export Setup button in the figure Property Editor.
 
-\section{From the Command Line} 
+\section{From the Command Line}
 All figure properties can also be manipulated from the command line. Here's an
-example: 
+example:
 \begin{verbatim}
 x=[0:0.1:pi];
Index: doc/theses/andrew_beach_MMath/unwinding.tex
===================================================================
--- doc/theses/andrew_beach_MMath/unwinding.tex	(revision da3963ae1399ac41655fc82eda47fad820dd3ccf)
+++ doc/theses/andrew_beach_MMath/unwinding.tex	(revision 565acf59c5402ff35d213dc020ead86f8cb51d16)
@@ -1,3 +1,3 @@
-\chapter{\texorpdfstring{Unwinding in \CFA}{Unwinding in Cforall}}
+\chapter{Unwinding in \CFA}
 
 Stack unwinding is the process of removing stack frames (activations) from the
@@ -110,5 +110,5 @@
 alternate transfers of control.
 
-\section{\texorpdfstring{\CFA Implementation}{Cforall Implementation}}
+\section{\CFA Implementation}
 
 To use libunwind, \CFA provides several wrappers, its own storage, personality
@@ -182,3 +182,3 @@
 control has returned to normal control flow.
 
-{\color{blue}PAB: Maybe a diagram would be helpful?}
+\PAB{Maybe a diagram would be helpful?}
Index: doc/theses/andrew_beach_MMath/uw-ethesis-frontpgs.tex
===================================================================
--- doc/theses/andrew_beach_MMath/uw-ethesis-frontpgs.tex	(revision da3963ae1399ac41655fc82eda47fad820dd3ccf)
+++ doc/theses/andrew_beach_MMath/uw-ethesis-frontpgs.tex	(revision 565acf59c5402ff35d213dc020ead86f8cb51d16)
@@ -13,22 +13,18 @@
         \vspace*{1.0cm}
 
-        \Huge
-        {\bf Exception Handling in \CFA}
+        {\Huge\bf Exception Handling in \CFA}
 
         \vspace*{1.0cm}
 
-        \normalsize
         by \\
 
         \vspace*{1.0cm}
 
-        \Large
-        Andrew James Beach \\
+        {\Large Andrew James Beach} \\
 
         \vspace*{3.0cm}
 
-        \normalsize
         A thesis \\
-        presented to the University of Waterloo \\ 
+        presented to the University of Waterloo \\
         in fulfillment of the \\
         thesis requirement for the degree of \\
@@ -43,31 +39,35 @@
         \vspace*{1.0cm}
 
-        \copyright\ Andrew James Beach \the\year \\
+        \copyright{} Andrew James Beach \the\year \\
         \end{center}
 \end{titlepage}
 
-% The rest of the front pages should contain no headers and be numbered using Roman numerals starting with `ii'
+% The rest of the front pages should contain no headers and be numbered using
+% Roman numerals starting with `ii'.
 \pagestyle{plain}
 \setcounter{page}{2}
 
-\cleardoublepage % Ends the current page and causes all figures and tables that have so far appeared in the input to be printed.
-% In a two-sided printing style, it also makes the next page a right-hand (odd-numbered) page, producing a blank page if necessary.
+\cleardoublepage % Ends the current page and causes all figures and tables
+% that have so far appeared in the input to be printed. In a two-sided
+% printing style, it also makes the next page a right-hand (odd-numbered)
+% page, producing a blank page if necessary.
 
-\begin{comment} 
+\begin{comment}
 % E X A M I N I N G   C O M M I T T E E (Required for Ph.D. theses only)
 % Remove or comment out the lines below to remove this page
 \begin{center}\textbf{Examining Committee Membership}\end{center}
   \noindent
-The following served on the Examining Committee for this thesis. The decision of the Examining Committee is by majority vote.
+The following served on the Examining Committee for this thesis.
+The decision of the Examining Committee is by majority vote.
   \bigskip
-  
+
   \noindent
 \begin{tabbing}
 Internal-External Member: \=  \kill % using longest text to define tab length
-External Examiner: \>  Bruce Bruce \\ 
+External Examiner: \>  Bruce Bruce \\
 \> Professor, Dept. of Philosophy of Zoology, University of Wallamaloo \\
-\end{tabbing} 
+\end{tabbing}
   \bigskip
-  
+
   \noindent
 \begin{tabbing}
@@ -79,5 +79,5 @@
 \end{tabbing}
   \bigskip
-  
+
   \noindent
   \begin{tabbing}
@@ -87,5 +87,5 @@
 \end{tabbing}
   \bigskip
-  
+
   \noindent
 \begin{tabbing}
@@ -95,5 +95,5 @@
 \end{tabbing}
   \bigskip
-  
+
   \noindent
 \begin{tabbing}
@@ -111,10 +111,12 @@
   % December 13th, 2006.  It is designed for an electronic thesis.
  \begin{center}\textbf{Author's Declaration}\end{center}
-  
+
  \noindent
-I hereby declare that I am the sole author of this thesis. This is a true copy of the thesis, including any required final revisions, as accepted by my examiners.
+I hereby declare that I am the sole author of this thesis. This is a true copy
+of the thesis, including any required final revisions, as accepted by my
+examiners.
 
   \bigskip
-  
+
   \noindent
 I understand that my thesis may be made electronically available to the public.
Index: doc/theses/andrew_beach_MMath/uw-ethesis.tex
===================================================================
--- doc/theses/andrew_beach_MMath/uw-ethesis.tex	(revision da3963ae1399ac41655fc82eda47fad820dd3ccf)
+++ doc/theses/andrew_beach_MMath/uw-ethesis.tex	(revision 565acf59c5402ff35d213dc020ead86f8cb51d16)
@@ -1,73 +1,95 @@
 %======================================================================
-% University of Waterloo Thesis Template for LaTeX 
-% Last Updated November, 2020 
-% by Stephen Carr, IST Client Services, 
+% University of Waterloo Thesis Template for LaTeX
+% Last Updated November, 2020
+% by Stephen Carr, IST Client Services,
 % University of Waterloo, 200 University Ave. W., Waterloo, Ontario, Canada
 % FOR ASSISTANCE, please send mail to request@uwaterloo.ca
 
 % DISCLAIMER
-% To the best of our knowledge, this template satisfies the current uWaterloo thesis requirements.
-% However, it is your responsibility to assure that you have met all requirements of the University and your particular department.
-
-% Many thanks for the feedback from many graduates who assisted the development of this template.
-% Also note that there are explanatory comments and tips throughout this template.
+% To the best of our knowledge, this template satisfies the current uWaterloo
+% thesis requirements. However, it is your responsibility to assure that you
+% have met all requirements of the University and your particular department.
+
+% Many thanks for the feedback from many graduates who assisted the
+% development of this template. Also note that there are explanatory comments
+% and tips throughout this template.
 %======================================================================
 % Some important notes on using this template and making it your own...
 
-% The University of Waterloo has required electronic thesis submission since October 2006. 
-% See the uWaterloo thesis regulations at
-% https://uwaterloo.ca/graduate-studies/thesis.
-% This thesis template is geared towards generating a PDF version optimized for viewing on an electronic display, including hyperlinks within the PDF.
-
-% DON'T FORGET TO ADD YOUR OWN NAME AND TITLE in the "hyperref" package configuration below. 
-% THIS INFORMATION GETS EMBEDDED IN THE PDF FINAL PDF DOCUMENT.
-% You can view the information if you view properties of the PDF document.
-
-% Many faculties/departments also require one or more printed copies. 
-% This template attempts to satisfy both types of output. 
+% The University of Waterloo has required electronic thesis submission since
+% October 2006. See the uWaterloo thesis regulations at:
+%   https://uwaterloo.ca/graduate-studies/thesis.
+% This thesis template is geared towards generating a PDF version optimized
+% for viewing on an electronic display, including hyperlinks within the PDF.
+
+% DON'T FORGET TO ADD YOUR OWN NAME AND TITLE in the "hyperref" package
+% configuration below. THIS INFORMATION GETS EMBEDDED IN THE FINAL PDF
+% DOCUMENT. You can view the information if you view properties of the PDF.
+
+% Many faculties/departments also require one or more printed copies.
+% This template attempts to satisfy both types of output.
 % See additional notes below.
-% It is based on the standard "book" document class which provides all necessary sectioning structures and allows multi-part theses.
-
-% If you are using this template in Overleaf (cloud-based collaboration service), then it is automatically processed and previewed for you as you edit.
-
-% For people who prefer to install their own LaTeX distributions on their own computers, and process the source files manually, the following notes provide the sequence of tasks:
- 
+% It is based on the standard "book" document class which provides all
+% necessary sectioning structures and allows multi-part theses.
+
+% If you are using this template in Overleaf (cloud-based collaboration
+% service), then it is automatically processed and previewed for you as you
+% edit.
+
+% For people who prefer to install their own LaTeX distributions on their own
+% computers, and process the source files manually, the following notes
+% provide the sequence of tasks:
+
 % E.g. to process a thesis called "mythesis.tex" based on this template, run:
 
 % pdflatex mythesis	-- first pass of the pdflatex processor
 % bibtex mythesis	-- generates bibliography from .bib data file(s)
-% makeindex         -- should be run only if an index is used 
-% pdflatex mythesis	-- fixes numbering in cross-references, bibliographic references, glossaries, index, etc.
-% pdflatex mythesis	-- it takes a couple of passes to completely process all cross-references
-
-% If you use the recommended LaTeX editor, Texmaker, you would open the mythesis.tex file, then click the PDFLaTeX button. Then run BibTeX (under the Tools menu).
-% Then click the PDFLaTeX button two more times. 
-% If you have an index as well,you'll need to run MakeIndex from the Tools menu as well, before running pdflatex
-% the last two times.
-
-% N.B. The "pdftex" program allows graphics in the following formats to be included with the "\includegraphics" command: PNG, PDF, JPEG, TIFF
-% Tip: Generate your figures and photos in the size you want them to appear in your thesis, rather than scaling them with \includegraphics options.
-% Tip: Any drawings you do should be in scalable vector graphic formats: SVG, PNG, WMF, EPS and then converted to PNG or PDF, so they are scalable in the final PDF as well.
+% makeindex         -- should be run only if an index is used
+% pdflatex mythesis	-- fixes numbering in cross-references, bibliographic
+%                      references, glossaries, index, etc.
+% pdflatex mythesis	-- it takes a couple of passes to completely process all
+%                      cross-references
+
+% If you use the recommended LaTeX editor, Texmaker, you would open the
+% mythesis.tex file, then click the PDFLaTeX button. Then run BibTeX (under
+% the Tools menu). Then click the PDFLaTeX button two more times.
+% If you have an index as well, you'll need to run MakeIndex from the Tools
+% menu as well, before running pdflatex the last two times.
+
+% N.B. The "pdftex" program allows graphics in the following formats to be
+% included with the "\includegraphics" command: PNG, PDF, JPEG, TIFF
+% Tip: Generate your figures and photos in the size you want them to appear
+% in your thesis, rather than scaling them with \includegraphics options.
+% Tip: Any drawings you do should be in scalable vector graphic formats: SVG,
+% PNG, WMF, EPS and then converted to PNG or PDF, so they are scalable in the
+% final PDF as well.
 % Tip: Photographs should be cropped and compressed so as not to be too large.
 
-% To create a PDF output that is optimized for double-sided printing: 
-% 1) comment-out the \documentclass statement in the preamble below, and un-comment the second \documentclass line.
-% 2) change the value assigned below to the boolean variable "PrintVersion" from " false" to "true".
-
-%======================================================================
+% To create a PDF output that is optimized for double-sided printing:
+% 1) comment-out the \documentclass statement in the preamble below, and
+%    un-comment the second \documentclass line.
+% 2) change the value assigned below to the boolean variable "PrintVersion"
+%    from "false" to "true".
+
+% ======================================================================
 %   D O C U M E N T   P R E A M B L E
-% Specify the document class, default style attributes, and page dimensions, etc.
+% Specify the document class, default style attributes, page dimensions, etc.
 % For hyperlinked PDF, suitable for viewing on a computer, use this:
 \documentclass[letterpaper,12pt,titlepage,oneside,final]{book}
 
-% For PDF, suitable for double-sided printing, change the PrintVersion variable below to "true" and use this \documentclass line instead of the one above:
+% For PDF, suitable for double-sided printing, change the PrintVersion
+% variable below to "true" and use this \documentclass line instead of the
+% one above:
 %\documentclass[letterpaper,12pt,titlepage,openright,twoside,final]{book}
 
+\usepackage{etoolbox}
+
 % Some LaTeX commands I define for my own nomenclature.
-% If you have to, it's easier to make changes to nomenclature once here than in a million places throughout your thesis!
+% If you have to, it's easier to make changes to nomenclature once here than
+% in a million places throughout your thesis!
 \newcommand{\package}[1]{\textbf{#1}} % package names in bold text
-\newcommand{\cmmd}[1]{\textbackslash\texttt{#1}} % command name in tt font 
-\newcommand{\href}[1]{#1} % does nothing, but defines the command so the print-optimized version will ignore \href tags (redefined by hyperref pkg).
-%\newcommand{\texorpdfstring}[2]{#1} % does nothing, but defines the command
+\newcommand{\cmmd}[1]{\textbackslash\texttt{#1}} % command name in tt font
+\newcommand{\href}[1]{#1} % does nothing, but defines the command so the
+% print-optimized version will ignore \href tags (redefined by hyperref pkg).
 % Anything defined here may be redefined by packages added below...
 
@@ -76,28 +98,35 @@
 \newboolean{PrintVersion}
 \setboolean{PrintVersion}{false}
-% CHANGE THIS VALUE TO "true" as necessary, to improve printed results for hard copies by overriding some options of the hyperref package, called below.
+% CHANGE THIS VALUE TO "true" as necessary, to improve printed results for
+% hard copies by overriding some options of the hyperref package, called below.
 
 %\usepackage{nomencl} % For a nomenclature (optional; available from ctan.org)
-\usepackage{amsmath,amssymb,amstext} % Lots of math symbols and environments
-\usepackage[pdftex]{graphicx} % For including graphics N.B. pdftex graphics driver 
+% Lots of math symbols and environments
+\usepackage{amsmath,amssymb,amstext}
+% For including graphics N.B. pdftex graphics driver
+\usepackage[pdftex]{graphicx}
+% Removes large sections of the document.
+\usepackage{comment}
 
 % Hyperlinks make it very easy to navigate an electronic document.
-% In addition, this is where you should specify the thesis title and author as they appear in the properties of the PDF document.
+% In addition, this is where you should specify the thesis title and author as
+% they appear in the properties of the PDF document.
 % Use the "hyperref" package
 % N.B. HYPERREF MUST BE THE LAST PACKAGE LOADED; ADD ADDITIONAL PKGS ABOVE
 \usepackage[pdftex,pagebackref=true]{hyperref} % with basic options
 %\usepackage[pdftex,pagebackref=true]{hyperref}
-		% N.B. pagebackref=true provides links back from the References to the body text. This can cause trouble for printing.
+% N.B. pagebackref=true provides links back from the References to the body
+% text. This can cause trouble for printing.
 \hypersetup{
     plainpages=false,       % needed if Roman numbers in frontpages
-    unicode=false,          % non-Latin characters in Acrobat’s bookmarks
-    pdftoolbar=true,        % show Acrobat’s toolbar?
-    pdfmenubar=true,        % show Acrobat’s menu?
+    unicode=false,          % non-Latin characters in Acrobat's bookmarks
+    pdftoolbar=true,        % show Acrobat's toolbar?
+    pdfmenubar=true,        % show Acrobat's menu?
     pdffitwindow=false,     % window fit to page when opened
     pdfstartview={FitH},    % fits the width of the page to the window
-%    pdftitle={uWaterloo\ LaTeX\ Thesis\ Template},    % title: CHANGE THIS TEXT!
+%    pdftitle={uWaterloo\ LaTeX\ Thesis\ Template}, % title: CHANGE THIS TEXT!
 %    pdfauthor={Author},    % author: CHANGE THIS TEXT! and uncomment this line
 %    pdfsubject={Subject},  % subject: CHANGE THIS TEXT! and uncomment this line
-%    pdfkeywords={keyword1} {key2} {key3}, % list of keywords, and uncomment this line if desired
+%    pdfkeywords={keyword1} {key2} {key3}, % optional list of keywords
     pdfnewwindow=true,      % links in new window
     colorlinks=true,        % false: boxed links; true: colored links
@@ -107,5 +136,6 @@
     urlcolor=cyan           % color of external links
 }
-\ifthenelse{\boolean{PrintVersion}}{   % for improved print quality, change some hyperref options
+% for improved print quality, change some hyperref options
+\ifthenelse{\boolean{PrintVersion}}{
 \hypersetup{	% override some previously defined hyperref options
 %    colorlinks,%
@@ -116,37 +146,52 @@
 }{} % end of ifthenelse (no else)
 
-\usepackage[automake,toc,abbreviations]{glossaries-extra} % Exception to the rule of hyperref being the last add-on package
-% If glossaries-extra is not in your LaTeX distribution, get it from CTAN (http://ctan.org/pkg/glossaries-extra),
-% although it's supposed to be in both the TeX Live and MikTeX distributions. There are also documentation and 
-% installation instructions there.
+% Exception to the rule of hyperref being the last add-on package
+\usepackage[automake,toc,abbreviations]{glossaries-extra}
+% If glossaries-extra is not in your LaTeX distribution, get it from CTAN
+% (http://ctan.org/pkg/glossaries-extra), although it's supposed to be in
+% both the TeX Live and MikTeX distributions. There are also documentation
+% and installation instructions there.
 
 % Setting up the page margins...
-\setlength{\textheight}{9in}\setlength{\topmargin}{-0.45in}\setlength{\headsep}{0.25in}
-% uWaterloo thesis requirements specify a minimum of 1 inch (72pt) margin at the
-% top, bottom, and outside page edges and a 1.125 in. (81pt) gutter margin (on binding side). 
-% While this is not an issue for electronic viewing, a PDF may be printed, and so we have the same page layout for both printed and electronic versions, we leave the gutter margin in.
-% Set margins to minimum permitted by uWaterloo thesis regulations:
+\setlength{\textheight}{9in}
+\setlength{\topmargin}{-0.45in}
+\setlength{\headsep}{0.25in}
+% uWaterloo thesis requirements specify a minimum of 1 inch (72pt) margin at
+% the top, bottom, and outside page edges and a 1.125 in. (81pt) gutter margin
+% (on binding side). While this is not an issue for electronic viewing, a PDF
+% may be printed, and so we have the same page layout for both printed and
+% electronic versions, we leave the gutter margin in. Set margins to minimum
+% permitted by uWaterloo thesis regulations:
 \setlength{\marginparwidth}{0pt} % width of margin notes
 % N.B. If margin notes are used, you must adjust \textwidth, \marginparwidth
 % and \marginparsep so that the space left between the margin notes and page
 % edge is less than 15 mm (0.6 in.)
-\setlength{\marginparsep}{0pt} % width of space between body text and margin notes
-\setlength{\evensidemargin}{0.125in} % Adds 1/8 in. to binding side of all
+% width of space between body text and margin notes
+\setlength{\marginparsep}{0pt}
+% Adds 1/8 in. to binding side of all
 % even-numbered pages when the "twoside" printing option is selected
-\setlength{\oddsidemargin}{0.125in} % Adds 1/8 in. to the left of all pages when "oneside" printing is selected, and to the left of all odd-numbered pages when "twoside" printing is selected
-\setlength{\textwidth}{6.375in} % assuming US letter paper (8.5 in. x 11 in.) and side margins as above
+\setlength{\evensidemargin}{0.125in}
+% Adds 1/8 in. to the left of all pages when "oneside" printing is selected,
+% and to the left of all odd-numbered pages when "twoside" printing is selected
+\setlength{\oddsidemargin}{0.125in}
+% assuming US letter paper (8.5 in. x 11 in.) and side margins as above
+\setlength{\textwidth}{6.375in}
 \raggedbottom
 
-% The following statement specifies the amount of space between paragraphs. Other reasonable specifications are \bigskipamount and \smallskipamount.
+% The following statement specifies the amount of space between paragraphs.
+% Other reasonable specifications are \bigskipamount and \smallskipamount.
 \setlength{\parskip}{\medskipamount}
 
-% The following statement controls the line spacing.  
-% The default spacing corresponds to good typographic conventions and only slight changes (e.g., perhaps "1.2"), if any, should be made.
+% The following statement controls the line spacing.
+% The default spacing corresponds to good typographic conventions and only
+% slight changes (e.g., perhaps "1.2"), if any, should be made.
 \renewcommand{\baselinestretch}{1} % this is the default line space setting
 
 % By default, each chapter will start on a recto (right-hand side) page.
-% We also force each section of the front pages to start on a recto page by inserting \cleardoublepage commands.
-% In many cases, this will require that the verso (left-hand) page be blank, and while it should be counted, a page number should not be printed.
-% The following statements ensure a page number is not printed on an otherwise blank verso page.
+% We also force each section of the front pages to start on a recto page by
+% inserting \cleardoublepage commands. In many cases, this will require that
+% the verso (left-hand) page be blank, and while it should be counted, a page
+% number should not be printed. The following statements ensure a page number
+% is not printed on an otherwise blank verso page.
 \let\origdoublepage\cleardoublepage
 \newcommand{\clearemptydoublepage}{%
@@ -154,19 +199,36 @@
 \let\cleardoublepage\clearemptydoublepage
 
-% Define Glossary terms (This is properly done here, in the preamble and could also be \input{} from a separate file...)
+% Define Glossary terms (This is properly done here, in the preamble and
+% could also be \input{} from a separate file...)
 \input{glossaries}
 \makeglossaries
 
-\usepackage{comment}
 % cfa macros used in the document
 %\usepackage{cfalab}
+% I'm going to bring back eventually.
+\makeatletter
+% Combines all \CC* commands:
+\newrobustcmd*\Cpp[1][\xspace]{\cfalab@Cpp#1}
+\newcommand\cfalab@Cpp{C\kern-.1em\hbox{+\kern-.25em+}}
+% Optional arguments do not work with pdf string. (Some fix-up required.)
+\pdfstringdefDisableCommands{\def\Cpp{C++}}
+\makeatother
+
 \input{common}
-\CFAStyle						% CFA code-style for all languages
-\lstset{language=CFA,basicstyle=\linespread{0.9}\tt}	% CFA default lnaguage
+% CFA code-style for all languages
+\CFAStyle
+% CFA default lnaguage
+\lstset{language=CFA,basicstyle=\linespread{0.9}\tt}
+% Annotations from Peter:
+\newcommand{\PAB}[1]{{\color{blue}PAB: #1}}
+% Change the style of abbreviations:
+\renewcommand{\abbrevFont}{}
 
 %======================================================================
 %   L O G I C A L    D O C U M E N T
 % The logical document contains the main content of your thesis.
-% Being a large document, it is a good idea to divide your thesis into several files, each one containing one chapter or other significant chunk of content, so you can easily shuffle things around later if desired.
+% Being a large document, it is a good idea to divide your thesis into several
+% files, each one containing one chapter or other significant chunk of content,
+% so you can easily shuffle things around later if desired.
 %======================================================================
 \begin{document}
@@ -175,18 +237,20 @@
 % FRONT MATERIAL
 % title page,declaration, borrowers' page, abstract, acknowledgements,
-% dedication, table of contents, list of tables, list of figures, nomenclature, etc.
-%----------------------------------------------------------------------
-\input{uw-ethesis-frontpgs} 
+% dedication, table of contents, list of tables, list of figures,
+% nomenclature, etc.
+%----------------------------------------------------------------------
+\input{uw-ethesis-frontpgs}
 
 %----------------------------------------------------------------------
 % MAIN BODY
 % We suggest using a separate file for each chapter of your thesis.
-% Start each chapter file with the \chapter command.
-% Only use \documentclass or \begin{document} and \end{document} commands in this master document.
+% Start each chapter file with the \chapter command. Only use \documentclass,
+% \begin{document} and \end{document} commands in this master document.
 % Tip: Putting each sentence on a new line is a way to simplify later editing.
 %----------------------------------------------------------------------
 \input{existing}
 \input{features}
-\input{unwinding}
+\input{implement}
+%\input{unwinding}
 \input{future}
 
@@ -198,13 +262,19 @@
 % Bibliography
 
-% The following statement selects the style to use for references.  
-% It controls the sort order of the entries in the bibliography and also the formatting for the in-text labels.
+% The following statement selects the style to use for references.
+% It controls the sort order of the entries in the bibliography and also the
+% formatting for the in-text labels.
 \bibliographystyle{plain}
-% This specifies the location of the file containing the bibliographic information.  
-% It assumes you're using BibTeX to manage your references (if not, why not?).
-\cleardoublepage % This is needed if the "book" document class is used, to place the anchor in the correct page, because the bibliography will start on its own page.
-% Use \clearpage instead if the document class uses the "oneside" argument
-\phantomsection  % With hyperref package, enables hyperlinking from the table of contents to bibliography             
-% The following statement causes the title "References" to be used for the bibliography section:
+% This specifies the location of the file containing the bibliographic
+% information. It assumes you're using BibTeX to manage your references (if
+% not, why not?).
+\cleardoublepage % This is needed if the "book" document class is used, to
+% place the anchor in the correct page, because the bibliography will start
+% on its own page.
+% Use \clearpage instead if the document class uses the "oneside" argument.
+\phantomsection  % With hyperref package, enables hyperlinking from the table
+% of contents to bibliography.
+% The following statement causes the title "References" to be used for the
+% bibliography section:
 \renewcommand*{\bibname}{References}
 
@@ -213,9 +283,11 @@
 
 \bibliography{uw-ethesis,pl}
-% Tip: You can create multiple .bib files to organize your references. 
-% Just list them all in the \bibliogaphy command, separated by commas (no spaces).
-
-% The following statement causes the specified references to be added to the bibliography even if they were not cited in the text. 
-% The asterisk is a wildcard that causes all entries in the bibliographic database to be included (optional).
+% Tip: You can create multiple .bib files to organize your references. Just
+% list them all in the \bibliogaphy command, separated by commas (no spaces).
+
+% The following statement causes the specified references to be added to the
+% bibliography even if they were not cited in the text. The asterisk is a
+% wildcard that causes all entries in the bibliographic database to be
+% included (optional).
 % \nocite{*}
 %----------------------------------------------------------------------
@@ -225,11 +297,14 @@
 % The \appendix statement indicates the beginning of the appendices.
 \appendix
-% Add an un-numbered title page before the appendices and a line in the Table of Contents
+% Add an un-numbered title page before the appendices and a line in the Table
+% of Contents
 % \chapter*{APPENDICES}
 % \addcontentsline{toc}{chapter}{APPENDICES}
-% Appendices are just more chapters, with different labeling (letters instead of numbers).
+% Appendices are just more chapters, with different labeling (letters instead
+% of numbers).
 % \input{appendix-matlab_plots.tex}
 
-% GLOSSARIES (Lists of definitions, abbreviations, symbols, etc. provided by the glossaries-extra package)
+% GLOSSARIES (Lists of definitions, abbreviations, symbols, etc.
+% provided by the glossaries-extra package)
 % -----------------------------
 \printglossaries
Index: doc/theses/fangren_yu_COOP_F20/Report.tex
===================================================================
--- doc/theses/fangren_yu_COOP_F20/Report.tex	(revision da3963ae1399ac41655fc82eda47fad820dd3ccf)
+++ doc/theses/fangren_yu_COOP_F20/Report.tex	(revision 565acf59c5402ff35d213dc020ead86f8cb51d16)
@@ -102,5 +102,5 @@
 \CFA language, developed by the Programming Language Group at the University of Waterloo, has a long history, with the initial language design in 1992 by Glen Ditchfield~\cite{Ditchfield92} and the first proof-of-concept compiler built in 2003 by Richard Bilson~\cite{Bilson03}. Many new features have been added to the language over time, but the core of \CFA's type-system --- parametric functions introduced by the @forall@ clause (hence the name of the language) providing parametric overloading --- remains mostly unchanged.
 
-The current \CFA reference compiler, @cfa-cc@, is designed using the visitor pattern~\cite{vistorpattern} over an abstract syntax tree (AST), where multiple passes over the AST modify it for subsequent passes. @cfa-cc@ still includes many parts taken directly from the original Bilson implementation, which served as the starting point for this enhancement work to the type system. Unfortunately, the prior implementation did not provide the efficiency required for the language to be practical: a \CFA source file of approximately 1000 lines of code can take a multiple minutes to compile. The cause of the problem is that the old compiler used inefficient data structures and algorithms for expression resolution, which involved significant copying and redundant work.
+The current \CFA reference compiler, @cfa-cc@, is designed using the visitor pattern~\cite{vistorpattern} over an abstract syntax tree (AST), where multiple passes over the AST modify it for subsequent passes. @cfa-cc@ still includes many parts taken directly from the original Bilson implementation, which served as the starting point for this enhancement work to the type system. Unfortunately, the prior implementation did not provide the efficiency required for the language to be practical: a \CFA source file of approximately 1000 lines of code can take multiple minutes to compile. The cause of the problem is that the old compiler used inefficient data structures and algorithms for expression resolution, which involved significant copying and redundant work.
 
 This report presents a series of optimizations to the performance-critical parts of the resolver, with a major rework of the compiler data-structures using a functional-programming approach to reduce memory complexity. The improvements were suggested by running the compiler builds with a performance profiler against the \CFA standard-library source-code and a test suite to find the most underperforming components in the compiler algorithm.
@@ -122,5 +122,5 @@
 \end{itemize}
 
-The resolver algorithm, designed for overload resolution, uses a significant amount of reused, and hence copying, for the intermediate representations, especially in the following two places:
+The resolver algorithm, designed for overload resolution, allows a significant amount of code reused, and hence copying, for the intermediate representations, especially in the following two places:
 \begin{itemize}
 \item
@@ -301,5 +301,5 @@
 forall( dtype T | sized( T ) )
 T * malloc( void ) { return (T *)malloc( sizeof(T) ); } // call C malloc
-int * i = malloc();  // type deduced from left-hand size $\Rightarrow$ no size argument or return cast
+int * i = malloc();  // type deduced from left-hand size $\(\Rightarrow\)$ no size argument or return cast
 \end{cfa}
 An unbound return-type is problematic in resolver complexity because a single match of a function call with an unbound return type may create multiple candidates. In the worst case, consider a function declared that returns any @otype@ (defined \VPageref{otype}):
@@ -432,6 +432,6 @@
 \begin{cfa}
 void f( int );
-double g$_1$( int );
-int g$_2$( long );
+double g$\(_1\)$( int );
+int g$\(_2\)$( long );
 f( g( 42 ) );
 \end{cfa}
Index: doc/theses/thierry_delisle_PhD/thesis/.gitignore
===================================================================
--- doc/theses/thierry_delisle_PhD/thesis/.gitignore	(revision 565acf59c5402ff35d213dc020ead86f8cb51d16)
+++ doc/theses/thierry_delisle_PhD/thesis/.gitignore	(revision 565acf59c5402ff35d213dc020ead86f8cb51d16)
@@ -0,0 +1,1 @@
+back_text/
Index: doc/theses/thierry_delisle_PhD/thesis/Makefile
===================================================================
--- doc/theses/thierry_delisle_PhD/thesis/Makefile	(revision da3963ae1399ac41655fc82eda47fad820dd3ccf)
+++ doc/theses/thierry_delisle_PhD/thesis/Makefile	(revision 565acf59c5402ff35d213dc020ead86f8cb51d16)
@@ -32,4 +32,6 @@
 	emptytree \
 	fairness \
+	io_uring \
+	pivot_ring \
 	system \
 }
@@ -43,5 +45,5 @@
 ## Define the documents that need to be made.
 all: thesis.pdf
-thesis.pdf: ${TEXTS} ${FIGURES} ${PICTURES} glossary.tex local.bib
+thesis.pdf: ${TEXTS} ${FIGURES} ${PICTURES} thesis.tex glossary.tex local.bib
 
 DOCUMENT = thesis.pdf
@@ -105,4 +107,7 @@
 	sed -i 's/$@/${Build}\/$@/g' ${Build}/$@_t
 
+build/fairness.svg: fig/fairness.py | ${Build}
+	python3 fig/fairness.py build/fairness.svg
+
 ## pstex with inverted colors
 %.dark.pstex : fig/%.fig Makefile | ${Build}
Index: doc/theses/thierry_delisle_PhD/thesis/fig/io_uring.fig
===================================================================
--- doc/theses/thierry_delisle_PhD/thesis/fig/io_uring.fig	(revision 565acf59c5402ff35d213dc020ead86f8cb51d16)
+++ doc/theses/thierry_delisle_PhD/thesis/fig/io_uring.fig	(revision 565acf59c5402ff35d213dc020ead86f8cb51d16)
@@ -0,0 +1,95 @@
+#FIG 3.2  Produced by xfig version 3.2.7b
+Landscape
+Center
+Metric
+A4
+100.00
+Single
+-2
+1200 2
+6 180 3240 2025 3510
+2 1 0 1 0 7 40 -1 -1 0.000 0 0 -1 0 0 2
+	 720 3240 720 3510
+2 1 0 1 0 7 40 -1 -1 0.000 0 0 -1 0 0 2
+	 450 3240 450 3510
+2 2 0 1 0 7 45 -1 20 0.000 0 0 -1 0 0 5
+	 180 3240 1260 3240 1260 3510 180 3510 180 3240
+2 1 0 1 0 7 40 -1 -1 0.000 0 0 -1 0 0 2
+	 990 3240 990 3510
+4 0 0 40 -1 0 12 0.0000 2 165 990 1035 3420 {\\small S3}\001
+4 0 0 40 -1 0 12 0.0000 2 165 990 765 3420 {\\small S2}\001
+4 0 0 40 -1 0 12 0.0000 2 165 990 225 3420 {\\small S0}\001
+4 0 0 40 -1 0 12 0.0000 2 165 990 495 3420 {\\small S1}\001
+-6
+6 1530 2610 3240 4140
+5 1 0 1 0 7 35 -1 -1 0.000 0 1 1 0 2455.714 3375.000 1890 2700 1575 3375 1890 4050
+	1 1 1.00 60.00 120.00
+1 3 0 1 0 7 40 -1 20 0.000 1 0.0000 2475 3375 315 315 2475 3375 2790 3375
+1 3 0 1 0 7 50 -1 20 0.000 1 0.0000 2475 3375 765 765 2475 3375 3240 3375
+2 1 0 1 0 7 45 -1 -1 0.000 0 0 -1 0 0 2
+	 2475 3375 2133 2690
+2 1 0 1 0 7 45 -1 -1 4.000 0 0 -1 0 0 2
+	 2475 3375 1769 3093
+2 1 0 1 0 7 45 -1 -1 4.000 0 0 -1 0 0 2
+	 2475 3375 1769 3661
+2 1 0 1 0 7 45 -1 -1 4.000 0 0 -1 0 0 2
+	 2475 3375 2133 4057
+2 1 1 1 0 7 35 -1 0 4.000 0 0 -1 0 0 2
+	 2205 3375 2745 3375
+-6
+6 585 2250 1485 2610
+4 2 0 50 -1 0 12 0.0000 2 135 900 1485 2385 Submission\001
+4 2 0 50 -1 0 12 0.0000 2 165 360 1485 2580 Ring\001
+-6
+6 3600 2610 5265 4140
+5 1 0 1 0 7 35 -1 -1 0.000 0 1 1 0 4384.000 3375.000 4950 4050 5265 3375 4950 2700
+	1 1 1.00 60.00 120.00
+1 3 0 1 0 7 40 -1 20 0.000 1 3.1416 4365 3375 315 315 4365 3375 4050 3375
+1 3 0 1 0 7 50 -1 20 0.000 1 3.1416 4365 3375 765 765 4365 3375 3600 3375
+2 1 0 1 0 7 45 -1 -1 0.000 0 0 -1 0 0 2
+	 4365 3375 4707 4060
+2 1 0 1 0 7 45 -1 -1 4.000 0 0 -1 0 0 2
+	 4365 3375 5071 3657
+2 1 0 1 0 7 45 -1 -1 4.000 0 0 -1 0 0 2
+	 4365 3375 5071 3089
+2 1 0 1 0 7 45 -1 -1 4.000 0 0 -1 0 0 2
+	 4365 3375 4707 2693
+2 1 1 1 0 7 35 -1 0 4.000 0 0 -1 0 0 2
+	 4635 3375 4095 3375
+-6
+6 5355 2250 6255 2610
+4 0 0 50 -1 0 12 0.0000 2 165 360 5355 2580 Ring\001
+4 0 0 50 -1 0 12 0.0000 2 165 900 5355 2385 Completion\001
+-6
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
+	1 1 1.00 60.00 120.00
+	 2925 2025 2550 2486
+2 1 0 1 0 7 50 -1 -1 4.000 0 0 -1 1 0 2
+	1 1 1.00 60.00 120.00
+	 4275 2475 3825 2025
+2 1 0 1 0 7 50 -1 -1 4.000 0 0 -1 1 0 2
+	1 1 1.00 60.00 120.00
+	 2751 4268 3066 4538
+2 1 0 1 0 7 50 -1 -1 4.000 0 0 -1 1 0 2
+	1 1 1.00 60.00 120.00
+	 3780 4545 4275 4230
+2 1 1 1 0 7 55 -1 -1 4.000 0 0 -1 0 0 2
+	 0 3375 6255 3375
+4 0 0 35 -1 0 12 0.0000 2 165 1170 1845 3060 {\\small \\&S2}\001
+4 0 0 35 -1 0 12 0.0000 2 165 1170 1755 3420 {\\small \\&S3}\001
+4 0 0 35 -1 0 12 0.0000 2 165 1170 1890 3735 {\\small \\&S0}\001
+4 0 0 50 -1 0 12 0.0000 6 135 360 2790 2565 Push\001
+4 0 0 50 -1 0 12 0.0000 6 165 270 2880 4230 Pop\001
+4 0 0 50 -1 0 12 0.0000 6 135 360 2025 4275 Head\001
+4 0 0 50 -1 0 12 0.0000 6 135 360 2025 2565 Tail\001
+4 0 0 35 -1 0 12 0.0000 2 165 990 4635 3060 {\\small C0}\001
+4 0 0 35 -1 0 12 0.0000 2 165 990 4815 3420 {\\small C1}\001
+4 0 0 35 -1 0 12 0.0000 2 165 990 4635 3780 {\\small C2}\001
+4 0 0 50 -1 0 12 0.0000 4 135 360 4725 4275 Tail\001
+4 0 0 50 -1 0 12 0.0000 6 135 360 4590 2565 Head\001
+4 0 0 50 -1 0 12 0.0000 2 135 990 5535 3285 Kernel Line\001
+4 1 0 50 -1 0 12 0.0000 2 180 1350 3375 4815 {\\Large Kernel}\001
+4 1 0 50 -1 0 12 0.0000 2 180 1800 3375 1845 {\\Large Application}\001
+4 0 0 50 -1 0 12 0.0000 6 165 270 3690 2565 Pop\001
+4 0 0 50 -1 0 12 0.0000 4 135 360 3465 4230 Push\001
+4 0 0 50 -1 0 12 0.0000 2 135 90 0 3285 S\001
Index: doc/theses/thierry_delisle_PhD/thesis/fig/pivot_ring.fig
===================================================================
--- doc/theses/thierry_delisle_PhD/thesis/fig/pivot_ring.fig	(revision 565acf59c5402ff35d213dc020ead86f8cb51d16)
+++ doc/theses/thierry_delisle_PhD/thesis/fig/pivot_ring.fig	(revision 565acf59c5402ff35d213dc020ead86f8cb51d16)
@@ -0,0 +1,30 @@
+#FIG 3.2  Produced by xfig version 3.2.7b
+Landscape
+Center
+Metric
+A4
+100.00
+Single
+-2
+1200 2
+5 1 0 1 0 7 35 -1 -1 0.000 0 1 1 0 2455.714 3375.000 1890 2700 1575 3375 1890 4050
+	1 1 1.00 60.00 120.00
+6 225 2475 1665 2835
+4 2 0 50 -1 0 12 0.0000 6 165 1440 1665 2805 \\lstinline|sqe|s\001
+4 2 0 50 -1 0 12 0.0000 6 135 810 1665 2610 Submitted\001
+-6
+6 180 3825 1620 4185
+4 2 0 50 -1 0 12 0.0000 6 135 810 1620 3960 Allocated\001
+4 2 0 50 -1 0 12 0.0000 6 165 1440 1620 4155 \\lstinline|sqe|s\001
+-6
+1 3 0 1 0 7 40 -1 20 0.000 1 0.0000 2475 3375 315 315 2475 3375 2790 3375
+1 3 0 1 0 7 50 -1 20 0.000 1 0.0000 2475 3375 765 765 2475 3375 3240 3375
+2 1 0 1 0 7 45 -1 -1 0.000 0 0 -1 0 0 2
+	 2475 3375 2133 2690
+2 1 0 1 0 7 45 -1 -1 4.000 0 0 -1 0 0 2
+	 2475 3375 1710 3375
+2 1 0 1 0 7 45 -1 -1 4.000 0 0 -1 0 0 2
+	 2475 3375 2133 4057
+4 0 0 50 -1 0 12 0.0000 6 135 360 2025 2565 Tail\001
+4 2 0 50 -1 0 12 0.0000 6 135 810 1485 3420 Partition\001
+4 0 0 50 -1 0 12 0.0000 6 135 360 2025 4320 Head\001
Index: doc/theses/thierry_delisle_PhD/thesis/local.bib
===================================================================
--- doc/theses/thierry_delisle_PhD/thesis/local.bib	(revision da3963ae1399ac41655fc82eda47fad820dd3ccf)
+++ doc/theses/thierry_delisle_PhD/thesis/local.bib	(revision 565acf59c5402ff35d213dc020ead86f8cb51d16)
@@ -512,4 +512,11 @@
 }
 
+@manual{MAN:bsd/kqueue,
+  title = {KQUEUE(2) - FreeBSD System Calls Manual},
+  url   = {https://www.freebsd.org/cgi/man.cgi?query=kqueue},
+  year  = {2020},
+  month = {may}
+}
+
 % Apple's MAC OS X
 @manual{MAN:apple/scheduler,
@@ -577,4 +584,57 @@
 
 % --------------------------------------------------
+% Man Pages
+@manual{MAN:open,
+  key        = "open",
+  title      = "open(2) Linux User's Manual",
+  year       = "2020",
+  month      = "February",
+}
+
+@manual{MAN:accept,
+  key        = "accept",
+  title      = "accept(2) Linux User's Manual",
+  year       = "2019",
+  month      = "March",
+}
+
+@manual{MAN:select,
+  key        = "select",
+  title      = "select(2) Linux User's Manual",
+  year       = "2019",
+  month      = "March",
+}
+
+@manual{MAN:poll,
+  key        = "poll",
+  title      = "poll(2) Linux User's Manual",
+  year       = "2019",
+  month      = "July",
+}
+
+@manual{MAN:epoll,
+  key        = "epoll",
+  title      = "epoll(7) Linux User's Manual",
+  year       = "2019",
+  month      = "March",
+}
+
+@manual{MAN:aio,
+  key        = "aio",
+  title      = "aio(7) Linux User's Manual",
+  year       = "2019",
+  month      = "March",
+}
+
+@misc{MAN:io_uring,
+  title   = {Efficient IO with io\_uring},
+  author  = {Axboe, Jens},
+  year    = "2019",
+  month   = "March",
+  version = {0,4},
+  howpublished = {\url{https://kernel.dk/io_uring.pdf}}
+}
+
+% --------------------------------------------------
 % Wikipedia Entries
 @misc{wiki:taskparallel,
@@ -617,2 +677,10 @@
   note = "[Online; accessed 2-January-2021]"
 }
+
+@misc{wiki:future,
+  author = "{Wikipedia contributors}",
+  title = "Futures and promises --- {W}ikipedia{,} The Free Encyclopedia",
+  year = "2020",
+  url = "https://en.wikipedia.org/wiki/Futures_and_promises",
+  note = "[Online; accessed 9-February-2021]"
+}
Index: doc/theses/thierry_delisle_PhD/thesis/text/core.tex
===================================================================
--- doc/theses/thierry_delisle_PhD/thesis/text/core.tex	(revision da3963ae1399ac41655fc82eda47fad820dd3ccf)
+++ doc/theses/thierry_delisle_PhD/thesis/text/core.tex	(revision 565acf59c5402ff35d213dc020ead86f8cb51d16)
@@ -49,8 +49,8 @@
 
 \section{Design}
-In general, a na\"{i}ve \glsxtrshort{fifo} ready-queue does not scale with increased parallelism from \glspl{hthrd}, resulting in decreased performance. The problem is adding/removing \glspl{thrd} is a single point of contention. As shown in the evaluation sections, most production schedulers do scale when adding \glspl{hthrd}. The common solution to the single point of contention is to shard the ready-queue so each \gls{hthrd} can access the ready-queue without contention, increasing performance though lack of contention.
+In general, a na\"{i}ve \glsxtrshort{fifo} ready-queue does not scale with increased parallelism from \glspl{hthrd}, resulting in decreased performance. The problem is adding/removing \glspl{thrd} is a single point of contention. As shown in the evaluation sections, most production schedulers do scale when adding \glspl{hthrd}. The common solution to the single point of contention is to shard the ready-queue so each \gls{hthrd} can access the ready-queue without contention, increasing performance.
 
 \subsection{Sharding} \label{sec:sharding}
-An interesting approach to sharding a queue is presented in \cit{Trevors paper}. This algorithm presents a queue with a relaxed \glsxtrshort{fifo} guarantee using an array of strictly \glsxtrshort{fifo} sublists as shown in Figure~\ref{fig:base}. Each \emph{cell} of the array has a timestamp for the last operation and a pointer to a linked-list with a lock and each node in the list is marked with a timestamp indicating when it is added to the list. A push operation is done by picking a random cell, acquiring the list lock, and pushing to the list. If the cell is locked, the operation is simply retried on another random cell until a lock is acquired. A pop operation is done in a similar fashion except two random cells are picked. If both cells are unlocked with non-empty lists, the operation pops the node with the oldest cell timestamp. If one of the cells is unlocked and non-empty, the operation pops from that cell. If both cells are either locked or empty, the operation picks two new random cells and tries again.
+An interesting approach to sharding a queue is presented in \cit{Trevors paper}. This algorithm presents a queue with a relaxed \glsxtrshort{fifo} guarantee using an array of strictly \glsxtrshort{fifo} sublists as shown in Figure~\ref{fig:base}. Each \emph{cell} of the array has a timestamp for the last operation and a pointer to a linked-list with a lock. Each node in the list is marked with a timestamp indicating when it is added to the list. A push operation is done by picking a random cell, acquiring the list lock, and pushing to the list. If the cell is locked, the operation is simply retried on another random cell until a lock is acquired. A pop operation is done in a similar fashion except two random cells are picked. If both cells are unlocked with non-empty lists, the operation pops the node with the oldest timestamp. If one of the cells is unlocked and non-empty, the operation pops from that cell. If both cells are either locked or empty, the operation picks two new random cells and tries again.
 
 \begin{figure}
@@ -100,10 +100,10 @@
 \paragraph{Local Information} Figure~\ref{fig:emptytls} shows an approach using dense information, similar to the bitmap, but each \gls{hthrd} keeps its own independent copy. While this approach can offer good scalability \emph{and} low latency, the liveliness and discovery of the information can become a problem. This case is made worst in systems with few processors where even blind random picks can find \glspl{thrd} in a few tries.
 
-I built a prototype of these approaches and none of these techniques offer satisfying performance when few threads are present. All of these approach hit the same 2 problems. First, randomly picking sub-queues is very fast but means any improvement to the hit rate can easily be countered by a slow-down in look-up speed when there are empty lists. Second, the array is already as sharded to avoid contention bottlenecks, so any denser data structure tends to become a bottleneck. In all cases, these factors meant the best cases scenario, \ie many threads, would get worst throughput, and the worst-case scenario, few threads, would get a better hit rate, but an equivalent poor throughput. As a result I tried an entirely different approach.
+I built a prototype of these approaches and none of these techniques offer satisfying performance when few threads are present. All of these approach hit the same 2 problems. First, randomly picking sub-queues is very fast. That speed means any improvement to the hit rate can easily be countered by a slow-down in look-up speed, whether or not there are empty lists. Second, the array is already sharded to avoid contention bottlenecks, so any denser data structure tends to become a bottleneck. In all cases, these factors meant the best cases scenario, \ie many threads, would get worst throughput, and the worst-case scenario, few threads, would get a better hit rate, but an equivalent poor throughput. As a result I tried an entirely different approach.
 
 \subsection{Dynamic Entropy}\cit{https://xkcd.com/2318/}
-In the worst-case scenario there are only few \glspl{thrd} ready to run, or more precisely given $P$ \glspl{proc}\footnote{For simplicity, this assumes there is a one-to-one match between \glspl{proc} and \glspl{hthrd}.}, $T$ \glspl{thrd} and $\epsilon$ a very small number, than the worst case scenario can be represented by $\epsilon \ll P$, than $T = P + \epsilon$. It is important to note in this case that fairness is effectively irrelevant. Indeed, this case is close to \emph{actually matching} the model of the ``Ideal multi-tasking CPU'' on page \pageref{q:LinuxCFS}. In this context, it is possible to use a purely internal-locality based approach and still meet the fairness requirements. This approach simply has each \gls{proc} running a single \gls{thrd} repeatedly. Or from the shared ready-queue viewpoint, each \gls{proc} pushes to a given sub-queue and then popes from the \emph{same} subqueue. In cases where $T \gg P$, the scheduler should also achieves similar performance without affecting the fairness guarantees.
+In the worst-case scenario there are only few \glspl{thrd} ready to run, or more precisely given $P$ \glspl{proc}\footnote{For simplicity, this assumes there is a one-to-one match between \glspl{proc} and \glspl{hthrd}.}, $T$ \glspl{thrd} and $\epsilon$ a very small number, than the worst case scenario can be represented by $T = P + \epsilon$, with $\epsilon \ll P$. It is important to note in this case that fairness is effectively irrelevant. Indeed, this case is close to \emph{actually matching} the model of the ``Ideal multi-tasking CPU'' on page \pageref{q:LinuxCFS}. In this context, it is possible to use a purely internal-locality based approach and still meet the fairness requirements. This approach simply has each \gls{proc} running a single \gls{thrd} repeatedly. Or from the shared ready-queue viewpoint, each \gls{proc} pushes to a given sub-queue and then pops from the \emph{same} subqueue. The challenge is for the the scheduler to achieve good performance in both the $T = P + \epsilon$ case and the $T \gg P$ case, without affecting the fairness guarantees in the later.
 
-To handle this case, I use a pseudo random-number generator, \glsxtrshort{prng} in a novel way. When the scheduler uses a \glsxtrshort{prng} instance per \gls{proc} exclusively, the random-number seed effectively starts an encoding that produces a list of all accessed subqueues, from latest to oldest. The novel approach is to be able to ``replay'' the \glsxtrshort{prng} backwards and there exist \glsxtrshort{prng}s that are fast, compact \emph{and} can be run forward and backwards. Linear congruential generators~\cite{wiki:lcg} are an example of \glsxtrshort{prng}s that match these requirements.
+To handle this case, I use a \glsxtrshort{prng}\todo{Fix missing long form} in a novel way. There exist \glsxtrshort{prng}s that are fast, compact and can be run forward \emph{and} backwards.  Linear congruential generators~\cite{wiki:lcg} are an example of \glsxtrshort{prng}s of such \glsxtrshort{prng}s. The novel approach is to use the ability to run backwards to ``replay'' the \glsxtrshort{prng}. The scheduler uses an exclusive \glsxtrshort{prng} instance per \gls{proc}, the random-number seed effectively starts an encoding that produces a list of all accessed subqueues, from latest to oldest. Replaying the \glsxtrshort{prng} to identify cells accessed recently and which probably have data still cached.
 
 The algorithm works as follows:
Index: doc/theses/thierry_delisle_PhD/thesis/text/intro.tex
===================================================================
--- doc/theses/thierry_delisle_PhD/thesis/text/intro.tex	(revision da3963ae1399ac41655fc82eda47fad820dd3ccf)
+++ doc/theses/thierry_delisle_PhD/thesis/text/intro.tex	(revision 565acf59c5402ff35d213dc020ead86f8cb51d16)
@@ -7,3 +7,3 @@
 While previous work on the concurrent package of \CFA focused on features and interfaces, this thesis focuses on performance, introducing \glsxtrshort{api} changes only when required by performance considerations. More specifically, this thesis concentrates on scheduling and \glsxtrshort{io}. Prior to this work, the \CFA runtime used a strictly \glsxtrshort{fifo} \gls{rQ}.
 
-This work exclusively concentrates on Linux as it's operating system since the existing \CFA runtime and compiler does not already support other operating systems. Furthermore, as \CFA is yet to be released, supporting version of Linux older that the latest version is not a goal of this work.
+This work exclusively concentrates on Linux as it's operating system since the existing \CFA runtime and compiler does not already support other operating systems. Furthermore, as \CFA is yet to be released, supporting version of Linux older than the latest version is not a goal of this work.
Index: doc/theses/thierry_delisle_PhD/thesis/text/io.tex
===================================================================
--- doc/theses/thierry_delisle_PhD/thesis/text/io.tex	(revision da3963ae1399ac41655fc82eda47fad820dd3ccf)
+++ doc/theses/thierry_delisle_PhD/thesis/text/io.tex	(revision 565acf59c5402ff35d213dc020ead86f8cb51d16)
@@ -1,13 +1,28 @@
-\chapter{User Level \glsxtrshort{io}}
-As mentionned in Section~\ref{prev:io}, User-Level \glsxtrshort{io} requires multiplexing the \glsxtrshort{io} operations of many \glspl{thrd} onto fewer \glspl{proc} using asynchronous \glsxtrshort{io} operations. Various operating systems offer various forms of asynchronous operations and as mentioned in Chapter~\ref{intro}, this work is exclusively focuesd on Linux.
+\chapter{User Level \io}
+As mentionned in Section~\ref{prev:io}, User-Level \io requires multiplexing the \io operations of many \glspl{thrd} onto fewer \glspl{proc} using asynchronous \io operations. Various operating systems offer various forms of asynchronous operations and as mentioned in Chapter~\ref{intro}, this work is exclusively focuesd on Linux.
 
-\section{Existing options}
-Since \glsxtrshort{io} operations are generally handled by the
+\section{Kernel Interface}
+Since this work fundamentally depends on operating system support, the first step of any design is to discuss the available interfaces and pick one (or more) as the foundations of the \io subsystem.
 
-\subsection{\lstinline|epoll|, \lstinline|poll| and \lstinline|select|}
+\subsection{\lstinline|O_NONBLOCK|}
+In Linux, files can be opened with the flag @O_NONBLOCK@~\cite{MAN:open} (or @SO_NONBLOCK@~\cite{MAN:accept}, the equivalent for sockets) to use the file descriptors in ``nonblocking mode''. In this mode, ``Neither the open() nor any subsequent \io operations on the [opened file descriptor] will cause the calling
+process to wait.'' This feature can be used as the foundation for the \io subsystem. However, for the subsystem to be able to block \glspl{thrd} until an operation completes, @O_NONBLOCK@ must be use in conjunction with a system call that monitors when a file descriptor becomes ready, \ie, the next \io operation on it will not cause the process to wait\footnote{In this context, ready means to \emph{some} operation can be performed without blocking. It does not mean that the last operation that return \lstinline|EAGAIN| will succeed on the next try. A file that is ready to read but has only 1 byte available would be an example of this distinction.}.
 
-\subsection{Linux's AIO}
+There are three options to monitor file descriptors in Linux\footnote{For simplicity, this section omits to mention \lstinline|pselect| and \lstinline|ppoll|. The difference between these system calls and \lstinline|select| and \lstinline|poll| respectively is not relevant for this discussion.}, @select@~\cite{MAN:select}, @poll@~\cite{MAN:poll} and @epoll@~\cite{MAN:epoll}. All three of these options offer a system call that blocks a \gls{kthrd} until at least one of many file descriptor becomes ready. The group of file descriptors being waited on is often referred to as the \newterm{interest set}.
 
+\paragraph{\lstinline|select|} is the oldest of these options, it takes as an input a contiguous array of bits, where each bits represent a file descriptor of interest. On return, it modifies the set in place to identify which of the file descriptors changed status. This means that calling select in a loop requires re-initializing the array each time and the number of file descriptors supported has a hard limit. Another limit of @select@ is that once the call is started, the interest set can no longer be modified. Monitoring a new file descriptor generally requires aborting any in progress call to @select@\footnote{Starting a new call to \lstinline|select| in this case is possible but requires a distinct kernel thread, and as a result is not a acceptable multiplexing solution when the interest set is large and highly dynamic unless the number of parallel calls to select can be strictly bounded.}.
 
+\paragraph{\lstinline|poll|} is an improvement over select, which removes the hard limit on the number of file descriptors and the need to re-initialize the input on every call. It works using an array of structures as an input rather than an array of bits, thus allowing a more compact input for small interest sets. Like @select@, @poll@ suffers from the limitation that the interest set cannot be changed while the call is blocked.
+
+\paragraph{\lstinline|epoll|} further improves on these two functions, by allowing the interest set to be dynamically added to and removed from while a \gls{kthrd} is blocked on a call to @epoll@. This is done by creating an \emph{epoll instance} with a persistent intereset set and that is used across multiple calls. This advantage significantly reduces synchronization overhead on the part of the caller (in this case the \io subsystem) since the interest set can be modified when adding or removing file descriptors without having to synchronize with other \glspl{kthrd} potentially calling @epoll@.
+
+However, all three of these system calls suffer from generality problems to some extent. The man page for @O_NONBLOCK@ mentions that ``[@O_NONBLOCK@] has no effect for regular files and block devices'', which means none of these three system calls are viable multiplexing strategies for these types of \io operations. Furthermore, @epoll@ has been shown to have some problems with pipes and ttys\cit{Peter's examples in some fashion}. Finally, none of these are useful solutions for multiplexing \io operations that do not have a corresponding file descriptor and can be awkward for operations using multiple file descriptors.
+
+\subsection{The POSIX asynchronous I/O (AIO)}
+An alternative to using @O_NONBLOCK@ is to use the AIO interface. Its interface lets programmers enqueue operations to be performed asynchronously by the kernel. Completions of these operations can be communicated in various ways, either by sending a Linux signal, spawning a new \gls{kthrd} or by polling for completion of one or more operation. For the purpose multiplexing operations, spawning a new \gls{kthrd} is counter-productive but a related solution is discussed in Section~\ref{io:morethreads}. Since using interrupts handlers can also lead to fairly complicated interactions between subsystems, I will concentrate on the different polling methods. AIO only supports read and write operations to file descriptors and those do not have the same limitation as @O_NONBLOCK@, \ie, the file descriptors can be regular files and blocked devices. It also supports batching more than one of these operations in a single system call.
+
+AIO offers two different approach to polling. @aio_error@ can be used as a spinning form of polling, returning @EINPROGRESS@ until the operation is completed, and @aio_suspend@ can be used similarly to @select@, @poll@ or @epoll@, to wait until one or more requests have completed. For the purpose of \io multiplexing, @aio_suspend@ is the intended interface. Even if AIO requests can be submitted concurrently, @aio_suspend@ suffers from the same limitation as @select@ and @poll@, \ie, the interest set cannot be dynamically changed while a call to @aio_suspend@ is in progress. Unlike @select@ and @poll@ however, it also suffers from the limitation that it does not specify which requests have completed, meaning programmers then have to poll each request in the interest set using @aio_error@ to identify which requests have completed. This means that, like @select@ and @poll@ but not @epoll@, the time needed to examine polling results increases based in the total number of requests monitored, not the number of completed requests.
+
+AIO does not seem to be a particularly popular interface, which I believe is in part due to this less than ideal polling interface. Linus Torvalds talks about this interface as follows :
 
 \begin{displayquote}
@@ -30,17 +45,96 @@
 in
 ``some kind of arbitrary \textit{queue up asynchronous system call} model''.
-This description is actually quite close to the interface of the interface described in the next section.
+This description is actually quite close to the interface described in the next section.
 
-\subsection{\texttt{io\_uring}}
-A very recent addition to Linux, @io_uring@\cit{io\_uring} is a framework that aims to solve many of the problems listed with the above mentioned solutions.
+\subsection{\lstinline|io_uring|}
+A very recent addition to Linux, @io_uring@\cite{MAN:io_uring} is a framework that aims to solve many of the problems listed with the above mentioned interfaces. Like AIO, it represents \io operations as entries added on a queue. But like @epoll@, new requests can be submitted while a blocking call waiting for requests to complete is already in progress. The @io_uring@ interface uses two ring buffers (referred to simply as rings) as its core, a submit ring to which programmers push \io requests and a completion buffer which programmers poll for completion.
+
+One of the big advantages over the interfaces listed above is that it also supports a much wider range of operations. In addition to supporting reads and writes to any file descriptor like AIO, it supports other operations like @open@, @close@, @fsync@, @accept@, @connect@, @send@, @recv@, @splice@, \etc.
+
+On top of these, @io_uring@ adds many ``bells and whistles'' like avoiding copies between the kernel and user-space with shared memory, allowing different mechanisms to communicate with device drivers and supporting chains of requests, \ie, requests that automatically trigger followup requests on completion.
 
 \subsection{Extra Kernel Threads}\label{io:morethreads}
-Finally, if the operating system does not offer any satisfying forms of asynchronous \glsxtrshort{io} operations, a solution is to fake it by creating a pool of \glspl{kthrd} and delegating operations to them in order to avoid blocking \glspl{proc}.
+Finally, if the operating system does not offer any satisfying forms of asynchronous \io operations, a solution is to fake it by creating a pool of \glspl{kthrd} and delegating operations to them in order to avoid blocking \glspl{proc}. The is a compromise on multiplexing. In the worst case, where all \glspl{thrd} are consistently blocking on \io, it devolves into 1-to-1 threading. However, regardless of the frequency of \io operations, it achieves the fundamental goal of not blocking \glspl{proc} when \glspl{thrd} are ready to run. This approach is used by languages like Go\cit{Go} and frameworks like libuv\cit{libuv}, since it has the advantage that it can easily be used across multiple operating systems. This advantage is especially relevant for languages like Go, which offer an homogenous \glsxtrshort{api} across all platforms. As opposed to C, which has a very limited standard api for \io, \eg, the C standard library has no networking.
 
 \subsection{Discussion}
+These options effectively fall into two broad camps of solutions, waiting for \io to be ready versus waiting for \io to be completed. All operating systems that support asynchronous \io must offer an interface along one of these lines, but the details can vary drastically. For example, Free BSD offers @kqueue@~\cite{MAN:bsd/kqueue} which behaves similarly to @epoll@ but with some small quality of life improvements, while Windows (Win32)~\cit{https://docs.microsoft.com/en-us/windows/win32/fileio/synchronous-and-asynchronous-i-o} offers ``overlapped I/O'' which handles submissions similarly to @O_NONBLOCK@, with extra flags on the synchronous system call, but waits for completion events, similarly to @io_uring@.
 
+For this project, I have chosen to use @io_uring@, in large parts due to its generality. While @epoll@ has been shown to be a good solution to socket \io (\cite{DBLP:journals/pomacs/KarstenB20}), @io_uring@'s transparent support for files, pipes and more complex operations, like @splice@ and @tee@, make it a better choice as the foundation for a general \io subsystem.
 
 \section{Event-Engine}
 
+The event engines reponsibility is to use the kernel interface to multiplex many \io operations onto few \glspl{kthrd}. In concrete terms, this means that \glspl{thrd} enter the engine through an interface, the event engines then starts the operation and parks the calling \glspl{thrd}, returning control to the \gls{proc}. The parked \glspl{thrd} are then rescheduled by the event engine once the desired operation has completed.
+
+\subsection{\lstinline|io_uring| in depth}
+Before going into details on the design of the event engine, I will present some more details on the usage of @io_uring@ which are important for the design of the engine.
+
+\begin{figure}
+	\centering
+	\input{io_uring.pstex_t}
+	\caption[Overview of \lstinline|io_uring|]{Overview of \lstinline|io_uring| \smallskip\newline Two ring buffer are used to communicate with the kernel, one for completions~(right) and one for submissions~(left). The completion ring contains entries, \newterm{CQE}s: Completion Queue Entries, that are produced by the kernel when an operation completes and then consumed by the application. On the other hand, the application produces \newterm{SQE}s: Submit Queue Entries, which it appends to the submission ring for the kernel to consume. Unlike the completion ring, the submission ring does not contain the entries directly, it indexes into the SQE array (denoted \emph{S}) instead.}
+	\label{fig:iouring}
+\end{figure}
+
+Figure~\ref{fig:iouring} shows an overview of an @io_uring@ instance. Multiple @io_uring@ instances can be created, in which case they each have a copy of the data structures in the figure. New \io operations are submitted to the kernel following 4 steps which use the components shown in the figure.
+
+\paragraph{First} an @sqe@ must be allocated from the pre-allocated array (denoted \emph{S} in Figure~\ref{fig:iouring}). This array is created at the same time as the @io_uring@ instance, is in kernel-locked memory, which means it is both visible by the kernel and the application, and has a fixed size determined at creation. How these entries are allocated is not important for the functionning of @io_uring@, the only requirement is that no entry is reused before the kernel has consumed it.
+
+\paragraph{Secondly} the @sqe@ must be filled according to the desired operation. This step is straight forward, the only detail worth mentionning is that @sqe@s have a @user_data@ field that must be filled in order to match submission and completion entries.
+
+\paragraph{Thirdly} the @sqe@ must be submitted to the submission ring, this requires appending the index of the @sqe@ to the ring following regular ring buffer steps: \lstinline|{ buffer[head] = item; head++ }|. Since the head is visible to the kernel, some memory barriers may be required to prevent the compiler from reordering these operations. Since the submission ring is a regular ring buffer, more than one @sqe@ can be added at once and the head can be updated only after the entire batch has been updated.
+
+\paragraph{Finally} the kernel must be notified of the change to the ring using the system call @io_uring_enter@. The number of elements appended to the submission ring is passed as a parameter and the number of elements consumed is returned. The @io_uring@ instance can be constructed so that this step is not required, but this requires elevated privilege and early version of @io_uring@ had additionnal restrictions.
+
+The completion side is simpler, applications call @io_uring_enter@ with the flag @IORING_ENTER_GETEVENTS@ to wait on a desired number of operations to complete. The same call can be used to both submit @sqe@s and wait for operations to complete. When operations do complete the kernel appends a @cqe@ to the completion ring and advances the head of the ring. Each @cqe@ contains the result of the operation as well as a copy of the @user_data@ field of the @sqe@ that triggered the operation. It is not necessary to call @io_uring_enter@ to get new events, the kernel can directly modify the completion ring, the system call is only needed if the application wants to block waiting on operations to complete.
+
+The @io_uring_enter@ system call is protected by a lock inside the kernel. This means that concurrent call to @io_uring_enter@ using the same instance are possible, but there is can be no performance gained from parallel calls to @io_uring_enter@. It is possible to do the first three submission steps in parallel, however, doing so requires careful synchronization.
+
+@io_uring@ also introduces some constraints on what the number of operations that can be ``in flight'' at the same time. Obviously, @sqe@s are allocated from a fixed-size array, meaning that there is a hard limit to how many @sqe@s can be submitted at once. In addition, the @io_uring_enter@ system call can fail because ``The  kernel [...] ran out of resources to handle [a request]'' or ``The application is attempting to overcommit the number of requests it can  have  pending.''. This requirement means that it can be required to handle bursts of \io requests by holding back some of the requests so they can be submitted at a later time.
+
+\subsection{Multiplexing \io: Submission}
+The submission side is the most complicated aspect of @io_uring@ and from the design decisions made in the submission side, the completion side effectively follows.
+
+While it is possible to do the first steps of submission in parallel, the duration of the system call scales with number of entries submitted. The consequence of this is that how much parallelism can be used to prepare submissions for the next system call is limited. Beyond this limit, the length of the system call will be the throughput limiting factor. I have concluded from early experiments that preparing submissions seems to take about as long as the system call itself, which means that with a single @io_uring@ instance, there is no benefit in terms of \io throughput to having more than two \glspl{hthrd}. Therefore the design of the submission engine must manage multiple instances of @io_uring@ running in parallel, effectively sharding @io_uring@ instances. Similarly to scheduling, this sharding can be done privately, \ie, one instance per \glspl{proc}, or in decoupled pools, \ie, a pool of \glspl{proc} use a pool of @io_uring@ instances without one-to-one coupling between any given instance and any given \gls{proc}.
+
+\subsubsection{Pool of Instances}
+One approach is to have multiple shared instances. \Glspl{thrd} attempting \io operations pick one of the available instances and submits operations to that instance. Since the completion will be sent to the same instance, all instances with pending operations must be polled continously\footnote{As will be described in Chapter~\ref{practice}, this does not translate into constant cpu usage.}. Since there is no coupling between \glspl{proc} and @io_uring@ instances in this approach, \glspl{thrd} running on more than one \gls{proc} can attempt to submit to the same instance concurrently. Since @io_uring@ effectively sets the amount of sharding needed to avoid contention on its internal locks, performance in this approach is based on two aspects: the synchronization needed to submit does not induce more contention than @io_uring@ already does and the scheme to route \io requests to specific @io_uring@ instances does not introduce contention. This second aspect has an oversized importance because it comes into play before the sharding of instances, and as such, all \glspl{hthrd} can contend on the routing algorithm.
+
+Allocation in this scheme can be handled fairly easily. Free @sqe@s, \ie, @sqe@s that aren't currently being used to represent a request, can be written to safely and have a field called @user_data@ which the kernel only reads to copy to @cqe@s. Allocation also requires no ordering guarantee as all free @sqe@s are interchangeable. This requires a simple concurrent bag. The only added complexity is that the number of @sqe@s is fixed, which means allocation can fail. This failure needs to be pushed up to the routing algorithm, \glspl{thrd} attempting \io operations must not be directed to @io_uring@ instances without any available @sqe@s. Ideally, the routing algorithm would block operations up-front if none of the instances have available @sqe@s.
+
+Once an @sqe@ is allocated, \glspl{thrd} can fill them normally, they simply need to keep trac of the @sqe@ index and which instance it belongs to.
+
+Once an @sqe@ is filled in, what needs to happen is that the @sqe@ must be added to the submission ring buffer, an operation that is not thread-safe on itself, and the kernel must be notified using the @io_uring_enter@ system call. The submission ring buffer is the same size as the pre-allocated @sqe@ buffer, therefore pushing to the ring buffer cannot fail\footnote{This is because it is invalid to have the same \lstinline|sqe| multiple times in the ring buffer.}. However, as mentioned, the system call itself can fail with the expectation that it will be retried once some of the already submitted operations complete. Since multiple @sqe@s can be submitted to the kernel at once, it is important to strike a balance between batching and latency. Operations that are ready to be submitted should be batched together in few system calls, but at the same time, operations should not be left pending for long period of times before being submitted. This can be handled by either designating one of the submitting \glspl{thrd} as the being responsible for the system call for the current batch of @sqe@s or by having some other party regularly submitting all ready @sqe@s, \eg, the poller \gls{thrd} mentionned later in this section.
+
+In the case of designating a \gls{thrd}, ideally, when multiple \glspl{thrd} attempt to submit operations to the same @io_uring@ instance, all requests would be batched together and one of the \glspl{thrd} would do the system call on behalf of the others, referred to as the \newterm{submitter}. In practice however, it is important that the \io requests are not left pending indefinately and as such, it may be required to have a current submitter and a next submitter. Indeed, as long as there is a ``next'' submitter, \glspl{thrd} submitting new \io requests can move on, knowing that some future system call will include their request. Once the system call is done, the submitter must also free @sqe@s so that the allocator can reused them.
+
+Finally, the completion side is much simpler since the @io_uring@ system call enforces a natural synchronization point. Polling simply needs to regularly do the system call, go through the produced @cqe@s and communicate the result back to the originating \glspl{thrd}. Since @cqe@s only own a signed 32 bit result, in addition to the copy of the @user_data@ field, all that is needed to communicate the result is a simple future~\cite{wiki:future}. If the submission side does not designate submitters, polling can also submit all @sqe@s as it is polling events.  A simple approach to polling is to allocate a \gls{thrd} per @io_uring@ instance and simply let the poller \glspl{thrd} poll their respective instances when scheduled. This design is especially convinient for reasons explained in Chapter~\ref{practice}.
+
+With this pool of instances approach, the big advantage is that it is fairly flexible. It does not impose restrictions on what \glspl{thrd} submitting \io operations can and cannot do between allocations and submissions. It also can gracefully handle running out of ressources, @sqe@s or the kernel returning @EBUSY@. The down side to this is that many of the steps used for submitting need complex synchronization to work properly. The routing and allocation algorithm needs to keep track of which ring instances have available @sqe@s, block incoming requests if no instance is available, prevent barging if \glspl{thrd} are already queued up waiting for @sqe@s and handle @sqe@s being freed. The submission side needs to safely append @sqe@s to the ring buffer, make sure no @sqe@ is dropped or left pending forever, notify the allocation side when @sqe@s can be reused and handle the kernel returning @EBUSY@. Sharding the @io_uring@ instances should alleviate much of the contention caused by this, but all this synchronization may still have non-zero cost.
+
+\subsubsection{Private Instances}
+Another approach is to simply create one ring instance per \gls{proc}. This alleviate the need for synchronization on the submissions, requiring only that \glspl{thrd} are not interrupted in between two submission steps. This is effectively the same requirement as using @thread_local@ variables. Since @sqe@s that are allocated must be submitted to the same ring, on the same \gls{proc}, this effectively forces the application to submit @sqe@s in allocation order\footnote{The actual requirement is that \glspl{thrd} cannot context switch between allocation and submission. This requirement means that from the subsystem's point of view, the allocation and submission are sequential. To remove this requirement, a \gls{thrd} would need the ability to ``yield to a specific \gls{proc}'', \ie, park with the promise that it will be run next on a specific \gls{proc}, the \gls{proc} attached to the correct ring. This is not a current or planned feature of \CFA.}, greatly simplifying both allocation and submission. In this design, allocation and submission form a ring partitionned ring buffer as shown in Figure~\ref{fig:pring}. Once added to the ring buffer, the attached \gls{proc} has a significant amount of flexibility with regards to when to do the system call. Possible options are: when the \gls{proc} runs out of \glspl{thrd} to run, after running a given number of threads \glspl{thrd}, etc.
+
+\begin{figure}
+	\centering
+	\input{pivot_ring.pstex_t}
+	\caption[Partitionned ring buffer]{Partitionned ring buffer \smallskip\newline Allocated sqes are appending to the first partition. When submitting, the partition is simply advanced to include all the sqes that should be submitted. The kernel considers the partition as the head of the ring.}
+	\label{fig:pring}
+\end{figure}
+
+This approach has the advantage that it does not require much of the synchronization needed in the shared approach. This comes at the cost that \glspl{thrd} submitting \io operations have less flexibility, they cannot park or yield, and several exceptional cases are handled poorly. Instances running out of @sqe@s cannot run \glspl{thrd} wanting to do \io operations, in such a case the \gls{thrd} needs to be moved to a different \gls{proc}, the only current way of achieving this would be to @yield()@ hoping to be scheduled on a different \gls{proc}, which is not guaranteed. Another problematic case is that \glspl{thrd} that do not park for long periods of time will delay the submission of any @sqe@ not already submitted. This issue is similar to fairness issues which schedulers that use work-stealing mentioned in the previous chapter.
+
+
 
 \section{Interface}
+Finally, the last important part of the \io subsystem is it's interface. There are multiple approaches that can be offered to programmers, each with advantages and disadvantages. The new \io subsystem can replace the C runtime's API or extend it. And in the later case the interface can go from very similar to vastly different. The following sections discuss some useful options using @read@ as an example. The standard Linux interface for C is :
+
+@ssize_t read(int fd, void *buf, size_t count);@.
+
+\subsection{Replacement}
+Replacing the C \glsxtrshort{api}
+
+\subsection{Synchronous Extension}
+
+\subsection{Asynchronous Extension}
+
+\subsection{Interface directly to \lstinline|io_uring|}
Index: doc/theses/thierry_delisle_PhD/thesis/text/runtime.tex
===================================================================
--- doc/theses/thierry_delisle_PhD/thesis/text/runtime.tex	(revision da3963ae1399ac41655fc82eda47fad820dd3ccf)
+++ doc/theses/thierry_delisle_PhD/thesis/text/runtime.tex	(revision 565acf59c5402ff35d213dc020ead86f8cb51d16)
@@ -11,5 +11,5 @@
 
 \section{Clusters}
-\CFA allows the option to group user-level threading, in the form of clusters. Both \glspl{thrd} and \glspl{proc} belong to a specific cluster. \Glspl{thrd} are only be scheduled onto \glspl{proc} in the same cluster and scheduling is done independently of other clusters. Figure~\ref{fig:system} shows an overview of the \CFA runtime, which allows programmers to tightly control parallelism. It also opens the door to handling effects like NUMA, by pining clusters to a specific NUMA node\footnote{This is not currently implemented in \CFA, but the only hurdle left is creating a generic interface for cpu masks.}.
+\CFA allows the option to group user-level threading, in the form of clusters. Both \glspl{thrd} and \glspl{proc} belong to a specific cluster. \Glspl{thrd} are only scheduled onto \glspl{proc} in the same cluster and scheduling is done independently of other clusters. Figure~\ref{fig:system} shows an overview of the \CFA runtime, which allows programmers to tightly control parallelism. It also opens the door to handling effects like NUMA, by pining clusters to a specific NUMA node\footnote{This is not currently implemented in \CFA, but the only hurdle left is creating a generic interface for cpu masks.}.
 
 \begin{figure}
@@ -25,12 +25,13 @@
 
 \section{\glsxtrshort{io}}\label{prev:io}
-Prior to this work, the \CFA runtime did not add any particular support for \glsxtrshort{io} operations. %\CFA being built on C, this means that,
-While all I/O operations available in C are available in \CFA, \glsxtrshort{io} operations are designed for the POSIX threading model~\cite{pthreads}. Using these 1:1 threading operations in an M:N threading model means I/O operations block \glspl{proc} instead of \glspl{thrd}. While this can work in certain cases, it limits the number of concurrent operations to the number of \glspl{proc} rather than \glspl{thrd}. It also means deadlock can occur because all \glspl{proc} are blocked even if at least one \gls{thrd} is ready to run. A simple example of this type of deadlock would be as follows:
+Prior to this work, the \CFA runtime did not add any particular support for \glsxtrshort{io} operations. While all \glsxtrshort{io} operations available in C are available in \CFA, \glsxtrshort{io} operations are designed for the POSIX threading model~\cite{pthreads}. Using these 1:1 threading operations in an M:N threading model means \glsxtrshort{io} operations block \glspl{proc} instead of \glspl{thrd}. While this can work in certain cases, it limits the number of concurrent operations to the number of \glspl{proc} rather than \glspl{thrd}. It also means deadlock can occur because all \glspl{proc} are blocked even if at least one \gls{thrd} is ready to run. A simple example of this type of deadlock would be as follows:
+
 \begin{quote}
 Given a simple network program with 2 \glspl{thrd} and a single \gls{proc}, one \gls{thrd} sends network requests to a server and the other \gls{thrd} waits for a response from the server. If the second \gls{thrd} races ahead, it may wait for responses to requests that have not been sent yet. In theory, this should not be a problem, even if the second \gls{thrd} waits, because the first \gls{thrd} is still ready to run and should be able to get CPU time to send the request. With M:N threading, while the first \gls{thrd} is ready, the lone \gls{proc} \emph{cannot} run the first \gls{thrd} if it is blocked in the \glsxtrshort{io} operation of the second \gls{thrd}. If this happen, the system is in a synchronization deadlock\footnote{In this example, the deadlocked could be resolved if the server sends unprompted messages to the client. However, this solution is not general and may not be appropriate even in this simple case.}.
 \end{quote}
-Therefore, one of the objective of this work is to introduce \emph{User-Level \glsxtrshort{io}}, like \glslink{uthrding}{User-Level \emph{Threading}} blocks \glspl{thrd} rather than \glspl{proc} when doing \glsxtrshort{io} operations, which entails multiplexing the \glsxtrshort{io} operations of many \glspl{thrd} onto fewer \glspl{proc}. This multiplexing requires that a single \gls{proc} be able to execute multiple I/O operations in parallel. This requirement cannot be done with operations that block \glspl{proc}, \ie \glspl{kthrd}, since the first operation would prevent starting new operations for its blocking duration. Executing I/O operations in parallel requires \emph{asynchronous} \glsxtrshort{io}, sometimes referred to as \emph{non-blocking}, since the \gls{kthrd} does not block.
 
-\section{Interoperating with C}
+Therefore, one of the objective of this work is to introduce \emph{User-Level \glsxtrshort{io}}, like \glslink{uthrding}{User-Level \emph{Threading}} blocks \glspl{thrd} rather than \glspl{proc} when doing \glsxtrshort{io} operations, which entails multiplexing the \glsxtrshort{io} operations of many \glspl{thrd} onto fewer \glspl{proc}. This multiplexing requires that a single \gls{proc} be able to execute multiple \glsxtrshort{io} operations in parallel. This requirement cannot be done with operations that block \glspl{proc}, \ie \glspl{kthrd}, since the first operation would prevent starting new operations for its blocking duration. Executing \glsxtrshort{io} operations in parallel requires \emph{asynchronous} \glsxtrshort{io}, sometimes referred to as \emph{non-blocking}, since the \gls{kthrd} does not block.
+
+\section{Interoperating with \texttt{C}}
 While \glsxtrshort{io} operations are the classical example of operations that block \glspl{kthrd}, the non-blocking challenge extends to all blocking system-calls. The POSIX standard states~\cite[\S~2.9.1]{POSIX17}:
 \begin{quote}
@@ -44,5 +45,5 @@
 \begin{enumerate}
 	\item Precisely identifying blocking C calls is difficult.
-	\item Introducing new code can have a significant impact on general performance.
+	\item Introducing control points code can have a significant impact on general performance.
 \end{enumerate}
-Because of these consequences, this work does not attempt to ``sandbox'' calls to C. Therefore, it is possible for an unidentified library calls to block a \gls{kthrd} leading to deadlocks in \CFA's M:N threading model, which would not occur in a traditional 1:1 threading model. Currently, all M:N thread systems interacting with UNIX without sandboxing suffer from this problem but manage to work very well in the majority of applications. Therefore, a complete solution to this problem is outside the scope of this thesis.
+Because of these consequences, this work does not attempt to ``sandbox'' calls to C. Therefore, it is possible calls from an unidentified library will block a \gls{kthrd} leading to deadlocks in \CFA's M:N threading model, which would not occur in a traditional 1:1 threading model. Currently, all M:N thread systems interacting with UNIX without sandboxing suffer from this problem but manage to work very well in the majority of applications. Therefore, a complete solution to this problem is outside the scope of this thesis.
Index: doc/theses/thierry_delisle_PhD/thesis/thesis.tex
===================================================================
--- doc/theses/thierry_delisle_PhD/thesis/thesis.tex	(revision da3963ae1399ac41655fc82eda47fad820dd3ccf)
+++ doc/theses/thierry_delisle_PhD/thesis/thesis.tex	(revision 565acf59c5402ff35d213dc020ead86f8cb51d16)
@@ -81,4 +81,5 @@
 %\usepackage{nomencl} % For a nomenclature (optional; available from ctan.org)
 \usepackage{amsmath,amssymb,amstext} % Lots of math symbols and environments
+\usepackage{xcolor}
 \usepackage{graphicx} % For including graphics
 
@@ -120,5 +121,14 @@
 % although it's supposed to be in both the TeX Live and MikTeX distributions. There are also documentation and
 % installation instructions there.
-\renewcommand*{\glstextformat}[1]{\textsf{#1}}
+\makeatletter
+\newcommand*{\glsplainhyperlink}[2]{%
+  \colorlet{currenttext}{.}% store current text color
+  \colorlet{currentlink}{\@linkcolor}% store current link color
+  \hypersetup{linkcolor=currenttext}% set link color
+  \hyperlink{#1}{#2}%
+  \hypersetup{linkcolor=currentlink}% reset to default
+}
+\let\@glslink\glsplainhyperlink
+\makeatother
 
 \usepackage{csquotes}
@@ -200,4 +210,6 @@
 \makeindex
 
+\newcommand\io{\glsxtrshort{io}}%
+
 %======================================================================
 %   L O G I C A L    D O C U M E N T -- the content of your thesis
@@ -232,6 +244,6 @@
 \part{Design}
 \input{text/core.tex}
+\input{text/io.tex}
 \input{text/practice.tex}
-\input{text/io.tex}
 \part{Evaluation}
 \label{Evaluation}
Index: doc/user/figures/Cdecl.fig
===================================================================
--- doc/user/figures/Cdecl.fig	(revision da3963ae1399ac41655fc82eda47fad820dd3ccf)
+++ doc/user/figures/Cdecl.fig	(revision 565acf59c5402ff35d213dc020ead86f8cb51d16)
@@ -19,9 +19,9 @@
 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
 	 2850 1200 3600 1200 3600 1350 2850 1350 2850 1200
-4 1 0 50 -1 4 10 0.0000 2 120 90 2925 1325 0\001
-4 1 0 50 -1 4 10 0.0000 2 120 90 3075 1325 1\001
-4 1 0 50 -1 4 10 0.0000 2 120 90 3225 1325 2\001
-4 1 0 50 -1 4 10 0.0000 2 120 90 3375 1325 3\001
-4 1 0 50 -1 4 10 0.0000 2 120 90 3525 1325 4\001
+4 1 0 50 -1 4 11 0.0000 2 120 90 2925 1325 0\001
+4 1 0 50 -1 4 11 0.0000 2 120 90 3075 1325 1\001
+4 1 0 50 -1 4 11 0.0000 2 120 90 3225 1325 2\001
+4 1 0 50 -1 4 11 0.0000 2 120 90 3375 1325 3\001
+4 1 0 50 -1 4 11 0.0000 2 120 90 3525 1325 4\001
 -6
 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
@@ -55,9 +55,9 @@
 	1 1 1.00 45.00 60.00
 	 2550 1275 2850 1275
-4 1 0 50 -1 4 10 0.0000 2 120 90 1350 1650 0\001
-4 1 0 50 -1 4 10 0.0000 2 120 90 1500 1650 1\001
-4 1 0 50 -1 4 10 0.0000 2 120 90 1650 1650 2\001
-4 1 0 50 -1 4 10 0.0000 2 120 90 1800 1650 3\001
-4 1 0 50 -1 4 10 0.0000 2 120 90 1950 1650 4\001
-4 1 0 50 -1 4 10 0.0000 2 90 90 1200 1325 x\001
-4 1 0 50 -1 4 10 0.0000 2 90 90 2400 1325 x\001
+4 1 0 50 -1 4 11 0.0000 2 120 90 1350 1650 0\001
+4 1 0 50 -1 4 11 0.0000 2 120 90 1500 1650 1\001
+4 1 0 50 -1 4 11 0.0000 2 120 90 1650 1650 2\001
+4 1 0 50 -1 4 11 0.0000 2 120 90 1800 1650 3\001
+4 1 0 50 -1 4 11 0.0000 2 120 90 1950 1650 4\001
+4 1 0 50 -1 4 11 0.0000 2 90 90 1200 1325 x\001
+4 1 0 50 -1 4 11 0.0000 2 90 90 2400 1325 x\001
Index: doc/user/user.tex
===================================================================
--- doc/user/user.tex	(revision da3963ae1399ac41655fc82eda47fad820dd3ccf)
+++ doc/user/user.tex	(revision 565acf59c5402ff35d213dc020ead86f8cb51d16)
@@ -11,6 +11,6 @@
 %% Created On       : Wed Apr  6 14:53:29 2016
 %% Last Modified By : Peter A. Buhr
-%% Last Modified On : Mon Oct  5 08:57:29 2020
-%% Update Count     : 3998
+%% Last Modified On : Mon Feb  8 21:53:31 2021
+%% Update Count     : 4327
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
@@ -37,4 +37,24 @@
 \usepackage{mathptmx}                                   % better math font with "times"
 \usepackage[usenames]{color}
+\usepackage[dvips,plainpages=false,pdfpagelabels,pdfpagemode=UseNone,colorlinks=true,pagebackref=true,linkcolor=blue,citecolor=blue,urlcolor=blue,pagebackref=true,breaklinks=true]{hyperref}
+\usepackage{breakurl}
+
+\renewcommand\footnoterule{\kern -3pt\rule{0.3\linewidth}{0.15pt}\kern 2pt}
+
+\usepackage[pagewise]{lineno}
+\renewcommand{\linenumberfont}{\scriptsize\sffamily}
+\usepackage[firstpage]{draftwatermark}
+\SetWatermarkLightness{0.9}
+
+% Default underscore is too low and wide. Cannot use lstlisting "literate" as replacing underscore
+% removes it as a variable-name character so keywords in variables are highlighted. MUST APPEAR
+% AFTER HYPERREF.
+\renewcommand{\textunderscore}{\leavevmode\makebox[1.2ex][c]{\rule{1ex}{0.075ex}}}
+
+\setlength{\topmargin}{-0.45in}							% move running title into header
+\setlength{\headsep}{0.25in}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
 \newcommand{\CFALatin}{}
 % inline code ©...© (copyright symbol) emacs: C-q M-)
@@ -46,27 +66,8 @@
 % math escape $...$ (dollar symbol)
 \input{common}                                          % common CFA document macros
-\usepackage[dvips,plainpages=false,pdfpagelabels,pdfpagemode=UseNone,colorlinks=true,pagebackref=true,linkcolor=blue,citecolor=blue,urlcolor=blue,pagebackref=true,breaklinks=true]{hyperref}
-\usepackage{breakurl}
-
-\renewcommand\footnoterule{\kern -3pt\rule{0.3\linewidth}{0.15pt}\kern 2pt}
-
-\usepackage[pagewise]{lineno}
-\renewcommand{\linenumberfont}{\scriptsize\sffamily}
-\usepackage[firstpage]{draftwatermark}
-\SetWatermarkLightness{0.9}
-
-% Default underscore is too low and wide. Cannot use lstlisting "literate" as replacing underscore
-% removes it as a variable-name character so keywords in variables are highlighted. MUST APPEAR
-% AFTER HYPERREF.
-\renewcommand{\textunderscore}{\leavevmode\makebox[1.2ex][c]{\rule{1ex}{0.075ex}}}
-
-\setlength{\topmargin}{-0.45in}							% move running title into header
-\setlength{\headsep}{0.25in}
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
 \CFAStyle												% use default CFA format-style
+\lstset{language=CFA}									% CFA default lnaguage
 \lstnewenvironment{C++}[1][]                            % use C++ style
-{\lstset{language=C++,moredelim=**[is][\protect\color{red}]{®}{®},#1}}
+{\lstset{language=C++,moredelim=**[is][\protect\color{red}]{@}{@},#1}}
 {}
 
@@ -81,4 +82,5 @@
 \newcommand{\Emph}[2][red]{{\color{#1}\textbf{\emph{#2}}}}
 \newcommand{\R}[1]{\Textbf{#1}}
+\newcommand{\RC}[1]{\Textbf{\LstBasicStyle{#1}}}
 \newcommand{\B}[1]{{\Textbf[blue]{#1}}}
 \newcommand{\G}[1]{{\Textbf[OliveGreen]{#1}}}
@@ -104,6 +106,7 @@
 \author{
 \huge \CFA Team \medskip \\
-\Large Andrew Beach, Richard Bilson, Peter A. Buhr, Thierry Delisle, \smallskip \\
-\Large Glen Ditchfield, Rodolfo G. Esteves, Aaron Moss, Rob Schluntz
+\Large Andrew Beach, Richard Bilson, Michael Brooks, Peter A. Buhr, Thierry Delisle, \smallskip \\
+\Large Glen Ditchfield, Rodolfo G. Esteves, Aaron Moss, Colby Parsons, Rob Schluntz, \smallskip \\
+\Large Fangren Yu, Mubeen Zulfiqar
 }% author
 
@@ -144,8 +147,8 @@
 \section{Introduction}
 
-\CFA{}\index{cforall@\CFA}\footnote{Pronounced ``\Index*{C-for-all}'', and written \CFA, CFA, or \CFL.} is a modern general-purpose programming-language, designed as an evolutionary step forward for the C programming language.
+\CFA{}\index{cforall@\CFA}\footnote{Pronounced ``\Index*{C-for-all}'', and written \CFA, CFA, or \CFL.} is a modern general-purpose concurrent programming-language, designed as an evolutionary step forward for the C programming language.
 The syntax of \CFA builds from C and should look immediately familiar to C/\Index*[C++]{\CC{}} programmers.
 % Any language feature that is not described here can be assumed to be using the standard \Celeven syntax.
-\CFA adds many modern programming-language features that directly lead to increased \emph{\Index{safety}} and \emph{\Index{productivity}}, while maintaining interoperability with existing C programs and achieving similar performance.
+\CFA adds many modern features that directly lead to increased \emph{\Index{safety}} and \emph{\Index{productivity}}, while maintaining interoperability with existing C programs and achieving similar performance.
 Like C, \CFA is a statically typed, procedural (non-\Index{object-oriented}) language with a low-overhead runtime, meaning there is no global \Index{garbage-collection}, but \Index{regional garbage-collection}\index{garbage-collection!regional} is possible.
 The primary new features include polymorphic routines and types, exceptions, concurrency, and modules.
@@ -157,4 +160,5 @@
 instead, a programmer evolves a legacy program into \CFA by incrementally incorporating \CFA features.
 As well, new programs can be written in \CFA using a combination of C and \CFA features.
+In many ways, \CFA is to C as \Index{Scala}~\cite{Scala} is to Java, providing a vehicle for new typing and control-flow capabilities on top of a highly popular programming language allowing immediate dissemination.
 
 \Index*[C++]{\CC{}}~\cite{c++:v1} had a similar goal 30 years ago, allowing object-oriented programming to be incrementally added to C.
@@ -165,35 +169,35 @@
 For example, the following programs compare the C, \CFA, and \CC I/O mechanisms, where the programs output the same result.
 \begin{center}
-\begin{tabular}{@{}l@{\hspace{1.5em}}l@{\hspace{1.5em}}l@{}}
-\multicolumn{1}{c@{\hspace{1.5em}}}{\textbf{C}}	& \multicolumn{1}{c}{\textbf{\CFA}}	& \multicolumn{1}{c}{\textbf{\CC}}	\\
-\begin{cfa}
-#include <stdio.h>§\indexc{stdio.h}§
+\begin{tabular}{@{}l@{\hspace{1em}}l@{\hspace{1em}}l@{}}
+\multicolumn{1}{c@{\hspace{1em}}}{\textbf{C}}	& \multicolumn{1}{c}{\textbf{\CFA}}	& \multicolumn{1}{c}{\textbf{\CC}}	\\
+\begin{cfa}
+#include <stdio.h>$\indexc{stdio.h}$
 
 int main( void ) {
 	int x = 0, y = 1, z = 2;
-	®printf( "%d %d %d\n", x, y, z );®
+	@printf( "%d %d %d\n", x, y, z );@
 }
 \end{cfa}
 &
 \begin{cfa}
-#include <fstream>§\indexc{fstream}§
+#include <fstream>$\indexc{fstream}$
 
 int main( void ) {
 	int x = 0, y = 1, z = 2;
-	®sout | x | y | z;®§\indexc{sout}§
+	@sout | x | y | z;@$\indexc{sout}$
 }
 \end{cfa}
 &
 \begin{cfa}
-#include <iostream>§\indexc{iostream}§
+#include <iostream>$\indexc{iostream}$
 using namespace std;
 int main() {
 	int x = 0, y = 1, z = 2;
-	®cout<<x<<" "<<y<<" "<<z<<endl;®
+	@cout<<x<<" "<<y<<" "<<z<<endl;@
 }
 \end{cfa}
 \end{tabular}
 \end{center}
-While the \CFA I/O looks similar to the \Index*[C++]{\CC{}} output style, there are important differences, such as automatic spacing between variables as in \Index*{Python} (see~\VRef{s:IOLibrary}).
+While \CFA I/O \see{\VRef{s:StreamIOLibrary}} looks similar to \Index*[C++]{\CC{}}, there are important differences, such as automatic spacing between variables and an implicit newline at the end of the expression list, similar to \Index*{Python}~\cite{Python}.
 
 
@@ -210,18 +214,18 @@
 \section{Why fix C?}
 
-The C programming language is a foundational technology for modern computing with millions of lines of code implementing everything from hobby projects to commercial operating-systems.
+The C programming language is a foundational technology for modern computing with billions of lines of code implementing everything from hobby projects to commercial operating-systems.
 This installation base and the programmers producing it represent a massive software-engineering investment spanning decades and likely to continue for decades more.
 Even with all its problems, C continues to be popular because it allows writing software at virtually any level in a computer system without restriction.
-For system programming, where direct access to hardware, storage management, and real-time issues are a requirement, C is usually the only language of choice.
-The TIOBE index~\cite{TIOBE} for February 2020 ranks the top six most \emph{popular} programming languages as \Index*{Java} 17.4\%, C 16.8\%, Python 9.3\%, \Index*[C++]{\CC{}} 6.2\%, \Csharp 5.9\%, Visual Basic 5.9\% = 61.5\%, where the next 50 languages are less than 2\% each, with a long tail.
+For system programming, where direct access to hardware, storage management, and real-time issues are a requirement, C is the only language of choice.
+The TIOBE index~\cite{TIOBE} for February 2021 ranks the top six most \emph{popular} programming languages as C 17.4\%, \Index*{Java} 12\%, Python 12\%, \Index*[C++]{\CC{}} 7.6\%, \Csharp 4\%, Visual Basic 3.8\% = 56.8\%, where the next 50 languages are less than 2\% each, with a long tail.
 The top 4 rankings over the past 35 years are:
 \begin{center}
 \setlength{\tabcolsep}{10pt}
 \begin{tabular}{@{}rcccccccc@{}}
-		& 2020	& 2015	& 2010	& 2005	& 2000	& 1995	& 1990	& 1985	\\ \hline
-Java	& 1		& 2		& 1		& 2		& 3		& -		& -		& -		\\
-\R{C}	& \R{2} & \R{1} & \R{2} & \R{1} & \R{1} & \R{2} & \R{1} & \R{1} \\
-Python	& 3		& 7		& 6		& 6		& 22	& 21	& -		& -		\\
-\CC		& 4		& 4		& 4		& 3		& 2		& 1		& 2		& 12	\\
+		& 2021	& 2016	& 2011	& 2006	& 2001	& 1996	& 1991	& 1986	\\ \hline
+\R{C}	& \R{1} & \R{2} & \R{2} & \R{1} & \R{1} & \R{1} & \R{1} & \R{1} \\
+Java	& 2		& 1		& 1		& 2		& 3		& 28	& -		& -		\\
+Python	& 3		& 5		& 6		& 7		& 23	& 13	& -		& -		\\
+\CC		& 4		& 3		& 3		& 3		& 2		& 2		& 2		& 8		\\
 \end{tabular}
 \end{center}
@@ -232,5 +236,5 @@
 As stated, the goal of the \CFA project is to engineer modern language-features into C in an evolutionary rather than revolutionary way.
 \CC~\cite{C++14,C++} is an example of a similar project;
-however, it largely extended the C language, and did not address most of C's existing problems.\footnote{%
+however, it largely extended the C language, and did not address many of C's existing problems.\footnote{%
 Two important existing problems addressed were changing the type of character literals from ©int© to ©char© and enumerator from ©int© to the type of its enumerators.}
 \Index*{Fortran}~\cite{Fortran08}, \Index*{Ada}~\cite{Ada12}, and \Index*{Cobol}~\cite{Cobol14} are examples of programming languages that took an evolutionary approach, where modern language-features (\eg objects, concurrency) are added and problems fixed within the framework of the existing language.
@@ -241,8 +245,6 @@
 
 The result of this project is a language that is largely backwards compatible with \Index*[C11]{\Celeven{}}~\cite{C11}, but fixes many of the well known C problems while adding modern language-features.
-To achieve these goals required a significant engineering exercise, where we had to ``think inside the existing C box''.
-Without these significant extension to C, it is unable to cope with the needs of modern programming problems and programmers;
-as a result, it will fade into disuse.
-Considering the large body of existing C code and programmers, there is significant impetus to ensure C is transformed into a modern programming language.
+To achieve these goals required a significant engineering exercise, \ie ``thinking \emph{inside} the C box''.
+Considering the large body of existing C code and programmers, there is significant impetus to ensure C is transformed into a modern language.
 While \Index*[C11]{\Celeven{}} made a few simple extensions to the language, nothing was added to address existing problems in the language or to augment the language with modern language-features.
 While some may argue that modern language-features may make C complex and inefficient, it is clear a language without modern capabilities is insufficient for the advanced programming problems existing today.
@@ -251,17 +253,16 @@
 \section{History}
 
-The \CFA project started with \Index*{Dave Till}\index{Till, Dave}'s \Index*{K-W C}~\cite{Buhr94a,Till89}, which extended C with new declaration syntax, multiple return values from routines, and advanced assignment capabilities using the notion of tuples.
-(See~\cite{Werther96} for similar work in \Index*[C++]{\CC{}}.)
+The \CFA project started with \Index*{Dave Till}\index{Till, Dave}'s \Index*{K-W C}~\cite{Buhr94a,Till89}, which extended C with new declaration syntax, multiple return values from routines, and advanced assignment capabilities using the notion of tuples \see{\cite{Werther96} for similar work in \Index*[C++]{\CC{}}}.
 The first \CFA implementation of these extensions was by \Index*{Rodolfo Esteves}\index{Esteves, Rodolfo}~\cite{Esteves04}.
 
 The signature feature of \CFA is \emph{\Index{overload}able} \Index{parametric-polymorphic} functions~\cite{forceone:impl,Cormack90,Duggan96} with functions generalized using a ©forall© clause (giving the language its name):
 \begin{cfa}
-®forall( otype T )® T identity( T val ) { return val; }
-int forty_two = identity( 42 ); §\C{// T is bound to int, forty\_two == 42}§
+@forall( otype T )@ T identity( T val ) { return val; }
+int forty_two = identity( 42 ); $\C{// T is bound to int, forty\_two == 42}$
 \end{cfa}
 % extending the C type system with parametric polymorphism and overloading, as opposed to the \Index*[C++]{\CC{}} approach of object-oriented extensions.
 \CFA{}\hspace{1pt}'s polymorphism was originally formalized by \Index*{Glen Ditchfield}\index{Ditchfield, Glen}~\cite{Ditchfield92}, and first implemented by \Index*{Richard Bilson}\index{Bilson, Richard}~\cite{Bilson03}.
 However, at that time, there was little interesting in extending C, so work did not continue.
-As the saying goes, ``\Index*{What goes around, comes around.}'', and there is now renewed interest in the C programming language because of legacy code-bases, so the \CFA project has been restarted.
+As the saying goes, ``\Index*{What goes around, comes around.}'', and there is now renewed interest in the C programming language because of the legacy code-base, so the \CFA project was restarted in 2015.
 
 
@@ -273,5 +274,5 @@
 This feature allows \CFA programmers to take advantage of the existing panoply of C libraries to access thousands of external software features.
 Language developers often state that adequate \Index{library support} takes more work than designing and implementing the language itself.
-Fortunately, \CFA, like \Index*[C++]{\CC{}}, starts with immediate access to all exiting C libraries, and in many cases, can easily wrap library routines with simpler and safer interfaces, at very low cost.
+Fortunately, \CFA, like \Index*[C++]{\CC{}}, starts with immediate access to all exiting C libraries, and in many cases, can easily wrap library routines with simpler and safer interfaces, at zero or very low cost.
 Hence, \CFA begins by leveraging the large repository of C libraries, and than allows programmers to incrementally augment their C programs with modern \Index{backward-compatible} features.
 
@@ -286,5 +287,5 @@
 
 double key = 5.0, vals[10] = { /* 10 sorted floating values */ };
-double * val = (double *)bsearch( &key, vals, 10, sizeof(vals[0]), comp ); §\C{// search sorted array}§
+double * val = (double *)bsearch( &key, vals, 10, sizeof(vals[0]), comp ); $\C{// search sorted array}$
 \end{cfa}
 which can be augmented simply with a polymorphic, type-safe, \CFA-overloaded wrappers:
@@ -295,8 +296,8 @@
 
 forall( otype T | { int ?<?( T, T ); } ) unsigned int bsearch( T key, const T * arr, size_t size ) {
-	T * result = bsearch( key, arr, size ); §\C{// call first version}§
-	return result ? result - arr : size; } §\C{// pointer subtraction includes sizeof(T)}§
-
-double * val = bsearch( 5.0, vals, 10 ); §\C{// selection based on return type}§
+	T * result = bsearch( key, arr, size ); $\C{// call first version}$
+	return result ? result - arr : size; } $\C{// pointer subtraction includes sizeof(T)}$
+
+double * val = bsearch( 5.0, vals, 10 ); $\C{// selection based on return type}$
 int posn = bsearch( 5.0, vals, 10 );
 \end{cfa}
@@ -310,5 +311,5 @@
 \begin{cfa}
 forall( dtype T | sized(T) ) T * malloc( void ) { return (T *)malloc( sizeof(T) ); }
-int * ip = malloc(); §\C{// select type and size from left-hand side}§
+int * ip = malloc(); $\C{// select type and size from left-hand side}$
 double * dp = malloc();
 struct S {...} * sp = malloc();
@@ -319,24 +320,23 @@
 However, it is necessary to differentiate between C and \CFA code because of name \Index{overload}ing, as for \CC.
 For example, the C math-library provides the following routines for computing the absolute value of the basic types: ©abs©, ©labs©, ©llabs©, ©fabs©, ©fabsf©, ©fabsl©, ©cabsf©, ©cabs©, and ©cabsl©.
-Whereas, \CFA wraps each of these routines into ones with the overloaded name ©abs©:
-\begin{cfa}
-char ®abs®( char );
-extern "C" { int ®abs®( int ); } §\C{// use default C routine for int}§
-long int ®abs®( long int );
-long long int ®abs®( long long int );
-float ®abs®( float );
-double ®abs®( double );
-long double ®abs®( long double );
-float _Complex ®abs®( float _Complex );
-double _Complex ®abs®( double _Complex );
-long double _Complex ®abs®( long double _Complex );
-\end{cfa}
-The problem is the name clash between the library routine ©abs© and the \CFA names ©abs©.
-Hence, names appearing in an ©extern "C"© block have \newterm*{C linkage}.
-Then overloading polymorphism uses a mechanism called \newterm{name mangling}\index{mangling!name} to create unique names that are different from C names, which are not mangled.
-Hence, there is the same need, as in \CC, to know if a name is a C or \CFA name, so it can be correctly formed.
-There is no way around this problem, other than C's approach of creating unique names for each pairing of operation and types.
-
-This example strongly illustrates a core idea in \CFA: \emph{the \Index{power of a name}}.
+Whereas, \CFA wraps each of these routines into one overloaded name ©abs©:
+\begin{cfa}
+char @abs@( char );
+extern "C" { int @abs@( int ); } $\C{// use default C routine for int}$
+long int @abs@( long int );
+long long int @abs@( long long int );
+float @abs@( float );
+double @abs@( double );
+long double @abs@( long double );
+float _Complex @abs@( float _Complex );
+double _Complex @abs@( double _Complex );
+long double _Complex @abs@( long double _Complex );
+\end{cfa}
+The problem is \Index{name clash} between the C name ©abs© and the \CFA names ©abs©, resulting in two name linkages\index{C linkage}: ©extern "C"© and ©extern "Cforall"© (default).
+Overloaded names must use \newterm{name mangling}\index{mangling!name} to create unique names that are different from unmangled C names.
+Hence, there is the same need as in \CC to know if a name is a C or \CFA name, so it can be correctly formed.
+The only way around this problem is C's approach of creating unique names for each pairing of operation and type.
+
+This example illustrates a core idea in \CFA: \emph{the \Index{power of a name}}.
 The name ``©abs©'' evokes the notion of absolute value, and many mathematical types provide the notion of absolute value.
 Hence, knowing the name ©abs© is sufficient to apply it to any type where it is applicable.
@@ -344,12 +344,14 @@
 
 
-\section[Compiling a CFA Program]{Compiling a \CFA Program}
+\section{\CFA Compilation}
 
 The command ©cfa© is used to compile a \CFA program and is based on the \Index{GNU} \Indexc{gcc} command, \eg:
 \begin{cfa}
-cfa§\indexc{cfa}\index{compilation!cfa@©cfa©}§ [ gcc-options ] [ C/§\CFA{}§ source-files ] [ assembler/loader files ]
-\end{cfa}
-\CFA programs having the following ©gcc© flags turned on:
-\begin{description}
+cfa$\indexc{cfa}\index{compilation!cfa@©cfa©}$ [ gcc/$\CFA{}$-options ] [ C/$\CFA{}$ source-files ] [ assembler/loader files ]
+\end{cfa}
+There is no ordering among options (flags) and files, unless an option has an argument, which must appear immediately after the option possibly with or without a space separating option and argument.
+
+\CFA has the following ©gcc© flags turned on:
+\begin{description}[topsep=0pt]
 \item
 \Indexc{-std=gnu11}\index{compilation option!-std=gnu11@{©-std=gnu11©}}
@@ -359,15 +361,20 @@
 Use the traditional GNU semantics for inline routines in C11 mode, which allows inline routines in header files.
 \end{description}
-The following new \CFA options are available:
-\begin{description}
+
+\CFA has the following new options:
+\begin{description}[topsep=0pt]
 \item
 \Indexc{-CFA}\index{compilation option!-CFA@©-CFA©}
-Only the C preprocessor and the \CFA translator steps are performed and the transformed program is written to standard output, which makes it possible to examine the code generated by the \CFA translator.
+Only the C preprocessor (flag ©-E©) and the \CFA translator steps are performed and the transformed program is written to standard output, which makes it possible to examine the code generated by the \CFA translator.
 The generated code starts with the standard \CFA \Index{prelude}.
+
+\item
+\Indexc{-XCFA}\index{compilation option!-XCFA@©-XCFA©}
+Pass next flag as-is to the ©cfa-cpp© translator (see details below).
 
 \item
 \Indexc{-debug}\index{compilation option!-debug@©-debug©}
 The program is linked with the debugging version of the runtime system.
-The debug version performs runtime checks to help during the debugging phase of a \CFA program, but can substantially slow program execution.
+The debug version performs runtime checks to aid the debugging phase of a \CFA program, but can substantially slow program execution.
 The runtime checks should only be removed after the program is completely debugged.
 \textbf{This option is the default.}
@@ -399,5 +406,5 @@
 \item
 \Indexc{-no-include-stdhdr}\index{compilation option!-no-include-stdhdr@©-no-include-stdhdr©}
-Do not supply ©extern "C"© wrappers for \Celeven standard include files (see~\VRef{s:StandardHeaders}).
+Do not supply ©extern "C"© wrappers for \Celeven standard include files \see{\VRef{s:StandardHeaders}}.
 \textbf{This option is \emph{not} the default.}
 \end{comment}
@@ -430,7 +437,7 @@
 \begin{cfa}
 #ifndef __CFORALL__
-#include <stdio.h>§\indexc{stdio.h}§ §\C{// C header file}§
+#include <stdio.h>$\indexc{stdio.h}$ $\C{// C header file}$
 #else
-#include <fstream>§\indexc{fstream}§ §\C{// \CFA header file}§
+#include <fstream>$\indexc{fstream}$ $\C{// \CFA header file}$
 #endif
 \end{cfa}
@@ -438,11 +445,20 @@
 
 The \CFA translator has multiple steps.
-The following flags control how the tranlator works, the stages run, and printing within a stage.
+The following flags control how the translator works, the stages run, and printing within a stage.
 The majority of these flags are used by \CFA developers, but some are occasionally useful to programmers.
+Each option must be escaped with \Indexc{-XCFA}\index{translator option!-XCFA@{©-XCFA©}} to direct it to the compiler step, similar to the ©-Xlinker© flag for the linker, \eg:
+\begin{lstlisting}[language=sh]
+cfa $test$.cfa -CFA -XCFA -p # print translated code without printing the standard prelude
+cfa $test$.cfa -XCFA -P -XCFA parse -XCFA -n # show program parse without prelude
+\end{lstlisting}
 \begin{description}[topsep=5pt,itemsep=0pt,parsep=0pt]
 \item
-\Indexc{-h}\index{translator option!-h@{©-h©}}, \Indexc{--help}\index{translator option!--help@{©--help©}} \, print help message
-\item
-\Indexc{-l}\index{translator option!-l@{©-l©}}, \Indexc{--libcfa}\index{translator option!--libcfa@{©--libcfa©}} \, generate libcfa.c
+\Indexc{-c}\index{translator option!-c@{©-c©}}, \Indexc{--colors}\index{translator option!--colors@{©--colors©}} \, diagnostic color: ©never©, ©always©, \lstinline[deletekeywords=auto]{auto}
+\item
+\Indexc{-g}\index{translator option!-g@{©-g©}}, \Indexc{--gdb}\index{translator option!--gdb@{©--gdb©}} \, wait for gdb to attach
+\item
+\Indexc{-h}\index{translator option!-h@{©-h©}}, \Indexc{--help}\index{translator option!--help@{©--help©}} \, print translator help message
+\item
+\Indexc{-l}\index{translator option!-l@{©-l©}}, \Indexc{--libcfa}\index{translator option!--libcfa@{©--libcfa©}} \, generate ©libcfa.c©
 \item
 \Indexc{-L}\index{translator option!-L@{©-L©}}, \Indexc{--linemarks}\index{translator option!--linemarks@{©--linemarks©}} \, generate line marks
@@ -454,55 +470,53 @@
 \Indexc{-n}\index{translator option!-n@{©-n©}}, \Indexc{--no-prelude}\index{translator option!--no-prelude@{©--no-prelude©}} \, do not read prelude
 \item
-\Indexc{-p}\index{translator option!-p@{©-p©}}, \Indexc{--prototypes}\index{translator option!--prototypes@{©--prototypes©}} \, generate prototypes for prelude functions
+\Indexc{-p}\index{translator option!-p@{©-p©}}, \Indexc{--prototypes}\index{translator option!--prototypes@{©--prototypes©}} \, do not generate prelude prototypes $\Rightarrow$ prelude not printed
+\item
+\Indexc{-d}\index{translator option!-d@{©-d©}}, \Indexc{--deterministic-out}\index{translator option!--deterministic-out@{©--deterministic-out©}} \, only print deterministic output
 \item
 \Indexc{-P}\index{translator option!-P@{©-P©}}, \Indexc{--print}\index{translator option!--print@{©--print©}} \, one of:
 \begin{description}[topsep=0pt,itemsep=0pt,parsep=0pt]
 \item
+\Indexc{ascodegen}\index{translator option!-P@{©-P©}!©ascodegen©}\index{translator option!--print@{©-print©}!©ascodegen©} \, as codegen rather than AST
+\item
+\Indexc{asterr}\index{translator option!-P@{©-P©}!©asterr©}\index{translator option!--print@{©-print©}!©asterr©} \, AST on error
+\item
+\Indexc{declstats}\index{translator option!-P@{©-P©}!©declstats©}\index{translator option!--print@{©-print©}!©declstats©} \, code property statistics
+\item
+\Indexc{parse}\index{translator option!-P@{©-P©}!©parse©}\index{translator option!--print@{©-print©}!©parse©} \, yacc (parsing) debug information
+\item
+\Indexc{pretty}\index{translator option!-P@{©-P©}!©pretty©}\index{translator option!--print@{©-print©}!©pretty©} \, prettyprint for ©ascodegen© flag
+\item
+\Indexc{rproto}\index{translator option!-P@{©-P©}!©rproto©}\index{translator option!--print@{©-print©}!©rproto©} \, resolver-proto instance
+\item
+\Indexc{rsteps}\index{translator option!-P@{©-P©}!©rsteps©}\index{translator option!--print@{©-print©}!©rsteps©} \, resolver steps
+\item
+\Indexc{tree}\index{translator option!-P@{©-P©}!©tree©}\index{translator option!--print@{©-print©}!©tree©} \, parse tree
+\item
+\Indexc{ast}\index{translator option!-P@{©-P©}!©ast©}\index{translator option!--print@{©-print©}!©ast©} \, AST after parsing
+\item
+\Indexc{symevt}\index{translator option!-P@{©-P©}!©symevt©}\index{translator option!--print@{©-print©}!©symevt©} \, symbol table events
+\item
 \Indexc{altexpr}\index{translator option!-P@{©-P©}!©altexpr©}\index{translator option!--print@{©-print©}!©altexpr©} \, alternatives for expressions
 \item
-\Indexc{ascodegen}\index{translator option!-P@{©-P©}!©ascodegen©}\index{translator option!--print@{©-print©}!©ascodegen©} \, as codegen rather than AST
-\item
-\Indexc{ast}\index{translator option!-P@{©-P©}!©ast©}\index{translator option!--print@{©-print©}!©ast©} \, AST after parsing
-\item
 \Indexc{astdecl}\index{translator option!-P@{©-P©}!©astdecl©}\index{translator option!--print@{©-print©}!©astdecl©} \, AST after declaration validation pass
 \item
-\Indexc{asterr}\index{translator option!-P@{©-P©}!©asterr©}\index{translator option!--print@{©-print©}!©asterr©} \, AST on error
+\Indexc{resolver}\index{translator option!-P@{©-P©}!©resolver©}\index{translator option!--print@{©-print©}!©resolver©} \, before resolver step
 \item
 \Indexc{astexpr}\index{translator option!-P@{©-P©}!©astexpr©}\index{translator option!--print@{©-print©}!©altexpr©} \, AST after expression analysis
 \item
+\Indexc{ctordtor}\index{translator option!-P@{©-P©}!©ctordtor©}\index{translator option!--print@{©-print©}!©ctordtor©} \, after ctor/dtor are replaced
+\item
+\Indexc{tuple}\index{translator option!-P@{©-P©}!©tuple©}\index{translator option!--print@{©-print©}!©tuple©} \, after tuple expansion
+\item
 \Indexc{astgen}\index{translator option!-P@{©-P©}!©astgen©}\index{translator option!--print@{©-print©}!©astgen©} \, AST after instantiate generics
 \item
 \Indexc{box}\index{translator option!-P@{©-P©}!©box©}\index{translator option!--print@{©-print©}!©box©} \, before box step
 \item
-\Indexc{ctordtor}\index{translator option!-P@{©-P©}!©ctordtor©}\index{translator option!--print@{©-print©}!©ctordtor©} \, after ctor/dtor are replaced
-\item
 \Indexc{codegen}\index{translator option!-P@{©-P©}!©codegen©}\index{translator option!--print@{©-print©}!©codegen©} \, before code generation
-\item
-\Indexc{declstats}\index{translator option!-P@{©-P©}!©declstats©}\index{translator option!--print@{©-print©}!©declstats©} \, code property statistics
-\item
-\Indexc{parse}\index{translator option!-P@{©-P©}!©parse©}\index{translator option!--print@{©-print©}!©parse©} \, yacc (parsing) debug information
-\item
-\Indexc{pretty}\index{translator option!-P@{©-P©}!©pretty©}\index{translator option!--print@{©-print©}!©pretty©} \, prettyprint for ascodegen flag
-\item
-\Indexc{resolver}\index{translator option!-P@{©-P©}!©resolver©}\index{translator option!--print@{©-print©}!©resolver©} \, before resolver step
-\item
-\Indexc{rproto}\index{translator option!-P@{©-P©}!©rproto©}\index{translator option!--print@{©-print©}!©rproto©} \, resolver-proto instance
-\item
-\Indexc{rsteps}\index{translator option!-P@{©-P©}!©rsteps©}\index{translator option!--print@{©-print©}!©rsteps©} \, resolver steps
-\item
-\Indexc{symevt}\index{translator option!-P@{©-P©}!©symevt©}\index{translator option!--print@{©-print©}!©symevt©} \, symbol table events
-\item
-\Indexc{tree}\index{translator option!-P@{©-P©}!©tree©}\index{translator option!--print@{©-print©}!©tree©} \, parse tree
-\item
-\Indexc{tuple}\index{translator option!-P@{©-P©}!©tuple©}\index{translator option!--print@{©-print©}!©tuple©} \, after tuple expansion
 \end{description}
 \item
 \Indexc{--prelude-dir} <directory> \, prelude directory for debug/nodebug
 \item
-\Indexc{-S}\index{translator option!-S@{©-S©}!©counters,heap,time,all,none©}, \Indexc{--statistics}\index{translator option!--statistics@{©--statistics©}!©counters,heap,time,all,none©} <option-list> \, enable profiling information:
-\begin{description}[topsep=0pt,itemsep=0pt,parsep=0pt]
-\item
-\Indexc{counters,heap,time,all,none}
-\end{description}
+\Indexc{-S}\index{translator option!-S@{©-S©}!©counters,heap,time,all,none©}, \Indexc{--statistics}\index{translator option!--statistics@{©--statistics©}!©counters,heap,time,all,none©} <option-list> \, enable profiling information: ©counters©, ©heap©, ©time©, ©all©, ©none©
 \item
 \Indexc{-t}\index{translator option!-t@{©-t©}}, \Indexc{--tree}\index{translator option!--tree@{©--tree©}} build in tree
@@ -513,13 +527,13 @@
 \label{s:BackquoteIdentifiers}
 
-\CFA introduces several new keywords (see \VRef{s:CFAKeywords}) that can clash with existing C variable-names in legacy code.
+\CFA introduces several new keywords \see{\VRef{s:CFAKeywords}} that can clash with existing C variable-names in legacy code.
 Keyword clashes are accommodated by syntactic transformations using the \CFA backquote escape-mechanism:
 \begin{cfa}
-int ®``®otype = 3; §\C{// make keyword an identifier}§
-double ®``®forall = 3.5;
+int @``@otype = 3; $\C{// make keyword an identifier}$
+double @``@forall = 3.5;
 \end{cfa}
 
 Existing C programs with keyword clashes can be converted by enclosing keyword identifiers in backquotes, and eventually the identifier name can be changed to a non-keyword name.
-\VRef[Figure]{f:HeaderFileInterposition} shows how clashes in existing C header-files (see~\VRef{s:StandardHeaders}) can be handled using preprocessor \newterm{interposition}: ©#include_next© and ©-I filename©.
+\VRef[Figure]{f:HeaderFileInterposition} shows how clashes in existing C header-files \see{\VRef{s:StandardHeaders}} can be handled using preprocessor \newterm{interposition}: ©#include_next© and ©-I filename©.
 Several common C header-files with keyword clashes are fixed in the standard \CFA header-library, so there is a seamless programming-experience.
 
@@ -527,10 +541,10 @@
 \begin{cfa}
 // include file uses the CFA keyword "with".
-#if ! defined( with ) §\C{// nesting ?}§
-#define with ®``®with §\C{// make keyword an identifier}§
+#if ! defined( with )							$\C{// nesting ?}$
+#define with @``@with							$\C{// make keyword an identifier}$
 #define __CFA_BFD_H__
 #endif
-§{\color{red}\#\textbf{include\_next} <bfdlink.h>}§ §\C{// must have internal check for multiple expansion}§
-#if defined( with ) && defined( __CFA_BFD_H__ ) §\C{// reset only if set}§
+$\R{\#include\_next} <bfdlink.h>$				$\C{// must have internal check for multiple expansion}$
+#if defined( with ) && defined( __CFA_BFD_H__ )	$\C{// reset only if set}$
 #undef with
 #undef __CFA_BFD_H__
@@ -544,16 +558,16 @@
 \section{Constant Underscores}
 
-Numeric constants are extended to allow \Index{underscore}s\index{constant!underscore}, \eg:
-\begin{cfa}
-2®_®147®_®483®_®648; §\C{// decimal constant}§
-56®_®ul; §\C{// decimal unsigned long constant}§
-0®_®377; §\C{// octal constant}§
-0x®_®ff®_®ff; §\C{// hexadecimal constant}§
-0x®_®ef3d®_®aa5c; §\C{// hexadecimal constant}§
-3.141®_®592®_®654; §\C{// floating constant}§
-10®_®e®_®+1®_®00; §\C{// floating constant}§
-0x®_®ff®_®ff®_®p®_®3; §\C{// hexadecimal floating}§
-0x®_®1.ffff®_®ffff®_®p®_®128®_®l; §\C{// hexadecimal floating long constant}§
-L®_®§"\texttt{\textbackslash{x}}§®_®§\texttt{ff}§®_®§\texttt{ee}"§; §\C{// wide character constant}§
+Numeric constants are extended to allow \Index{underscore}s\index{constant!underscore} as a separator, \eg:
+\begin{cfa}
+2@_@147@_@483@_@648; $\C{// decimal constant}$
+56@_@ul; $\C{// decimal unsigned long constant}$
+0@_@377; $\C{// octal constant}$
+0x@_@ff@_@ff; $\C{// hexadecimal constant}$
+0x@_@ef3d@_@aa5c; $\C{// hexadecimal constant}$
+3.141@_@592@_@654; $\C{// floating constant}$
+10@_@e@_@+1@_@00; $\C{// floating constant}$
+0x@_@ff@_@ff@_@p@_@3; $\C{// hexadecimal floating}$
+0x@_@1.ffff@_@ffff@_@p@_@128@_@l; $\C{// hexadecimal floating long constant}$
+L@_@$"\texttt{\textbackslash{x}}$@_@$\texttt{ff}$@_@$\texttt{ee}"$; $\C{// wide character constant}$
 \end{cfa}
 The rules for placement of underscores are:
@@ -574,11 +588,12 @@
 It is significantly easier to read and enter long constants when they are broken up into smaller groupings (many cultures use comma and/or period among digits for the same purpose).
 This extension is backwards compatible, matches with the use of underscore in variable names, and appears in \Index*{Ada} and \Index*{Java} 8.
+\CC uses the single quote (©'©) as a separator, restricted within a sequence of digits, \eg ©0xaa©©'©©ff©, ©3.141©©'©©592E1©©'©©1©.
 
 
 \section{Exponentiation Operator}
 
-C, \CC, and Java (and many other programming languages) have no exponentiation operator\index{exponentiation!operator}\index{operator!exponentiation}, \ie $x^y$, and instead use a routine, like \Indexc{pow(x,y)}, to perform the exponentiation operation.
-\CFA extends the basic operators with the exponentiation operator ©?®\®?©\index{?\\?@©?®\®?©} and ©?\=?©\index{?\\=?@©®\®=?©}, as in, ©x ®\® y© and ©x ®\®= y©, which means $x^y$ and $x \leftarrow x^y$.
-The priority of the exponentiation operator is between the cast and multiplicative operators, so that ©w * (int)x \ (int)y * z© is parenthesized as ©((w * (((int)x) \ ((int)y))) * z)©.
+C, \CC, and Java (and other programming languages) have no exponentiation operator\index{exponentiation!operator}\index{operator!exponentiation}, \ie $x^y$, and instead use a routine, like \Indexc{pow(x,y)}, to perform the exponentiation operation.
+\CFA extends the basic operators with the exponentiation operator ©?©\R{©\\©}©?©\index{?\\?@©?@\@?©} and ©?©\R{©\\©}©=?©\index{?\\=?@©@\@=?©}, as in, ©x ©\R{©\\©}© y© and ©x ©\R{©\\©}©= y©, which means $x^y$ and $x \leftarrow x^y$.
+The priority of the exponentiation operator is between the cast and multiplicative operators, so that ©w * (int)x \ (int)y * z© is parenthesized as ©(w * (((int)x) \ ((int)y))) * z©.
 
 There are exponentiation operators for integral and floating types, including the builtin \Index{complex} types.
@@ -587,16 +602,17 @@
 Floating exponentiation\index{exponentiation!floating} is performed using \Index{logarithm}s\index{exponentiation!logarithm}, so the exponent cannot be negative.
 \begin{cfa}
-sout | 1 ®\® 0 | 1 ®\® 1 | 2 ®\® 8 | -4 ®\® 3 | 5 ®\® 3 | 5 ®\® 32 | 5L ®\® 32 | 5L ®\® 64 | -4 ®\® -3 | -4.0 ®\® -3 | 4.0 ®\® 2.1
-	   | (1.0f+2.0fi) ®\® (3.0f+2.0fi);
-1 1 256 -64 125 ®0® 3273344365508751233 ®0® ®0® -0.015625 18.3791736799526 0.264715-1.1922i
+sout | 1 @\@ 0 | 1 @\@ 1 | 2 @\@ 8 | -4 @\@ 3 | 5 @\@ 3 | 5 @\@ 32 | 5L @\@ 32 | 5L @\@ 64 | -4 @\@ -3 | -4.0 @\@ -3 | 4.0 @\@ 2.1
+	   | (1.0f+2.0fi) @\@ (3.0f+2.0fi);
+1 1 256 -64 125 @0@ 3273344365508751233 @0@ @0@ -0.015625 18.3791736799526 0.264715-1.1922i
 \end{cfa}
 Note, ©5 \ 32© and ©5L \ 64© overflow, and ©-4 \ -3© is a fraction but stored in an integer so all three computations generate an integral zero.
-Parenthesis are necessary for complex constants or the expression is parsed as ©1.0f+®(®2.0fi \ 3.0f®)®+2.0fi©.
+Because exponentiation has higher priority than ©+©, parenthesis are necessary for exponentiation of \Index{complex constant}s or the expression is parsed as ©1.0f+©\R{©(©}©2.0fi \ 3.0f©\R{©)©}©+2.0fi©, requiring \R{©(©}©1.0f+2.0fi©\R{©)©}© \ ©\R{©(©}©3.0f+2.0fi©\R{©)©}.
+
 The exponentiation operator is available for all the basic types, but for user-defined types, only the integral-computation version is available.
 \begin{cfa}
-forall( otype OT | { void ?{}( OT & this, one_t ); OT ?*?( OT, OT ); } )
-OT ?®\®?( OT ep, unsigned int y );
-forall( otype OT | { void ?{}( OT & this, one_t ); OT ?*?( OT, OT ); } )
-OT ?®\®?( OT ep, unsigned long int y );
+forall( otype T | { void ?{}( T & this, one_t ); T ?*?( T, T ); } )
+T ?@\@?( T ep, unsigned int y );
+forall( otype T | { void ?{}( T & this, one_t ); T ?*?( T, T ); } )
+T ?@\@?( T ep, unsigned long int y );
 \end{cfa}
 The user type ©T© must define multiplication, one (©1©), and ©*©.
@@ -609,25 +625,27 @@
 
 %\subsection{\texorpdfstring{\protect\lstinline@if@/\protect\lstinline@while@ Statement}{if Statement}}
-\subsection{\texorpdfstring{\LstKeywordStyle{if}/\LstKeywordStyle{while} Statement}{if/while Statement}}
-
-The ©if©/©while© expression allows declarations, similar to ©for© declaration expression.
-(Does not make sense for ©do©-©while©.)
-\begin{cfa}
-if ( ®int x = f()® ) ... §\C{// x != 0}§
-if ( ®int x = f(), y = g()® ) ... §\C{// x != 0 \&\& y != 0}§
-if ( ®int x = f(), y = g(); x < y® ) ... §\C{// relational expression}§
-if ( ®struct S { int i; } x = { f() }; x.i < 4® ) §\C{// relational expression}§
-
-while ( ®int x = f()® ) ... §\C{// x != 0}§
-while ( ®int x = f(), y = g()® ) ... §\C{// x != 0 \&\& y != 0}§
-while ( ®int x = f(), y = g(); x < y® ) ... §\C{// relational expression}§
-while ( ®struct S { int i; } x = { f() }; x.i < 4® ) ... §\C{// relational expression}§
-\end{cfa}
-Unless a relational expression is specified, each variable is compared not equal to 0, which is the standard semantics for the ©if©/©while© expression, and the results are combined using the logical ©&&© operator.\footnote{\CC only provides a single declaration always compared not equal to 0.}
-The scope of the declaration(s) is local to the @if@ statement but exist within both the ``then'' and ``else'' clauses.
+\subsection{\texorpdfstring{\LstKeywordStyle{if} / \LstKeywordStyle{while} Statement}{if / while Statement}}
+
+The ©if©/©while© expression allows declarations, similar to ©for© declaration expression.\footnote{
+Declarations in the ©do©-©while© condition are not useful because they appear after the loop body.}
+\begin{cfa}
+if ( @int x = f()@ ) ... $\C{// x != 0}$
+if ( @int x = f(), y = g()@ ) ... $\C{// x != 0 \&\& y != 0}$
+if ( @int x = f(), y = g(); x < y@ ) ... $\C{// relational expression}$
+if ( @struct S { int i; } x = { f() }; x.i < 4@ ) $\C{// relational expression}$
+
+while ( @int x = f()@ ) ... $\C{// x != 0}$
+while ( @int x = f(), y = g()@ ) ... $\C{// x != 0 \&\& y != 0}$
+while ( @int x = f(), y = g(); x < y@ ) ... $\C{// relational expression}$
+while ( @struct S { int i; } x = { f() }; x.i < 4@ ) ... $\C{// relational expression}$
+\end{cfa}
+Unless a relational expression is specified, each variable is compared not equal to 0, which is the standard semantics for the ©if©/©while© expression, and the results are combined using the logical ©&&© operator.
+The scope of the declaration(s) is local to the ©if© statement but exist within both the \emph{then} and \emph{else} clauses.
+\CC only provides a single declaration always compared ©!=© to 0.
 
 
 %\section{\texorpdfstring{\protect\lstinline@case@ Clause}{case Clause}}
 \subsection{\texorpdfstring{\LstKeywordStyle{case} Clause}{case Clause}}
+\label{s:caseClause}
 
 C restricts the ©case© clause of a ©switch© statement to a single value.
@@ -640,7 +658,7 @@
 \begin{cfa}
 switch ( i ) {
-  case ®1, 3, 5®:
+  case @1, 3, 5@:
 	...
-  case ®2, 4, 6®:
+  case @2, 4, 6@:
 	...
 }
@@ -670,7 +688,7 @@
 \begin{cfa}
 switch ( i ) {
-  case ®1~5:® §\C{// 1, 2, 3, 4, 5}§
+  case @1~5:@ $\C{// 1, 2, 3, 4, 5}$
 	...
-  case ®10~15:® §\C{// 10, 11, 12, 13, 14, 15}§
+  case @10~15:@ $\C{// 10, 11, 12, 13, 14, 15}$
 	...
 }
@@ -678,5 +696,5 @@
 Lists of subranges are also allowed.
 \begin{cfa}
-case ®1~5, 12~21, 35~42®:
+case @1~5, 12~21, 35~42@:
 \end{cfa}
 
@@ -722,9 +740,9 @@
 if ( argc == 3 ) {
 	// open output file
-	®// open input file
-®} else if ( argc == 2 ) {
-	®// open input file (duplicate)
-
-®} else {
+	@// open input file
+@} else if ( argc == 2 ) {
+	@// open input file (duplicate)
+
+@} else {
 	// usage message
 }
@@ -733,18 +751,18 @@
 \end{cquote}
 In this example, case 2 is always done if case 3 is done.
-This control flow is difficult to simulate with if statements or a ©switch© statement without fall-through as code must be duplicated or placed in a separate routine.
+This control flow is difficult to simulate with ©if© statements or a ©switch© statement without fall-through as code must be duplicated or placed in a separate routine.
 C also uses fall-through to handle multiple case-values resulting in the same action:
 \begin{cfa}
 switch ( i ) {
-  ®case 1: case 3: case 5:®	// odd values
+  @case 1: case 3: case 5:@	// odd values
 	// odd action
 	break;
-  ®case 2: case 4: case 6:®	// even values
+  @case 2: case 4: case 6:@	// even values
 	// even action
 	break;
 }
 \end{cfa}
-However, this situation is handled in other languages without fall-through by allowing a list of case values.
-While fall-through itself is not a problem, the problem occurs when fall-through is the default, as this semantics is unintuitive to many programmers and is different from virtually all other programming languages with a ©switch© statement.
+This situation better handled without fall-through by allowing a list of case values \see{\VRef{s:caseClause}}.
+While fall-through itself is not a problem, the problem occurs when fall-through is the default, as this semantics is unintuitive to many programmers and is different from most programming languages with a ©switch© statement.
 Hence, default fall-through semantics results in a large number of programming errors as programmers often \emph{forget} the ©break© statement at the end of a ©case© clause, resulting in inadvertent fall-through.
 
@@ -756,5 +774,5 @@
 	if ( j < k ) {
 		...
-	  ®case 1:®		// transfer into "if" statement
+	  @case 1:@		// transfer into "if" statement
 		...
 	} // if
@@ -762,13 +780,13 @@
 	while ( j < 5 ) {
 		...
-	  ®case 3:®		// transfer into "while" statement
+	  @case 3:@		// transfer into "while" statement
 		...
 	} // while
 } // switch
 \end{cfa}
-The problem with this usage is branching into control structures, which is known to cause both comprehension and technical difficulties.
-The comprehension problem occurs from the inability to determine how control reaches a particular point due to the number of branches leading to it.
+This usage branches into control structures, which is known to cause both comprehension and technical difficulties.
+The comprehension problem results from the inability to determine how control reaches a particular point due to the number of branches leading to it.
 The technical problem results from the inability to ensure declaration and initialization of variables when blocks are not entered at the beginning.
-There are no positive arguments for this kind of control flow, and therefore, there is a strong impetus to eliminate it.
+There are few arguments for this kind of control flow, and therefore, there is a strong impetus to eliminate it.
 Nevertheless, C does have an idiom where this capability is used, known as ``\Index*{Duff's device}''~\cite{Duff83}:
 \begin{cfa}
@@ -794,5 +812,5 @@
 \item
 It is possible to place the ©default© clause anywhere in the list of labelled clauses for a ©switch© statement, rather than only at the end.
-Virtually all programming languages with a ©switch© statement require the ©default© clause to appear last in the case-clause list.
+Most programming languages with a ©switch© statement require the ©default© clause to appear last in the case-clause list.
 The logic for this semantics is that after checking all the ©case© clauses without success, the ©default© clause is selected;
 hence, physically placing the ©default© clause at the end of the ©case© clause list matches with this semantics.
@@ -803,18 +821,18 @@
 \begin{cfa}
 switch ( x ) {
-	®int y = 1;® §\C{// unreachable initialization}§
-	®x = 7;® §\C{// unreachable code without label/branch}§
+	@int y = 1;@ $\C{// unreachable initialization}$
+	@x = 7;@ $\C{// unreachable code without label/branch}$
   case 0: ...
 	...
-	®int z = 0;® §\C{// unreachable initialization, cannot appear after case}§
+	@int z = 0;@ $\C{// unreachable initialization, cannot appear after case}$
 	z = 2;
   case 1:
-	®x = z;® §\C{// without fall through, z is uninitialized}§
+	@x = z;@ $\C{// without fall through, z is uninitialized}$
 }
 \end{cfa}
 While the declaration of the local variable ©y© is useful with a scope across all ©case© clauses, the initialization for such a variable is defined to never be executed because control always transfers over it.
-Furthermore, any statements before the first ©case© clause can only be executed if labelled and transferred to using a ©goto©, either from outside or inside of the ©switch©, both of which are problematic.
-As well, the declaration of ©z© cannot occur after the ©case© because a label can only be attached to a statement, and without a fall through to case 3, ©z© is uninitialized.
-The key observation is that the ©switch© statement branches into control structure, \ie there are multiple entry points into its statement body.
+Furthermore, any statements before the first ©case© clause can only be executed if labelled and transferred to using a ©goto©, either from outside or inside of the ©switch©, where both are problematic.
+As well, the declaration of ©z© cannot occur after the ©case© because a label can only be attached to a statement, and without a fall-through to case 3, ©z© is uninitialized.
+The key observation is that the ©switch© statement branches into a control structure, \ie there are multiple entry points into its statement body.
 \end{enumerate}
 
@@ -842,19 +860,19 @@
 Therefore, to preserve backwards compatibility, it is necessary to introduce a new kind of ©switch© statement, called ©choose©, with no implicit fall-through semantics and an explicit fall-through if the last statement of a case-clause ends with the new keyword ©fallthrough©/©fallthru©, \eg:
 \begin{cfa}
-®choose® ( i ) {
+@choose@ ( i ) {
   case 1:  case 2:  case 3:
 	...
-	®// implicit end of switch (break)
-  ®case 5:
+	@// implicit end of switch (break)
+  @case 5:
 	...
-	®fallthru®; §\C{// explicit fall through}§
+	@fallthru@; $\C{// explicit fall through}$
   case 7:
 	...
-	®break® §\C{// explicit end of switch (redundant)}§
+	@break@ $\C{// explicit end of switch (redundant)}$
   default:
 	j = 3;
 }
 \end{cfa}
-Like the ©switch© statement, the ©choose© statement retains the fall-through semantics for a list of ©case© clauses;
+Like the ©switch© statement, the ©choose© statement retains the fall-through semantics for a list of ©case© clauses.
 An implicit ©break© is applied only at the end of the \emph{statements} following a ©case© clause.
 An explicit ©fallthru© is retained because it is a C-idiom most C programmers expect, and its absence might discourage programmers from using the ©choose© statement.
@@ -872,13 +890,13 @@
 \begin{cfa}
 switch ( x ) {
-	®int i = 0;® §\C{// allowed only at start}§
+	@int i = 0;@ $\C{// allowed only at start}$
   case 0:
 	...
-	®int j = 0;® §\C{// disallowed}§
+	@int j = 0;@ $\C{// disallowed}$
   case 1:
 	{
-		®int k = 0;® §\C{// allowed at different nesting levels}§
+		@int k = 0;@ $\C{// allowed at different nesting levels}$
 		...
-	  ®case 2:® §\C{// disallow case in nested statements}§
+	  @case 2:@ $\C{// disallow case in nested statements}$
 	}
   ...
@@ -897,5 +915,5 @@
   case 3:
 	if ( ... ) {
-		... ®fallthru;® // goto case 4
+		... @fallthru;@ // goto case 4
 	} else {
 		...
@@ -912,9 +930,9 @@
 choose ( ... ) {
   case 3:
-	... ®fallthrough common;®
+	... @fallthrough common;@
   case 4:
-	... ®fallthrough common;®
-
-  ®common:® // below fallthrough
+	... @fallthrough common;@
+
+  @common:@ // below fallthrough
 			  // at case-clause level
 	...	// common code for cases 3/4
@@ -932,10 +950,10 @@
 		for ( ... ) {
 			// multi-level transfer
-			... ®fallthru common;®
+			... @fallthru common;@
 		}
 		...
 	}
 	...
-  ®common:® // below fallthrough
+  @common:@ // below fallthrough
 			  // at case-clause level
 \end{cfa}
@@ -948,50 +966,50 @@
 
 \begin{figure}
-\begin{tabular}{@{}l|l@{}}
-\multicolumn{1}{c|}{loop control} & \multicolumn{1}{c}{output} \\
+\begin{tabular}{@{}l@{\hspace{25pt}}|l@{}}
+\multicolumn{1}{@{}c@{\hspace{25pt}}|}{loop control} & \multicolumn{1}{c@{}}{output} \\
 \hline
-\begin{cfa}[xleftmargin=0pt]
-while ®()® { sout | "empty"; break; }
-do { sout | "empty"; break; } while ®()®;
-for ®()® { sout | "empty"; break; }
-for ( ®0® ) { sout | "A"; } sout | "zero";
-for ( ®1® ) { sout | "A"; }
-for ( ®10® ) { sout | "A"; }
-for ( ®= 10® ) { sout | "A"; }
-for ( ®1 ~= 10 ~ 2® ) { sout | "B"; }
-for ( ®10 -~= 1 ~ 2® ) { sout | "C"; }
-for ( ®0.5 ~ 5.5® ) { sout | "D"; }
-for ( ®5.5 -~ 0.5® ) { sout | "E"; }
-for ( ®i; 10® ) { sout | i; }
-for ( ®i; = 10® ) { sout | i; }
-for ( ®i; 1 ~= 10 ~ 2® ) { sout | i; }
-for ( ®i; 10 -~= 1 ~ 2® ) { sout | i; }
-for ( ®i; 0.5 ~ 5.5® ) { sout | i; }
-for ( ®i; 5.5 -~ 0.5® ) { sout | i; }
-for ( ®ui; 2u ~= 10u ~ 2u® ) { sout | ui; }
-for ( ®ui; 10u -~= 2u ~ 2u® ) { sout | ui; }
+\begin{cfa}
+while @()@ { sout | "empty"; break; }
+do { sout | "empty"; break; } while @()@;
+for @()@ { sout | "empty"; break; }
+for ( @0@ ) { sout | "A"; } sout | "zero";
+for ( @1@ ) { sout | "A"; }
+for ( @10@ ) { sout | "A"; }
+for ( @= 10@ ) { sout | "A"; }
+for ( @1 ~= 10 ~ 2@ ) { sout | "B"; }
+for ( @10 -~= 1 ~ 2@ ) { sout | "C"; }
+for ( @0.5 ~ 5.5@ ) { sout | "D"; }
+for ( @5.5 -~ 0.5@ ) { sout | "E"; }
+for ( @i; 10@ ) { sout | i; }
+for ( @i; = 10@ ) { sout | i; }
+for ( @i; 1 ~= 10 ~ 2@ ) { sout | i; }
+for ( @i; 10 -~= 1 ~ 2@ ) { sout | i; }
+for ( @i; 0.5 ~ 5.5@ ) { sout | i; }
+for ( @i; 5.5 -~ 0.5@ ) { sout | i; }
+for ( @ui; 2u ~= 10u ~ 2u@ ) { sout | ui; }
+for ( @ui; 10u -~= 2u ~ 2u@ ) { sout | ui; }
 enum { N = 10 };
-for ( ®N® ) { sout | "N"; }
-for ( ®i; N® ) { sout | i; }
-for ( ®i; N -~ 0® ) { sout | i; }
+for ( @N@ ) { sout | "N"; }
+for ( @i; N@ ) { sout | i; }
+for ( @i; N -~ 0@ ) { sout | i; }
 const int start = 3, comp = 10, inc = 2;
-for ( ®i; start ~ comp ~ inc + 1® ) { sout | i; }
-for ( i; 1 ~ ®@® ) { if ( i > 10 ) break; sout | i; }
-for ( i; 10 -~ ®@® ) { if ( i < 0 ) break; sout | i; }
-for ( i; 2 ~ ®@® ~ 2 ) { if ( i > 10 ) break; sout | i; }
-for ( i; 2.1 ~ ®@® ~ ®@® ) { if ( i > 10.5 ) break; sout | i; i += 1.7; }
-for ( i; 10 -~ ®@® ~ 2 ) { if ( i < 0 ) break; sout | i; }
-for ( i; 12.1 ~ ®@® ~ ®@® ) { if ( i < 2.5 ) break; sout | i; i -= 1.7; }
-for ( i; 5 ®:® j; -5 ~ @ ) { sout | i | j; }
-for ( i; 5 ®:® j; -5 -~ @ ) { sout | i | j; }
-for ( i; 5 ®:® j; -5 ~ @ ~ 2 ) { sout | i | j; }
-for ( i; 5 ®:® j; -5 -~ @ ~ 2 ) { sout | i | j; }
-for ( i; 5 ®:® j; -5 ~ @ ) { sout | i | j; }
-for ( i; 5 ®:® j; -5 -~ @ ) { sout | i | j; }
-for ( i; 5 ®:® j; -5 ~ @ ~ 2 ) { sout | i | j; }
-for ( i; 5 ®:® j; -5 -~ @ ~ 2 ) { sout | i | j; }
-for ( i; 5 ®:® j; -5 -~ @ ~ 2 ®:® k; 1.5 ~ @ ) { sout | i | j | k; }
-for ( i; 5 ®:® j; -5 -~ @ ~ 2 ®:® k; 1.5 ~ @ ) { sout | i | j | k; }
-for ( i; 5 ®:® k; 1.5 ~ @ ®:® j; -5 -~ @ ~ 2 ) { sout | i | j | k; }
+for ( @i; start ~ comp ~ inc + 1@ ) { sout | i; }
+for ( i; 1 ~ $\R{@}$ ) { if ( i > 10 ) break; sout | i; }
+for ( i; 10 -~ $\R{@}$ ) { if ( i < 0 ) break; sout | i; }
+for ( i; 2 ~ $\R{@}$ ~ 2 ) { if ( i > 10 ) break; sout | i; }
+for ( i; 2.1 ~ $\R{@}$ ~ $\R{@}$ ) { if ( i > 10.5 ) break; sout | i; i += 1.7; }
+for ( i; 10 -~ $\R{@}$ ~ 2 ) { if ( i < 0 ) break; sout | i; }
+for ( i; 12.1 ~ $\R{@}$ ~ $\R{@}$ ) { if ( i < 2.5 ) break; sout | i; i -= 1.7; }
+for ( i; 5 @:@ j; -5 ~ $@$ ) { sout | i | j; }
+for ( i; 5 @:@ j; -5 -~ $@$ ) { sout | i | j; }
+for ( i; 5 @:@ j; -5 ~ $@$ ~ 2 ) { sout | i | j; }
+for ( i; 5 @:@ j; -5 -~ $@$ ~ 2 ) { sout | i | j; }
+for ( i; 5 @:@ j; -5 ~ $@$ ) { sout | i | j; }
+for ( i; 5 @:@ j; -5 -~ $@$ ) { sout | i | j; }
+for ( i; 5 @:@ j; -5 ~ $@$ ~ 2 ) { sout | i | j; }
+for ( i; 5 @:@ j; -5 -~ $@$ ~ 2 ) { sout | i | j; }
+for ( i; 5 @:@ j; -5 -~ $@$ ~ 2 @:@ k; 1.5 ~ $@$ ) { sout | i | j | k; }
+for ( i; 5 @:@ j; -5 -~ $@$ ~ 2 @:@ k; 1.5 ~ $@$ ) { sout | i | j | k; }
+for ( i; 5 @:@ k; 1.5 ~ $@$ @:@ j; -5 -~ $@$ ~ 2 ) { sout | i | j | k; }
 \end{cfa}
 &
@@ -1056,34 +1074,69 @@
 \subsection{Loop Control}
 
-The ©for©/©while©/©do-while© loop-control allows empty or simplified ranges (see Figure~\ref{f:LoopControlExamples}).
-\begin{itemize}
+Looping a fixed number of times, possibly with a loop index, occurs frequently.
+\CFA condenses simply looping to facilitate coding speed and safety.
+The ©for©/©while©/©do-while© loop-control is augmented as follows \see{examples in \VRef[Figure]{f:LoopControlExamples}}:
+\begin{itemize}[itemsep=0pt]
+\item
+©0© is the implicit start value;
+\item
+©1© is the implicit increment value.
+\item
+The up-to range uses operator ©+=© for increment;
+\item
+The down-to range uses operator ©-=© for decrement.
 \item
 The loop index is polymorphic in the type of the comparison value N (when the start value is implicit) or the start value M.
+\begin{cfa}
+for ( i; @5@ )					$\C[2.5in]{// typeof(5) i; 5 is comparison value}$
+for ( i; @1.5@~5.5~0.5 )		$\C{// typeof(1.5) i; 1.5 is start value}$
+\end{cfa}
 \item
 An empty conditional implies comparison value of ©1© (true).
-\item
-A comparison N is implicit up-to exclusive range [0,N©®)®©.
-\item
-A comparison ©=© N is implicit up-to inclusive range [0,N©®]®©.
-\item
-The up-to range M ©~©\index{~@©~©} N means exclusive range [M,N©®)®©.
-\item
-The up-to range M ©~=©\index{~=@©~=©} N means inclusive range [M,N©®]®©.
-\item
-The down-to range M ©-~©\index{-~@©-~©} N means exclusive range [N,M©®)®©.
-\item
-The down-to range M ©-~=©\index{-~=@©-~=©} N means inclusive range [N,M©®]®©.
-\item
-©0© is the implicit start value;
-\item
-©1© is the implicit increment value.
-\item
-The up-to range uses operator ©+=© for increment;
-\item
-The down-to range uses operator ©-=© for decrement.
+\begin{cfa}
+while ( $\R{/*empty*/}$ )		$\C{// while ( true )}$
+for ( $\R{/*empty*/}$ )			$\C{// for ( ; true; )}$
+do ... while ( $\R{/*empty*/}$ ) $\C{// do ... while ( true )}$
+\end{cfa}
+\item
+A comparison N is implicit up-to exclusive range [0,N\R{)}.
+\begin{cfa}
+for ( @5@ )						$\C{// for ( typeof(5) i; i < 5; i += 1 )}$
+\end{cfa}
+\item
+A comparison ©=© N is implicit up-to inclusive range [0,N\R{]}.
+\begin{cfa}
+for ( @=@5 )					$\C{// for ( typeof(5) i; i <= 5; i += 1 )}$
+\end{cfa}
+\item
+The up-to range M ©~©\index{~@©~©} N means exclusive range [M,N\R{)}.
+\begin{cfa}
+for ( 1@~@5 )					$\C{// for ( typeof(1) i = 1; i < 5; i += 1 )}$
+\end{cfa}
+\item
+The up-to range M ©~=©\index{~=@©~=©} N means inclusive range [M,N\R{]}.
+\begin{cfa}
+for ( 1@~=@5 )					$\C{// for ( typeof(1) i = 1; i <= 5; i += 1 )}$
+\end{cfa}
+\item
+The down-to range M ©-~©\index{-~@©-~©} N means exclusive range [N,M\R{)}.
+\begin{cfa}
+for ( 1@-~@5 )					$\C{// for ( typeof(1) i = 5; i > 0; i -= 1 )}$
+\end{cfa}
+\item
+The down-to range M ©-~=©\index{-~=@©-~=©} N means inclusive range [N,M\R{]}.
+\begin{cfa}
+for ( 1@-~=@5 )					$\C{// for ( typeof(1) i = 5; i >= 0; i -= 1 )}$
+\end{cfa}
 \item
 ©@© means put nothing in this field.
+\begin{cfa}
+for ( 1~$\R{@}$~2 )				$\C{// for ( typeof(1) i = 1; /*empty*/; i += 2 )}$
+\end{cfa}
 \item
 ©:© means start another index.
+\begin{cfa}
+for ( i; 5 @:@ j; 2~12~3 )		$\C{// for ( typeof(i) i = 1, j = 2; i < 5 \&\& j < 12; i += 1, j += 3 )}\CRT$
+\end{cfa}
 \end{itemize}
 
@@ -1104,27 +1157,27 @@
 \begin{lrbox}{\myboxA}
 \begin{cfa}[tabsize=3]
-®Compound:® {
-	®Try:® try {
-		®For:® for ( ... ) {
-			®While:® while ( ... ) {
-				®Do:® do {
-					®If:® if ( ... ) {
-						®Switch:® switch ( ... ) {
+@Compound:@ {
+	@Try:@ try {
+		@For:@ for ( ... ) {
+			@While:@ while ( ... ) {
+				@Do:@ do {
+					@If:@ if ( ... ) {
+						@Switch:@ switch ( ... ) {
 							case 3:
-								®break Compound®;
-								®break Try®;
-								®break For®;      /* or */  ®continue For®;
-								®break While®;  /* or */  ®continue While®;
-								®break Do®;      /* or */  ®continue Do®;
-								®break If®;
-								®break Switch®;
+								@break Compound@;
+								@break Try@;
+								@break For@;      /* or */  @continue For@;
+								@break While@;  /* or */  @continue While@;
+								@break Do@;      /* or */  @continue Do@;
+								@break If@;
+								@break Switch@;
 							} // switch
 						} else {
-							... ®break If®; ...	// terminate if
+							... @break If@; ...	// terminate if
 						} // if
 				} while ( ... ); // do
 			} // while
 		} // for
-	} ®finally® { // always executed
+	} @finally@ { // always executed
 	} // try
 } // compound
@@ -1136,34 +1189,34 @@
 {
 
-		®ForC:® for ( ... ) {
-			®WhileC:® while ( ... ) {
-				®DoC:® do {
+		@ForC:@ for ( ... ) {
+			@WhileC:@ while ( ... ) {
+				@DoC:@ do {
 					if ( ... ) {
 						switch ( ... ) {
 							case 3:
-								®goto Compound®;
-								®goto Try®;
-								®goto ForB®;      /* or */  ®goto ForC®;
-								®goto WhileB®;  /* or */  ®goto WhileC®;
-								®goto DoB®;      /* or */  ®goto DoC®;
-								®goto If®;
-								®goto Switch®;
-							} ®Switch:® ;
+								@goto Compound@;
+								@goto Try@;
+								@goto ForB@;      /* or */  @goto ForC@;
+								@goto WhileB@;  /* or */  @goto WhileC@;
+								@goto DoB@;      /* or */  @goto DoC@;
+								@goto If@;
+								@goto Switch@;
+							} @Switch:@ ;
 						} else {
-							... ®goto If®; ...	// terminate if
-						} ®If:®;
-				} while ( ... ); ®DoB:® ;
-			} ®WhileB:® ;
-		} ®ForB:® ;
-
-
-} ®Compound:® ;
+							... @goto If@; ...	// terminate if
+						} @If:@;
+				} while ( ... ); @DoB:@ ;
+			} @WhileB:@ ;
+		} @ForB:@ ;
+
+
+} @Compound:@ ;
 \end{cfa}
 \end{lrbox}
 
+\hspace*{-10pt}
 \subfloat[\CFA]{\label{f:CFibonacci}\usebox\myboxA}
 \hspace{2pt}
 \vrule
-\hspace{2pt}
 \subfloat[C]{\label{f:CFAFibonacciGen}\usebox\myboxB}
 \caption{Multi-level Exit}
@@ -1193,6 +1246,6 @@
 Grouping heterogeneous data into \newterm{aggregate}s (structure/union) is a common programming practice, and an aggregate can be further organized into more complex structures, such as arrays and containers:
 \begin{cfa}
-struct S { §\C{// aggregate}§
-	char c; §\C{// fields}§
+struct S { $\C{// aggregate}$
+	char c; $\C{// fields}$
 	int i;
 	double d;
@@ -1203,5 +1256,5 @@
 \begin{cfa}
 void f( S s ) {
-	®s.®c; ®s.®i; ®s.®d; §\C{// access containing fields}§
+	@s.@c; @s.@i; @s.@d; $\C{// access containing fields}$
 }
 \end{cfa}
@@ -1210,9 +1263,9 @@
 \begin{C++}
 struct S {
-	char c; §\C{// fields}§
+	char c; $\C{// fields}$
 	int i;
 	double d;
-	void f() { §\C{// implicit ``this'' aggregate}§
-		®this->®c; ®this->®i; ®this->®d; §\C{// access containing fields}§
+	void f() { $\C{// implicit ``this'' aggregate}$
+		@this->@c; @this->@i; @this->@d; $\C{// access containing fields}$
 	}
 }
@@ -1222,22 +1275,22 @@
 \begin{cfa}
 struct T { double m, n; };
-int S::f( T & t ) { §\C{// multiple aggregate parameters}§
-	c; i; d; §\C{\color{red}// this--{\textgreater}.c, this--{\textgreater}.i, this--{\textgreater}.d}§
-	®t.®m; ®t.®n; §\C{// must qualify}§
-}
-\end{cfa}
-
-To simplify the programmer experience, \CFA provides a ©with© statement (see Pascal~\cite[\S~4.F]{Pascal}) to elide aggregate qualification to fields by opening a scope containing the field identifiers.
+int S::f( T & t ) { $\C{// multiple aggregate parameters}$
+	c; i; d; $\C{\R{// this--{\textgreater}c, this--{\textgreater}i, this--{\textgreater}d}}$
+	@t.@m; @t.@n; $\C{// must qualify}$
+}
+\end{cfa}
+
+To simplify the programmer experience, \CFA provides a ©with© statement \see{Pascal~\cite[\S~4.F]{Pascal}} to elide aggregate qualification to fields by opening a scope containing the field identifiers.
 Hence, the qualified fields become variables with the side-effect that it is easier to optimizing field references in a block.
 \begin{cfa}
-void f( S & this ) ®with ( this )® { §\C{// with statement}§
-	c; i; d; §\C{\color{red}// this.c, this.i, this.d}§
+void f( S & this ) @with ( this )@ { $\C{// with statement}$
+	c; i; d; $\C{\R{// this.c, this.i, this.d}}$
 }
 \end{cfa}
 with the generality of opening multiple aggregate-parameters:
 \begin{cfa}
-void f( S & s, T & t ) ®with ( s, t )® { §\C{// multiple aggregate parameters}§
-	c; i; d; §\C{\color{red}// s.c, s.i, s.d}§
-	m; n; §\C{\color{red}// t.m, t.n}§
+void f( S & s, T & t ) @with ( s, t )@ { $\C{// multiple aggregate parameters}$
+	c; i; d; $\C{\R{// s.c, s.i, s.d}}$
+	m; n; $\C{\R{// t.m, t.n}}$
 }
 \end{cfa}
@@ -1245,6 +1298,6 @@
 In detail, the ©with© statement has the form:
 \begin{cfa}
-§\emph{with-statement}§:
-	'with' '(' §\emph{expression-list}§ ')' §\emph{compound-statement}§
+$\emph{with-statement}$:
+	'with' '(' $\emph{expression-list}$ ')' $\emph{compound-statement}$
 \end{cfa}
 and may appear as the body of a function or nested within a function body.
@@ -1258,14 +1311,14 @@
 The difference between parallel and nesting occurs for fields with the same name and type:
 \begin{cfa}
-struct S { int ®i®; int j; double m; } s, w;
-struct T { int ®i®; int k; int m; } t, w;
+struct S { int @i@; int j; double m; } s, w;
+struct T { int @i@; int k; int m; } t, w;
 with ( s, t ) {
-	j + k; §\C{// unambiguous, s.j + t.k}§
-	m = 5.0; §\C{// unambiguous, t.m = 5.0}§
-	m = 1; §\C{// unambiguous, s.m = 1}§
-	int a = m; §\C{// unambiguous, a = s.i }§
-	double b = m; §\C{// unambiguous, b = t.m}§
-	int c = s.i + t.i; §\C{// unambiguous, qualification}§
-	(double)m; §\C{// unambiguous, cast}§
+	j + k; $\C{// unambiguous, s.j + t.k}$
+	m = 5.0; $\C{// unambiguous, t.m = 5.0}$
+	m = 1; $\C{// unambiguous, s.m = 1}$
+	int a = m; $\C{// unambiguous, a = s.i }$
+	double b = m; $\C{// unambiguous, b = t.m}$
+	int c = s.i + t.i; $\C{// unambiguous, qualification}$
+	(double)m; $\C{// unambiguous, cast}$
 }
 \end{cfa}
@@ -1277,6 +1330,6 @@
 There is an interesting problem between parameters and the function-body ©with©, \eg:
 \begin{cfa}
-void ?{}( S & s, int i ) with ( s ) { §\C{// constructor}§
-	®s.i = i;®  j = 3;  m = 5.5; §\C{// initialize fields}§
+void ?{}( S & s, int i ) with ( s ) { $\C{// constructor}$
+	@s.i = i;@  j = 3;  m = 5.5; $\C{// initialize fields}$
 }
 \end{cfa}
@@ -1291,26 +1344,26 @@
 and implicitly opened \emph{after} a function-body open, to give them higher priority:
 \begin{cfa}
-void ?{}( S & s, int ®i® ) with ( s ) ®with( §\emph{\color{red}params}§ )® {
-	s.i = ®i®; j = 3; m = 5.5;
+void ?{}( S & s, int @i@ ) with ( s ) @with( $\emph{\R{params}}$ )@ {
+	s.i = @i@; j = 3; m = 5.5;
 }
 \end{cfa}
 Finally, a cast may be used to disambiguate among overload variables in a ©with© expression:
 \begin{cfa}
-with ( w ) { ... } §\C{// ambiguous, same name and no context}§
-with ( (S)w ) { ... } §\C{// unambiguous, cast}§
-\end{cfa}
-and ©with© expressions may be complex expressions with type reference (see Section~\ref{s:References}) to aggregate:
+with ( w ) { ... } $\C{// ambiguous, same name and no context}$
+with ( (S)w ) { ... } $\C{// unambiguous, cast}$
+\end{cfa}
+and ©with© expressions may be complex expressions with type reference \see{\VRef{s:References}} to aggregate:
 % \begin{cfa}
 % struct S { int i, j; } sv;
-% with ( sv ) { §\C{// implicit reference}§
+% with ( sv ) { $\C{// implicit reference}$
 % 	S & sr = sv;
-% 	with ( sr ) { §\C{// explicit reference}§
+% 	with ( sr ) { $\C{// explicit reference}$
 % 		S * sp = &sv;
-% 		with ( *sp ) { §\C{// computed reference}§
-% 			i = 3; j = 4; §\C{\color{red}// sp--{\textgreater}i, sp--{\textgreater}j}§
+% 		with ( *sp ) { $\C{// computed reference}$
+% 			i = 3; j = 4; $\C{\color{red}// sp--{\textgreater}i, sp--{\textgreater}j}$
 % 		}
-% 		i = 2; j = 3; §\C{\color{red}// sr.i, sr.j}§
+% 		i = 2; j = 3; $\C{\color{red}// sr.i, sr.j}$
 % 	}
-% 	i = 1; j = 2; §\C{\color{red}// sv.i, sv.j}§
+% 	i = 1; j = 2; $\C{\color{red}// sv.i, sv.j}$
 % }
 % \end{cfa}
@@ -1320,7 +1373,7 @@
 class C {
 	int i, j;
-	int mem() { §\C{\color{red}// implicit "this" parameter}§
-		i = 1; §\C{\color{red}// this->i}§
-		j = 2; §\C{\color{red}// this->j}§
+	int mem() { $\C{\R{// implicit "this" parameter}}$
+		i = 1; $\C{\R{// this->i}}$
+		j = 2; $\C{\R{// this->j}}$
 	}
 }
@@ -1329,16 +1382,16 @@
 \begin{cfa}
 struct S { int i, j; };
-int mem( S & ®this® ) { §\C{// explicit "this" parameter}§
-	®this.®i = 1; §\C{// "this" is not elided}§
-	®this.®j = 2;
+int mem( S & @this@ ) { $\C{// explicit "this" parameter}$
+	@this.@i = 1; $\C{// "this" is not elided}$
+	@this.@j = 2;
 }
 \end{cfa}
 but it is cumbersome having to write ``©this.©'' many times in a member.
 
-\CFA provides a ©with© clause/statement (see Pascal~\cite[\S~4.F]{Pascal}) to elided the "©this.©" by opening a scope containing field identifiers, changing the qualified fields into variables and giving an opportunity for optimizing qualified references.
-\begin{cfa}
-int mem( S & this ) ®with( this )® { §\C{// with clause}§
-	i = 1; §\C{\color{red}// this.i}§
-	j = 2; §\C{\color{red}// this.j}§
+\CFA provides a ©with© clause/statement \see{Pascal~\cite[\S~4.F]{Pascal}} to elided the "©this.©" by opening a scope containing field identifiers, changing the qualified fields into variables and giving an opportunity for optimizing qualified references.
+\begin{cfa}
+int mem( S & this ) @with( this )@ { $\C{// with clause}$
+	i = 1; $\C{\R{// this.i}}$
+	j = 2; $\C{\R{// this.j}}$
 }
 \end{cfa}
@@ -1346,5 +1399,5 @@
 \begin{cfa}
 struct T { double m, n; };
-int mem2( S & this1, T & this2 ) ®with( this1, this2 )® {
+int mem2( S & this1, T & this2 ) @with( this1, this2 )@ {
 	i = 1; j = 2;
 	m = 1.0; n = 2.0;
@@ -1357,11 +1410,11 @@
 	struct S1 { ... } s1;
 	struct S2 { ... } s2;
-	®with( s1 )® { §\C{// with statement}§
+	@with( s1 )@ { $\C{// with statement}$
 		// access fields of s1 without qualification
-		®with s2® { §\C{// nesting}§
+		@with s2@ { $\C{// nesting}$
 			// access fields of s1 and s2 without qualification
 		}
 	}
-	®with s1, s2® {
+	@with s1, s2@ {
 		// access unambiguous fields of s1 and s2 without qualification
 	}
@@ -1414,14 +1467,14 @@
 Non-local transfer can cause stack unwinding, \ie non-local routine termination, depending on the kind of raise.
 \begin{cfa}
-exception_t E {}; §\C{// exception type}§
+exception_t E {}; $\C{// exception type}$
 void f(...) {
-	... throw E{}; ... §\C{// termination}§
-	... throwResume E{}; ... §\C{// resumption}§
+	... throw E{}; ... $\C{// termination}$
+	... throwResume E{}; ... $\C{// resumption}$
 }
 try {
 	f(...);
-} catch( E e ; §boolean-predicate§ ) {		§\C{// termination handler}§
+} catch( E e ; $boolean-predicate$ ) {		$\C{// termination handler}$
 	// recover and continue
-} catchResume( E e ; §boolean-predicate§ ) { §\C{// resumption handler}§
+} catchResume( E e ; $boolean-predicate$ ) { $\C{// resumption handler}$
 	// repair and return
 } finally {
@@ -1430,5 +1483,5 @@
 \end{cfa}
 The kind of raise and handler match: ©throw© with ©catch© and ©throwResume© with ©catchResume©.
-Then the exception type must match along with any additonal predicate must be true.
+Then the exception type must match along with any additional predicate must be true.
 The ©catch© and ©catchResume© handlers may appear in any oder.
 However, the ©finally© clause must appear at the end of the ©try© statement.
@@ -1483,6 +1536,6 @@
 For example, a routine returning a \Index{pointer} to an array of integers is defined and used in the following way:
 \begin{cfa}
-int ®(*®f®())[®5®]® {...}; §\C{// definition}§
- ... ®(*®f®())[®3®]® += 1; §\C{// usage}§
+int @(*@f@())[@5@]@ {...}; $\C{// definition}$
+ ... @(*@f@())[@3@]@ += 1; $\C{// usage}$
 \end{cfa}
 Essentially, the return type is wrapped around the routine name in successive layers (like an \Index{onion}).
@@ -1499,14 +1552,14 @@
 \begin{tabular}{@{}l@{\hspace{3em}}l@{}}
 \multicolumn{1}{c@{\hspace{3em}}}{\textbf{\CFA}}	& \multicolumn{1}{c}{\textbf{C}}	\\
-\begin{cfa}
-ß[5] *ß ®int® x1;
-ß* [5]ß ®int® x2;
-ß[* [5] int]ß f®( int p )®;
+\begin{cfa}[moredelim={**[is][\color{blue}]{\#}{\#}}]
+#[5] *# @int@ x1;
+#* [5]# @int@ x2;
+#[* [5] int]# f@( int p )@;
 \end{cfa}
 &
-\begin{cfa}
-®int® ß*ß x1 ß[5]ß;
-®int® ß(*ßx2ß)[5]ß;
-ßint (*ßf®( int p )®ß)[5]ß;
+\begin{cfa}[moredelim={**[is][\color{blue}]{\#}{\#}}]
+@int@ #*# x1 #[5]#;
+@int@ #(*#x2#)[5]#;
+#int (*#f@( int p )@#)[5]#;
 \end{cfa}
 \end{tabular}
@@ -1520,9 +1573,9 @@
 \multicolumn{1}{c@{\hspace{3em}}}{\textbf{\CFA}}	& \multicolumn{1}{c}{\textbf{C}}	\\
 \begin{cfa}
-®*® int x, y;
+@*@ int x, y;
 \end{cfa}
 &
 \begin{cfa}
-int ®*®x, ®*®y;
+int @*@x, @*@y;
 \end{cfa}
 \end{tabular}
@@ -1533,10 +1586,10 @@
 \multicolumn{1}{c@{\hspace{3em}}}{\textbf{\CFA}}	& \multicolumn{1}{c}{\textbf{C}}	\\
 \begin{cfa}
-®*® int x;
+@*@ int x;
 int y;
 \end{cfa}
 &
 \begin{cfa}
-int ®*®x, y;
+int @*@x, y;
 
 \end{cfa}
@@ -1647,4 +1700,5 @@
 
 \section{Pointer / Reference}
+\label{s:PointerReference}
 
 C provides a \newterm{pointer type};
@@ -1673,7 +1727,7 @@
 &
 \begin{cfa}
-int * ®const® x = (int *)100
+int * @const@ x = (int *)100
 *x = 3;			// implicit dereference
-int * ®const® y = (int *)104;
+int * @const@ y = (int *)104;
 *y = *x;			// implicit dereference
 \end{cfa}
@@ -1713,8 +1767,8 @@
 \begin{tabular}{@{}l@{\hspace{2em}}l@{}}
 \begin{cfa}
-int x, y, ®*® p1, ®*® p2, ®**® p3;
-p1 = ®&®x;     // p1 points to x
+int x, y, @*@ p1, @*@ p2, @**@ p3;
+p1 = @&@x;     // p1 points to x
 p2 = p1;     // p2 points to x
-p1 = ®&®y;     // p1 points to y
+p1 = @&@y;     // p1 points to y
 p3 = &p2;  // p3 points to p2
 \end{cfa}
@@ -1728,5 +1782,5 @@
 For example, \Index*{Algol68}~\cite{Algol68} infers pointer dereferencing to select the best meaning for each pointer usage
 \begin{cfa}
-p2 = p1 + x; §\C{// compiler infers *p2 = *p1 + x;}§
+p2 = p1 + x; $\C{// compiler infers *p2 = *p1 + x;}$
 \end{cfa}
 Algol68 infers the following dereferencing ©*p2 = *p1 + x©, because adding the arbitrary integer value in ©x© to the address of ©p1© and storing the resulting address into ©p2© is an unlikely operation.
@@ -1736,11 +1790,11 @@
 In C, objects of pointer type always manipulate the pointer object's address:
 \begin{cfa}
-p1 = p2; §\C{// p1 = p2\ \ rather than\ \ *p1 = *p2}§
-p2 = p1 + x; §\C{// p2 = p1 + x\ \ rather than\ \ *p2 = *p1 + x}§
+p1 = p2; $\C{// p1 = p2\ \ rather than\ \ *p1 = *p2}$
+p2 = p1 + x; $\C{// p2 = p1 + x\ \ rather than\ \ *p2 = *p1 + x}$
 \end{cfa}
 even though the assignment to ©p2© is likely incorrect, and the programmer probably meant:
 \begin{cfa}
-p1 = p2; §\C{// pointer address assignment}§
-®*®p2 = ®*®p1 + x; §\C{// pointed-to value assignment / operation}§
+p1 = p2; $\C{// pointer address assignment}$
+@*@p2 = @*@p1 + x; $\C{// pointed-to value assignment / operation}$
 \end{cfa}
 The C semantics work well for situations where manipulation of addresses is the primary meaning and data is rarely accessed, such as storage management (©malloc©/©free©).
@@ -1758,10 +1812,10 @@
 To support this common case, a reference type is introduced in \CFA, denoted by ©&©, which is the opposite dereference semantics to a pointer type, making the value at the pointed-to location the implicit semantics for dereferencing (similar but not the same as \CC \Index{reference type}s).
 \begin{cfa}
-int x, y, ®&® r1, ®&® r2, ®&&® r3;
-®&®r1 = &x; §\C{// r1 points to x}§
-®&®r2 = &r1; §\C{// r2 points to x}§
-®&®r1 = &y; §\C{// r1 points to y}§
-®&&®r3 = ®&®&r2; §\C{// r3 points to r2}§
-r2 = ((r1 + r2) * (r3 - r1)) / (r3 - 15); §\C{// implicit dereferencing}§
+int x, y, @&@ r1, @&@ r2, @&&@ r3;
+@&@r1 = &x; $\C{// r1 points to x}$
+@&@r2 = &r1; $\C{// r2 points to x}$
+@&@r1 = &y; $\C{// r1 points to y}$
+@&&@r3 = @&@&r2; $\C{// r3 points to r2}$
+r2 = ((r1 + r2) * (r3 - r1)) / (r3 - 15); $\C{// implicit dereferencing}$
 \end{cfa}
 Except for auto-dereferencing by the compiler, this reference example is the same as the previous pointer example.
@@ -1769,5 +1823,5 @@
 One way to conceptualize a reference is via a rewrite rule, where the compiler inserts a dereference operator before the reference variable for each reference qualifier in a declaration, so the previous example becomes:
 \begin{cfa}
-®*®r2 = ((®*®r1 + ®*®r2) ®*® (®**®r3 - ®*®r1)) / (®**®r3 - 15);
+@*@r2 = ((@*@r1 + @*@r2) @*@ (@**@r3 - @*@r1)) / (@**@r3 - 15);
 \end{cfa}
 When a reference operation appears beside a dereference operation, \eg ©&*©, they cancel out.
@@ -1778,9 +1832,9 @@
 For a \CFA reference type, the cancellation on the left-hand side of assignment leaves the reference as an address (\Index{lvalue}):
 \begin{cfa}
-(&®*®)r1 = &x; §\C{// (\&*) cancel giving address in r1 not variable pointed-to by r1}§
+(&@*@)r1 = &x; $\C{// (\&*) cancel giving address in r1 not variable pointed-to by r1}$
 \end{cfa}
 Similarly, the address of a reference can be obtained for assignment or computation (\Index{rvalue}):
 \begin{cfa}
-(&(&®*®)®*®)r3 = &(&®*®)r2; §\C{// (\&*) cancel giving address in r2, (\&(\&*)*) cancel giving address in r3}§
+(&(&@*@)@*@)r3 = &(&@*@)r2; $\C{// (\&*) cancel giving address in r2, (\&(\&*)*) cancel giving address in r3}$
 \end{cfa}
 Cancellation\index{cancellation!pointer/reference}\index{pointer!cancellation} works to arbitrary depth.
@@ -1790,11 +1844,11 @@
 int x, *p1 = &x, **p2 = &p1, ***p3 = &p2,
 		 &r1 = x,    &&r2 = r1,   &&&r3 = r2;
-***p3 = 3; §\C{// change x}§
-r3 = 3; §\C{// change x, ***r3}§
-**p3 = ...; §\C{// change p1}§
-&r3 = ...; §\C{// change r1, (\&*)**r3, 1 cancellation}§
-*p3 = ...; §\C{// change p2}§
-&&r3 = ...; §\C{// change r2, (\&(\&*)*)*r3, 2 cancellations}§
-&&&r3 = p3; §\C{// change r3 to p3, (\&(\&(\&*)*)*)r3, 3 cancellations}§
+***p3 = 3; $\C{// change x}$
+r3 = 3; $\C{// change x, ***r3}$
+**p3 = ...; $\C{// change p1}$
+&r3 = ...; $\C{// change r1, (\&*)**r3, 1 cancellation}$
+*p3 = ...; $\C{// change p2}$
+&&r3 = ...; $\C{// change r2, (\&(\&*)*)*r3, 2 cancellations}$
+&&&r3 = p3; $\C{// change r3 to p3, (\&(\&(\&*)*)*)r3, 3 cancellations}$
 \end{cfa}
 Furthermore, both types are equally performant, as the same amount of dereferencing occurs for both types.
@@ -1803,17 +1857,17 @@
 As for a pointer type, a reference type may have qualifiers:
 \begin{cfa}
-const int cx = 5; §\C{// cannot change cx;}§
-const int & cr = cx; §\C{// cannot change what cr points to}§
-®&®cr = &cx; §\C{// can change cr}§
-cr = 7; §\C{// error, cannot change cx}§
-int & const rc = x; §\C{// must be initialized}§
-®&®rc = &x; §\C{// error, cannot change rc}§
-const int & const crc = cx; §\C{// must be initialized}§
-crc = 7; §\C{// error, cannot change cx}§
-®&®crc = &cx; §\C{// error, cannot change crc}§
+const int cx = 5; $\C{// cannot change cx;}$
+const int & cr = cx; $\C{// cannot change what cr points to}$
+@&@cr = &cx; $\C{// can change cr}$
+cr = 7; $\C{// error, cannot change cx}$
+int & const rc = x; $\C{// must be initialized}$
+@&@rc = &x; $\C{// error, cannot change rc}$
+const int & const crc = cx; $\C{// must be initialized}$
+crc = 7; $\C{// error, cannot change cx}$
+@&@crc = &cx; $\C{// error, cannot change crc}$
 \end{cfa}
 Hence, for type ©& const©, there is no pointer assignment, so ©&rc = &x© is disallowed, and \emph{the address value cannot be the null pointer unless an arbitrary pointer is coerced\index{coercion} into the reference}:
 \begin{cfa}
-int & const cr = *0; §\C{// where 0 is the int * zero}§
+int & const cr = *0; $\C{// where 0 is the int * zero}$
 \end{cfa}
 Note, constant reference-types do not prevent \Index{addressing errors} because of explicit storage-management:
@@ -1822,20 +1876,20 @@
 cr = 5;
 free( &cr );
-cr = 7; §\C{// unsound pointer dereference}§
+cr = 7; $\C{// unsound pointer dereference}$
 \end{cfa}
 
 The position of the ©const© qualifier \emph{after} the pointer/reference qualifier causes confuse for C programmers.
 The ©const© qualifier cannot be moved before the pointer/reference qualifier for C style-declarations;
-\CFA-style declarations (see \VRef{s:AlternativeDeclarations}) attempt to address this issue:
+\CFA-style declarations \see{\VRef{s:AlternativeDeclarations}} attempt to address this issue:
 \begin{cquote}
 \begin{tabular}{@{}l@{\hspace{3em}}l@{}}
 \multicolumn{1}{c@{\hspace{3em}}}{\textbf{\CFA}}	& \multicolumn{1}{c}{\textbf{C}}	\\
 \begin{cfa}
-®const® * ®const® * const int ccp;
-®const® & ®const® & const int ccr;
+@const@ * @const@ * const int ccp;
+@const@ & @const@ & const int ccr;
 \end{cfa}
 &
 \begin{cfa}
-const int * ®const® * ®const® ccp;
+const int * @const@ * @const@ ccp;
 
 \end{cfa}
@@ -1846,15 +1900,15 @@
 Finally, like pointers, references are usable and composable with other type operators and generators.
 \begin{cfa}
-int w, x, y, z, & ar[3] = { x, y, z }; §\C{// initialize array of references}§
-&ar[1] = &w; §\C{// change reference array element}§
-typeof( ar[1] ) p; §\C{// (gcc) is int, \ie the type of referenced object}§
-typeof( &ar[1] ) q; §\C{// (gcc) is int \&, \ie the type of reference}§
-sizeof( ar[1] ) == sizeof( int ); §\C{// is true, \ie the size of referenced object}§
-sizeof( &ar[1] ) == sizeof( int *) §\C{// is true, \ie the size of a reference}§
+int w, x, y, z, & ar[3] = { x, y, z }; $\C{// initialize array of references}$
+&ar[1] = &w; $\C{// change reference array element}$
+typeof( ar[1] ) p; $\C{// (gcc) is int, \ie the type of referenced object}$
+typeof( &ar[1] ) q; $\C{// (gcc) is int \&, \ie the type of reference}$
+sizeof( ar[1] ) == sizeof( int ); $\C{// is true, \ie the size of referenced object}$
+sizeof( &ar[1] ) == sizeof( int *) $\C{// is true, \ie the size of a reference}$
 \end{cfa}
 
 In contrast to \CFA reference types, \Index*[C++]{\CC{}}'s reference types are all ©const© references, preventing changes to the reference address, so only value assignment is possible, which eliminates half of the \Index{address duality}.
 Also, \CC does not allow \Index{array}s\index{array!reference} of reference\footnote{
-The reason for disallowing arrays of reference is unknown, but possibly comes from references being ethereal (like a textual macro), and hence, replaceable by the referant object.}
+The reason for disallowing arrays of reference is unknown, but possibly comes from references being ethereal (like a textual macro), and hence, replaceable by the referent object.}
 \Index*{Java}'s reference types to objects (all Java objects are on the heap) are like C pointers, which always manipulate the address, and there is no (bit-wise) object assignment, so objects are explicitly cloned by shallow or deep copying, which eliminates half of the address duality.
 
@@ -1868,7 +1922,7 @@
 Therefore, for pointer/reference initialization, the initializing value must be an address not a value.
 \begin{cfa}
-int * p = &x; §\C{// assign address of x}§
-®int * p = x;® §\C{// assign value of x}§
-int & r = x; §\C{// must have address of x}§
+int * p = &x; $\C{// assign address of x}$
+@int * p = x;@ $\C{// assign value of x}$
+int & r = x; $\C{// must have address of x}$
 \end{cfa}
 Like the previous example with C pointer-arithmetic, it is unlikely assigning the value of ©x© into a pointer is meaningful (again, a warning is usually given).
@@ -1879,6 +1933,6 @@
 Similarly, when a reference type is used for a parameter/return type, the call-site argument does not require a reference operator for the same reason.
 \begin{cfa}
-int & f( int & r ); §\C{// reference parameter and return}§
-z = f( x ) + f( y ); §\C{// reference operator added, temporaries needed for call results}§
+int & f( int & r ); $\C{// reference parameter and return}$
+z = f( x ) + f( y ); $\C{// reference operator added, temporaries needed for call results}$
 \end{cfa}
 Within routine ©f©, it is possible to change the argument by changing the corresponding parameter, and parameter ©r© can be locally reassigned within ©f©.
@@ -1893,8 +1947,8 @@
 When a pointer/reference parameter has a ©const© value (immutable), it is possible to pass literals and expressions.
 \begin{cfa}
-void f( ®const® int & cr );
-void g( ®const® int * cp );
-f( 3 );			  g( ®&®3 );
-f( x + y );		g( ®&®(x + y) );
+void f( @const@ int & cr );
+void g( @const@ int * cp );
+f( 3 );			  g( @&@3 );
+f( x + y );		g( @&@(x + y) );
 \end{cfa}
 Here, the compiler passes the address to the literal 3 or the temporary for the expression ©x + y©, knowing the argument cannot be changed through the parameter.
@@ -1907,6 +1961,6 @@
 void f( int & r );
 void g( int * p );
-f( 3 );			  g( ®&®3 ); §\C{// compiler implicit generates temporaries}§
-f( x + y );		g( ®&®(x + y) ); §\C{// compiler implicit generates temporaries}§
+f( 3 );			  g( @&@3 ); $\C{// compiler implicit generates temporaries}$
+f( x + y );		g( @&@(x + y) ); $\C{// compiler implicit generates temporaries}$
 \end{cfa}
 Essentially, there is an implicit \Index{rvalue} to \Index{lvalue} conversion in this case.\footnote{
@@ -1919,19 +1973,19 @@
 \begin{cfa}
 void f( int i );
-void (* fp)( int ); §\C{// routine pointer}§
-fp = f; §\C{// reference initialization}§
-fp = &f; §\C{// pointer initialization}§
-fp = *f; §\C{// reference initialization}§
-fp(3); §\C{// reference invocation}§
-(*fp)(3); §\C{// pointer invocation}§
+void (* fp)( int ); $\C{// routine pointer}$
+fp = f; $\C{// reference initialization}$
+fp = &f; $\C{// pointer initialization}$
+fp = *f; $\C{// reference initialization}$
+fp(3); $\C{// reference invocation}$
+(*fp)(3); $\C{// pointer invocation}$
 \end{cfa}
 While C's treatment of routine objects has similarity to inferring a reference type in initialization contexts, the examples are assignment not initialization, and all possible forms of assignment are possible (©f©, ©&f©, ©*f©) without regard for type.
 Instead, a routine object should be referenced by a ©const© reference:
 \begin{cfa}
-®const® void (®&® fr)( int ) = f; §\C{// routine reference}§
-fr = ... §\C{// error, cannot change code}§
-&fr = ...; §\C{// changing routine reference}§
-fr( 3 ); §\C{// reference call to f}§
-(*fr)(3); §\C{// error, incorrect type}§
+@const@ void (@&@ fr)( int ) = f; $\C{// routine reference}$
+fr = ... $\C{// error, cannot change code}$
+&fr = ...; $\C{// changing routine reference}$
+fr( 3 ); $\C{// reference call to f}$
+(*fr)(3); $\C{// error, incorrect type}$
 \end{cfa}
 because the value of the routine object is a routine literal, \ie the routine code is normally immutable during execution.\footnote{
@@ -1946,8 +2000,8 @@
 \begin{itemize}
 \item
-if ©R© is an \Index{rvalue} of type ©T &©$_1\cdots$ ©&©$_r$, where $r \ge 1$ references (©&© symbols), than ©&R© has type ©T ®*®&©$_{\color{red}2}\cdots$ ©&©$_{\color{red}r}$, \ie ©T© pointer with $r-1$ references (©&© symbols).
-
-\item
-if ©L© is an \Index{lvalue} of type ©T &©$_1\cdots$ ©&©$_l$, where $l \ge 0$ references (©&© symbols), than ©&L© has type ©T ®*®&©$_{\color{red}1}\cdots$ ©&©$_{\color{red}l}$, \ie ©T© pointer with $l$ references (©&© symbols).
+if ©R© is an \Index{rvalue} of type ©T &©$_1\cdots$ ©&©$_r$, where $r \ge 1$ references (©&© symbols), than ©&R© has type ©T ©\R{©*©}©&©\R{$_2$}$\cdots$ ©&©\R{$_r$}, \ie ©T© pointer with $r-1$ references (©&© symbols).
+
+\item
+if ©L© is an \Index{lvalue} of type ©T &©$_1\cdots$ ©&©$_l$, where $l \ge 0$ references (©&© symbols), than ©&L© has type ©T ©\R{©*©}©&©\R{$_1$}$\cdots$ ©&©\R{$_l$}, \ie ©T© pointer with $l$ references (©&© symbols).
 \end{itemize}
 The following example shows the first rule applied to different \Index{rvalue} contexts:
@@ -1955,9 +2009,9 @@
 int x, * px, ** ppx, *** pppx, **** ppppx;
 int & rx = x, && rrx = rx, &&& rrrx = rrx ;
-x = rrrx; §\C[2.0in]{// rrrx is an lvalue with type int \&\&\& (equivalent to x)}§
-px = &rrrx; §\C{// starting from rrrx, \&rrrx is an rvalue with type int *\&\&\& (\&x)}§
-ppx = &&rrrx; §\C{// starting from \&rrrx, \&\&rrrx is an rvalue with type int **\&\& (\&rx)}§
-pppx = &&&rrrx; §\C{// starting from \&\&rrrx, \&\&\&rrrx is an rvalue with type int ***\& (\&rrx)}§
-ppppx = &&&&rrrx; §\C{// starting from \&\&\&rrrx, \&\&\&\&rrrx is an rvalue with type int **** (\&rrrx)}§
+x = rrrx; $\C[2.0in]{// rrrx is an lvalue with type int \&\&\& (equivalent to x)}$
+px = &rrrx; $\C{// starting from rrrx, \&rrrx is an rvalue with type int *\&\&\& (\&x)}$
+ppx = &&rrrx; $\C{// starting from \&rrrx, \&\&rrrx is an rvalue with type int **\&\& (\&rx)}$
+pppx = &&&rrrx; $\C{// starting from \&\&rrrx, \&\&\&rrrx is an rvalue with type int ***\& (\&rrx)}$
+ppppx = &&&&rrrx; $\C{// starting from \&\&\&rrrx, \&\&\&\&rrrx is an rvalue with type int **** (\&rrrx)}$
 \end{cfa}
 The following example shows the second rule applied to different \Index{lvalue} contexts:
@@ -1965,8 +2019,8 @@
 int x, * px, ** ppx, *** pppx;
 int & rx = x, && rrx = rx, &&& rrrx = rrx ;
-rrrx = 2; §\C{// rrrx is an lvalue with type int \&\&\& (equivalent to x)}§
-&rrrx = px; §\C{// starting from rrrx, \&rrrx is an rvalue with type int *\&\&\& (rx)}§
-&&rrrx = ppx; §\C{// starting from \&rrrx, \&\&rrrx is an rvalue with type int **\&\& (rrx)}§
-&&&rrrx = pppx; §\C{// starting from \&\&rrrx, \&\&\&rrrx is an rvalue with type int ***\& (rrrx)}\CRT§
+rrrx = 2; $\C{// rrrx is an lvalue with type int \&\&\& (equivalent to x)}$
+&rrrx = px; $\C{// starting from rrrx, \&rrrx is an rvalue with type int *\&\&\& (rx)}$
+&&rrrx = ppx; $\C{// starting from \&rrrx, \&\&rrrx is an rvalue with type int **\&\& (rrx)}$
+&&&rrrx = pppx; $\C{// starting from \&\&rrrx, \&\&\&rrrx is an rvalue with type int ***\& (rrrx)}\CRT$
 \end{cfa}
 
@@ -1981,5 +2035,5 @@
 \begin{cfa}
 int x;
-x + 1; §\C[2.0in]{// lvalue variable (int) converts to rvalue for expression}§
+x + 1; $\C[2.0in]{// lvalue variable (int) converts to rvalue for expression}$
 \end{cfa}
 An rvalue has no type qualifiers (©cv©), so the lvalue qualifiers are dropped.
@@ -1991,5 +2045,5 @@
 \begin{cfa}
 int x, &r = x, f( int p );
-x = ®r® + f( ®r® ); §\C{// lvalue reference converts to rvalue}§
+x = @r@ + f( @r@ ); $\C{// lvalue reference converts to rvalue}$
 \end{cfa}
 An rvalue has no type qualifiers (©cv©), so the reference qualifiers are dropped.
@@ -1998,6 +2052,6 @@
 lvalue to reference conversion: \lstinline[deletekeywords=lvalue]@lvalue-type cv1 T@ converts to ©cv2 T &©, which allows implicitly converting variables to references.
 \begin{cfa}
-int x, &r = ®x®, f( int & p ); §\C{// lvalue variable (int) convert to reference (int \&)}§
-f( ®x® ); §\C{// lvalue variable (int) convert to reference (int \&)}§
+int x, &r = @x@, f( int & p ); $\C{// lvalue variable (int) convert to reference (int \&)}$
+f( @x@ ); $\C{// lvalue variable (int) convert to reference (int \&)}$
 \end{cfa}
 Conversion can restrict a type, where ©cv1© $\le$ ©cv2©, \eg passing an ©int© to a ©const volatile int &©, which has low cost.
@@ -2009,6 +2063,6 @@
 \begin{cfa}
 int x, & f( int & p );
-f( ®x + 3® );	§\C[1.5in]{// rvalue parameter (int) implicitly converts to lvalue temporary reference (int \&)}§
-®&f®(...) = &x;	§\C{// rvalue result (int \&) implicitly converts to lvalue temporary reference (int \&)}\CRT§
+f( @x + 3@ );	$\C[1.5in]{// rvalue parameter (int) implicitly converts to lvalue temporary reference (int \&)}$
+@&f@(...) = &x;	$\C{// rvalue result (int \&) implicitly converts to lvalue temporary reference (int \&)}\CRT$
 \end{cfa}
 In both case, modifications to the temporary are inaccessible (\Index{warning}).
@@ -2182,6 +2236,6 @@
 The point of the new syntax is to allow returning multiple values from a routine~\cite{Galletly96,CLU}, \eg:
 \begin{cfa}
-®[ int o1, int o2, char o3 ]® f( int i1, char i2, char i3 ) {
-	§\emph{routine body}§
+@[ int o1, int o2, char o3 ]@ f( int i1, char i2, char i3 ) {
+	$\emph{routine body}$
 }
 \end{cfa}
@@ -2194,11 +2248,11 @@
 Declaration qualifiers can only appear at the start of a routine definition, \eg:
 \begin{cfa}
-®extern® [ int x ] g( int y ) {§\,§}
+@extern@ [ int x ] g( int y ) {$\,$}
 \end{cfa}
 Lastly, if there are no output parameters or input parameters, the brackets and/or parentheses must still be specified;
 in both cases the type is assumed to be void as opposed to old style C defaults of int return type and unknown parameter types, respectively, as in:
 \begin{cfa}
-[§\,§] g(); §\C{// no input or output parameters}§
-[ void ] g( void ); §\C{// no input or output parameters}§
+[$\,$] g(); $\C{// no input or output parameters}$
+[ void ] g( void ); $\C{// no input or output parameters}$
 \end{cfa}
 
@@ -2218,5 +2272,5 @@
 \begin{cfa}
 typedef int foo;
-int f( int (* foo) ); §\C{// foo is redefined as a parameter name}§
+int f( int (* foo) ); $\C{// foo is redefined as a parameter name}$
 \end{cfa}
 The string ``©int (* foo)©'' declares a C-style named-parameter of type pointer to an integer (the parenthesis are superfluous), while the same string declares a \CFA style unnamed parameter of type routine returning integer with unnamed parameter of type pointer to foo.
@@ -2226,12 +2280,12 @@
 C-style declarations can be used to declare parameters for \CFA style routine definitions, \eg:
 \begin{cfa}
-[ int ] f( * int, int * ); §\C{// returns an integer, accepts 2 pointers to integers}§
-[ * int, int * ] f( int ); §\C{// returns 2 pointers to integers, accepts an integer}§
+[ int ] f( * int, int * ); $\C{// returns an integer, accepts 2 pointers to integers}$
+[ * int, int * ] f( int ); $\C{// returns 2 pointers to integers, accepts an integer}$
 \end{cfa}
 The reason for allowing both declaration styles in the new context is for backwards compatibility with existing preprocessor macros that generate C-style declaration-syntax, as in:
 \begin{cfa}
 #define ptoa( n, d ) int (*n)[ d ]
-int f( ptoa( p, 5 ) ) ... §\C{// expands to int f( int (*p)[ 5 ] )}§
-[ int ] f( ptoa( p, 5 ) ) ... §\C{// expands to [ int ] f( int (*p)[ 5 ] )}§
+int f( ptoa( p, 5 ) ) ... $\C{// expands to int f( int (*p)[ 5 ] )}$
+[ int ] f( ptoa( p, 5 ) ) ... $\C{// expands to [ int ] f( int (*p)[ 5 ] )}$
 \end{cfa}
 Again, programmers are highly encouraged to use one declaration form or the other, rather than mixing the forms.
@@ -2252,8 +2306,8 @@
 \begin{minipage}{\linewidth}
 \begin{cfa}
-®[ int x, int y ]® f() {
+@[ int x, int y ]@ f() {
 	int z;
 	... x = 0; ... y = z; ...
-	®return;® §\C{// implicitly return x, y}§
+	@return;@ $\C{// implicitly return x, y}$
 }
 \end{cfa}
@@ -2265,5 +2319,5 @@
 [ int x, int y ] f() {
 	...
-} §\C{// implicitly return x, y}§
+} $\C{// implicitly return x, y}$
 \end{cfa}
 In this case, the current values of ©x© and ©y© are returned to the calling routine just as if a ©return© had been encountered.
@@ -2274,9 +2328,9 @@
 [ int x, int y ] f( int, x, int y ) {
 	...
-} §\C{// implicitly return x, y}§
+} $\C{// implicitly return x, y}$
 \end{cfa}
 This notation allows the compiler to eliminate temporary variables in nested routine calls.
 \begin{cfa}
-[ int x, int y ] f( int, x, int y ); §\C{// prototype declaration}§
+[ int x, int y ] f( int, x, int y ); $\C{// prototype declaration}$
 int a, b;
 [a, b] = f( f( f( a, b ) ) );
@@ -2292,14 +2346,14 @@
 as well, parameter names are optional, \eg:
 \begin{cfa}
-[ int x ] f (); §\C{// returning int with no parameters}§
-[ * int ] g (int y); §\C{// returning pointer to int with int parameter}§
-[ ] h ( int, char ); §\C{// returning no result with int and char parameters}§
-[ * int, int ] j ( int ); §\C{// returning pointer to int and int, with int parameter}§
+[ int x ] f (); $\C{// returning int with no parameters}$
+[ * int ] g (int y); $\C{// returning pointer to int with int parameter}$
+[ ] h ( int, char ); $\C{// returning no result with int and char parameters}$
+[ * int, int ] j ( int ); $\C{// returning pointer to int and int, with int parameter}$
 \end{cfa}
 This syntax allows a prototype declaration to be created by cutting and pasting source text from the routine definition header (or vice versa).
-Like C, it is possible to declare multiple routine-prototypes in a single declaration, where the return type is distributed across \emph{all} routine names in the declaration list (see~\VRef{s:AlternativeDeclarations}), \eg:
+Like C, it is possible to declare multiple routine-prototypes in a single declaration, where the return type is distributed across \emph{all} routine names in the declaration list \see{\VRef{s:AlternativeDeclarations}}, \eg:
 \begin{cfa}
 C :		const double bar1(), bar2( int ), bar3( double );
-§\CFA§:	[const double] foo(), foo( int ), foo( double ) { return 3.0; }
+$\CFA$:	[const double] foo(), foo( int ), foo( double ) { return 3.0; }
 \end{cfa}
 \CFA allows the last routine in the list to define its body.
@@ -2316,13 +2370,13 @@
 The syntax for pointers to \CFA routines specifies the pointer name on the right, \eg:
 \begin{cfa}
-* [ int x ] () fp; §\C{// pointer to routine returning int with no parameters}§
-* [ * int ] (int y) gp; §\C{// pointer to routine returning pointer to int with int parameter}§
-* [ ] (int,char) hp; §\C{// pointer to routine returning no result with int and char parameters}§
-* [ * int,int ] ( int ) jp; §\C{// pointer to routine returning pointer to int and int, with int parameter}§
+* [ int x ] () fp; $\C[2.25in]{// pointer to routine returning int with no parameters}$
+* [ * int ] (int y) gp; $\C{// pointer to routine returning pointer to int with int parameter}$
+* [ ] (int,char) hp; $\C{// pointer to routine returning no result with int and char parameters}$
+* [ * int,int ] ( int ) jp; $\C{// pointer to routine returning pointer to int and int, with int parameter}\CRT$
 \end{cfa}
 While parameter names are optional, \emph{a routine name cannot be specified};
 for example, the following is incorrect:
 \begin{cfa}
-* [ int x ] f () fp; §\C{// routine name "f" is not allowed}§
+* [ int x ] f () fp; $\C{// routine name "f" is not allowed}$
 \end{cfa}
 
@@ -2347,5 +2401,5 @@
 whereas a named (keyword) call may be:
 \begin{cfa}
-p( z : 3, x : 4, y : 7 );  §\C{// rewrite $\Rightarrow$ p( 4, 7, 3 )}§
+p( z : 3, x : 4, y : 7 );  $\C{// rewrite \(\Rightarrow\) p( 4, 7, 3 )}$
 \end{cfa}
 Here the order of the arguments is unimportant, and the names of the parameters are used to associate argument values with the corresponding parameters.
@@ -2364,9 +2418,9 @@
 For example, the following routine prototypes and definition are all valid.
 \begin{cfa}
-void p( int, int, int ); §\C{// equivalent prototypes}§
+void p( int, int, int ); $\C{// equivalent prototypes}$
 void p( int x, int y, int z );
 void p( int y, int x, int z );
 void p( int z, int y, int x );
-void p( int q, int r, int s ) {} §\C{// match with this definition}§
+void p( int q, int r, int s ) {} $\C{// match with this definition}$
 \end{cfa}
 Forcing matching parameter names in routine prototypes with corresponding routine definitions is possible, but goes against a strong tradition in C programming.
@@ -2380,11 +2434,11 @@
 int f( int x, double y );
 
-f( j : 3, i : 4 ); §\C{// 1st f}§
-f( x : 7, y : 8.1 ); §\C{// 2nd f}§
-f( 4, 5 );  §\C{// ambiguous call}§
+f( j : 3, i : 4 ); $\C{// 1st f}$
+f( x : 7, y : 8.1 ); $\C{// 2nd f}$
+f( 4, 5 );  $\C{// ambiguous call}$
 \end{cfa}
 However, named arguments compound routine resolution in conjunction with conversions:
 \begin{cfa}
-f( i : 3, 5.7 ); §\C{// ambiguous call ?}§
+f( i : 3, 5.7 ); $\C{// ambiguous call ?}$
 \end{cfa}
 Depending on the cost associated with named arguments, this call could be resolvable or ambiguous.
@@ -2400,16 +2454,16 @@
 the allowable positional calls are:
 \begin{cfa}
-p(); §\C{// rewrite $\Rightarrow$ p( 1, 2, 3 )}§
-p( 4 ); §\C{// rewrite $\Rightarrow$ p( 4, 2, 3 )}§
-p( 4, 4 ); §\C{// rewrite $\Rightarrow$ p( 4, 4, 3 )}§
-p( 4, 4, 4 ); §\C{// rewrite $\Rightarrow$ p( 4, 4, 4 )}§
+p(); $\C{// rewrite \(\Rightarrow\) p( 1, 2, 3 )}$
+p( 4 ); $\C{// rewrite \(\Rightarrow\) p( 4, 2, 3 )}$
+p( 4, 4 ); $\C{// rewrite \(\Rightarrow\) p( 4, 4, 3 )}$
+p( 4, 4, 4 ); $\C{// rewrite \(\Rightarrow\) p( 4, 4, 4 )}$
 // empty arguments
-p(  , 4, 4 ); §\C{// rewrite $\Rightarrow$ p( 1, 4, 4 )}§
-p( 4,  , 4 ); §\C{// rewrite $\Rightarrow$ p( 4, 2, 4 )}§
-p( 4, 4,   ); §\C{// rewrite $\Rightarrow$ p( 4, 4, 3 )}§
-p( 4,  ,   ); §\C{// rewrite $\Rightarrow$ p( 4, 2, 3 )}§
-p(  , 4,   ); §\C{// rewrite $\Rightarrow$ p( 1, 4, 3 )}§
-p(  ,  , 4 ); §\C{// rewrite $\Rightarrow$ p( 1, 2, 4 )}§
-p(  ,  ,   ); §\C{// rewrite $\Rightarrow$ p( 1, 2, 3 )}§
+p(  , 4, 4 ); $\C{// rewrite \(\Rightarrow\) p( 1, 4, 4 )}$
+p( 4,  , 4 ); $\C{// rewrite \(\Rightarrow\) p( 4, 2, 4 )}$
+p( 4, 4,   ); $\C{// rewrite \(\Rightarrow\) p( 4, 4, 3 )}$
+p( 4,  ,   ); $\C{// rewrite \(\Rightarrow\) p( 4, 2, 3 )}$
+p(  , 4,   ); $\C{// rewrite \(\Rightarrow\) p( 1, 4, 3 )}$
+p(  ,  , 4 ); $\C{// rewrite \(\Rightarrow\) p( 1, 2, 4 )}$
+p(  ,  ,   ); $\C{// rewrite \(\Rightarrow\) p( 1, 2, 3 )}$
 \end{cfa}
 Here the missing arguments are inserted from the default values in the parameter list.
@@ -2435,7 +2489,7 @@
 Default values may only appear in a prototype versus definition context:
 \begin{cfa}
-void p( int x, int y = 2, int z = 3 ); §\C{// prototype: allowed}§
-void p( int, int = 2, int = 3 ); §\C{// prototype: allowed}§
-void p( int x, int y = 2, int z = 3 ) {} §\C{// definition: not allowed}§
+void p( int x, int y = 2, int z = 3 ); $\C{// prototype: allowed}$
+void p( int, int = 2, int = 3 ); $\C{// prototype: allowed}$
+void p( int x, int y = 2, int z = 3 ) {} $\C{// definition: not allowed}$
 \end{cfa}
 The reason for this restriction is to allow separate compilation.
@@ -2452,6 +2506,6 @@
 \begin{cfa}
 p( int x, int y, int z, ... );
-p( 1, 4, 5, 6, z : 3, y : 2 ); §\C{// assume p( /* positional */, ... , /* named */ );}§
-p( 1, z : 3, y : 2, 4, 5, 6 ); §\C{// assume p( /* positional */, /* named */, ... );}§
+p( 1, 4, 5, 6, z : 3, y : 2 ); $\C{// assume p( /* positional */, ... , /* named */ );}$
+p( 1, z : 3, y : 2, 4, 5, 6 ); $\C{// assume p( /* positional */, /* named */, ... );}$
 \end{cfa}
 In the first call, it is necessary for the programmer to conceptually rewrite the call, changing named arguments into positional, before knowing where the ellipse arguments begin.
@@ -2462,6 +2516,6 @@
 \begin{cfa}
 void p( int x, int y = 2, int z = 3... );
-p( 1, 4, 5, 6, z : 3 ); §\C{// assume p( /* positional */, ... , /* named */ );}§
-p( 1, z : 3, 4, 5, 6 ); §\C{// assume p( /* positional */, /* named */, ... );}§
+p( 1, 4, 5, 6, z : 3 ); $\C{// assume p( /* positional */, ... , /* named */ );}$
+p( 1, z : 3, 4, 5, 6 ); $\C{// assume p( /* positional */, /* named */, ... );}$
 \end{cfa}
 The first call is an error because arguments 4 and 5 are actually positional not ellipse arguments;
@@ -2469,7 +2523,7 @@
 In the second call, the default value for y is implicitly inserted after argument 1 and the named arguments separate the positional and ellipse arguments, making it trivial to read the call.
 For these reasons, \CFA requires named arguments before ellipse arguments.
-Finally, while ellipse arguments are needed for a small set of existing C routines, like printf, the extended \CFA type system largely eliminates the need for ellipse arguments (see Section 24), making much of this discussion moot.
-
-Default arguments and overloading (see Section 24) are complementary.
+Finally, while ellipse arguments are needed for a small set of existing C routines, like ©printf©, the extended \CFA type system largely eliminates the need for ellipse arguments \see{\VRef{s:Overloading}}, making much of this discussion moot.
+
+Default arguments and overloading \see{\VRef{s:Overloading}} are complementary.
 While in theory default arguments can be simulated with overloading, as in:
 \begin{cquote}
@@ -2493,5 +2547,5 @@
 Furthermore, overloading cannot handle accessing default arguments in the middle of a positional list, via a missing argument, such as:
 \begin{cfa}
-p( 1, /* default */, 5 ); §\C{// rewrite $\Rightarrow$ p( 1, 2, 5 )}§
+p( 1, /* default */, 5 ); $\C{// rewrite \(\Rightarrow\) p( 1, 2, 5 )}$
 \end{cfa}
 
@@ -2506,10 +2560,10 @@
 \begin{cfa}
 struct {
-	int f1; §\C{// named field}§
-	int f2 : 4; §\C{// named field with bit field size}§
-	int : 3; §\C{// unnamed field for basic type with bit field size}§
-	int ; §\C{// disallowed, unnamed field}§
-	int *; §\C{// disallowed, unnamed field}§
-	int (*)( int ); §\C{// disallowed, unnamed field}§
+	int f1; $\C{// named field}$
+	int f2 : 4; $\C{// named field with bit field size}$
+	int : 3; $\C{// unnamed field for basic type with bit field size}$
+	int ; $\C{// disallowed, unnamed field}$
+	int *; $\C{// disallowed, unnamed field}$
+	int (*)( int ); $\C{// disallowed, unnamed field}$
 };
 \end{cfa}
@@ -2519,5 +2573,5 @@
 \begin{cfa}
 struct {
-	int , , ; §\C{// 3 unnamed fields}§
+	int , , ; $\C{// 3 unnamed fields}$
 }
 \end{cfa}
@@ -2531,5 +2585,5 @@
 \subsection{Type Nesting}
 
-\CFA allows \Index{type nesting}, and type qualification of the nested types (see \VRef[Figure]{f:TypeNestingQualification}), where as C hoists\index{type hoisting} (refactors) nested types into the enclosing scope and has no type qualification.
+\CFA allows \Index{type nesting}, and type qualification of the nested types \see{\VRef[Figure]{f:TypeNestingQualification}}, where as C hoists\index{type hoisting} (refactors) nested types into the enclosing scope and has no type qualification.
 \begin{figure}
 \centering
@@ -2587,8 +2641,8 @@
 
 int fred() {
-	s.t.c = ®S.®R;	// type qualification
-	struct ®S.®T t = { ®S.®R, 1, 2 };
-	enum ®S.®C c;
-	union ®S.T.®U u;
+	s.t.c = @S.@R;	// type qualification
+	struct @S.@T t = { @S.@R, 1, 2 };
+	enum @S.@C c;
+	union @S.T.@U u;
 }
 \end{cfa}
@@ -2613,9 +2667,9 @@
 const unsigned int size = 5;
 int ia[size];
-... §\C{// assign values to array ia}§
-qsort( ia, size ); §\C{// sort ascending order using builtin ?<?}§
+... $\C{// assign values to array ia}$
+qsort( ia, size ); $\C{// sort ascending order using builtin ?<?}$
 {
-	®int ?<?( int x, int y ) { return x > y; }® §\C{// nested routine}§
-	qsort( ia, size ); §\C{// sort descending order by local redefinition}§
+	@int ?<?( int x, int y ) { return x > y; }@ $\C{// nested routine}$
+	qsort( ia, size ); $\C{// sort descending order by local redefinition}$
 }
 \end{cfa}
@@ -2625,14 +2679,14 @@
 The following program in undefined in \CFA (and Indexc{gcc})
 \begin{cfa}
-[* [int]( int )] foo() { §\C{// int (* foo())( int )}§
-	int ®i® = 7;
+[* [int]( int )] foo() { $\C{// int (* foo())( int )}$
+	int @i@ = 7;
 	int bar( int p ) {
-		®i® += 1; §\C{// dependent on local variable}§
-		sout | ®i®;
+		@i@ += 1; $\C{// dependent on local variable}$
+		sout | @i@;
 	}
-	return bar; §\C{// undefined because of local dependence}§
+	return bar; $\C{// undefined because of local dependence}$
 }
 int main() {
-	* [int]( int ) fp = foo(); §\C{// int (* fp)( int )}§
+	* [int]( int ) fp = foo(); $\C{// int (* fp)( int )}$
 	sout | fp( 3 );
 }
@@ -2647,5 +2701,5 @@
 In C and \CFA, lists of elements appear in several contexts, such as the parameter list of a routine call.
 \begin{cfa}
-f( ®2, x, 3 + i® ); §\C{// element list}§
+f( @2, x, 3 + i@ ); $\C{// element list}$
 \end{cfa}
 A list of elements is called a \newterm{tuple}, and is different from a \Index{comma expression}.
@@ -2656,5 +2710,5 @@
 
 In C and most programming languages, functions return at most one value;
-however, many operations have multiple outcomes, some exceptional (see~\VRef{s:ExceptionHandling}).
+however, many operations have multiple outcomes, some exceptional \see{\VRef{s:ExceptionHandling}}.
 To emulate functions with multiple return values, \emph{\Index{aggregation}} and/or \emph{\Index{aliasing}} is used.
 
@@ -2662,8 +2716,8 @@
 For example, consider C's \Indexc{div} function, which returns the quotient and remainder for a division of an integer value.
 \begin{cfa}
-typedef struct { int quot, rem; } div_t;	§\C[7cm]{// from include stdlib.h}§
+typedef struct { int quot, rem; } div_t;	$\C[7cm]{// from include stdlib.h}$
 div_t div( int num, int den );
-div_t qr = div( 13, 5 ); §\C{// return quotient/remainder aggregate}§
-printf( "%d %d\n", qr.quot, qr.rem ); §\C{// print quotient/remainder}§
+div_t qr = div( 13, 5 ); $\C{// return quotient/remainder aggregate}$
+printf( "%d %d\n", qr.quot, qr.rem ); $\C{// print quotient/remainder}$
 \end{cfa}
 This approach requires a name for the return type and fields, where \Index{naming} is a common programming-language issue.
@@ -2675,7 +2729,7 @@
 For example, consider C's \Indexc{modf} function, which returns the integral and fractional part of a floating value.
 \begin{cfa}
-double modf( double x, double * i ); §\C{// from include math.h}§
-double intp, frac = modf( 13.5, &intp ); §\C{// return integral and fractional components}§
-printf( "%g %g\n", intp, frac ); §\C{// print integral/fractional components}§
+double modf( double x, double * i ); $\C{// from include math.h}$
+double intp, frac = modf( 13.5, &intp ); $\C{// return integral and fractional components}$
+printf( "%g %g\n", intp, frac ); $\C{// print integral/fractional components}$
 \end{cfa}
 This approach requires allocating storage for the return values, which complicates the call site with a sequence of variable declarations leading to the call.
@@ -2704,8 +2758,8 @@
 When a function call is passed as an argument to another call, the best match of actual arguments to formal parameters is evaluated given all possible expression interpretations in the current scope.
 \begin{cfa}
-void g( int, int ); §\C{// 1}§
-void g( double, double ); §\C{// 2}§
-g( div( 13, 5 ) ); §\C{// select 1}§
-g( modf( 13.5 ) ); §\C{// select 2}§
+void g( int, int ); $\C{// 1}$
+void g( double, double ); $\C{// 2}$
+g( div( 13, 5 ) ); $\C{// select 1}$
+g( modf( 13.5 ) ); $\C{// select 2}$
 \end{cfa}
 In this case, there are two overloaded ©g© routines.
@@ -2716,9 +2770,9 @@
 The previous examples can be rewritten passing the multiple returned-values directly to the ©printf© function call.
 \begin{cfa}
-[ int, int ] div( int x, int y ); §\C{// from include stdlib}§
-printf( "%d %d\n", div( 13, 5 ) ); §\C{// print quotient/remainder}§
-
-[ double, double ] modf( double x ); §\C{// from include math}§
-printf( "%g %g\n", modf( 13.5 ) ); §\C{// print integral/fractional components}§
+[ int, int ] div( int x, int y ); $\C{// from include stdlib}$
+printf( "%d %d\n", div( 13, 5 ) ); $\C{// print quotient/remainder}$
+
+[ double, double ] modf( double x ); $\C{// from include math}$
+printf( "%g %g\n", modf( 13.5 ) ); $\C{// print integral/fractional components}$
 \end{cfa}
 This approach provides the benefits of compile-time checking for appropriate return statements as in aggregation, but without the required verbosity of declaring a new named type.
@@ -2730,6 +2784,6 @@
 \begin{cfa}
 int quot, rem;
-[ quot, rem ] = div( 13, 5 ); §\C{// assign multiple variables}§
-printf( "%d %d\n", quot, rem ); §\C{// print quotient/remainder}\CRT§
+[ quot, rem ] = div( 13, 5 ); $\C{// assign multiple variables}$
+printf( "%d %d\n", quot, rem ); $\C{// print quotient/remainder}\CRT$
 \end{cfa}
 Here, the multiple return-values are matched in much the same way as passing multiple return-values to multiple parameters in a call.
@@ -2760,6 +2814,6 @@
 In \CFA, it is possible to overcome this restriction by declaring a \newterm{tuple variable}.
 \begin{cfa}
-[int, int] ®qr® = div( 13, 5 ); §\C{// initialize tuple variable}§
-printf( "%d %d\n", ®qr® ); §\C{// print quotient/remainder}§
+[int, int] @qr@ = div( 13, 5 ); $\C{// initialize tuple variable}$
+printf( "%d %d\n", @qr@ ); $\C{// print quotient/remainder}$
 \end{cfa}
 It is now possible to match the multiple return-values to a single variable, in much the same way as \Index{aggregation}.
@@ -2767,5 +2821,5 @@
 One way to access the individual components of a tuple variable is with assignment.
 \begin{cfa}
-[ quot, rem ] = qr; §\C{// assign multiple variables}§
+[ quot, rem ] = qr; $\C{// assign multiple variables}$
 \end{cfa}
 
@@ -2790,9 +2844,9 @@
 [int, double] * p;
 
-int y = x.0; §\C{// access int component of x}§
-y = f().1; §\C{// access int component of f}§
-p->0 = 5; §\C{// access int component of tuple pointed-to by p}§
-g( x.1, x.0 ); §\C{// rearrange x to pass to g}§
-double z = [ x, f() ].0.1; §\C{// access second component of first component of tuple expression}§
+int y = x.0; $\C{// access int component of x}$
+y = f().1; $\C{// access int component of f}$
+p->0 = 5; $\C{// access int component of tuple pointed-to by p}$
+g( x.1, x.0 ); $\C{// rearrange x to pass to g}$
+double z = [ x, f() ].0.1; $\C{// access second component of first component of tuple expression}$
 \end{cfa}
 Tuple-index expressions can occur on any tuple-typed expression, including tuple-returning functions, square-bracketed tuple expressions, and other tuple-index expressions, provided the retrieved component is also a tuple.
@@ -2801,4 +2855,5 @@
 
 \subsection{Flattening and Structuring}
+\label{s:FlatteningStructuring}
 
 As evident in previous examples, tuples in \CFA do not have a rigid structure.
@@ -2861,8 +2916,8 @@
 double y;
 [int, double] z;
-[y, x] = 3.14; §\C{// mass assignment}§
-[x, y] = z;							    §\C{// multiple assignment}§
-z = 10;							        §\C{// mass assignment}§
-z = [x, y]; §\C{// multiple assignment}§
+[y, x] = 3.14; $\C{// mass assignment}$
+[x, y] = z;							    $\C{// multiple assignment}$
+z = 10;							        $\C{// mass assignment}$
+z = [x, y]; $\C{// multiple assignment}$
 \end{cfa}
 Let $L_i$ for $i$ in $[0, n)$ represent each component of the flattened left side, $R_i$ represent each component of the flattened right side of a multiple assignment, and $R$ represent the right side of a mass assignment.
@@ -2872,5 +2927,5 @@
 \begin{cfa}
 [ int, int ] x, y, z;
-[ x, y ] = z;						   §\C{// multiple assignment, invalid 4 != 2}§
+[ x, y ] = z;						   $\C{// multiple assignment, invalid 4 != 2}$
 \end{cfa}
 Multiple assignment assigns $R_i$ to $L_i$ for each $i$.
@@ -2908,5 +2963,5 @@
 	double c, d;
 	[ void ] f( [ int, int ] );
-	f( [ c, a ] = [ b, d ] = 1.5 ); §\C{// assignments in parameter list}§
+	f( [ c, a ] = [ b, d ] = 1.5 ); $\C{// assignments in parameter list}$
 \end{cfa}
 The tuple expression begins with a mass assignment of ©1.5© into ©[b, d]©, which assigns ©1.5© into ©b©, which is truncated to ©1©, and ©1.5© into ©d©, producing the tuple ©[1, 1.5]© as a result.
@@ -2921,12 +2976,12 @@
 \begin{cfa}
 struct S;
-void ?{}(S *); §\C{// (1)}§
-void ?{}(S *, int); §\C{// (2)}§
-void ?{}(S * double); §\C{// (3)}§
-void ?{}(S *, S); §\C{// (4)}§
-
-[S, S] x = [3, 6.28]; §\C{// uses (2), (3), specialized constructors}§
-[S, S] y; §\C{// uses (1), (1), default constructor}§
-[S, S] z = x.0; §\C{// uses (4), (4), copy constructor}§
+void ?{}(S *); $\C{// (1)}$
+void ?{}(S *, int); $\C{// (2)}$
+void ?{}(S * double); $\C{// (3)}$
+void ?{}(S *, S); $\C{// (4)}$
+
+[S, S] x = [3, 6.28]; $\C{// uses (2), (3), specialized constructors}$
+[S, S] y; $\C{// uses (1), (1), default constructor}$
+[S, S] z = x.0; $\C{// uses (4), (4), copy constructor}$
 \end{cfa}
 In this example, ©x© is initialized by the multiple constructor calls ©?{}(&x.0, 3)© and ©?{}(&x.1, 6.28)©, while ©y© is initialized by two default constructor calls ©?{}(&y.0)© and ©?{}(&y.1)©.
@@ -2969,6 +3024,6 @@
 A member-access tuple may be used anywhere a tuple can be used, \eg:
 \begin{cfa}
-s.[ y, z, x ] = [ 3, 3.2, 'x' ]; §\C{// equivalent to s.x = 'x', s.y = 3, s.z = 3.2}§
-f( s.[ y, z ] ); §\C{// equivalent to f( s.y, s.z )}§
+s.[ y, z, x ] = [ 3, 3.2, 'x' ]; $\C{// equivalent to s.x = 'x', s.y = 3, s.z = 3.2}$
+f( s.[ y, z ] ); $\C{// equivalent to f( s.y, s.z )}$
 \end{cfa}
 Note, the fields appearing in a record-field tuple may be specified in any order;
@@ -2980,6 +3035,6 @@
 void f( double, long );
 
-f( x.[ 0, 3 ] ); §\C{// f( x.0, x.3 )}§
-x.[ 0, 1 ] = x.[ 1, 0 ]; §\C{// [ x.0, x.1 ] = [ x.1, x.0 ]}§
+f( x.[ 0, 3 ] ); $\C{// f( x.0, x.3 )}$
+x.[ 0, 1 ] = x.[ 1, 0 ]; $\C{// [ x.0, x.1 ] = [ x.1, x.0 ]}$
 [ long, int, long ] y = x.[ 2, 0, 2 ];
 \end{cfa}
@@ -2998,5 +3053,5 @@
 \begin{cfa}
 [ int, float, double ] f();
-[ double, float ] x = f().[ 2, 1 ]; §\C{// f() called once}§
+[ double, float ] x = f().[ 2, 1 ]; $\C{// f() called once}$
 \end{cfa}
 
@@ -3011,9 +3066,9 @@
 That is, a cast can be used to select the type of an expression when it is ambiguous, as in the call to an overloaded function.
 \begin{cfa}
-int f(); §\C{// (1)}§
-double f(); §\C{// (2)}§
-
-f(); §\C{// ambiguous - (1),(2) both equally viable}§
-(int)f(); §\C{// choose (2)}§
+int f(); $\C{// (1)}$
+double f(); $\C{// (2)}$
+
+f(); $\C{// ambiguous - (1),(2) both equally viable}$
+(int)f(); $\C{// choose (2)}$
 \end{cfa}
 Since casting is a fundamental operation in \CFA, casts need to be given a meaningful interpretation in the context of tuples.
@@ -3023,9 +3078,9 @@
 void g();
 
-(void)f(); §\C{// valid, ignore results}§
-(int)g(); §\C{// invalid, void cannot be converted to int}§
+(void)f(); $\C{// valid, ignore results}$
+(int)g(); $\C{// invalid, void cannot be converted to int}$
 
 struct A { int x; };
-(struct A)f(); §\C{// invalid, int cannot be converted to A}§
+(struct A)f(); $\C{// invalid, int cannot be converted to A}$
 \end{cfa}
 In C, line 4 is a valid cast, which calls ©f© and discards its result.
@@ -3043,9 +3098,9 @@
 	[int, [int, int], int] g();
 
-	([int, double])f(); §\C{// (1) valid}§
-	([int, int, int])g(); §\C{// (2) valid}§
-	([void, [int, int]])g(); §\C{// (3) valid}§
-	([int, int, int, int])g(); §\C{// (4) invalid}§
-	([int, [int, int, int]])g(); §\C{// (5) invalid}§
+	([int, double])f(); $\C{// (1) valid}$
+	([int, int, int])g(); $\C{// (2) valid}$
+	([void, [int, int]])g(); $\C{// (3) valid}$
+	([int, int, int, int])g(); $\C{// (4) invalid}$
+	([int, [int, int, int]])g(); $\C{// (5) invalid}$
 \end{cfa}
 
@@ -3107,8 +3162,8 @@
 void f([int, int], int, int);
 
-f([0, 0], 0, 0); §\C{// no cost}§
-f(0, 0, 0, 0); §\C{// cost for structuring}§
-f([0, 0,], [0, 0]); §\C{// cost for flattening}§
-f([0, 0, 0], 0); §\C{// cost for flattening and structuring}§
+f([0, 0], 0, 0); $\C{// no cost}$
+f(0, 0, 0, 0); $\C{// cost for structuring}$
+f([0, 0,], [0, 0]); $\C{// cost for flattening}$
+f([0, 0, 0], 0); $\C{// cost for flattening and structuring}$
 \end{cfa}
 
@@ -3146,5 +3201,5 @@
 The general syntax of a lexical list is:
 \begin{cfa}
-[ §\emph{exprlist}§ ]
+[ $\emph{exprlist}$ ]
 \end{cfa}
 where ©$\emph{exprlist}$© is a list of one or more expressions separated by commas.
@@ -3158,5 +3213,5 @@
 Tuples are permitted to contain sub-tuples (\ie nesting), such as ©[ [ 14, 21 ], 9 ]©, which is a 2-element tuple whose first element is itself a tuple.
 Note, a tuple is not a record (structure);
-a record denotes a single value with substructure, whereas a tuple is multiple values with no substructure (see flattening coercion in Section 12.1).
+a record denotes a single value with substructure, whereas a tuple is multiple values with no substructure \see{flattening coercion in \VRef{s:FlatteningStructuring}}.
 In essence, tuples are largely a compile time phenomenon, having little or no runtime presence.
 
@@ -3166,5 +3221,5 @@
 The general syntax of a tuple type is:
 \begin{cfa}
-[ §\emph{typelist}§ ]
+[ $\emph{typelist}$ ]
 \end{cfa}
 where ©$\emph{typelist}$© is a list of one or more legal \CFA or C type specifications separated by commas, which may include other tuple type specifications.
@@ -3173,5 +3228,5 @@
 [ unsigned int, char ]
 [ double, double, double ]
-[ * int, int * ] §\C{// mix of CFA and ANSI}§
+[ * int, int * ] $\C{// mix of CFA and ANSI}$
 [ * [ 5 ] int, * * char, * [ [ int, int ] ] (int, int) ]
 \end{cfa}
@@ -3180,6 +3235,6 @@
 Examples of declarations using tuple types are:
 \begin{cfa}
-[ int, int ] x; §\C{// 2 element tuple, each element of type int}§
-* [ char, char ] y; §\C{// pointer to a 2 element tuple}§
+[ int, int ] x; $\C{// 2 element tuple, each element of type int}$
+* [ char, char ] y; $\C{// pointer to a 2 element tuple}$
 [ [ int, int ] ] z ([ int, int ]);
 \end{cfa}
@@ -3198,6 +3253,6 @@
 [ int, int ] w1;
 [ int, int, int ] w2;
-[ void ] f (int, int, int); §\C{// three input parameters of type int}§
-[ void ] g ([ int, int, int ]); §\C{3 element tuple as input}§
+[ void ] f (int, int, int); $\C{// three input parameters of type int}$
+[ void ] g ([ int, int, int ]); $\C{3 element tuple as input}$
 f( [ 1, 2, 3 ] );
 f( w1, 3 );
@@ -3279,5 +3334,5 @@
 [ int, int, int, int ] w = [ 1, 2, 3, 4 ];
 int x = 5;
-[ x, w ] = [ w, x ]; §\C{// all four tuple coercions}§
+[ x, w ] = [ w, x ]; $\C{// all four tuple coercions}$
 \end{cfa}
 Starting on the right-hand tuple in the last assignment statement, w is opened, producing a tuple of four values;
@@ -3285,5 +3340,5 @@
 This tuple is then flattened, yielding ©[ 1, 2, 3, 4, 5 ]©, which is structured into ©[ 1, [ 2, 3, 4, 5 ] ]© to match the tuple type of the left-hand side.
 The tuple ©[ 2, 3, 4, 5 ]© is then closed to create a tuple value.
-Finally, ©x© is assigned ©1© and ©w© is assigned the tuple value using multiple assignment (see Section 14).
+Finally, ©x© is assigned ©1© and ©w© is assigned the tuple value using \Index{multiple assignment} \see{\VRef{s:TupleAssignment}}.
 \begin{rationale}
 A possible additional language extension is to use the structuring coercion for tuples to initialize a complex record with a tuple.
@@ -3296,5 +3351,5 @@
 Mass assignment has the following form:
 \begin{cfa}
-[ §\emph{lvalue}§, ... , §\emph{lvalue}§ ] = §\emph{expr}§;
+[ $\emph{lvalue}$, ... , $\emph{lvalue}$ ] = $\emph{expr}$;
 \end{cfa}
 \index{lvalue}
@@ -3336,5 +3391,5 @@
 Multiple assignment has the following form:
 \begin{cfa}
-[ §\emph{lvalue}§, ... , §\emph{lvalue}§ ] = [ §\emph{expr}§, ... , §\emph{expr}§ ];
+[ $\emph{lvalue}$, ... , $\emph{lvalue}$ ] = [ $\emph{expr}$, ... , $\emph{expr}$ ];
 \end{cfa}
 \index{lvalue}
@@ -3367,6 +3422,6 @@
 both these examples produce indeterminate results:
 \begin{cfa}
-f( x++, x++ ); §\C{// C routine call with side effects in arguments}§
-[ v1, v2 ] = [ x++, x++ ]; §\C{// side effects in righthand side of multiple assignment}§
+f( x++, x++ ); $\C{// C routine call with side effects in arguments}$
+[ v1, v2 ] = [ x++, x++ ]; $\C{// side effects in right-hand side of multiple assignment}$
 \end{cfa}
 
@@ -3377,5 +3432,5 @@
 Cascade assignment has the following form:
 \begin{cfa}
-§\emph{tuple}§ = §\emph{tuple}§ = ... = §\emph{tuple}§;
+$\emph{tuple}$ = $\emph{tuple}$ = ... = $\emph{tuple}$;
 \end{cfa}
 and it has the same parallel semantics as for mass and multiple assignment.
@@ -3424,10 +3479,10 @@
 \begin{cfa}
 int x = 1, y = 2, z = 3;
-sout | x ®|® y ®|® z;
+sout | x @|@ y @|@ z;
 \end{cfa}
 &
 \begin{cfa}
 
-cout << x ®<< " "® << y ®<< " "® << z << endl;
+cout << x @<< " "@ << y @<< " "@ << z << endl;
 \end{cfa}
 &
@@ -3438,13 +3493,13 @@
 \\
 \begin{cfa}[showspaces=true,aboveskip=0pt,belowskip=0pt]
-1® ®2® ®3
+1@ @2@ @3
 \end{cfa}
 &
 \begin{cfa}[showspaces=true,aboveskip=0pt,belowskip=0pt]
-1® ®2® ®3
+1@ @2@ @3
 \end{cfa}
 &
 \begin{cfa}[showspaces=true,aboveskip=0pt,belowskip=0pt]
-1® ®2® ®3
+1@ @2@ @3
 \end{cfa}
 \end{tabular}
@@ -3454,8 +3509,8 @@
 \begin{cfa}
 [int, [ int, int ] ] t1 = [ 1, [ 2, 3 ] ], t2 = [ 4, [ 5, 6 ] ];
-sout | t1 | t2; §\C{// print tuples}§
+sout | t1 | t2; $\C{// print tuples}$
 \end{cfa}
 \begin{cfa}[showspaces=true,aboveskip=0pt]
-1®, ®2®, ®3 4®, ®5®, ®6
+1@, @2@, @3 4@, @5@, @6
 \end{cfa}
 Finally, \CFA uses the logical-or operator for I/O as it is the lowest-priority \emph{overloadable} operator, other than assignment.
@@ -3466,5 +3521,5 @@
 &
 \begin{cfa}
-sout | x * 3 | y + 1 | z << 2 | x == y | ®(®x | y®)® | ®(®x || y®)® | ®(®x > z ? 1 : 2®)®;
+sout | x * 3 | y + 1 | z << 2 | x == y | @(@x | y@)@ | @(@x || y@)@ | @(@x > z ? 1 : 2@)@;
 \end{cfa}
 \\
@@ -3472,5 +3527,5 @@
 &
 \begin{cfa}
-cout << x * 3 << y + 1 << ®(®z << 2®)® << ®(®x == y®)® << ®(®x | y®)® << ®(®x || y®)® << ®(®x > z ? 1 : 2®)® << endl;
+cout << x * 3 << y + 1 << @(@z << 2@)@ << @(@x == y@)@ << @(@x | y@)@ << @(@x || y@)@ << @(@x > z ? 1 : 2@)@ << endl;
 \end{cfa}
 \\
@@ -3507,5 +3562,5 @@
 \\
 \begin{cfa}[showspaces=true,aboveskip=0pt,belowskip=0pt]
-®1® ®2.5® ®A®
+@1@ @2.5@ @A@
 
 
@@ -3513,5 +3568,5 @@
 &
 \begin{cfa}[showspaces=true,aboveskip=0pt,belowskip=0pt]
-®1® ®2.5® ®A®
+@1@ @2.5@ @A@
 
 
@@ -3519,7 +3574,7 @@
 &
 \begin{cfa}[showspaces=true,aboveskip=0pt,belowskip=0pt]
-®1®
-®2.5®
-®A®
+@1@
+@2.5@
+@A@
 \end{cfa}
 \end{tabular}
@@ -3557,43 +3612,42 @@
 
 \item
-{\lstset{language=CFA,deletedelim=**[is][]{¢}{¢}}
-A separator does not appear before a C string starting with the (extended) \Index*{ASCII}\index{ASCII!extended} characters: \lstinline[basicstyle=\tt]@,.;!?)]}%¢»@, where \lstinline[basicstyle=\tt]@»@ is a closing citation mark.
-\begin{cfa}[belowskip=0pt]
+A separator does not appear before a C string starting with the (extended) \Index*{ASCII}\index{ASCII!extended} characters: \LstStringStyle{,.;!?)]\}\%\textcent\guillemotright}, where \LstStringStyle{\guillemotright} a closing citation mark.
+\begin{cfa}
 sout | 1 | ", x" | 2 | ". x" | 3 | "; x" | 4 | "! x" | 5 | "? x" | 6 | "% x"
-		| 7 | "¢ x" | 8 | "» x" | 9 | ") x" | 10 | "] x" | 11 | "} x";
-\end{cfa}
-\begin{cfa}[basicstyle=\tt,showspaces=true,aboveskip=0pt,belowskip=0pt]
-1®,® x 2®.® x 3®;® x 4®!® x 5®?® x 6®%® x 7§\color{red}\textcent§ x 8®»® x 9®)® x 10®]® x 11®}® x
-\end{cfa}}%
-
-\item
-A separator does not appear after a C string ending with the (extended) \Index*{ASCII}\index{ASCII!extended} characters: \lstinline[mathescape=off,basicstyle=\tt]@([{=$£¥¡¿«@, where \lstinline[basicstyle=\tt]@¡¿@ are inverted opening exclamation and question marks, and \lstinline[basicstyle=\tt]@«@ is an opening citation mark.
+	   | 7 | "$\LstStringStyle{\textcent}$ x" | 8 | "$\LstStringStyle{\guillemotright}$ x" | 9 | ") x" | 10 | "] x" | 11 | "} x";
+\end{cfa}
+\begin{cfa}[showspaces=true]
+1@,@ x 2@.@ x 3@;@ x 4@!@ x 5@?@ x 6@%@ x 7$\R{\LstStringStyle{\textcent}}$ x 8$\R{\LstStringStyle{\guillemotright}}$ x 9@)@ x 10@]@ x 11@}@ x
+\end{cfa}
+
+\item
+A separator does not appear after a C string ending with the (extended) \Index*{ASCII}\index{ASCII!extended} characters: \LstStringStyle{([\{=\$\textsterling\textyen\textexclamdown\textquestiondown\guillemotleft}, where \LstStringStyle{\textexclamdown\textquestiondown} are inverted opening exclamation and question marks, and \LstStringStyle{\guillemotleft} is an opening citation mark.
 %$
-\begin{cfa}[mathescape=off]
-sout | "x (" | 1 | "x [" | 2 | "x {" | 3 | "x =" | 4 | "x $" | 5 | "x £" | 6 | "x ¥"
-		| 7 | "x ¡" | 8 | "x ¿" | 9 | "x «" | 10;
+\begin{cfa}
+sout | "x (" | 1 | "x [" | 2 | "x {" | 3 | "x =" | 4 | "x $" | 5 | "x $\LstStringStyle{\textsterling}$" | 6 | "x $\LstStringStyle{\textyen}$"
+	   | 7 | "x $\LstStringStyle{\textexclamdown}$" | 8 | "x $\LstStringStyle{\textquestiondown}$" | 9 | "x $\LstStringStyle{\guillemotleft}$" | 10;
 \end{cfa}
 %$
-\begin{cfa}[mathescape=off,basicstyle=\tt,showspaces=true,aboveskip=0pt,belowskip=0pt]
-x ®(®1 x ®[®2 x ®{®3 x ®=®4 x ®$®5 x ®£®6 x ®¥®7 x ®¡®8 x ®¿®9 x ®«®10
+\begin{cfa}[showspaces=true]
+x @(@1 x @[@2 x @{@3 x @=@4 x $\LstStringStyle{\textdollar}$5 x $\R{\LstStringStyle{\textsterling}}$6 x $\R{\LstStringStyle{\textyen}}$7 x $\R{\LstStringStyle{\textexclamdown}}$8 x $\R{\LstStringStyle{\textquestiondown}}$9 x $\R{\LstStringStyle{\guillemotleft}}$10
 \end{cfa}
 %$
 
 \item
-A seperator does not appear before/after a C string starting/ending with the \Index*{ASCII} quote or whitespace characters: \lstinline[basicstyle=\tt,showspaces=true]@`'": \t\v\f\r\n@
-\begin{cfa}[belowskip=0pt]
+A seperator does not appear before/after a C string starting/ending with the \Index*{ASCII} quote or whitespace characters: \lstinline[basicstyle=\tt,showspaces=true]{`'": \t\v\f\r\n}
+\begin{cfa}
 sout | "x`" | 1 | "`x'" | 2 | "'x\"" | 3 | "\"x:" | 4 | ":x " | 5 | " x\t" | 6 | "\tx";
 \end{cfa}
-\begin{cfa}[basicstyle=\tt,showspaces=true,showtabs=true,aboveskip=0pt,belowskip=0pt]
-x®`®1®`®x§\color{red}\texttt{'}§2§\color{red}\texttt{'}§x§\color{red}\texttt{"}§3§\color{red}\texttt{"}§x®:®4®:®x® ®5® ®x®	®6®	®x
+\begin{cfa}[showspaces=true,showtabs=true]
+x@`@1@`@x$\R{\texttt{'}}$2$\R{\texttt{'}}$x$\R{\texttt{"}}$3$\R{\texttt{"}}$x@:@4@:@x@ @5@ @x@	@6@	@x
 \end{cfa}
 
 \item
 If a space is desired before or after one of the special string start/end characters, simply insert a space.
-\begin{cfa}[belowskip=0pt]
-sout | "x (§\color{red}\texttt{\textvisiblespace}§" | 1 | "§\color{red}\texttt{\textvisiblespace}§) x" | 2 | "§\color{red}\texttt{\textvisiblespace}§, x" | 3 | "§\color{red}\texttt{\textvisiblespace}§:x:§\color{red}\texttt{\textvisiblespace}§" | 4;
-\end{cfa}
-\begin{cfa}[basicstyle=\tt,showspaces=true,showtabs=true,aboveskip=0pt,belowskip=0pt]
-x (® ®1® ®) x 2® ®, x 3® ®:x:® ®4
+\begin{cfa}
+sout | "x ($\R{\texttt{\textvisiblespace}}$" | 1 | "$\R{\texttt{\textvisiblespace}}$) x" | 2 | "$\R{\texttt{\textvisiblespace}}$, x" | 3 | "$\R{\texttt{\textvisiblespace}}$:x:$\R{\texttt{\textvisiblespace}}$" | 4;
+\end{cfa}
+\begin{cfa}[showspaces=true,showtabs=true]
+x (@ @1@ @) x 2@ @, x 3@ @:x:@ @4
 \end{cfa}
 \end{enumerate}
@@ -3608,36 +3662,36 @@
 \Indexc{sepSet}\index{manipulator!sepSet@©sepSet©} and \Indexc{sep}\index{manipulator!sep@©sep©}/\Indexc{sepGet}\index{manipulator!sepGet@©sepGet©} set and get the separator string.
 The separator string can be at most 16 characters including the ©'\0'© string terminator (15 printable characters).
-\begin{cfa}[mathescape=off,belowskip=0pt]
-sepSet( sout, ", $" ); §\C{// set separator from " " to ", \$"}§
-sout | 1 | 2 | 3 | " \"" | ®sep® | "\"";
+\begin{cfa}[escapechar=off,belowskip=0pt]
+sepSet( sout, ", $" ); $\C{// set separator from " " to ", \$"}$
+sout | 1 | 2 | 3 | " \"" | @sep@ | "\"";
 \end{cfa}
 %$
 \begin{cfa}[mathescape=off,showspaces=true,aboveskip=0pt]
-1®, $®2®, $®3 ®", $"®
+1@, $@2@, $@3 @", $"@
 \end{cfa}
 %$
 \begin{cfa}[belowskip=0pt]
-sepSet( sout, " " ); §\C{// reset separator to " "}§
-sout | 1 | 2 | 3 | " \"" | ®sepGet( sout )® | "\"";
+sepSet( sout, " " ); $\C{// reset separator to " "}$
+sout | 1 | 2 | 3 | " \"" | @sepGet( sout )@ | "\"";
 \end{cfa}
 \begin{cfa}[showspaces=true,aboveskip=0pt]
-1® ®2® ®3 ®" "®
+1@ @2@ @3 @" "@
 \end{cfa}
 ©sepGet© can be used to store a separator and then restore it:
 \begin{cfa}[belowskip=0pt]
-char store[®sepSize®]; §\C{// sepSize is the maximum separator size}§
-strcpy( store, sepGet( sout ) ); §\C{// copy current separator}§
-sepSet( sout, "_" ); §\C{// change separator to underscore}§
+char store[@sepSize@]; $\C{// sepSize is the maximum separator size}$
+strcpy( store, sepGet( sout ) ); $\C{// copy current separator}$
+sepSet( sout, "_" ); $\C{// change separator to underscore}$
 sout | 1 | 2 | 3;
 \end{cfa}
 \begin{cfa}[showspaces=true,aboveskip=0pt,belowskip=0pt]
-1®_®2®_®3
+1@_@2@_@3
 \end{cfa}
 \begin{cfa}[belowskip=0pt]
-sepSet( sout, store ); §\C{// change separator back to original}§
+sepSet( sout, store ); $\C{// change separator back to original}$
 sout | 1 | 2 | 3;
 \end{cfa}
 \begin{cfa}[showspaces=true,aboveskip=0pt]
-1® ®2® ®3
+1@ @2@ @3
 \end{cfa}
 
@@ -3646,16 +3700,16 @@
 The tuple separator-string can be at most 16 characters including the ©'\0'© string terminator (15 printable characters).
 \begin{cfa}[belowskip=0pt]
-sepSetTuple( sout, " " ); §\C{// set tuple separator from ", " to " "}§
-sout | t1 | t2 | " \"" | ®sepTuple® | "\"";
+sepSetTuple( sout, " " ); $\C{// set tuple separator from ", " to " "}$
+sout | t1 | t2 | " \"" | @sepTuple@ | "\"";
 \end{cfa}
 \begin{cfa}[showspaces=true,aboveskip=0pt]
-1 2 3 4 5 6 ®" "®
+1 2 3 4 5 6 @" "@
 \end{cfa}
 \begin{cfa}[belowskip=0pt]
-sepSetTuple( sout, ", " ); §\C{// reset tuple separator to ", "}§
-sout | t1 | t2 | " \"" | ®sepGetTuple( sout )® | "\"";
+sepSetTuple( sout, ", " ); $\C{// reset tuple separator to ", "}$
+sout | t1 | t2 | " \"" | @sepGetTuple( sout )@ | "\"";
 \end{cfa}
 \begin{cfa}[showspaces=true,aboveskip=0pt]
-1, 2, 3 4, 5, 6 ®", "®
+1, 2, 3 4, 5, 6 @", "@
 \end{cfa}
 As for ©sepGet©, ©sepGetTuple© can be use to store a tuple separator and then restore it.
@@ -3664,5 +3718,5 @@
 \Indexc{sepDisable}\index{manipulator!sepDisable@©sepDisable©} and \Indexc{sepEnable}\index{manipulator!sepEnable@©sepEnable©} toggle printing the separator.
 \begin{cfa}[belowskip=0pt]
-sout | sepDisable | 1 | 2 | 3; §\C{// turn off implicit separator}§
+sout | sepDisable | 1 | 2 | 3; $\C{// turn off implicit separator}$
 \end{cfa}
 \begin{cfa}[showspaces=true,aboveskip=0pt,belowskip=0pt]
@@ -3670,5 +3724,5 @@
 \end{cfa}
 \begin{cfa}[belowskip=0pt]
-sout | sepEnable | 1 | 2 | 3; §\C{// turn on implicit separator}§
+sout | sepEnable | 1 | 2 | 3; $\C{// turn on implicit separator}$
 \end{cfa}
 \begin{cfa}[mathescape=off,showspaces=true,aboveskip=0pt,belowskip=0pt]
@@ -3679,5 +3733,5 @@
 \Indexc{sepOn}\index{manipulator!sepOn@©sepOn©} and \Indexc{sepOff}\index{manipulator!sepOff@©sepOff©} toggle printing the separator with respect to the next printed item, and then return to the global seperator setting.
 \begin{cfa}[belowskip=0pt]
-sout | 1 | sepOff | 2 | 3; §\C{// turn off implicit separator for the next item}§
+sout | 1 | sepOff | 2 | 3; $\C{// turn off implicit separator for the next item}$
 \end{cfa}
 \begin{cfa}[showspaces=true,aboveskip=0pt,belowskip=0pt]
@@ -3685,5 +3739,5 @@
 \end{cfa}
 \begin{cfa}[belowskip=0pt]
-sout | sepDisable | 1 | sepOn | 2 | 3; §\C{// turn on implicit separator for the next item}§
+sout | sepDisable | 1 | sepOn | 2 | 3; $\C{// turn on implicit separator for the next item}$
 \end{cfa}
 \begin{cfa}[showspaces=true,aboveskip=0pt,belowskip=0pt]
@@ -3692,5 +3746,5 @@
 The tuple separator also responses to being turned on and off.
 \begin{cfa}[belowskip=0pt]
-sout | t1 | sepOff | t2; §\C{// turn off implicit separator for the next item}§
+sout | t1 | sepOff | t2; $\C{// turn off implicit separator for the next item}$
 \end{cfa}
 \begin{cfa}[showspaces=true,aboveskip=0pt,belowskip=0pt]
@@ -3700,5 +3754,5 @@
 use ©sep© to accomplish this functionality.
 \begin{cfa}[belowskip=0pt]
-sout | sepOn | 1 | 2 | 3 | sepOn; §\C{// sepOn does nothing at start/end of line}§
+sout | sepOn | 1 | 2 | 3 | sepOn; $\C{// sepOn does nothing at start/end of line}$
 \end{cfa}
 \begin{cfa}[showspaces=true,aboveskip=0pt,belowskip=0pt]
@@ -3706,8 +3760,8 @@
 \end{cfa}
 \begin{cfa}[belowskip=0pt]
-sout | sep | 1 | 2 | 3 | sep ; §\C{// use sep to print separator at start/end of line}§
+sout | sep | 1 | 2 | 3 | sep ; $\C{// use sep to print separator at start/end of line}$
 \end{cfa}
 \begin{cfa}[showspaces=true,aboveskip=0pt,belowskip=0pt]
-® ®1 2 3® ®
+@ @1 2 3@ @
 \end{cfa}
 \end{enumerate}
@@ -3721,5 +3775,5 @@
 \begin{enumerate}[parsep=0pt]
 \item
-\Indexc{nl}\index{manipulator!nl@©nl©} scans characters until the next newline character, i.e., ignore the remaining characters in the line.
+\Indexc{nl}\index{manipulator!nl@©nl©} scans characters until the next newline character, \ie ignore the remaining characters in the line.
 \item
 \Indexc{nlOn}\index{manipulator!nlOn@©nlOn©} reads the newline character, when reading single characters.
@@ -3729,6 +3783,6 @@
 For example, in:
 \begin{cfa}
-sin | i | ®nl® | j;
-1 ®2®
+sin | i | @nl@ | j;
+1 @2@
 3
 \end{cfa}
@@ -3740,9 +3794,9 @@
 \Indexc{nl}\index{manipulator!nl@©nl©} inserts a newline.
 \begin{cfa}
-sout | nl; §\C{// only print newline}§
-sout | 2; §\C{// implicit newline}§
-sout | 3 | nl | 4 | nl; §\C{// terminating nl merged with implicit newline}§
-sout | 5 | nl | nl; §\C{// again terminating nl merged with implicit newline}§
-sout | 6; §\C{// implicit newline}§
+sout | nl; $\C{// only print newline}$
+sout | 2; $\C{// implicit newline}$
+sout | 3 | nl | 4 | nl; $\C{// terminating nl merged with implicit newline}$
+sout | 5 | nl | nl; $\C{// again terminating nl merged with implicit newline}$
+sout | 6; $\C{// implicit newline}$
 
 2
@@ -3771,5 +3825,5 @@
 0b0 0b11011 0b11011 0b11011 0b11011
 sout | bin( -27HH ) | bin( -27H ) | bin( -27 ) | bin( -27L );
-0b11100101 0b1111111111100101 0b11111111111111111111111111100101 0b®(58 1s)®100101
+0b11100101 0b1111111111100101 0b11111111111111111111111111100101 0b@(58 1s)@100101
 \end{cfa}
 
@@ -3810,5 +3864,5 @@
 \begin{cfa}[belowskip=0pt]
 sout | upcase( bin( 27 ) ) | upcase( hex( 27 ) ) | upcase( 27.5e-10 ) | upcase( hex( 27.5 ) );
-0®B®11011 0®X®1®B® 2.75®E®-09 0®X®1.®B®8®P®+4
+0@B@11011 0@X@1@B@ 2.75@E@-09 0@X@1.@B@8@P@+4
 \end{cfa}
 
@@ -3826,5 +3880,5 @@
 \begin{cfa}[belowskip=0pt]
 sout | 0. | nodp( 0. ) | 27.0 | nodp( 27.0 ) | nodp( 27.5 );
-0.0 ®0® 27.0 ®27® 27.5
+0.0 @0@ 27.0 @27@ 27.5
 \end{cfa}
 
@@ -3833,5 +3887,5 @@
 \begin{cfa}[belowskip=0pt]
 sout | sign( 27 ) | sign( -27 ) | sign( 27. ) | sign( -27. ) | sign( 27.5 ) | sign( -27.5 );
-®+®27 -27 ®+®27.0 -27.0 ®+®27.5 -27.5
+@+@27 -27 @+@27.0 -27.0 @+@27.5 -27.5
 \end{cfa}
 
@@ -3846,7 +3900,7 @@
 \end{cfa}
 \begin{cfa}[showspaces=true,aboveskip=0pt,belowskip=0pt]
-®  ®34 ® ®34 34
-®  ®4.000000 ® ®4.000000 4.000000
-®  ®ab ® ®ab ab
+@  @34 @ @34 34
+@  @4.000000 @ @4.000000 4.000000
+@  @ab @ @ab ab
 \end{cfa}
 If the value is larger, it is printed without truncation, ignoring the ©minimum©.
@@ -3857,7 +3911,7 @@
 \end{cfa}
 \begin{cfa}[showspaces=true,aboveskip=0pt,belowskip=0pt]
-3456®7® 345®67® 34®567®
-3456®.® 345®6.® 34®56.®
-abcd®e® abc®de® ab®cde®
+3456@7@ 345@67@ 34@567@
+3456@.@ 345@6.@ 34@56.@
+abcd@e@ abc@de@ ab@cde@
 \end{cfa}
 
@@ -3868,5 +3922,5 @@
 \end{cfa}
 \begin{cfa}[showspaces=true,aboveskip=0pt,belowskip=0pt]
- ®0®34     ®00®34 ®00000000®34
+ @0@34     @00@34 @00000000@34
 \end{cfa}
 If the value is larger, it is printed without truncation, ignoring the ©precision©.
@@ -3883,5 +3937,5 @@
 \end{cfa}
 \begin{cfa}[showspaces=true,aboveskip=0pt,belowskip=0pt]
-®    ® ®00000000®34
+@    @ @00000000@34
 \end{cfa}
 For floating-point types, ©precision© is the minimum number of digits after the decimal point.
@@ -3890,7 +3944,7 @@
 \end{cfa}
 \begin{cfa}[showspaces=true,aboveskip=0pt,belowskip=0pt]
-27.®500®     27.®5®      28. 27.®50000000®
-\end{cfa}
-For the C-string type, ©precision© is the maximum number of printed characters, so the string is truncared if it exceeds the maximum.
+27.@500@     27.@5@      28. 27.@50000000@
+\end{cfa}
+For the C-string type, ©precision© is the maximum number of printed characters, so the string is truncated if it exceeds the maximum.
 \begin{cfa}[belowskip=0pt]
 sout | wd( 6,8, "abcd" ) | wd( 6,8, "abcdefghijk" ) | wd( 6,3, "abcd" );
@@ -3908,5 +3962,5 @@
 \end{cfa}
 \begin{cfa}[showspaces=true,aboveskip=0pt,belowskip=0pt]
-234.567 234.5®7®  234.®6®    23®5®
+234.567 234.5@7@  234.@6@    23@5@
 \end{cfa}
 If a value's magnitude is greater than ©significant©, the value is printed in scientific notation with the specified number of significant digits.
@@ -3915,5 +3969,5 @@
 \end{cfa}
 \begin{cfa}[showspaces=true,aboveskip=0pt,belowskip=0pt]
-234567. 2.3457®e+05® 2.346®e+05® 2.35®e+05®
+234567. 2.3457@e+05@ 2.346@e+05@ 2.35@e+05@
 \end{cfa}
 If ©significant© is greater than ©minimum©, it defines the number of printed characters.
@@ -3931,5 +3985,5 @@
 \end{cfa}
 \begin{cfa}[showspaces=true,aboveskip=0pt,belowskip=0pt]
-27®  ® 27.000000  27.500000  027  27.500®    ®
+27@  @ 27.000000  27.500000  027  27.500@    @
 \end{cfa}
 
@@ -3938,5 +3992,5 @@
 \begin{cfa}[belowskip=0pt]
 sout | pad0( wd( 4, 27 ) ) | pad0( wd( 4,3, 27 ) ) | pad0( wd( 8,3, 27.5 ) );
-®00®27  ®0®27 ®00®27.500
+@00@27  @0@27 @00@27.500
 \end{cfa}
 \end{enumerate}
@@ -4034,7 +4088,7 @@
 \end{cfa}
 \begin{cfa}[showspaces=true,aboveskip=0pt,belowskip=0pt]
-®abc   ®
-®abc  ®
-®xx®
+@abc   @
+@abc  @
+@xx@
 \end{cfa}
 
@@ -4047,5 +4101,5 @@
 \end{cfa}
 \begin{cfa}[showspaces=true,aboveskip=0pt,belowskip=0pt]
-®abcd1233.456E+2®
+@abcd1233.456E+2@
 \end{cfa}
 Note, input ©wdi© cannot be overloaded with output ©wd© because both have the same parameters but return different types.
@@ -4060,5 +4114,5 @@
 \end{cfa}
 \begin{cfa}[showspaces=true,aboveskip=0pt,belowskip=0pt]
-®  -75.35e-4® 25
+@  -75.35e-4@ 25
 \end{cfa}
 
@@ -4072,5 +4126,5 @@
 \end{cfa}
 \begin{cfa}[showspaces=true,aboveskip=0pt,belowskip=0pt]
-®bca®xyz
+@bca@xyz
 \end{cfa}
 
@@ -4084,5 +4138,5 @@
 \end{cfa}
 \begin{cfa}[showspaces=true,aboveskip=0pt,belowskip=0pt]
-®xyz®bca
+@xyz@bca
 \end{cfa}
 \end{enumerate}
@@ -4101,5 +4155,5 @@
 
 A type definition is different from a typedef in C because a typedef just creates an alias for a type,  while Do.s type definition creates a distinct type.
-This means that users can define distinct function overloads for the new type (see Overloading for more information).
+This means that users can define distinct function overloads for the new type \see{\VRef{s:Overloading} for more information}.
 For example:
 
@@ -4207,5 +4261,5 @@
 \CFA supports C initialization of structures, but it also adds constructors for more advanced initialization.
 Additionally, \CFA adds destructors that are called when a variable is deallocated (variable goes out of scope or object is deleted).
-These functions take a reference to the structure as a parameter (see References for more information).
+These functions take a reference to the structure as a parameter \see{\VRef{s:PointerReference} for more information}.
 
 \begin{figure}
@@ -4258,4 +4312,5 @@
 
 \section{Overloading}
+\label{s:Overloading}
 
 Overloading refers to the capability of a programmer to define and use multiple objects in a program with the same name.
@@ -4468,9 +4523,9 @@
 For example, given
 \begin{cfa}
-auto j = ®...®
+auto j = @...@
 \end{cfa}
 and the need to write a routine to compute using ©j©
 \begin{cfa}
-void rtn( ®...® parm );
+void rtn( @...@ parm );
 rtn( j );
 \end{cfa}
@@ -4713,5 +4768,5 @@
 
 coroutine Fibonacci {
-	int fn; §\C{// used for communication}§
+	int fn; $\C{// used for communication}$
 };
 void ?{}( Fibonacci * this ) {
@@ -4719,23 +4774,23 @@
 }
 void main( Fibonacci * this ) {
-	int fn1, fn2; §\C{// retained between resumes}§
-	this->fn = 0; §\C{// case 0}§
+	int fn1, fn2; $\C{// retained between resumes}$
+	this->fn = 0; $\C{// case 0}$
 	fn1 = this->fn;
-	suspend(); §\C{// return to last resume}§
-
-	this->fn = 1; §\C{// case 1}§
+	suspend(); $\C{// return to last resume}$
+
+	this->fn = 1; $\C{// case 1}$
 	fn2 = fn1;
 	fn1 = this->fn;
-	suspend(); §\C{// return to last resume}§
-
-	for ( ;; ) { §\C{// general case}§
+	suspend(); $\C{// return to last resume}$
+
+	for ( ;; ) { $\C{// general case}$
 		this->fn = fn1 + fn2;
 		fn2 = fn1;
 		fn1 = this->fn;
-		suspend(); §\C{// return to last resume}§
+		suspend(); $\C{// return to last resume}$
 	} // for
 }
 int next( Fibonacci * this ) {
-	resume( this ); §\C{// transfer to last suspend}§
+	resume( this ); $\C{// transfer to last suspend}$
 	return this->fn;
 }
@@ -4964,5 +5019,5 @@
 When building a \CFA module which needs to be callable from C code, users can use the tools to generate a header file suitable for including in these C files with all of the needed declarations.
 
-In order to interoperate with existing C code, \CFA files can still include header files, the contents of which will be enclosed in a C linkage section to indicate C calling conventions (see Interoperability for more information).
+In order to interoperate with existing C code, \CFA files can still include header files, the contents of which will be enclosed in a C linkage section to indicate C calling conventions \see{\VRef{s:Interoperability} for more information}.
 
 
@@ -6282,6 +6337,6 @@
 In \CFA, there are ambiguous cases with dereference and operator identifiers, \eg ©int *?*?()©, where the string ©*?*?© can be interpreted as:
 \begin{cfa}
-*?§\color{red}\textvisiblespace§*? §\C{// dereference operator, dereference operator}§
-*§\color{red}\textvisiblespace§?*? §\C{// dereference, multiplication operator}§
+*?$\R{\textvisiblespace}$*? $\C{// dereference operator, dereference operator}$
+*$\R{\textvisiblespace}$?*? $\C{// dereference, multiplication operator}$
 \end{cfa}
 By default, the first interpretation is selected, which does not yield a meaningful parse.
@@ -6292,6 +6347,6 @@
 The ambiguity occurs when the deference operator has no parameters:
 \begin{cfa}
-*?()§\color{red}\textvisiblespace...§ ;
-*?()§\color{red}\textvisiblespace...§(...) ;
+*?()$\R{\textvisiblespace...}$ ;
+*?()$\R{\textvisiblespace...}$(...) ;
 \end{cfa}
 requiring arbitrary whitespace look-ahead for the routine-call parameter-list to disambiguate.
@@ -6301,12 +6356,12 @@
 The remaining cases are with the increment/decrement operators and conditional expression, \eg:
 \begin{cfa}
-i++?§\color{red}\textvisiblespace...§(...);
-i?++§\color{red}\textvisiblespace...§(...);
+i++?$\R{\textvisiblespace...}$(...);
+i?++$\R{\textvisiblespace...}$(...);
 \end{cfa}
 requiring arbitrary whitespace look-ahead for the operator parameter-list, even though that interpretation is an incorrect expression (juxtaposed identifiers).
 Therefore, it is necessary to disambiguate these cases with a space:
 \begin{cfa}
-i++§\color{red}\textvisiblespace§? i : 0;
-i?§\color{red}\textvisiblespace§++i : 0;
+i++$\R{\textvisiblespace}$? i : 0;
+i?$\R{\textvisiblespace}$++i : 0;
 \end{cfa}
 
@@ -6321,9 +6376,9 @@
 \begin{description}
 \item[Change:] add new keywords \\
-New keywords are added to \CFA (see~\VRef{s:CFAKeywords}).
+New keywords are added to \CFA \see{\VRef{s:CFAKeywords}}.
 \item[Rationale:] keywords added to implement new semantics of \CFA.
 \item[Effect on original feature:] change to semantics of well-defined feature. \\
 Any \Celeven programs using these keywords as identifiers are invalid \CFA programs.
-\item[Difficulty of converting:] keyword clashes are accommodated by syntactic transformations using the \CFA backquote escape-mechanism (see~\VRef{s:BackquoteIdentifiers}).
+\item[Difficulty of converting:] keyword clashes are accommodated by syntactic transformations using the \CFA backquote escape-mechanism \see{\VRef{s:BackquoteIdentifiers}}.
 \item[How widely used:] clashes among new \CFA keywords and existing identifiers are rare.
 \end{description}
@@ -6335,13 +6390,13 @@
 \eg:
 \begin{cfa}
-x; §\C{// int x}§
-*y; §\C{// int *y}§
-f( p1, p2 ); §\C{// int f( int p1, int p2 );}§
-g( p1, p2 ) int p1, p2; §\C{// int g( int p1, int p2 );}§
+x; $\C{// int x}$
+*y; $\C{// int *y}$
+f( p1, p2 ); $\C{// int f( int p1, int p2 );}$
+g( p1, p2 ) int p1, p2; $\C{// int g( int p1, int p2 );}$
 \end{cfa}
 \CFA continues to support K\&R routine definitions:
 \begin{cfa}
-f( a, b, c ) §\C{// default int return}§
-	int a, b; char c §\C{// K\&R parameter declarations}§
+f( a, b, c ) $\C{// default int return}$
+	int a, b; char c $\C{// K\&R parameter declarations}$
 {
 	...
@@ -6362,5 +6417,5 @@
 int rtn( int i );
 int rtn( char c );
-rtn( 'x' ); §\C{// programmer expects 2nd rtn to be called}§
+rtn( 'x' ); $\C{// programmer expects 2nd rtn to be called}$
 \end{cfa}
 \item[Rationale:] it is more intuitive for the call to ©rtn© to match the second version of definition of ©rtn© rather than the first.
@@ -6384,6 +6439,6 @@
 \item[Change:] make string literals ©const©:
 \begin{cfa}
-char * p = "abc"; §\C{// valid in C, deprecated in \CFA}§
-char * q = expr ? "abc" : "de"; §\C{// valid in C, invalid in \CFA}§
+char * p = "abc"; $\C{// valid in C, deprecated in \CFA}$
+char * q = expr ? "abc" : "de"; $\C{// valid in C, invalid in \CFA}$
 \end{cfa}
 The type of a string literal is changed from ©[] char© to ©const [] char©.
@@ -6392,5 +6447,5 @@
 \begin{cfa}
 char * p = "abc";
-p[0] = 'w'; §\C{// segment fault or change constant literal}§
+p[0] = 'w'; $\C{// segment fault or change constant literal}$
 \end{cfa}
 The same problem occurs when passing a string literal to a routine that changes its argument.
@@ -6404,7 +6459,7 @@
 \item[Change:] remove \newterm{tentative definitions}, which only occurs at file scope:
 \begin{cfa}
-int i; §\C{// forward definition}§
-int *j = ®&i®; §\C{// forward reference, valid in C, invalid in \CFA}§
-int i = 0; §\C{// definition}§
+int i; $\C{// forward definition}$
+int *j = @&i@; $\C{// forward reference, valid in C, invalid in \CFA}$
+int i = 0; $\C{// definition}$
 \end{cfa}
 is valid in C, and invalid in \CFA because duplicate overloaded object definitions at the same scope level are disallowed.
@@ -6412,7 +6467,7 @@
 \begin{cfa}
 struct X { int i; struct X *next; };
-static struct X a; §\C{// forward definition}§
-static struct X b = { 0, ®&a® };§\C{// forward reference, valid in C, invalid in \CFA}§
-static struct X a = { 1, &b }; §\C{// definition}§
+static struct X a; $\C{// forward definition}$
+static struct X b = { 0, @&a@ };$\C{// forward reference, valid in C, invalid in \CFA}$
+static struct X a = { 1, &b }; $\C{// definition}$
 \end{cfa}
 \item[Rationale:] avoids having different initialization rules for builtin types and user-defined types.
@@ -6426,17 +6481,17 @@
 \item[Change:] have ©struct© introduce a scope for nested types:
 \begin{cfa}
-enum ®Colour® { R, G, B, Y, C, M };
+enum @Colour@ { R, G, B, Y, C, M };
 struct Person {
-	enum ®Colour® { R, G, B };	§\C[7cm]{// nested type}§
-	struct Face { §\C{// nested type}§
-		®Colour® Eyes, Hair; §\C{// type defined outside (1 level)}§
+	enum @Colour@ { R, G, B };	$\C[7cm]{// nested type}$
+	struct Face { $\C{// nested type}$
+		@Colour@ Eyes, Hair; $\C{// type defined outside (1 level)}$
 	};
-	®.Colour® shirt; §\C{// type defined outside (top level)}§
-	®Colour® pants; §\C{// type defined same level}§
-	Face looks[10]; §\C{// type defined same level}§
+	@.Colour@ shirt; $\C{// type defined outside (top level)}$
+	@Colour@ pants; $\C{// type defined same level}$
+	Face looks[10]; $\C{// type defined same level}$
 };
-®Colour® c = R; §\C{// type/enum defined same level}§
-Person®.Colour® pc = Person®.®R;§\C{// type/enum defined inside}§
-Person®.®Face pretty; §\C{// type defined inside}\CRT§
+@Colour@ c = R; $\C{// type/enum defined same level}$
+Person@.Colour@ pc = Person@.@R;$\C{// type/enum defined inside}$
+Person@.@Face pretty; $\C{// type defined inside}\CRT$
 \end{cfa}
 In C, the name of the nested types belongs to the same scope as the name of the outermost enclosing structure, \ie the nested types are hoisted to the scope of the outer-most type, which is not useful and confusing.
@@ -6455,5 +6510,5 @@
 \item[Difficulty of converting:] Semantic transformation. To make the struct type name visible in the scope of the enclosing struct, the struct tag could be declared in the scope of the enclosing struct, before the enclosing struct is defined. Example:
 \begin{cfa}
-struct Y; §\C{// struct Y and struct X are at the same scope}§
+struct Y; $\C{// struct Y and struct X are at the same scope}$
 struct X {
 	struct Y { /* ... */ } y;
@@ -6470,11 +6525,11 @@
 \begin{cfa}
 void foo() {
-	int * b = malloc( sizeof(int) ); §\C{// implicitly convert void * to int *}§
-	char * c = b; §\C{// implicitly convert int * to void *, and then void * to char *}§
+	int * b = malloc( sizeof(int) ); $\C{// implicitly convert void * to int *}$
+	char * c = b; $\C{// implicitly convert int * to void *, and then void * to char *}$
 }
 \end{cfa}
 \item[Rationale:] increase type safety
 \item[Effect on original feature:] deletion of semantically well-defined feature.
-\item[Difficulty of converting:] requires adding a cast (see \VRef{s:StorageManagement} for better alternatives):
+\item[Difficulty of converting:] requires adding a cast \see{\VRef{s:StorageManagement} for better alternatives}:
 \begin{cfa}
 	int * b = (int *)malloc( sizeof(int) );
@@ -6586,5 +6641,5 @@
 \end{cquote}
 For the prescribed head-files, \CFA uses header interposition to wraps these includes in an ©extern "C"©;
-hence, names in these include files are not mangled\index{mangling!name} (see~\VRef{s:Interoperability}).
+hence, names in these include files are not mangled\index{mangling!name} \see{\VRef{s:Interoperability}}.
 All other C header files must be explicitly wrapped in ©extern "C"© to prevent name mangling.
 This approach is different from \Index*[C++]{\CC{}} where the name-mangling issue is handled internally in C header-files through checks for preprocessor variable ©__cplusplus©, which adds appropriate ©extern "C"© qualifiers.
@@ -6649,7 +6704,7 @@
 Type-safe allocation is provided for all C allocation routines and new \CFA allocation routines, \eg in
 \begin{cfa}
-int * ip = (int *)malloc( sizeof(int) );		§\C{// C}§
-int * ip = malloc();							§\C{// \CFA type-safe version of C malloc}§
-int * ip = alloc();								§\C{// \CFA type-safe uniform alloc}§
+int * ip = (int *)malloc( sizeof(int) );		$\C{// C}$
+int * ip = malloc();							$\C{// \CFA type-safe version of C malloc}$
+int * ip = alloc();								$\C{// \CFA type-safe uniform alloc}$
 \end{cfa}
 the latter two allocations determine the allocation size from the type of ©p© (©int©) and cast the pointer to the allocated storage to ©int *©.
@@ -6658,5 +6713,5 @@
 \begin{cfa}
 struct S { int i; } __attribute__(( aligned( 128 ) )); // cache-line alignment
-S * sp = malloc();								§\C{// honour type alignment}§
+S * sp = malloc();								$\C{// honour type alignment}$
 \end{cfa}
 the storage allocation is implicitly aligned to 128 rather than the default 16.
@@ -6673,16 +6728,16 @@
 \CFA memory management extends allocation to support constructors for initialization of allocated storage, \eg in
 \begin{cfa}
-struct S { int i; };							§\C{// cache-line aglinment}§
+struct S { int i; };							$\C{// cache-line alignment}$
 void ?{}( S & s, int i ) { s.i = i; }
 // assume ?|? operator for printing an S
 
-S & sp = *®new®( 3 );							§\C{// call constructor after allocation}§
+S & sp = *@new@( 3 );							$\C{// call constructor after allocation}$
 sout | sp.i;
-®delete®( &sp );
-
-S * spa = ®anew®( 10, 5 );						§\C{// allocate array and initialize each array element}§
+@delete@( &sp );
+
+S * spa = @anew@( 10, 5 );						$\C{// allocate array and initialize each array element}$
 for ( i; 10 ) sout | spa[i] | nonl;
 sout | nl;
-®adelete®( 10, spa );
+@adelete@( 10, spa );
 \end{cfa}
 Allocation routines ©new©/©anew© allocate a variable/array and initialize storage using the allocated type's constructor.
@@ -6693,15 +6748,15 @@
 extern "C" {
 	// C unsafe allocation
-	void * malloc( size_t size );§\indexc{malloc}§
-	void * calloc( size_t dim, size_t size );§\indexc{calloc}§
-	void * realloc( void * ptr, size_t size );§\indexc{realloc}§
-	void * memalign( size_t align, size_t size );§\indexc{memalign}§
-	void * aligned_alloc( size_t align, size_t size );§\indexc{aligned_alloc}§
-	int posix_memalign( void ** ptr, size_t align, size_t size );§\indexc{posix_memalign}§
-	void * cmemalign( size_t alignment, size_t noOfElems, size_t elemSize );§\indexc{cmemalign}§ // CFA
+	void * malloc( size_t size );$\indexc{malloc}$
+	void * calloc( size_t dim, size_t size );$\indexc{calloc}$
+	void * realloc( void * ptr, size_t size );$\indexc{realloc}$
+	void * memalign( size_t align, size_t size );$\indexc{memalign}$
+	void * aligned_alloc( size_t align, size_t size );$\indexc{aligned_alloc}$
+	int posix_memalign( void ** ptr, size_t align, size_t size );$\indexc{posix_memalign}$
+	void * cmemalign( size_t alignment, size_t noOfElems, size_t elemSize );$\indexc{cmemalign}$ // CFA
 
 	// C unsafe initialization/copy
-	void * memset( void * dest, int c, size_t size );§\indexc{memset}§
-	void * memcpy( void * dest, const void * src, size_t size );§\indexc{memcpy}§
+	void * memset( void * dest, int c, size_t size );$\indexc{memset}$
+	void * memcpy( void * dest, const void * src, size_t size );$\indexc{memcpy}$
 }
 
@@ -6709,5 +6764,5 @@
 
 forall( dtype T | sized(T) ) {
-	// §\CFA§ safe equivalents, i.e., implicit size specification
+	// $\CFA$ safe equivalents, i.e., implicit size specification
 	T * malloc( void );
 	T * calloc( size_t dim );
@@ -6718,46 +6773,46 @@
 	int posix_memalign( T ** ptr, size_t align );
 
-	// §\CFA§ safe general allocation, fill, resize, alignment, array
-	T * alloc( void );§\indexc{alloc}§					§\C[3.5in]{// variable, T size}§
-	T * alloc( size_t dim );							§\C{// array[dim], T size elements}§
-	T * alloc( T ptr[], size_t dim );					§\C{// realloc array[dim], T size elements}§
-
-	T * alloc_set( char fill );§\indexc{alloc_set}§		§\C{// variable, T size, fill bytes with value}§
-	T * alloc_set( T fill );							§\C{// variable, T size, fill with value}§
-	T * alloc_set( size_t dim, char fill );				§\C{// array[dim], T size elements, fill bytes with value}§
-	T * alloc_set( size_t dim, T fill );				§\C{// array[dim], T size elements, fill elements with value}§
-	T * alloc_set( size_t dim, const T fill[] );		§\C{// array[dim], T size elements, fill elements with array}§
-	T * alloc_set( T ptr[], size_t dim, char fill );	§\C{// realloc array[dim], T size elements, fill bytes with value}§
-
-	T * alloc_align( size_t align );					§\C{// aligned variable, T size}§
-	T * alloc_align( size_t align, size_t dim );		§\C{// aligned array[dim], T size elements}§
-	T * alloc_align( T ptr[], size_t align );			§\C{// realloc new aligned array}§
-	T * alloc_align( T ptr[], size_t align, size_t dim ); §\C{// realloc new aligned array[dim]}§
-
-	T * alloc_align_set( size_t align, char fill );		§\C{// aligned variable, T size, fill bytes with value}§
-	T * alloc_align_set( size_t align, T fill );		§\C{// aligned variable, T size, fill with value}§
-	T * alloc_align_set( size_t align, size_t dim, char fill ); §\C{// aligned array[dim], T size elements, fill bytes with value}§
-	T * alloc_align_set( size_t align, size_t dim, T fill ); §\C{// aligned array[dim], T size elements, fill elements with value}§
-	T * alloc_align_set( size_t align, size_t dim, const T fill[] ); §\C{// aligned array[dim], T size elements, fill elements with array}§
-	T * alloc_align_set( T ptr[], size_t align, size_t dim, char fill ); §\C{// realloc new aligned array[dim], fill new bytes with value}§
-
-	// §\CFA§ safe initialization/copy, i.e., implicit size specification
-	T * memset( T * dest, char fill );§\indexc{memset}§
-	T * memcpy( T * dest, const T * src );§\indexc{memcpy}§
-
-	// §\CFA§ safe initialization/copy, i.e., implicit size specification, array types
+	// $\CFA$ safe general allocation, fill, resize, alignment, array
+	T * alloc( void );$\indexc{alloc}$					$\C[3.5in]{// variable, T size}$
+	T * alloc( size_t dim );							$\C{// array[dim], T size elements}$
+	T * alloc( T ptr[], size_t dim );					$\C{// realloc array[dim], T size elements}$
+
+	T * alloc_set( char fill );$\indexc{alloc_set}$		$\C{// variable, T size, fill bytes with value}$
+	T * alloc_set( T fill );							$\C{// variable, T size, fill with value}$
+	T * alloc_set( size_t dim, char fill );				$\C{// array[dim], T size elements, fill bytes with value}$
+	T * alloc_set( size_t dim, T fill );				$\C{// array[dim], T size elements, fill elements with value}$
+	T * alloc_set( size_t dim, const T fill[] );		$\C{// array[dim], T size elements, fill elements with array}$
+	T * alloc_set( T ptr[], size_t dim, char fill );	$\C{// realloc array[dim], T size elements, fill bytes with value}$
+
+	T * alloc_align( size_t align );					$\C{// aligned variable, T size}$
+	T * alloc_align( size_t align, size_t dim );		$\C{// aligned array[dim], T size elements}$
+	T * alloc_align( T ptr[], size_t align );			$\C{// realloc new aligned array}$
+	T * alloc_align( T ptr[], size_t align, size_t dim ); $\C{// realloc new aligned array[dim]}$
+
+	T * alloc_align_set( size_t align, char fill );		$\C{// aligned variable, T size, fill bytes with value}$
+	T * alloc_align_set( size_t align, T fill );		$\C{// aligned variable, T size, fill with value}$
+	T * alloc_align_set( size_t align, size_t dim, char fill ); $\C{// aligned array[dim], T size elements, fill bytes with value}$
+	T * alloc_align_set( size_t align, size_t dim, T fill ); $\C{// aligned array[dim], T size elements, fill elements with value}$
+	T * alloc_align_set( size_t align, size_t dim, const T fill[] ); $\C{// aligned array[dim], T size elements, fill elements with array}$
+	T * alloc_align_set( T ptr[], size_t align, size_t dim, char fill ); $\C{// realloc new aligned array[dim], fill new bytes with value}$
+
+	// $\CFA$ safe initialization/copy, i.e., implicit size specification
+	T * memset( T * dest, char fill );$\indexc{memset}$
+	T * memcpy( T * dest, const T * src );$\indexc{memcpy}$
+
+	// $\CFA$ safe initialization/copy, i.e., implicit size specification, array types
 	T * amemset( T dest[], char fill, size_t dim );
 	T * amemcpy( T dest[], const T src[], size_t dim );
 }
 
-// §\CFA§ allocation/deallocation and constructor/destructor, non-array types
-forall( dtype T | sized(T), ttype Params | { void ?{}( T &, Params ); } ) T * new( Params p );§\indexc{new}§
-forall( dtype T | sized(T) | { void ^?{}( T & ); } ) void delete( T * ptr );§\indexc{delete}§
+// $\CFA$ allocation/deallocation and constructor/destructor, non-array types
+forall( dtype T | sized(T), ttype Params | { void ?{}( T &, Params ); } ) T * new( Params p );$\indexc{new}$
+forall( dtype T | sized(T) | { void ^?{}( T & ); } ) void delete( T * ptr );$\indexc{delete}$
 forall( dtype T, ttype Params | sized(T) | { void ^?{}( T & ); void delete( Params ); } )
   void delete( T * ptr, Params rest );
 
-// §\CFA§ allocation/deallocation and constructor/destructor, array types
-forall( dtype T | sized(T), ttype Params | { void ?{}( T &, Params ); } ) T * anew( size_t dim, Params p );§\indexc{anew}§
-forall( dtype T | sized(T) | { void ^?{}( T & ); } ) void adelete( size_t dim, T arr[] );§\indexc{adelete}§
+// $\CFA$ allocation/deallocation and constructor/destructor, array types
+forall( dtype T | sized(T), ttype Params | { void ?{}( T &, Params ); } ) T * anew( size_t dim, Params p );$\indexc{anew}$
+forall( dtype T | sized(T) | { void ^?{}( T & ); } ) void adelete( size_t dim, T arr[] );$\indexc{adelete}$
 forall( dtype T | sized(T) | { void ^?{}( T & ); }, ttype Params | { void adelete( Params ); } )
   void adelete( size_t dim, T arr[], Params rest );
@@ -6769,5 +6824,5 @@
 \leavevmode
 \begin{cfa}[aboveskip=0pt,belowskip=0pt]
-int ato( const char * ptr );§\indexc{ato}§
+int ato( const char * ptr );$\indexc{ato}$
 unsigned int ato( const char * ptr );
 long int ato( const char * ptr );
@@ -6801,19 +6856,19 @@
 \leavevmode
 \begin{cfa}[aboveskip=0pt,belowskip=0pt]
-forall( otype T | { int ?<?( T, T ); } ) §\C{// location}§
-T * bsearch( T key, const T * arr, size_t dim );§\indexc{bsearch}§
-
-forall( otype T | { int ?<?( T, T ); } ) §\C{// position}§
+forall( otype T | { int ?<?( T, T ); } ) $\C{// location}$
+T * bsearch( T key, const T * arr, size_t dim );$\indexc{bsearch}$
+
+forall( otype T | { int ?<?( T, T ); } ) $\C{// position}$
 unsigned int bsearch( T key, const T * arr, size_t dim );
 
 forall( otype T | { int ?<?( T, T ); } )
-void qsort( const T * arr, size_t dim );§\indexc{qsort}§
+void qsort( const T * arr, size_t dim );$\indexc{qsort}$
 
 forall( otype E | { int ?<?( E, E ); } ) {
-	E * bsearch( E key, const E * vals, size_t dim );§\indexc{bsearch}§ §\C{// location}§
-	size_t bsearch( E key, const E * vals, size_t dim );§\C{// position}§
-	E * bsearchl( E key, const E * vals, size_t dim );§\indexc{bsearchl}§
+	E * bsearch( E key, const E * vals, size_t dim );$\indexc{bsearch}$ $\C{// location}$
+	size_t bsearch( E key, const E * vals, size_t dim );$\C{// position}$
+	E * bsearchl( E key, const E * vals, size_t dim );$\indexc{bsearchl}$
 	size_t bsearchl( E key, const E * vals, size_t dim );
-	E * bsearchu( E key, const E * vals, size_t dim );§\indexc{bsearchu}§
+	E * bsearchu( E key, const E * vals, size_t dim );$\indexc{bsearchu}$
 	size_t bsearchu( E key, const E * vals, size_t dim );
 }
@@ -6829,5 +6884,5 @@
 
 forall( otype E | { int ?<?( E, E ); } ) {
-	void qsort( E * vals, size_t dim );§\indexc{qsort}§
+	void qsort( E * vals, size_t dim );$\indexc{qsort}$
 }
 \end{cfa}
@@ -6838,5 +6893,5 @@
 \leavevmode
 \begin{cfa}[aboveskip=0pt,belowskip=0pt]
-unsigned char abs( signed char );§\indexc{abs}§
+unsigned char abs( signed char );$\indexc{abs}$
 int abs( int );
 unsigned long int abs( long int );
@@ -6857,25 +6912,25 @@
 \leavevmode
 \begin{cfa}[aboveskip=0pt,belowskip=0pt]
-void srandom( unsigned int seed );§\indexc{srandom}§
-char random( void );§\indexc{random}§
-char random( char u ); §\C{// [0,u)}§
-char random( char l, char u ); §\C{// [l,u)}§
+void srandom( unsigned int seed );$\indexc{srandom}$
+char random( void );$\indexc{random}$
+char random( char u ); $\C{// [0,u)}$
+char random( char l, char u ); $\C{// [l,u)}$
 int random( void );
-int random( int u ); §\C{// [0,u)}§
-int random( int l, int u ); §\C{// [l,u)}§
+int random( int u ); $\C{// [0,u)}$
+int random( int l, int u ); $\C{// [l,u)}$
 unsigned int random( void );
-unsigned int random( unsigned int u ); §\C{// [0,u)}§
-unsigned int random( unsigned int l, unsigned int u ); §\C{// [l,u)}§
+unsigned int random( unsigned int u ); $\C{// [0,u)}$
+unsigned int random( unsigned int l, unsigned int u ); $\C{// [l,u)}$
 long int random( void );
-long int random( long int u ); §\C{// [0,u)}§
-long int random( long int l, long int u ); §\C{// [l,u)}§
+long int random( long int u ); $\C{// [0,u)}$
+long int random( long int l, long int u ); $\C{// [l,u)}$
 unsigned long int random( void );
-unsigned long int random( unsigned long int u ); §\C{// [0,u)}§
-unsigned long int random( unsigned long int l, unsigned long int u ); §\C{// [l,u)}§
-float random( void );						 §\C{// [0.0, 1.0)}§
-double random( void );						 §\C{// [0.0, 1.0)}§
-float _Complex random( void );				 §\C{// [0.0, 1.0)+[0.0, 1.0)i}§
-double _Complex random( void );				 §\C{// [0.0, 1.0)+[0.0, 1.0)i}§
-long double _Complex random( void );		 §\C{// [0.0, 1.0)+[0.0, 1.0)i}§
+unsigned long int random( unsigned long int u ); $\C{// [0,u)}$
+unsigned long int random( unsigned long int l, unsigned long int u ); $\C{// [l,u)}$
+float random( void );						 $\C{// [0.0, 1.0)}$
+double random( void );						 $\C{// [0.0, 1.0)}$
+float _Complex random( void );				 $\C{// [0.0, 1.0)+[0.0, 1.0)i}$
+double _Complex random( void );				 $\C{// [0.0, 1.0)+[0.0, 1.0)i}$
+long double _Complex random( void );		 $\C{// [0.0, 1.0)+[0.0, 1.0)i}$
 \end{cfa}
 
@@ -6885,8 +6940,8 @@
 \leavevmode
 \begin{cfa}[aboveskip=0pt,belowskip=0pt]
-forall( otype T | { int ?<?( T, T ); } ) T min( T t1, T t2 );§\indexc{min}§
-forall( otype T | { int ?>?( T, T ); } ) T max( T t1, T t2 );§\indexc{max}§
-forall( otype T | { T min( T, T ); T max( T, T ); } ) T clamp( T value, T min_val, T max_val );§\indexc{clamp}§
-forall( otype T ) void swap( T * t1, T * t2 );§\indexc{swap}§
+forall( otype T | { int ?<?( T, T ); } ) T min( T t1, T t2 );$\indexc{min}$
+forall( otype T | { int ?>?( T, T ); } ) T max( T t1, T t2 );$\indexc{max}$
+forall( otype T | { T min( T, T ); T max( T, T ); } ) T clamp( T value, T min_val, T max_val );$\indexc{clamp}$
+forall( otype T ) void swap( T * t1, T * t2 );$\indexc{swap}$
 \end{cfa}
 
@@ -6902,5 +6957,5 @@
 \leavevmode
 \begin{cfa}[aboveskip=0pt,belowskip=0pt]
-float ?%?( float, float );§\indexc{fmod}§
+float ?%?( float, float );$\indexc{fmod}$
 float fmod( float, float );
 double ?%?( double, double );
@@ -6909,9 +6964,9 @@
 long double fmod( long double, long double );
 
-float remainder( float, float );§\indexc{remainder}§
+float remainder( float, float );$\indexc{remainder}$
 double remainder( double, double );
 long double remainder( long double, long double );
 
-float remquo( float, float, int * );§\indexc{remquo}§
+float remquo( float, float, int * );$\indexc{remquo}$
 double remquo( double, double, int * );
 long double remquo( long double, long double, int * );
@@ -6920,5 +6975,5 @@
 [ int, long double ] remquo( long double, long double );
 
-float div( float, float, int * );§\indexc{div}§ §\C{// alternative name for remquo}§
+float div( float, float, int * );$\indexc{div}$ $\C{// alternative name for remquo}$
 double div( double, double, int * );
 long double div( long double, long double, int * );
@@ -6927,13 +6982,13 @@
 [ int, long double ] div( long double, long double );
 
-float fma( float, float, float );§\indexc{fma}§
+float fma( float, float, float );$\indexc{fma}$
 double fma( double, double, double );
 long double fma( long double, long double, long double );
 
-float fdim( float, float );§\indexc{fdim}§
+float fdim( float, float );$\indexc{fdim}$
 double fdim( double, double );
 long double fdim( long double, long double );
 
-float nan( const char * );§\indexc{nan}§
+float nan( const char * );$\indexc{nan}$
 double nan( const char * );
 long double nan( const char * );
@@ -6945,5 +7000,5 @@
 \leavevmode
 \begin{cfa}[aboveskip=0pt,belowskip=0pt]
-float exp( float );§\indexc{exp}§
+float exp( float );$\indexc{exp}$
 double exp( double );
 long double exp( long double );
@@ -6952,5 +7007,5 @@
 long double _Complex exp( long double _Complex );
 
-float exp2( float );§\indexc{exp2}§
+float exp2( float );$\indexc{exp2}$
 double exp2( double );
 long double exp2( long double );
@@ -6959,9 +7014,9 @@
 // long double _Complex exp2( long double _Complex );
 
-float expm1( float );§\indexc{expm1}§
+float expm1( float );$\indexc{expm1}$
 double expm1( double );
 long double expm1( long double );
 
-float pow( float, float );§\indexc{pow}§
+float pow( float, float );$\indexc{pow}$
 double pow( double, double );
 long double pow( long double, long double );
@@ -6976,5 +7031,5 @@
 \leavevmode
 \begin{cfa}[aboveskip=0pt,belowskip=0pt]
-float log( float );§\indexc{log}§
+float log( float );$\indexc{log}$
 double log( double );
 long double log( long double );
@@ -6983,5 +7038,5 @@
 long double _Complex log( long double _Complex );
 
-float log2( float );§\indexc{log2}§
+float log2( float );$\indexc{log2}$
 double log2( double );
 long double log2( long double );
@@ -6990,5 +7045,5 @@
 // long double _Complex log2( long double _Complex );
 
-float log10( float );§\indexc{log10}§
+float log10( float );$\indexc{log10}$
 double log10( double );
 long double log10( long double );
@@ -6997,17 +7052,17 @@
 // long double _Complex log10( long double _Complex );
 
-float log1p( float );§\indexc{log1p}§
+float log1p( float );$\indexc{log1p}$
 double log1p( double );
 long double log1p( long double );
 
-int ilogb( float );§\indexc{ilogb}§
+int ilogb( float );$\indexc{ilogb}$
 int ilogb( double );
 int ilogb( long double );
 
-float logb( float );§\indexc{logb}§
+float logb( float );$\indexc{logb}$
 double logb( double );
 long double logb( long double );
 
-float sqrt( float );§\indexc{sqrt}§
+float sqrt( float );$\indexc{sqrt}$
 double sqrt( double );
 long double sqrt( long double );
@@ -7016,9 +7071,9 @@
 long double _Complex sqrt( long double _Complex );
 
-float cbrt( float );§\indexc{cbrt}§
+float cbrt( float );$\indexc{cbrt}$
 double cbrt( double );
 long double cbrt( long double );
 
-float hypot( float, float );§\indexc{hypot}§
+float hypot( float, float );$\indexc{hypot}$
 double hypot( double, double );
 long double hypot( long double, long double );
@@ -7030,5 +7085,5 @@
 \leavevmode
 \begin{cfa}[aboveskip=0pt,belowskip=0pt]
-float sin( float );§\indexc{sin}§
+float sin( float );$\indexc{sin}$
 double sin( double );
 long double sin( long double );
@@ -7037,5 +7092,5 @@
 long double _Complex sin( long double _Complex );
 
-float cos( float );§\indexc{cos}§
+float cos( float );$\indexc{cos}$
 double cos( double );
 long double cos( long double );
@@ -7044,5 +7099,5 @@
 long double _Complex cos( long double _Complex );
 
-float tan( float );§\indexc{tan}§
+float tan( float );$\indexc{tan}$
 double tan( double );
 long double tan( long double );
@@ -7051,5 +7106,5 @@
 long double _Complex tan( long double _Complex );
 
-float asin( float );§\indexc{asin}§
+float asin( float );$\indexc{asin}$
 double asin( double );
 long double asin( long double );
@@ -7058,5 +7113,5 @@
 long double _Complex asin( long double _Complex );
 
-float acos( float );§\indexc{acos}§
+float acos( float );$\indexc{acos}$
 double acos( double );
 long double acos( long double );
@@ -7065,5 +7120,5 @@
 long double _Complex acos( long double _Complex );
 
-float atan( float );§\indexc{atan}§
+float atan( float );$\indexc{atan}$
 double atan( double );
 long double atan( long double );
@@ -7072,10 +7127,10 @@
 long double _Complex atan( long double _Complex );
 
-float atan2( float, float );§\indexc{atan2}§
+float atan2( float, float );$\indexc{atan2}$
 double atan2( double, double );
 long double atan2( long double, long double );
 
-float atan( float, float ); §\C{// alternative name for atan2}§
-double atan( double, double );§\indexc{atan}§
+float atan( float, float ); $\C{// alternative name for atan2}$
+double atan( double, double );$\indexc{atan}$
 long double atan( long double, long double );
 \end{cfa}
@@ -7086,5 +7141,5 @@
 \leavevmode
 \begin{cfa}[aboveskip=0pt,belowskip=0pt]
-float sinh( float );§\indexc{sinh}§
+float sinh( float );$\indexc{sinh}$
 double sinh( double );
 long double sinh( long double );
@@ -7093,5 +7148,5 @@
 long double _Complex sinh( long double _Complex );
 
-float cosh( float );§\indexc{cosh}§
+float cosh( float );$\indexc{cosh}$
 double cosh( double );
 long double cosh( long double );
@@ -7100,5 +7155,5 @@
 long double _Complex cosh( long double _Complex );
 
-float tanh( float );§\indexc{tanh}§
+float tanh( float );$\indexc{tanh}$
 double tanh( double );
 long double tanh( long double );
@@ -7107,5 +7162,5 @@
 long double _Complex tanh( long double _Complex );
 
-float asinh( float );§\indexc{asinh}§
+float asinh( float );$\indexc{asinh}$
 double asinh( double );
 long double asinh( long double );
@@ -7114,5 +7169,5 @@
 long double _Complex asinh( long double _Complex );
 
-float acosh( float );§\indexc{acosh}§
+float acosh( float );$\indexc{acosh}$
 double acosh( double );
 long double acosh( long double );
@@ -7121,5 +7176,5 @@
 long double _Complex acosh( long double _Complex );
 
-float atanh( float );§\indexc{atanh}§
+float atanh( float );$\indexc{atanh}$
 double atanh( double );
 long double atanh( long double );
@@ -7134,5 +7189,5 @@
 \leavevmode
 \begin{cfa}[aboveskip=0pt,belowskip=0pt]
-float erf( float );§\indexc{erf}§
+float erf( float );$\indexc{erf}$
 double erf( double );
 long double erf( long double );
@@ -7141,5 +7196,5 @@
 long double _Complex erf( long double _Complex );
 
-float erfc( float );§\indexc{erfc}§
+float erfc( float );$\indexc{erfc}$
 double erfc( double );
 long double erfc( long double );
@@ -7148,5 +7203,5 @@
 long double _Complex erfc( long double _Complex );
 
-float lgamma( float );§\indexc{lgamma}§
+float lgamma( float );$\indexc{lgamma}$
 double lgamma( double );
 long double lgamma( long double );
@@ -7155,5 +7210,5 @@
 long double lgamma( long double, int * );
 
-float tgamma( float );§\indexc{tgamma}§
+float tgamma( float );$\indexc{tgamma}$
 double tgamma( double );
 long double tgamma( long double );
@@ -7165,17 +7220,17 @@
 \leavevmode
 \begin{cfa}[aboveskip=0pt,belowskip=0pt]
-float floor( float );§\indexc{floor}§
+float floor( float );$\indexc{floor}$
 double floor( double );
 long double floor( long double );
 
-float ceil( float );§\indexc{ceil}§
+float ceil( float );$\indexc{ceil}$
 double ceil( double );
 long double ceil( long double );
 
-float trunc( float );§\indexc{trunc}§
+float trunc( float );$\indexc{trunc}$
 double trunc( double );
 long double trunc( long double );
 
-float rint( float );§\indexc{rint}§
+float rint( float );$\indexc{rint}$
 long double rint( long double );
 long int rint( float );
@@ -7186,5 +7241,5 @@
 long long int rint( long double );
 
-long int lrint( float );§\indexc{lrint}§
+long int lrint( float );$\indexc{lrint}$
 long int lrint( double );
 long int lrint( long double );
@@ -7193,9 +7248,9 @@
 long long int llrint( long double );
 
-float nearbyint( float );§\indexc{nearbyint}§
+float nearbyint( float );$\indexc{nearbyint}$
 double nearbyint( double );
 long double nearbyint( long double );
 
-float round( float );§\indexc{round}§
+float round( float );$\indexc{round}$
 long double round( long double );
 long int round( float );
@@ -7206,5 +7261,5 @@
 long long int round( long double );
 
-long int lround( float );§\indexc{lround}§
+long int lround( float );$\indexc{lround}$
 long int lround( double );
 long int lround( long double );
@@ -7219,17 +7274,17 @@
 \leavevmode
 \begin{cfa}[aboveskip=0pt,belowskip=0pt]
-float copysign( float, float );§\indexc{copysign}§
+float copysign( float, float );$\indexc{copysign}$
 double copysign( double, double );
 long double copysign( long double, long double );
 
-float frexp( float, int * );§\indexc{frexp}§
+float frexp( float, int * );$\indexc{frexp}$
 double frexp( double, int * );
 long double frexp( long double, int * );
 
-float ldexp( float, int );§\indexc{ldexp}§
+float ldexp( float, int );$\indexc{ldexp}$
 double ldexp( double, int );
 long double ldexp( long double, int );
 
-[ float, float ] modf( float );§\indexc{modf}§
+[ float, float ] modf( float );$\indexc{modf}$
 float modf( float, float * );
 [ double, double ] modf( double );
@@ -7238,17 +7293,17 @@
 long double modf( long double, long double * );
 
-float nextafter( float, float );§\indexc{nextafter}§
+float nextafter( float, float );$\indexc{nextafter}$
 double nextafter( double, double );
 long double nextafter( long double, long double );
 
-float nexttoward( float, long double );§\indexc{nexttoward}§
+float nexttoward( float, long double );$\indexc{nexttoward}$
 double nexttoward( double, long double );
 long double nexttoward( long double, long double );
 
-float scalbn( float, int );§\indexc{scalbn}§
+float scalbn( float, int );$\indexc{scalbn}$
 double scalbn( double, int );
 long double scalbn( long double, int );
 
-float scalbln( float, long int );§\indexc{scalbln}§
+float scalbln( float, long int );$\indexc{scalbln}$
 double scalbln( double, long int );
 long double scalbln( long double, long int );
@@ -7267,5 +7322,5 @@
 \begin{cfa}[aboveskip=0pt,belowskip=0pt]
 struct Duration {
-	int64_t tv; §\C{// nanoseconds}§
+	int64_t tv; $\C{// nanoseconds}$
 };
 
@@ -7397,5 +7452,5 @@
 \begin{cfa}[aboveskip=0pt,belowskip=0pt]
 struct Time {
-	uint64_t tv; §\C{// nanoseconds since UNIX epoch}§
+	uint64_t tv; $\C{// nanoseconds since UNIX epoch}$
 };
 
@@ -7468,6 +7523,6 @@
 \begin{cfa}[aboveskip=0pt,belowskip=0pt]
 struct Clock {
-	Duration offset; §\C{// for virtual clock: contains offset from real-time}§
-	int clocktype; §\C{// implementation only -1 (virtual), CLOCK\_REALTIME}§
+	Duration offset; $\C{// for virtual clock: contains offset from real-time}$
+	int clocktype; $\C{// implementation only -1 (virtual), CLOCK\_REALTIME}$
 };
 
@@ -7477,9 +7532,9 @@
 void ?{}( Clock & clk, Duration adj );
 
-Duration getResNsec(); §\C{// with nanoseconds}§
-Duration getRes(); §\C{// without nanoseconds}§
-
-Time getTimeNsec(); §\C{// with nanoseconds}§
-Time getTime(); §\C{// without nanoseconds}§
+Duration getResNsec(); $\C{// with nanoseconds}$
+Duration getRes(); $\C{// without nanoseconds}$
+
+Time getTimeNsec(); $\C{// with nanoseconds}$
+Time getTime(); $\C{// without nanoseconds}$
 Time getTime( Clock & clk );
 Time ?()( Clock & clk );
@@ -7497,5 +7552,5 @@
 
 \begin{cfa}
-void ?{}( Int * this ); §\C{// constructor/destructor}§
+void ?{}( Int * this ); $\C{// constructor/destructor}$
 void ?{}( Int * this, Int init );
 void ?{}( Int * this, zero_t );
@@ -7506,5 +7561,5 @@
 void ^?{}( Int * this );
 
-Int ?=?( Int * lhs, Int rhs ); §\C{// assignment}§
+Int ?=?( Int * lhs, Int rhs ); $\C{// assignment}$
 Int ?=?( Int * lhs, long int rhs );
 Int ?=?( Int * lhs, unsigned long int rhs );
@@ -7523,5 +7578,5 @@
 unsigned long int narrow( Int val );
 
-int ?==?( Int oper1, Int oper2 ); §\C{// comparison}§
+int ?==?( Int oper1, Int oper2 ); $\C{// comparison}$
 int ?==?( Int oper1, long int oper2 );
 int ?==?( long int oper2, Int oper1 );
@@ -7559,5 +7614,5 @@
 int ?>=?( unsigned long int oper1, Int oper2 );
 
-Int +?( Int oper ); §\C{// arithmetic}§
+Int +?( Int oper ); $\C{// arithmetic}$
 Int -?( Int oper );
 Int ~?( Int oper );
@@ -7641,5 +7696,5 @@
 Int ?>>=?( Int * lhs, mp_bitcnt_t shift );
 
-Int abs( Int oper ); §\C{// number functions}§
+Int abs( Int oper ); $\C{// number functions}$
 Int fact( unsigned long int N );
 Int gcd( Int oper1, Int oper2 );
@@ -7653,5 +7708,5 @@
 Int sqrt( Int oper );
 
-forall( dtype istype | istream( istype ) ) istype * ?|?( istype * is, Int * mp );  §\C{// I/O}§
+forall( dtype istype | istream( istype ) ) istype * ?|?( istype * is, Int * mp );  $\C{// I/O}$
 forall( dtype ostype | ostream( ostype ) ) ostype * ?|?( ostype * os, Int mp );
 \end{cfa}
@@ -7664,5 +7719,5 @@
 \hline
 \begin{cfa}
-#include <gmp>§\indexc{gmp}§
+#include <gmp>$\indexc{gmp}$
 int main( void ) {
 	sout | "Factorial Numbers";
@@ -7678,13 +7733,13 @@
 &
 \begin{cfa}
-#include <gmp.h>§\indexc{gmp.h}§
+#include <gmp.h>$\indexc{gmp.h}$
 int main( void ) {
-	®gmp_printf®( "Factorial Numbers\n" );
-	®mpz_t® fact;
-	®mpz_init_set_ui®( fact, 1 );
-	®gmp_printf®( "%d %Zd\n", 0, fact );
+	@gmp_printf@( "Factorial Numbers\n" );
+	@mpz_t@ fact;
+	@mpz_init_set_ui@( fact, 1 );
+	@gmp_printf@( "%d %Zd\n", 0, fact );
 	for ( unsigned int i = 1; i <= 40; i += 1 ) {
-		®mpz_mul_ui®( fact, fact, i );
-		®gmp_printf®( "%d %Zd\n", i, fact );
+		@mpz_mul_ui@( fact, fact, i );
+		@gmp_printf@( "%d %Zd\n", i, fact );
 	}
 }
@@ -7751,9 +7806,9 @@
 \begin{cfa}[belowskip=0pt]
 // implementation
-struct Rational {§\indexc{Rational}§
-	long int numerator, denominator; §\C{// invariant: denominator > 0}§
+struct Rational {$\indexc{Rational}$
+	long int numerator, denominator; $\C{// invariant: denominator > 0}$
 }; // Rational
 
-Rational rational(); §\C{// constructors}§
+Rational rational(); $\C{// constructors}$
 Rational rational( long int n );
 Rational rational( long int n, long int d );
@@ -7761,10 +7816,10 @@
 void ?{}( Rational * r, one_t );
 
-long int numerator( Rational r ); §\C{// numerator/denominator getter/setter}§
+long int numerator( Rational r ); $\C{// numerator/denominator getter/setter}$
 long int numerator( Rational r, long int n );
 long int denominator( Rational r );
 long int denominator( Rational r, long int d );
 
-int ?==?( Rational l, Rational r ); §\C{// comparison}§
+int ?==?( Rational l, Rational r ); $\C{// comparison}$
 int ?!=?( Rational l, Rational r );
 int ?<?( Rational l, Rational r );
@@ -7773,5 +7828,5 @@
 int ?>=?( Rational l, Rational r );
 
-Rational -?( Rational r ); §\C{// arithmetic}§
+Rational -?( Rational r ); $\C{// arithmetic}$
 Rational ?+?( Rational l, Rational r );
 Rational ?-?( Rational l, Rational r );
@@ -7779,5 +7834,5 @@
 Rational ?/?( Rational l, Rational r );
 
-double widen( Rational r ); §\C{// conversion}§
+double widen( Rational r ); $\C{// conversion}$
 Rational narrow( double f, long int md );
 
Index: libcfa/src/memory.cfa
===================================================================
--- libcfa/src/memory.cfa	(revision da3963ae1399ac41655fc82eda47fad820dd3ccf)
+++ libcfa/src/memory.cfa	(revision 565acf59c5402ff35d213dc020ead86f8cb51d16)
@@ -10,6 +10,6 @@
 // Created On       : Tue Jun  2 16:48:00 2020
 // Last Modified By : Andrew Beach
-// Last Modified On : Tue Jun  3 12:30:00 2020
-// Update Count     : 0
+// Last Modified On : Mon Feb  1 16:10:00 2021
+// Update Count     : 1
 //
 
@@ -56,9 +56,8 @@
 }
 
-forall(T & | sized(T) | { void ^?{}(T &); })
+forall(T & | sized(T))
 void ?{}(counter_ptr(T) & this, counter_ptr(T) that) {
 	// `that` is a copy but it should have neither a constructor
 	// nor destructor run on it so it shouldn't need adjustment.
-	internal_decrement(this);
 	internal_copy(this, that);
 }
@@ -66,5 +65,7 @@
 forall(T & | sized(T), Args... | { void ?{}(T&, Args); })
 void ?{}(counter_ptr(T) & this, Args args) {
-	this.data = (counter_data(T)*)new(args);
+	this.data = malloc();
+	this.data->counter = 1;
+	(this.data->object){args};
 }
 
@@ -126,5 +127,6 @@
 forall(T & | sized(T), Args... | { void ?{}(T &, Args); })
 void ?{}(unique_ptr(T) & this, Args args) {
-	this.data = (T *)new(args);
+	this.data = malloc();
+	(*this.data){args};
 }
 
Index: libcfa/src/memory.hfa
===================================================================
--- libcfa/src/memory.hfa	(revision da3963ae1399ac41655fc82eda47fad820dd3ccf)
+++ libcfa/src/memory.hfa	(revision 565acf59c5402ff35d213dc020ead86f8cb51d16)
@@ -10,6 +10,6 @@
 // Created On       : Tue Jun  2 16:48:00 2020
 // Last Modified By : Andrew Beach
-// Last Modified On : Tue Jun  3 12:29:00 2020
-// Update Count     : 0
+// Last Modified On : Fri Jan 29 15:52:00 2021
+// Update Count     : 1
 //
 
@@ -17,73 +17,86 @@
 
 // Internal data object.
-forall(T & | sized(T)) {
-	struct counter_data {
-		unsigned int counter;
-		T object;
-	};
+forall(T & | sized(T))
+struct counter_data {
+	unsigned int counter;
+	T object;
+};
 
-	forall(Args... | { void ?{}(T &, Args); })
-	void ?{}(counter_data(T) & this, Args args);
+forall(T & | sized(T), Args... | { void ?{}(T &, Args); })
+void ?{}(counter_data(T) & this, Args args);
 
-	forall( | { void ^?{}(T &); })
-	void ^?{}(counter_data(T) & this);
-}
+forall(T & | sized(T) | { void ^?{}(T &); })
+void ^?{}(counter_data(T) & this);
 
 // This is one of many pointers keeping this alive.
-forall(T & | sized(T)) {
-	struct counter_ptr {
-		counter_data(T) * data;
-	};
+forall(T & | sized(T))
+struct counter_ptr {
+	counter_data(T) * data;
+};
 
-	void ?{}(counter_ptr(T) & this);
-	void ?{}(counter_ptr(T) & this, zero_t);
-	forall( | { void ^?{}(T &); })
-	void ?{}(counter_ptr(T) & this, counter_ptr(T) that);
-	forall(Args... | { void ?{}(T&, Args); })
-	void ?{}(counter_ptr(T) & this, Args args);
+forall(T & | sized(T))
+void ?{}(counter_ptr(T) & this);
+forall(T & | sized(T))
+void ?{}(counter_ptr(T) & this, zero_t);
+forall(T & | sized(T))
+void ?{}(counter_ptr(T) & this, counter_ptr(T) that);
+forall(T & | sized(T), Args... | { void ?{}(T&, Args); })
+void ?{}(counter_ptr(T) & this, Args args);
 
-	forall( | { void ^?{}(T &); })
-	void ^?{}(counter_ptr(T) & this);
+forall(T & | sized(T) | { void ^?{}(T &); })
+void ^?{}(counter_ptr(T) & this);
 
-	T & *?(counter_ptr(T) & this);
+forall(T & | sized(T))
+T & *?(counter_ptr(T) & this);
 
-	forall( | { void ^?{}(T &); })
-	void ?=?(counter_ptr(T) & this, counter_ptr(T) that);
-	forall( | { void ^?{}(T &); })
-	void ?=?(counter_ptr(T) & this, zero_t);
+forall(T & | sized(T) | { void ^?{}(T &); })
+void ?=?(counter_ptr(T) & this, counter_ptr(T) that);
+forall(T & | sized(T) | { void ^?{}(T &); })
+void ?=?(counter_ptr(T) & this, zero_t);
 
-	int ?==?(counter_ptr(T) const & this, counter_ptr(T) const & that);
-	int ?!=?(counter_ptr(T) const & this, counter_ptr(T) const & that);
-	int ?==?(counter_ptr(T) const & this, zero_t);
-	int ?!=?(counter_ptr(T) const & this, zero_t);
-}
+forall(T & | sized(T))
+int ?==?(counter_ptr(T) const & this, counter_ptr(T) const & that);
+forall(T & | sized(T))
+int ?!=?(counter_ptr(T) const & this, counter_ptr(T) const & that);
+forall(T & | sized(T))
+int ?==?(counter_ptr(T) const & this, zero_t);
+forall(T & | sized(T))
+int ?!=?(counter_ptr(T) const & this, zero_t);
 
 // This is the only pointer that keeps this alive.
-forall(T &) {
-	struct unique_ptr {
-		T * data;
-	};
+forall(T &)
+struct unique_ptr {
+	T * data;
+};
 
-	void ?{}(unique_ptr(T) & this);
-	void ?{}(unique_ptr(T) & this, zero_t);
-	void ?{}(unique_ptr(T) & this, unique_ptr(T) that) = void;
-	forall( | sized(T), Args... | { void ?{}(T &, Args); })
-	void ?{}(unique_ptr(T) & this, Args args);
+forall(T &)
+void ?{}(unique_ptr(T) & this);
+forall(T &)
+void ?{}(unique_ptr(T) & this, zero_t);
+forall(T &)
+void ?{}(unique_ptr(T) & this, unique_ptr(T) that) = void;
+forall(T & | sized(T), Args... | { void ?{}(T &, Args); })
+void ?{}(unique_ptr(T) & this, Args args);
 
-	forall( | { void ^?{}(T &); })
-	void ^?{}(unique_ptr(T) & this);
+forall(T & | { void ^?{}(T &); })
+void ^?{}(unique_ptr(T) & this);
 
-	T & *?(unique_ptr(T) & this);
+forall(T & )
+T & *?(unique_ptr(T) & this);
 
-	void ?=?(unique_ptr(T) & this, unique_ptr(T) that) = void;
-	forall( | { void ^?{}(T &); })
-	void ?=?(unique_ptr(T) & this, zero_t);
+forall(T &)
+void ?=?(unique_ptr(T) & this, unique_ptr(T) that) = void;
+forall(T & | { void ^?{}(T &); })
+void ?=?(unique_ptr(T) & this, zero_t);
 
-	forall( | { void ^?{}(T &); })
-	void move(unique_ptr(T) & this, unique_ptr(T) & that);
+forall(T & | { void ^?{}(T &); })
+void move(unique_ptr(T) & this, unique_ptr(T) & that);
 
-	int ?==?(unique_ptr(T) const & this, unique_ptr(T) const & that);
-	int ?!=?(unique_ptr(T) const & this, unique_ptr(T) const & that);
-	int ?==?(unique_ptr(T) const & this, zero_t);
-	int ?!=?(unique_ptr(T) const & this, zero_t);
-}
+forall(T &)
+int ?==?(unique_ptr(T) const & this, unique_ptr(T) const & that);
+forall(T &)
+int ?!=?(unique_ptr(T) const & this, unique_ptr(T) const & that);
+forall(T &)
+int ?==?(unique_ptr(T) const & this, zero_t);
+forall(T &)
+int ?!=?(unique_ptr(T) const & this, zero_t);
Index: src/Parser/parser.yy
===================================================================
--- src/Parser/parser.yy	(revision da3963ae1399ac41655fc82eda47fad820dd3ccf)
+++ src/Parser/parser.yy	(revision 565acf59c5402ff35d213dc020ead86f8cb51d16)
@@ -10,6 +10,6 @@
 // Created On       : Sat Sep  1 20:22:55 2001
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Tue Jan 26 11:18:19 2021
-// Update Count     : 4674
+// Last Modified On : Wed Feb  3 18:30:12 2021
+// Update Count     : 4700
 //
 
@@ -41,5 +41,5 @@
 
 %{
-#define YYDEBUG_LEXER_TEXT (yylval)						// lexer loads this up each time
+#define YYDEBUG_LEXER_TEXT( yylval )					// lexer loads this up each time
 #define YYDEBUG 1										// get the pretty debugging code to compile
 #define YYERROR_VERBOSE									// more information in syntax errors
@@ -187,5 +187,5 @@
 	ConstantExpr * constant = dynamic_cast<ConstantExpr *>(type->expr.get());
 	if ( constant && (constant->get_constant()->get_value() == "0" || constant->get_constant()->get_value() == "1") ) {
-    	type = new ExpressionNode( new CastExpr( maybeMoveBuild<Expression>(type), new BasicType( Type::Qualifiers(), BasicType::SignedInt ) ) );
+		type = new ExpressionNode( new CastExpr( maybeMoveBuild<Expression>(type), new BasicType( Type::Qualifiers(), BasicType::SignedInt ) ) );
 	} // if
 	return new ForCtrl(
@@ -618,4 +618,7 @@
 postfix_expression:
 	primary_expression
+	| postfix_expression '[' assignment_expression ',' comma_expression ']'
+		// { $$ = new ExpressionNode( build_binary_val( OperKinds::Index, $1, new ExpressionNode( build_binary_val( OperKinds::Index, $3, $5 ) ) ) ); }
+		{ SemanticError( yylloc, "New array subscript is currently unimplemented." ); $$ = nullptr; }
 	| postfix_expression '[' assignment_expression ']'
 		// CFA, comma_expression disallowed in this context because it results in a common user error: subscripting a
@@ -1247,7 +1250,7 @@
 		{ $$ = new StatementNode( build_computedgoto( $3 ) ); }
 		// A semantic check is required to ensure fallthru appears only in the body of a choose statement.
-    | fall_through_name ';'								// CFA
+	| fall_through_name ';'								// CFA
 		{ $$ = new StatementNode( build_branch( BranchStmt::FallThrough ) ); }
-    | fall_through_name identifier_or_type_name ';'		// CFA
+	| fall_through_name identifier_or_type_name ';'		// CFA
 		{ $$ = new StatementNode( build_branch( $2, BranchStmt::FallThrough ) ); }
 	| fall_through_name DEFAULT ';'						// CFA
@@ -1739,9 +1742,5 @@
 
 enum_specifier_nobody:									// type specifier - {...}
-		// Preclude SUE declarations in restricted scopes:
-		//
-		//    int f( struct S { int i; } s1, Struct S s2 ) { struct S s3; ... }
-		//
-		// because it is impossible to call f due to name equivalence.
+		// Preclude SUE declarations in restricted scopes (see type_specifier_nobody)
 	basic_type_specifier
 	| sue_type_specifier_nobody
@@ -2461,7 +2460,7 @@
 		{
 			typedefTable.addToScope( *$2, TYPEDEFname, "9" );
-			if ( $1 == TypeDecl::Otype ) { SemanticError( yylloc, "otype keyword is deprecated" ); }
-			if ( $1 == TypeDecl::Dtype ) { SemanticError( yylloc, "dtype keyword is deprecated" ); }
-			if ( $1 == TypeDecl::Ttype ) { SemanticError( yylloc, "ttype keyword is deprecated" ); }
+			if ( $1 == TypeDecl::Otype ) { SemanticError( yylloc, "otype keyword is deprecated, use T " ); }
+			if ( $1 == TypeDecl::Dtype ) { SemanticError( yylloc, "dtype keyword is deprecated, use T &" ); }
+			if ( $1 == TypeDecl::Ttype ) { SemanticError( yylloc, "ttype keyword is deprecated, use T ..." ); }
 		}
 	  type_initializer_opt assertion_list_opt
@@ -3191,4 +3190,7 @@
 	| '[' ']' multi_array_dimension
 		{ $$ = DeclarationNode::newArray( 0, 0, false )->addArray( $3 ); }
+	| '[' push assignment_expression pop ',' comma_expression ']'
+		{ $$ = DeclarationNode::newArray( $3, 0, false )->addArray( DeclarationNode::newArray( $6, 0, false ) ); }
+		// { SemanticError( yylloc, "New array dimension is currently unimplemented." ); $$ = nullptr; }
 	| multi_array_dimension
 	;
Index: src/main.cc
===================================================================
--- src/main.cc	(revision da3963ae1399ac41655fc82eda47fad820dd3ccf)
+++ src/main.cc	(revision 565acf59c5402ff35d213dc020ead86f8cb51d16)
@@ -9,7 +9,7 @@
 // Author           : Peter Buhr and Rob Schluntz
 // Created On       : Fri May 15 23:12:02 2015
-// Last Modified By : Andrew Beach
-// Last Modified On : Mon Dec  7 15:29:00 2020
-// Update Count     : 639
+// Last Modified By : Peter A. Buhr
+// Last Modified On : Mon Feb  8 21:10:16 2021
+// Update Count     : 642
 //
 
@@ -492,7 +492,7 @@
 
 static const char * description[] = {
-	"diagnostic color: never, always, or auto.",		// -c
+	"diagnostic color: never, always, auto",			// -c
 	"wait for gdb to attach",							// -g
-	"print help message",								// -h
+	"print translator help message",					// -h
 	"generate libcfa.c",								// -l
 	"generate line marks",								// -L
@@ -500,5 +500,5 @@
 	"do not generate line marks",						// -N
 	"do not read prelude",								// -n
-	"generate prototypes for prelude functions",		// -p
+	"do not generate prelude prototypes => prelude not printed", // -p
 	"only print deterministic output",                  // -d
 	"Use the old-ast",									// -O
@@ -506,5 +506,5 @@
 	"print",											// -P
 	"<directory> prelude directory for debug/nodebug",	// no flag
-	"<option-list> enable profiling information:\n          counters,heap,time,all,none", // -S
+	"<option-list> enable profiling information: counters, heap, time, all, none", // -S
 	"building cfa standard lib",						// -t
 	"",													// -w
Index: tests/.expect/smart-pointers.txt
===================================================================
--- tests/.expect/smart-pointers.txt	(revision 565acf59c5402ff35d213dc020ead86f8cb51d16)
+++ tests/.expect/smart-pointers.txt	(revision 565acf59c5402ff35d213dc020ead86f8cb51d16)
@@ -0,0 +1,1 @@
+done
Index: tests/smart-pointers.cfa
===================================================================
--- tests/smart-pointers.cfa	(revision da3963ae1399ac41655fc82eda47fad820dd3ccf)
+++ tests/smart-pointers.cfa	(revision 565acf59c5402ff35d213dc020ead86f8cb51d16)
@@ -2,5 +2,5 @@
 
 #include <memory.hfa>
-#include <stdlib.hfa>
+#include <assert.h>
 
 void counter_test(void) {
@@ -53,7 +53,21 @@
 }
 
+void declare_test(void) {
+	counter_ptr(int) ptr_i0 = 3;
+	counter_ptr(char) ptr_c0 = 'a';
+	counter_ptr(float) ptr_f0 = 3.5f;
+	counter_ptr(double) ptr_d0 = 3.5;
+
+	unique_ptr(int) ptr_i1 = 3;
+	unique_ptr(char) ptr_c1 = 'a';
+	unique_ptr(float) ptr_f1 = 3.5f;
+	unique_ptr(double) ptr_d1 = 3.5;
+}
+
 int main(int argc, char * argv[]) {
 	counter_test();
 	unique_test();
 	pointer_equality();
+
+	printf("done\n");
 }
Index: tools/prettyprinter/Makefile.am
===================================================================
--- tools/prettyprinter/Makefile.am	(revision da3963ae1399ac41655fc82eda47fad820dd3ccf)
+++ tools/prettyprinter/Makefile.am	(revision 565acf59c5402ff35d213dc020ead86f8cb51d16)
@@ -11,6 +11,6 @@
 ## Created On       : Wed Jun 28 12:07:10 2017
 ## Last Modified By : Peter A. Buhr
-## Last Modified On : Mon Apr 16 09:43:23 2018
-## Update Count     : 20
+## Last Modified On : Thu Jan 28 08:48:22 2021
+## Update Count     : 23
 ###############################################################################
 
@@ -20,5 +20,5 @@
 BUILT_SOURCES = parser.hh
 
-AM_YFLAGS = -d -t -v
+AM_YFLAGS = -d -t -v -Wno-yacc
 
 SRC = lex.ll \
@@ -34,3 +34,3 @@
 pretty_CXXFLAGS = -Wno-deprecated -Wall -DYY_NO_INPUT -O2 -g -std=c++14
 
-MAINTAINERCLEANFILES = parser.output
+MOSTLYCLEANFILES = parser.output
Index: tools/prettyprinter/ParserTypes.h
===================================================================
--- tools/prettyprinter/ParserTypes.h	(revision da3963ae1399ac41655fc82eda47fad820dd3ccf)
+++ tools/prettyprinter/ParserTypes.h	(revision 565acf59c5402ff35d213dc020ead86f8cb51d16)
@@ -13,11 +13,11 @@
 // Created On       : Sun Dec 16 15:00:49 2001
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Sat Jul 22 10:13:09 2017
-// Update Count     : 175
+// Last Modified On : Tue Jan 26 23:05:34 2021
+// Update Count     : 176
 // 
 
 #pragma once
 
-int yylex();
+extern "C" int yylex();
 
 #include <string>
Index: tools/prettyprinter/parser.yy
===================================================================
--- tools/prettyprinter/parser.yy	(revision da3963ae1399ac41655fc82eda47fad820dd3ccf)
+++ tools/prettyprinter/parser.yy	(revision 565acf59c5402ff35d213dc020ead86f8cb51d16)
@@ -10,6 +10,6 @@
 // Created On       : Sat Dec 15 13:44:21 2001
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Sun Apr 15 21:40:30 2018
-// Update Count     : 1052
+// Last Modified On : Tue Jan 26 22:50:03 2021
+// Update Count     : 1053
 // 
 
@@ -17,4 +17,5 @@
 #define YYDEBUG_LEXER_TEXT( yylval )					// lexer loads this up each time
 #define YYDEBUG 1										// get the pretty debugging code to compile
+#define YYERROR_VERBOSE									// more information in syntax errors
 
 #include <iostream>