Index: benchmark/io/http/http_ring.cpp
===================================================================
--- benchmark/io/http/http_ring.cpp	(revision c292244d4c299c228d7e40cd8a91071f5066c92f)
+++ benchmark/io/http/http_ring.cpp	(revision ef0b45633df5650457989c598232acaf4a0d8a66)
@@ -9,32 +9,10 @@
 #include <liburing.h>
 
-typedef enum {
-	EVENT_END,
-	EVENT_ACCEPT,
-	EVENT_REQUEST,
-	EVENT_ANSWER
-} event_t;
-
-struct __attribute__((aligned(128))) request_t {
-	event_t type;
-	int fd;
-	size_t length;
-	char * buff;
-	char data[0];
-
-	static struct request_t * create(event_t type, size_t extra) {
-		auto ret = (struct request_t *)malloc(sizeof(struct request_t) + extra);
-		ret->type = type;
-		ret->length = extra;
-		ret->buff = ret->data;
-		return ret;
-	}
-
-	static struct request_t * create(event_t type) {
-		return create(type, 0);
-	}
-};
-
+// #define NOBATCHING
+// #define USE_ASYNC
+
+// Options passed to each threads
 struct __attribute__((aligned(128))) options_t {
+	// Data passed to accept
 	struct {
 		int sockfd;
@@ -44,14 +22,62 @@
 	} acpt;
 
+	// Termination notification
 	int endfd;
+
+	// The ring to use for io
 	struct io_uring * ring;
-
+};
+
+//=========================================================
+// General statistics
+struct __attribute__((aligned(128))) stats_block_t {
 	struct {
-		size_t subs = 0;
-		size_t cnts = 0;
-	} result;
+		volatile size_t conns = 0;
+		volatile size_t reads = 0;
+		volatile size_t writes = 0;
+		volatile size_t full_writes = 0;
+	} completions;
+
+	struct {
+		volatile size_t conns = 0;
+		struct {
+			volatile size_t pipes = 0;
+			volatile size_t reset = 0;
+			volatile size_t other = 0;
+		} requests;
+
+		struct {
+			volatile size_t pipes = 0;
+			volatile size_t reset = 0;
+			volatile size_t other = 0;
+		} answers;
+	} errors;
+
+	struct {
+		volatile size_t current = 0;
+		volatile size_t max = 0;
+		volatile size_t used = 0;
+	} conns;
+
+	volatile size_t recycle_errors = 0;
 };
 
+// Each thread gets its own block of stats
+// and there is a global block for tallying at the end
+thread_local stats_block_t stats;
+stats_block_t global_stats;
+
+// Get an array of current connections
+// This is just for debugging, to make sure
+// no two state-machines get the same fd
+const size_t array_max = 25000;
+class connection * volatile conns[array_max] = { 0 };
+
+// Max fd we've seen, keep track so it's convenient to adjust the array size after
+volatile int max_fd = 0;
+
 //=========================================================
+// Some small wrappers for ring operations used outside the connection state machine
+// get sqe + error handling
 static struct io_uring_sqe * get_sqe(struct io_uring * ring) {
 	struct io_uring_sqe * sqe = io_uring_get_sqe(ring);
@@ -63,38 +89,15 @@
 }
 
-static void submit(struct io_uring * ) {
-	// io_uring_submit(ring);
-}
-
-//=========================================================
+// read of the event fd is not done by a connection
+// use nullptr as the user data
 static void ring_end(struct io_uring * ring, int fd, char * buffer, size_t len) {
 	struct io_uring_sqe * sqe = get_sqe(ring);
 	io_uring_prep_read(sqe, fd, buffer, len, 0);
-	io_uring_sqe_set_data(sqe, request_t::create(EVENT_END));
-	submit(ring);
+	io_uring_sqe_set_data(sqe, nullptr);
+	io_uring_submit(ring);
 }
 
-static void ring_accept(struct io_uring * ring, int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags) {
-	auto req = request_t::create(EVENT_ACCEPT);
-	struct io_uring_sqe * sqe = get_sqe(ring);
-	io_uring_prep_accept(sqe, sockfd, addr, addrlen, flags);
-	io_uring_sqe_set_data(sqe, req);
-	submit(ring);
-	// std::cout << "Submitted accept: " << req << std::endl;
-}
-
-static void ring_request(struct io_uring * ring, int fd) {
-	size_t size = 1024;
-	auto req = request_t::create(EVENT_REQUEST, size);
-	req->fd = fd;
-
-	struct io_uring_sqe * sqe = get_sqe(ring);
-	io_uring_prep_read(sqe, fd, req->buff, size, 0);
-	io_uring_sqe_set_data(sqe, req);
-	submit(ring);
-	// std::cout << "Submitted request: " << req << " (" << (void*)req->buffer << ")"<<std::endl;
-}
-
 //=========================================================
+// All answers are fixed and determined by the return code
 enum HttpCode {
 	OK200 = 0,
@@ -108,273 +111,428 @@
 };
 
+// Get a fix reply based on the return code
 const char * http_msgs[] = {
-	"HTTP/1.1 200 OK\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: %zu \n\n%s",
-	"HTTP/1.1 400 Bad Request\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
-	"HTTP/1.1 404 Not Found\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
-	"HTTP/1.1 405 Method Not Allowed\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
-	"HTTP/1.1 408 Request Timeout\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
-	"HTTP/1.1 413 Payload Too Large\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
-	"HTTP/1.1 414 URI Too Long\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
+	"HTTP/1.1 200 OK\r\nServer: HttoForall\r\nContent-Type: text/plain\r\nContent-Length: 15\r\nConnection: keep-alive\r\n\r\nHello, World!\r\n",
+	"HTTP/1.1 400 Bad Request\r\nServer: HttoForall\r\nContent-Type: text/plain\r\nContent-Length: 0 \r\n\r\n",
+	"HTTP/1.1 404 Not Found\r\nServer: HttoForall\r\nContent-Type: text/plain\r\nContent-Length: 0 \r\n\r\n",
+	"HTTP/1.1 405 Method Not \r\nServer: HttoForall\r\nContent-Type: text/plain\r\nContent-Length: 0 \r\n\r\n",
+	"HTTP/1.1 408 Request Timeout\r\nServer: HttoForall\r\nContent-Type: text/plain\r\nContent-Length: 0 \r\n\r\n",
+	"HTTP/1.1 413 Payload Too Large\r\nServer: HttoForall\r\nContent-Type: text/plain\r\nContent-Length: 0 \r\n\r\n",
+	"HTTP/1.1 414 URI Too Long\r\nServer: HttoForall\r\nContent-Type: text/plain\r\nContent-Length: 0 \r\n\r\n",
 };
-
-static_assert( KNOWN_CODES == (sizeof(http_msgs ) / sizeof(http_msgs [0])));
-
-const int http_codes[] = {
-	200,
-	400,
-	404,
-	405,
-	408,
-	413,
-	414,
+static_assert( KNOWN_CODES == (sizeof(http_msgs) / sizeof(http_msgs[0])) );
+
+// Pre-compute the length of these replys
+const size_t http_lens[] = {
+	strlen(http_msgs[0]),
+	strlen(http_msgs[1]),
+	strlen(http_msgs[2]),
+	strlen(http_msgs[3]),
+	strlen(http_msgs[4]),
+	strlen(http_msgs[5]),
+	strlen(http_msgs[6]),
 };
-
-static_assert( KNOWN_CODES == (sizeof(http_codes) / sizeof(http_codes[0])));
-
-int code_val(HttpCode code) {
-	return http_codes[code];
-}
-
-static void ring_answer(struct io_uring * ring, int fd, HttpCode code) {
-	size_t size = 256;
-	auto req = request_t::create(EVENT_ANSWER, size);
-	req->fd = fd;
-
-	const char * fmt = http_msgs[code];
-	const char * date = "";
-	size = snprintf(req->buff, size, fmt, date, size);
-
-	struct io_uring_sqe * sqe = get_sqe(ring);
-	io_uring_prep_write(sqe, fd, req->buff, size, 0);
-	io_uring_sqe_set_data(sqe, req);
-	submit(ring);
-	// std::cout << "Submitted good answer: " << req << " (" << (void*)req->buffer << ")"<<std::endl;
-}
-
-static void ring_answer(struct io_uring * ring, int fd, const std::string &) {
-	// size_t size = 256;
-	// auto req = request_t::create(EVENT_ANSWER, size);
-	// req->fd = fd;
-
-	// const char * fmt = http_msgs[OK200];
-	// const char * date = "";
-	// size_t len = snprintf(req->buffer, size, fmt, date, ans.size(), ans.c_str());
-	// req->length = len;
-
-	// struct io_uring_sqe * sqe = get_sqe(ring);
-	// io_uring_prep_write(sqe, fd, req->buffer, len, 0);
-	// io_uring_sqe_set_data(sqe, req);
-	// submit(ring);
-	// std::cout << "Submitted good answer: " << req << " (" << (void*)req->buffer << ")"<<std::endl;
-
-
-	static const char* RESPONSE = "HTTP/1.1 200 OK\r\n" \
-						"Content-Length: 15\r\n" \
-						"Content-Type: text/html\r\n" \
-						"Connection: keep-alive\r\n" \
-						"Server: testserver\r\n" \
-						"\r\n" \
-						"Hello, World!\r\n";
-
-	static const size_t RLEN = strlen(RESPONSE);
-
-	size_t size = 256;
-	auto req = request_t::create(EVENT_ANSWER, size);
-	req->fd = fd;
-	req->buff = (char*)RESPONSE;
-	req->length = RLEN;
-
-	// const char * fmt = http_msgs[OK200];
-	// const char * date = "";
-	// size_t len = snprintf(req->buffer, size, fmt, date, ans.size(), ans.c_str());
-	// req->length = len;
-
-	struct io_uring_sqe * sqe = get_sqe(ring);
-	io_uring_prep_write(sqe, fd, RESPONSE, RLEN, 0);
-	io_uring_sqe_set_data(sqe, req);
-	submit(ring);
-}
+static_assert( KNOWN_CODES == (sizeof(http_lens) / sizeof(http_lens[0])) );
 
 //=========================================================
-static void handle_new_conn(struct io_uring * ring, int fd) {
-	if( fd < 0 ) {
-		int err = -fd;
-		if( err == ECONNABORTED ) return;
-		std::cerr << "accept error: (" << errno << ") " << strerror(errno) << std::endl;
-		exit(EXIT_FAILURE);
-	}
-
-	ring_request(ring, fd);
-}
-
-static void handle_request(struct io_uring * ring, struct request_t * in, int res) {
-	if( res < 0 ) {
-		int err = -res;
-		switch(err) {
-			case EPIPE:
-			case ECONNRESET:
-				close(in->fd);
-				free(in);
+// Finate state machine responsible for handling each connection
+class __attribute__((aligned(128))) connection {
+private:
+	// The state of the machine
+	enum {
+		ACCEPTING,  // Accept sent waiting for connection
+		REQUESTING, // Waiting for new request
+		ANSWERING,  // Either request received submitting answer or short answer sent, need to submit rest
+	} state;
+
+	// The file descriptor of the connection
+	int fd;
+
+	// request data
+	static const size_t buffer_size = 1024;	// Size of the read buffer
+	const char * buffer;                      // Buffer into which requests are read
+
+	// send data
+	size_t to_send;		// Data left to send
+	const char * iterator;	// Pointer to rest of the message to send
+
+	// stats
+	// how many requests/answers were complete, that is, a valid cqe was obtained
+	struct {
+		size_t requests = 0;
+		size_t answers = 0;
+	} stats;
+
+private:
+	connection()
+		: state(ACCEPTING)
+		, fd(0)
+		, buffer( new char[buffer_size])
+		, iterator(nullptr)
+	{}
+
+	~connection() {
+		delete [] buffer;
+		::stats.conns.current--;
+	}
+
+	// Close the current connection
+	void close(int err) {
+		// std::cout << "(" << this->stats.requests << "," << this->stats.answers << ", e" << err << ") ";
+		conns[fd] = nullptr;
+
+		if(fd != 0) {
+			::close(fd);
+		}
+		delete this;
+	}
+
+	//--------------------------------------------------
+	// Wrappers for submit so we can tweak it more easily
+	static void submit(struct io_uring * ring, struct io_uring_sqe * sqe, connection * conn) {
+		(void)ring;
+		#ifdef USE_ASYNC
+			io_uring_sqe_set_flags(sqe, IOSQE_ASYNC);
+		#endif
+		io_uring_sqe_set_data(sqe, conn);
+		#ifdef NOBATCHING
+			io_uring_submit(ring);
+		#endif
+	}
+
+	void submit(struct io_uring * ring, struct io_uring_sqe * sqe) {
+		submit(ring, sqe, this);
+	}
+
+	//--------------------------------------------------
+	// get a new request from the client
+	void request(struct io_uring * ring) {
+		state = REQUESTING;
+		struct io_uring_sqe * sqe = get_sqe(ring);
+		io_uring_prep_recv(sqe, fd, (void*)buffer, buffer_size, 0);
+		submit(ring, sqe);
+	}
+
+	//--------------------------------------------------
+	// Send a new answer based on a return code
+	void answer(struct io_uring * ring, HttpCode code) {
+		iterator = http_msgs[code];
+		to_send  = http_lens[code];
+		if(to_send != 124) {
+			std::cerr << "Answer has weird size: " << to_send << " (" << (int)code << ")" << std::endl;
+		}
+		answer(ring);
+	}
+
+	// send a new answer to the client
+	// Reused for incomplete writes
+	void answer(struct io_uring * ring) {
+		state = ANSWERING;
+		struct io_uring_sqe * sqe = get_sqe(ring);
+		io_uring_prep_send(sqe, fd, iterator, to_send, 0);
+		submit(ring, sqe);
+	}
+
+	//--------------------------------------------------
+	// Handle a new connection, results for getting an cqe while in the ACCEPTING state
+	void newconn(struct io_uring * ring, int ret) {
+		// Check errors
+		if( ret < 0 ) {
+			int err = -ret;
+			if( err == ECONNABORTED ) {
+				::stats.errors.conns++;
+				this->close(err);
 				return;
-			default:
-				std::cerr << "request error: (" << err << ") " << strerror(err) << std::endl;
-				exit(EXIT_FAILURE);
-		}
-	}
-
-	if(res == 0) {
-		close(in->fd);
-		free(in);
-		return;
-	}
-
-	const char * it = in->buff;
-	if( !strstr( it, "\r\n\r\n" ) ) {
-		std::cout << "Incomplete request" << std::endl;
-		close(in->fd);
-		free(in);
-		return;
-	}
-
-	it = in->buff;
-	const std::string reply = "Hello, World!\n";
-	int ret = memcmp(it, "GET ", 4);
-	if( ret != 0 ) {
-		ring_answer(ring, in->fd, E400);
-		goto NEXT;
-	}
-
-	it += 4;
-	ret = memcmp(it, "/plaintext", 10);
-	if( ret != 0 ) {
-		ring_answer(ring, in->fd, E404);
-		goto NEXT;
-	}
-
-	ring_answer(ring, in->fd, reply);
-
-	NEXT:
-		ring_request(ring, in->fd);
-		return;
-}
-
-static void handle_answer(struct io_uring * ring, struct request_t * in, int res) {
-	if( res < 0 ) {
-		int err = -res;
-		switch(err) {
-			case EPIPE:
-			case ECONNRESET:
-				close(in->fd);
-				free(in);
-				return;
-			default:
-				std::cerr << "answer error: (" << err << ") " << strerror(err) << std::endl;
-				exit(EXIT_FAILURE);
-		}
-	}
-
-	if( res >= in->length ) {
-		free(in);
-		return;
-	}
-
-	struct io_uring_sqe * sqe = get_sqe(ring);
-	io_uring_prep_write(sqe, in->fd, in->buff + res, in->length - res, 0);
-	io_uring_sqe_set_data(sqe, in);
-	submit(ring);
-	// std::cout << "Re-Submitted request: " << in << " (" << (void*)in->buffer << ")"<<std::endl;
-
-	ring_request(ring, in->fd);
-}
+			}
+			std::cerr << "accept error: (" << errno << ") " << strerror(errno) << std::endl;
+			exit(EXIT_FAILURE);
+		}
+
+		// Count the connections
+		::stats.completions.conns++;
+		::stats.conns.current++;
+		if(::stats.conns.current > ::stats.conns.max) {
+			::stats.conns.max = ::stats.conns.current;
+		}
+
+		// Read on the data
+		fd = ret;
+		request(ring);
+
+		// check the max fd so we know if we exceeded the array
+		for(;;) {
+			int expected = max_fd;
+			if(expected >= fd) return;
+			if( __atomic_compare_exchange_n(&max_fd, &expected, fd, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) ) return;
+		}
+
+		// check if we have enough space to fit inside the array
+		if(fd >= array_max) {
+			std::cerr << "accept error: fd " << fd << " is too high" << std::endl;
+			return;
+		}
+
+		// Put our connection into the global array
+		// No one else should be using it so if they are that's a bug
+		auto exist = __atomic_exchange_n( &conns[fd], this, __ATOMIC_SEQ_CST);
+		if( exist ) {
+			size_t first = __atomic_fetch_add(&global_stats.recycle_errors, 1, __ATOMIC_SEQ_CST);
+			if( first == 0 ) {
+				std::cerr << "First: accept has existing connection " << std::endl;
+			}
+		}
+	}
+
+	// Handle a new request, results for getting an cqe while in the REQUESTING state
+	void newrequest(struct io_uring * ring, int res) {
+		// Check errors
+		if( res < 0 ) {
+			int err = -res;
+			switch(err) {
+				case EPIPE:
+					::stats.errors.requests.pipes++;
+					break;
+					// Don't fall through the get better stats
+				case ECONNRESET:
+					::stats.errors.requests.reset++;
+					break;
+				default:
+					::stats.errors.requests.other++;
+					std::cerr << "request error: (" << err << ") " << strerror(err) << std::endl;
+					exit(EXIT_FAILURE);
+			}
+
+			// Connection failed, close it
+			this->close(err);
+			return;
+		}
+
+		// Update stats
+		::stats.completions.reads++;
+
+		// Is this an EOF
+		if(res == 0) {
+			// Yes, close the connection
+			this->close(0);
+			return;
+		}
+
+		// Find the end of the request header
+		const char * it = buffer;
+		if( !strstr( it, "\r\n\r\n" ) ) {
+			// This state machine doesn't support incomplete reads
+			// Print them to output so it's clear there is an issue
+			std::cout << "Incomplete request" << std::endl;
+			this->close(EBADR);
+			return;
+		}
+
+		// Find the method to use
+		it = buffer;
+		int ret = memcmp(it, "GET ", 4);
+		if( ret != 0 ) {
+			// We only support get, answer with an error
+			answer(ring, E400);
+			return;
+		}
+
+		// Find the target
+		it += 4;
+		ret = memcmp(it, "/plaintext", 10);
+		if( ret != 0 ) {
+			// We only support /plaintext, answer with an error
+			answer(ring, E404);
+			return;
+		}
+
+		// Correct request, answer with the payload
+		this->stats.requests++;
+		answer(ring, OK200);
+	}
+
+	// Handle a partial or full answer sent, results for getting an cqe while in the ANSWERING state
+	void writedone(struct io_uring * ring, int res) {
+		// Check errors
+		if( res < 0 ) {
+			int err = -res;
+			switch(err) {
+				case EPIPE:
+					::stats.errors.answers.pipes++;
+					break;
+					// Don't fall through the get better stats
+				case ECONNRESET:
+					::stats.errors.answers.reset++;
+					break;
+				default:
+					::stats.errors.answers.other++;
+					std::cerr << "answer error: (" << err << ") " << strerror(err) << std::endl;
+					exit(EXIT_FAILURE);
+			}
+
+			this->close(err);
+			return;
+		}
+
+		// Update stats
+		::stats.completions.writes++;
+		if(res == 124) ::stats.completions.full_writes++;
+
+		// Is this write completed
+		if( res == to_send ) {
+			// Yes, more stats
+			this->stats.answers++;
+			if(this->stats.answers == 1) ::stats.conns.used++;
+			// Then read a new request
+			request(ring);
+			return;
+		}
+
+		// Not a completed read, push the rest
+		to_send -= res;
+		iterator += res;
+		answer(ring);
+	}
+public:
+	// Submit a call to accept and create a new connection object
+	static void accept(struct io_uring * ring, const struct options_t & opt) {
+		struct io_uring_sqe * sqe = get_sqe(ring);
+		io_uring_prep_accept(sqe, opt.acpt.sockfd, opt.acpt.addr, opt.acpt.addrlen, opt.acpt.flags);
+		submit(ring, sqe, new connection());
+		// std::cout << "Submitted accept: " << req << std::endl;
+	}
+
+	// Handle a new cqe
+	void handle(struct io_uring * ring, int res, const struct options_t & opt) {
+		switch(state) {
+		case ACCEPTING:
+			connection::accept(ring, opt);
+			newconn(ring, res);
+			break;
+		case REQUESTING:
+			newrequest(ring, res);
+			break;
+		case ANSWERING:
+			writedone(ring, res);
+			break;
+		}
+	}
+};
 
 //=========================================================
-extern "C" {
-extern int __io_uring_flush_sq(struct io_uring *ring);
-}
-
+// Main loop of the WebServer
+// Effectively uses one thread_local copy of everything per kernel thread
 void * proc_loop(void * arg) {
-	size_t count = 0;
+	// Get the thread local argument
 	struct options_t & opt = *(struct options_t *)arg;
-
 	struct io_uring * ring = opt.ring;
 
+	// Track the shutdown using a event_fd
 	char endfd_buf[8];
 	ring_end(ring, opt.endfd, endfd_buf, 8);
 
-	ring_accept(ring, opt.acpt.sockfd, opt.acpt.addr, opt.acpt.addrlen, opt.acpt.flags);
-
-	bool done = false;
+	// Accept our first connection
+	// May not take effect until io_uring_submit_and_wait
+	connection::accept(ring, opt);
+
+	int reset = 1;       // Counter to print stats once in a while
+	bool done = false;   // Are we done
+	size_t sqes = 0;     // Number of sqes we submitted
+	size_t call = 0;     // Number of submits we made
 	while(!done) {
-    		struct io_uring_cqe *cqe;
-		int ret;
-		while(-EAGAIN == (ret = io_uring_wait_cqe_nr(ring, &cqe, 0))) {
-			ret = io_uring_submit_and_wait(ring, 1);
-			if (ret < 0) {
-				fprintf( stderr, "io_uring get error: (%d) %s\n", (int)-ret, strerror(-ret) );
-				exit(EXIT_FAILURE);
-			}
-			opt.result.subs += ret;
-			opt.result.cnts++;
-		}
-
-		if (ret < 0 && -EAGAIN != ret) {
-			fprintf( stderr, "io_uring peek error: (%d) %s\n", (int)-ret, strerror(-ret) );
+		// Submit all the answers we have and wait for responses
+		int ret = io_uring_submit_and_wait(ring, 1);
+
+		// check errors
+		if (ret < 0) {
+			fprintf( stderr, "io_uring S&W error: (%d) %s\n", (int)-ret, strerror(-ret) );
 			exit(EXIT_FAILURE);
 		}
 
-		auto req = (struct request_t *)cqe->user_data;
-		// std::cout << req << " completed with " << cqe->res << std::endl;
-
-		switch(req->type) {
-			case EVENT_END:
+		// Check how good we are at batching sqes
+		sqes += ret;
+		call++;
+
+		struct io_uring_cqe *cqe;
+		unsigned head;
+		unsigned count = 0;
+
+        	// go through all cqes
+        	io_uring_for_each_cqe(ring, head, cqe) {
+			if (0 == cqe->user_data) {
 				done = true;
 				break;
-			case EVENT_ACCEPT:
-				handle_new_conn(ring, cqe->res);
-				free(req);
-				ring_accept(ring, opt.acpt.sockfd, opt.acpt.addr, opt.acpt.addrlen, opt.acpt.flags);
-				break;
-			case EVENT_REQUEST:
-				handle_request(ring, req, cqe->res);
-				break;
-			case EVENT_ANSWER:
-				handle_answer(ring, req, cqe->res);
-				break;
-		}
-
-		io_uring_cqe_seen(ring, cqe);
-	}
-
-	return (void*)count;
+			}
+
+			auto req = (class connection *)cqe->user_data;
+			req->handle( ring, cqe->res, opt );
+
+			// Every now and then, print some stats
+			reset--;
+			if(reset == 0) {
+				std::cout << "Submit average: " << sqes << "/" << call << "(" << (((double)sqes) / call) << ")" << std::endl;
+				// Reset to some random number of completions
+				// use the ring_fd in the number of threads don't all print at once
+				reset = 100000 + (100000 * (ring->ring_fd % 5));
+			}
+
+			// Keep track of how many cqes we have seen
+			count++;
+		}
+
+		// Mark the cqes as seen
+		io_uring_cq_advance(ring, count);
+	}
+
+	// Tally all the thread local statistics
+	__atomic_fetch_add( &global_stats.completions.conns, ::stats.completions.conns, __ATOMIC_SEQ_CST );
+	__atomic_fetch_add( &global_stats.completions.reads, ::stats.completions.reads, __ATOMIC_SEQ_CST );
+	__atomic_fetch_add( &global_stats.completions.writes, ::stats.completions.writes, __ATOMIC_SEQ_CST );
+	__atomic_fetch_add( &global_stats.completions.full_writes, ::stats.completions.full_writes, __ATOMIC_SEQ_CST );
+	__atomic_fetch_add( &global_stats.errors.conns, ::stats.errors.conns, __ATOMIC_SEQ_CST );
+	__atomic_fetch_add( &global_stats.errors.requests.pipes, ::stats.errors.requests.pipes, __ATOMIC_SEQ_CST );
+	__atomic_fetch_add( &global_stats.errors.requests.reset, ::stats.errors.requests.reset, __ATOMIC_SEQ_CST );
+	__atomic_fetch_add( &global_stats.errors.requests.other, ::stats.errors.requests.other, __ATOMIC_SEQ_CST );
+	__atomic_fetch_add( &global_stats.errors.answers.pipes, ::stats.errors.answers.pipes, __ATOMIC_SEQ_CST );
+	__atomic_fetch_add( &global_stats.errors.answers.reset, ::stats.errors.answers.reset, __ATOMIC_SEQ_CST );
+	__atomic_fetch_add( &global_stats.errors.answers.other, ::stats.errors.answers.other, __ATOMIC_SEQ_CST );
+	__atomic_fetch_add( &global_stats.conns.current, ::stats.conns.current, __ATOMIC_SEQ_CST );
+	__atomic_fetch_add( &global_stats.conns.max, ::stats.conns.max, __ATOMIC_SEQ_CST );
+	__atomic_fetch_add( &global_stats.conns.used, ::stats.conns.used, __ATOMIC_SEQ_CST );
+
+	return nullptr;
 }
 
 //=========================================================
-struct __attribute__((aligned(128))) aligned_ring {
-	struct io_uring storage;
-};
-
-#include <bit>
-
-#include <pthread.h>
+#include <bit> // for ispow2
+
 extern "C" {
-	#include <signal.h>
-	#include <sys/eventfd.h>
-	#include <sys/socket.h>
-	#include <netinet/in.h>
+	#include <pthread.h>      // for pthreads
+	#include <signal.h>       // for signal(SIGPIPE, SIG_IGN);
+	#include <sys/eventfd.h>  // use for termination
+	#include <sys/socket.h>   // for sockets in general
+	#include <netinet/in.h>   // for sockaddr_in, AF_INET
 }
 
 int main(int argc, char * argv[]) {
+	// Initialize the array of connection-fd associations
+	for(int i = 0; i < array_max; i++) {
+		conns[i] = nullptr;
+	}
+
+	// Make sure we ignore all sigpipes
 	signal(SIGPIPE, SIG_IGN);
 
-	unsigned nthreads = 1;
-	unsigned port = 8800;
-	unsigned entries = 256;
-	unsigned backlog = 10;
-	bool attach = false;
+	// Default command line arguments
+	unsigned nthreads = 1;      // number of kernel threads
+	unsigned port = 8800;       // which port to listen on
+	unsigned entries = 256;     // number of entries per ring/kernel thread
+	unsigned backlog = 262144;  // backlog argument to listen
+	bool attach = false;        // Whether or not to attach all the rings
+	bool sqpoll = false;        // Whether or not to use SQ Polling
 
 	//===================
-	// Arguments
+	// Arguments Parsing
 	int c;
-	while ((c = getopt (argc, argv, "t:p:e:b:a")) != -1) {
+	while ((c = getopt (argc, argv, "t:p:e:b:aS")) != -1) {
 		switch (c)
 		{
@@ -394,7 +552,10 @@
 			attach = true;
 			break;
+		case 'S':
+			sqpoll = true;
+			break;
 		case '?':
 		default:
-			std::cerr << "Usage: -t <threads> -p <port> -e <entries> -b <backlog> -a" << std::endl;
+			std::cerr << "Usage: -t <threads> -p <port> -e <entries> -b <backlog> -aS" << std::endl;
 			return EXIT_FAILURE;
 		}
@@ -416,4 +577,5 @@
 	//===================
 	// End FD
+	// Create a single event fd to notify the kernel threads when the server shutsdown
 	int efd = eventfd(0, EFD_SEMAPHORE);
 	if (efd < 0) {
@@ -424,4 +586,5 @@
 	//===================
 	// Open Socket
+	// Listen on specified port
 	std::cout << getpid() << " : Listening on port " << port << std::endl;
 	int server_fd = socket(AF_INET, SOCK_STREAM, 0);
@@ -439,4 +602,6 @@
 	address.sin_port = htons( port );
 
+	// In case the port is already in use, don't just return an error
+	// Linux is very slow at reclaiming port so just retry regularly
 	int waited = 0;
 	while(true) {
@@ -444,14 +609,17 @@
 		if(ret < 0) {
 			if(errno == EADDRINUSE) {
+				// Port is in used let's retry later
 				if(waited == 0) {
 					std::cerr << "Waiting for port" << std::endl;
 				} else {
+					// To be cure, print how long we have been waiting
 					std::cerr << "\r" << waited;
 					std::cerr.flush();
 				}
 				waited ++;
-				usleep( 1000000 );
+				usleep( 1000000 ); // Wait and retry
 				continue;
 			}
+			// Some other error occured, this is a real error
 			std::cerr << "bind error: (" << errno << ") " << strerror(errno) << std::endl;
 			exit(EXIT_FAILURE);
@@ -474,19 +642,39 @@
 	std::cout << std::endl;
 
+	// Create the desired number of kernel-threads and for each
+	// create a ring. Create the rings in the main so we can attach them
+	// Since the rings are all in a dense VLA, aligned them so we don't get false sharing
+	// it's unlikely but better safe than sorry
+	struct __attribute__((aligned(128))) aligned_ring {
+		struct io_uring storage;
+	};
 	aligned_ring thrd_rings[nthreads];
 	pthread_t    thrd_hdls[nthreads];
 	options_t    thrd_opts[nthreads];
+	bool no_drops  = true;
+	bool fast_poll = true;
+	bool nfix_sqpl = true;
 	for(unsigned i = 0; i < nthreads; i++) {
-		if(!attach || i == 0) {
-			io_uring_queue_init(entries, &thrd_rings[i].storage, 0);
-		}
-		else {
-			struct io_uring_params p;
-			memset(&p, 0, sizeof(p));
-			p.flags = IORING_SETUP_ATTACH_WQ;
+		struct io_uring_params p = { };
+
+		if(sqpoll) { // If sqpoll is on, add the flag
+			p.flags |= IORING_SETUP_SQPOLL;
+			p.sq_thread_idle = 100;
+		}
+
+		if (attach && i != 0) { // If attach is on, add the flag, except for the first ring
+			p.flags |= IORING_SETUP_ATTACH_WQ;
 			p.wq_fd = thrd_rings[0].storage.ring_fd;
-			io_uring_queue_init_params(entries, &thrd_rings[i].storage, &p);
-		}
-
+		}
+
+		// Create the ring
+		io_uring_queue_init_params(entries, &thrd_rings[i].storage, &p);
+
+		// Check if some of the note-worthy features are there
+		if(0 == (p.features & IORING_FEAT_NODROP         )) { no_drops  = false; }
+		if(0 == (p.features & IORING_FEAT_FAST_POLL      )) { fast_poll = false; }
+		if(0 == (p.features & IORING_FEAT_SQPOLL_NONFIXED)) { nfix_sqpl = false; }
+
+		// Write the socket options we want to the options we pass to the threads
 		thrd_opts[i].acpt.sockfd  = server_fd;
 		thrd_opts[i].acpt.addr    = (struct sockaddr *)&address;
@@ -502,4 +690,9 @@
 		}
 	}
+
+	// Tell the user if the features are present
+	if( no_drops ) std::cout << "No Drop Present" << std::endl;
+	if( fast_poll) std::cout << "Fast Poll Present" << std::endl;
+	if(!nfix_sqpl) std::cout << "Non-Fixed SQ Poll not Present" << std::endl;
 
 	//===================
@@ -510,4 +703,5 @@
 		int ret;
 		do {
+			// Wait for a Ctrl-D to close the server
 			ret = read(STDIN_FILENO, buffer, 128);
 			if(ret < 0) {
@@ -526,4 +720,5 @@
 
 	//===================
+	// Use eventfd_write to tell the threads we are closing
 	(std::cout << "Sending Shutdown to Threads... ").flush();
 	ret = eventfd_write(efd, nthreads);
@@ -535,7 +730,6 @@
 
 	//===================
+	// Join all the threads and close the rings
 	(std::cout << "Stopping Threads Done... ").flush();
-	size_t total = 0;
-	size_t count = 0;
 	for(unsigned i = 0; i < nthreads; i++) {
 		void * retval;
@@ -545,14 +739,11 @@
 			exit(EXIT_FAILURE);
 		}
-		// total += (size_t)retval;
-		total += thrd_opts[i].result.subs;
-		count += thrd_opts[i].result.cnts;
 
 		io_uring_queue_exit(thrd_opts[i].ring);
 	}
 	std::cout << "done" << std::endl;
-	std::cout << "Submit average: " << total << "/" << count << "(" << (((double)total) / count) << ")" << std::endl;
 
 	//===================
+	// Close the sockets
 	(std::cout << "Closing Socket... ").flush();
 	ret = shutdown( server_fd, SHUT_RD );
@@ -567,4 +758,16 @@
 		exit(EXIT_FAILURE);
 	}
-	std::cout << "done" << std::endl;
+	std::cout << "done" << std::endl << std::endl;
+
+	// Print stats and exit
+	std::cout << "Errors: " << global_stats.errors.conns << "c, (" << global_stats.errors.requests.pipes << "p, " << global_stats.errors.requests.reset << "r, " << global_stats.errors.requests.other << "o" << ")r, (" << global_stats.errors.answers.pipes << "p, " << global_stats.errors.answers.reset << "r, " << global_stats.errors.answers.other << "o" << ")a" << std::endl;
+	std::cout << "Completions: " << global_stats.completions.conns << "c, " << global_stats.completions.reads << "r, " << global_stats.completions.writes << "w" << std::endl;
+	std::cout << "Full Writes: " << global_stats.completions.full_writes << std::endl;
+	std::cout << "Max FD: " << max_fd << std::endl;
+	std::cout << "Successful connections: " << global_stats.conns.used << std::endl;
+	std::cout << "Max concurrent connections: " << global_stats.conns.max << std::endl;
+	std::cout << "Accepts on non-zeros: " << global_stats.recycle_errors << std::endl;
+	std::cout << "Leaked conn objects: " << global_stats.conns.current << std::endl;
 }
+
+// compile-command: "g++ http_ring.cpp -std=c++2a -pthread -luring -O3" //
Index: benchmark/io/http/parhttperf
===================================================================
--- benchmark/io/http/parhttperf	(revision ef0b45633df5650457989c598232acaf4a0d8a66)
+++ benchmark/io/http/parhttperf	(revision ef0b45633df5650457989c598232acaf4a0d8a66)
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+NTHREADS=$1
+shift
+echo "Running $NTHREADS"
+
+mkdir -p out
+rm -v out/*
+for ((i=0; i<$NTHREADS; i++))
+do
+	# echo "httperf --client $i/$NTHREADS $@ > out/result.$i.out"
+	httperf --client $i/$NTHREADS $@ > out/result.$i.out &
+done
+
+wait
+echo "Experiment terminated"
+
+FILES=""
+for ((i=0; i<$NTHREADS; i++))
+do
+	FILES="$FILES out/result.$i.out"
+done
+./parse-httperf.py $FILES ||  echo $FILES
Index: benchmark/io/http/parse-httperf.py
===================================================================
--- benchmark/io/http/parse-httperf.py	(revision ef0b45633df5650457989c598232acaf4a0d8a66)
+++ benchmark/io/http/parse-httperf.py	(revision ef0b45633df5650457989c598232acaf4a0d8a66)
@@ -0,0 +1,258 @@
+#!/usr/bin/python3
+
+import argparse
+import decimal
+import math
+import re
+import sys
+
+import collections, functools, operator
+
+def parse(reg, lines):
+	m = [re.findall(reg,l) for l in lines]
+	return [*filter(None, m)][0][0]
+
+def wavg(vals, ws):
+	t = sum(ws)
+	if t == 0:
+		return 0.0
+	s = sum([vals[i] * ws[i] for i in range(len(vals))])
+	return s / t
+
+def hist(s):
+	s = s.split()
+	h = [int(v) for v in s]
+	return dict([(k, v) for (k,v) in enumerate(h) if v != 0])
+
+class Result:
+	def __init__(self):
+		self.total = {}
+		self.connect = {}
+		self.request = {}
+		self.reply = {}
+		self.misc = {}
+		self.errors = {}
+		self.session = {}
+
+	@staticmethod
+	def from_file(file):
+		r = Result()
+		lines  = [l for l in file]
+		print(lines[0].strip())
+		#------------------------------
+		# total
+		totals = parse(r'^Total: connections ([0-9]+) requests ([0-9]+) replies ([0-9]+) test-duration ([\.0-9]+) s', lines)
+		r.total = {
+			'connections': int(totals[0]),
+			'requests': int(totals[1]),
+			'replies': int(totals[2]),
+			'duration': float(totals[3])
+		}
+
+		#------------------------------
+		# connection
+		connection1 = parse(r'^Connection rate: ([\.0-9]+) conn/s \(([\.0-9]+) ms/conn, <=([0-9]+) concurrent connections\)', lines)
+		connection2 = parse(r'^Connection time \[ms\]: min ([\.0-9]+) avg ([\.0-9]+) max ([\.0-9]+) median ([\.0-9]+) stddev ([\.0-9]+)', lines)
+		connection3 = parse(r'^Connection time \[ms\]: connect ([\.0-9]+)', lines)
+		connection4 = parse(r'^Connection length \[replies/conn\]: ([\.0-9]+)', lines)
+		r.connect = {
+			'rate': { 'conn/s': float(connection1[0]), 'ms/conn': float(connection1[1]), '<=': int(connection1[2]) },
+			'time': { 'min': float(connection2[0]), 'avg': float(connection2[1]), 'max': float(connection2[2]), 'median': float(connection2[3]), 'stddev': float(connection2[4]) },
+			'connect': float(connection3),
+			'length': float(connection4)
+		}
+
+		#------------------------------
+		# request
+		request1 = parse(r'^Request rate: ([\.0-9]+) req/s \(([\.0-9]+) ms/req\)', lines)
+		request2 = parse(r'^Request size \[B\]: ([\.0-9]+)', lines)
+		r.request = {
+			'req/s': float(request1[0]),
+			'ms/req': float(request1[1]),
+			'size': float(request2)
+		}
+
+		#------------------------------
+		# reply
+		replies1 = parse(r'^Reply rate \[replies/s\]: min ([\.0-9]+) avg ([\.0-9]+) max ([\.0-9]+) stddev ([\.0-9]+)', lines)
+		replies2 = parse(r'^Reply time \[ms\]: response ([\.0-9]+) transfer ([\.0-9]+)', lines)
+		replies3 = parse(r'^Reply size \[B\]: header ([\.0-9]+) content ([\.0-9]+) footer ([\.0-9]+) \(total ([\.0-9]+)\)', lines)
+		replies4 = parse(r'^Reply status: 1xx=([0-9]+) 2xx=([0-9]+) 3xx=([0-9]+) 4xx=([0-9]+) 5xx=([0-9]+)', lines)
+		r.reply = {
+			'rate' : { 'min': float(replies1[0]), 'avg': float(replies1[1]), 'max': float(replies1[2]), 'stddev': float(replies1[3]) },
+			'time' : { 'response': float(replies2[0]), 'transfer': float(replies2[1]) },
+			'size' : { 'header': float(replies3[0]), 'content': float(replies3[1]), 'footer': float(replies3[2]), 'total': float(replies3[3]) },
+			'status' : { '1xx': int(replies4[0]), '2xx': int(replies4[1]), '3xx': int(replies4[2]), '4xx': int(replies4[3]), '5xx': int(replies4[4]) }
+		}
+
+		#------------------------------
+		# misc
+		misc1 = parse(r'^CPU time \[s\]: user ([\.0-9]+) system ([\.0-9]+) \(user ([\.0-9]+)% system ([\.0-9]+)% total ([\.0-9]+)%\)', lines)
+		misc2 = parse(r'^Net I/O: ([\.0-9]+) KB/s \(([\.0-9]+)\*10\^([0-9]+) bps\)', lines)
+		r.misc = {
+			'usr': float(misc1[0]),
+			'sys': float(misc1[1]),
+			'usr%': float(misc1[2]),
+			'sys%': float(misc1[3]),
+			'total%': float(misc1[4]),
+			'KB/S': float(misc2[0]),
+			'bps': float(misc2[1]) * math.pow(10, int(misc2[2]))
+		}
+
+		#------------------------------
+		# errors
+		errors1 = parse(r'^Errors: total ([0-9]+) client-timo ([0-9]+) socket-timo ([0-9]+) connrefused ([0-9]+) connreset ([0-9]+)', lines)
+		errors2 = parse(r'^Errors: fd-unavail ([0-9]+) addrunavail ([0-9]+) ftab-full ([0-9]+) other ([0-9]+)', lines)
+		r.errors = {
+			'total': int(errors1[0]),
+			'client-timout': int(errors1[1]),
+			'socket-timout': int(errors1[2]),
+			'connection-refused': int(errors1[3]),
+			'connection-reset': int(errors1[4]),
+			'fd-unavailable': int(errors2[0]),
+			'address-unavailable': int(errors2[1]),
+			'ftab-full': int(errors2[2]),
+			'other': int(errors2[3])
+		}
+
+		#------------------------------
+		# session
+		session1 = parse(r'^Session rate \[sess/s\]: min ([\.0-9]+) avg ([\.0-9]+) max ([\.0-9]+) stddev ([\.0-9]+) \(([0-9]+)/([0-9]+)\)', lines)
+		session2 = parse(r'^Session: avg ([\.0-9]+) connections/session', lines)
+		session3 = parse(r'^Session lifetime \[s\]: ([\.0-9]+)', lines)
+		session4 = parse(r'^Session failtime \[s\]: ([\.0-9]+)', lines)
+		session5 = parse(r'^Session length histogram: ([ 0-9]+)', lines)
+		r.session = {
+			'rate': { 'min': float(session1[0]), 'avg': float(session1[1]), 'max': float(session1[2]), 'stddev': float(session1[3]) },
+			'successes': int(session1[4]),
+			'totals': int(session1[5]),
+			'conns/ses': float(session2),
+			'lifetime': float(session3),
+			'failtime': float(session4),
+			'hist': hist(session5)
+		}
+
+		return r
+
+if __name__ == "__main__":
+	#------------------------------
+	# parse args
+	parser = argparse.ArgumentParser(description='Script aggregates httperf output')
+	parser.add_argument('files', metavar='files', type=argparse.FileType('r'), nargs='*', help='a list of files to aggregate')
+
+	try:
+		args =  parser.parse_args()
+	except:
+		print('ERROR: invalid arguments', file=sys.stderr)
+		parser.print_help(sys.stderr)
+		sys.exit(1)
+
+	if len(args.files) == 0:
+		print('No input files', file=sys.stderr)
+		parser.print_help(sys.stderr)
+		sys.exit(1)
+
+	#------------------------------
+	# Construct objects
+	results = [Result.from_file(f) for f in args.files]
+
+	#==================================================
+	# Print
+	#==================================================
+	totals = dict(functools.reduce(operator.add, map(collections.Counter, [r.total for r in results])))
+	totals['duration-'] = min([r.total['duration'] for r in results])
+	totals['duration+'] = max([r.total['duration'] for r in results])
+	print("")
+	print("")
+	print("Total: connections {:,} requests {:,} replies {:,} test-duration {}-{} s".format(totals['connections'], totals['requests'], totals['replies'], totals['duration-'], totals['duration+']))
+	print("")
+
+	#==================================================
+	connections = {
+		'conn/s': sum([r.connect['rate']['conn/s'] for r in results]),
+		'<=': sum([r.connect['rate']['<='] for r in results]),
+		'min': min([r.connect['time']['min'] for r in results]),
+		'avg': wavg([r.connect['time']['avg'] for r in results], [r.total['connections'] for r in results]),
+		'max': max([r.connect['time']['max'] for r in results]),
+		'median': wavg([r.connect['time']['median'] for r in results], [r.total['connections'] for r in results]),
+		'stddev': wavg([r.connect['time']['stddev'] for r in results], [r.total['connections'] for r in results]),
+		'connect': wavg([r.connect['connect'] for r in results], [r.total['connections'] for r in results]),
+		'length': wavg([r.connect['length'] for r in results], [r.total['connections'] for r in results])
+	}
+	print("Connection rate: {:,.2f} conn/s ({:.2f} ms/conn, <={:,} concurrent connections)".format(connections['conn/s'], 1000.0 / connections['conn/s'], connections['<=']))
+	print("Connection time [ms]: min {:,.2f} avg {:,.2f} max {:,.2f} avg median {:,.2f} avg stddev {:,.2f}".format(connections['min'], connections['avg'], connections['max'], connections['median'], connections['stddev']))
+	print("Connection time [ms]: connect {:,.2f}".format(connections['connect']))
+	print("Connection length [replies/conn]: {:,.2f}".format(connections['length']))
+	print("")
+
+	#==================================================
+	requests = {
+		'req/s': sum([r.request['req/s'] for r in results]),
+		'size': wavg([r.request['size'] for r in results], [r.total['requests'] for r in results])
+	}
+	print("Request rate: {:,.2f} req/s ({:.2f} ms/req)".format(requests['req/s'], 1000.0 / requests['req/s']))
+	print("Request size [B]: {:,.2f}".format(requests['size']))
+	print("")
+
+	#==================================================
+	replies = {
+		'min': sum([r.reply['rate']['min'] for r in results]),
+		'avg': sum([r.reply['rate']['avg'] for r in results]),
+		'max': sum([r.reply['rate']['max'] for r in results]),
+		'std':  wavg([r.reply['rate']['stddev'] for r in results], [r.total['replies'] for r in results])
+	}
+	print("Reply rate [replies/s]: min {:,.2f} avg {:,.2f} max {:,.2f} avg stddev {:,.2f}".format(replies['min'], replies['avg'], replies['max'], replies['std']))
+	replies = {
+		'rs': wavg([r.reply['time']['response'] for r in results], [r.total['replies'] for r in results]),
+		'tr': wavg([r.reply['time']['transfer'] for r in results], [r.total['replies'] for r in results])
+	}
+	print("Reply time [ms]: response {:,.2f} transfer {:,.2f}".format(replies['rs'], replies['tr']))
+	replies = {
+		'hd': wavg([r.reply['size']['header' ] for r in results], [r.total['replies'] for r in results]),
+		'ct': wavg([r.reply['size']['content'] for r in results], [r.total['replies'] for r in results]),
+		'ft': wavg([r.reply['size']['footer' ] for r in results], [r.total['replies'] for r in results]),
+		'tt': wavg([r.reply['size']['total'  ] for r in results], [r.total['replies'] for r in results])
+	}
+	print("Reply size [B]: header {:,.2f} content {:,.2f} footer {:,.2f} (total {:,.2f})".format(replies['hd'], replies['ct'], replies['ft'], replies['tt']))
+	replies = {
+		'1xx': sum([r.reply['status']['1xx'] for r in results]),
+		'2xx': sum([r.reply['status']['2xx'] for r in results]),
+		'3xx': sum([r.reply['status']['3xx'] for r in results]),
+		'4xx': sum([r.reply['status']['4xx'] for r in results]),
+		'5xx': sum([r.reply['status']['5xx'] for r in results])
+	}
+	print("Reply status: 1xx={:,} 2xx={:,} 3xx={:,} 4xx={:,} 5xx={:,}".format(replies['1xx'], replies['2xx'], replies['3xx'], replies['4xx'], replies['5xx']))
+	print("")
+
+	#==================================================
+	misc = dict(functools.reduce(operator.add, map(collections.Counter, [r.misc for r in results])))
+	print("CPU time [s]: user {:.2f} system {:.2f} (user {:.2f}% system {:.2f}% total {:.2f}%)".format(misc['usr'], misc['sys'], misc['usr%'], misc['sys%'], misc['total%']))
+	print("Net I/O: {:,.2f} KB/s ({} bps)".format(misc['KB/S'], decimal.Decimal(misc['bps']).normalize().to_eng_string()))
+	print("")
+
+	#==================================================
+	errors = dict(functools.reduce(lambda a, b: a.update(b) or a, [r.errors for r in results], collections.Counter()))
+	print("Errors: total {} client-timo {} socket-timo {} connrefused {} connreset {}".format(errors['total'], errors['client-timout'], errors['socket-timout'], errors['connection-refused'], errors['connection-reset']))
+	print("Errors: fd-unavail {} addrunavail {} ftab-full {} other {}".format(errors['fd-unavailable'], errors['address-unavailable'], errors['ftab-full'], errors['other']))
+	print("")
+
+	#==================================================
+	sessions = {
+		'min': sum([r.session['rate']['min'] for r in results]),
+		'avg': wavg([r.session['rate']['avg'] for r in results], [r.session['totals'] for r in results]),
+		'max': sum([r.session['rate']['max'] for r in results]),
+		'stddev':  wavg([r.session['rate']['stddev'] for r in results], [r.session['totals'] for r in results]),
+		'successes': sum([r.session['successes'] for r in results]),
+		'totals': sum([r.session['totals'] for r in results]),
+		'conns/ses': wavg([r.session['conns/ses'] for r in results], [r.session['totals'] for r in results]),
+		'lifetime': wavg([r.session['lifetime'] for r in results], [r.session['successes'] for r in results]),
+		'failtime': wavg([r.session['failtime'] for r in results], [r.session['totals'] - r.session['successes'] for r in results]),
+	}
+	print("Session rate [sess/s]: min {:.2f} avg {:.2f} max {:.2f} avg stddev {:.2f} ({:,}/{:,})".format(sessions['min'], sessions['avg'], sessions['max'], sessions['stddev'], sessions['successes'], sessions['totals']))
+	print("Session: avg {:.2f} connections/session".format(sessions['conns/ses']))
+	print("Session lifetime [s]: {:.2f}".format(sessions['lifetime']))
+	print("Session failtime [s]: {:.2f}".format(sessions['failtime']))
+
+	hist = dict(functools.reduce(operator.add, map(collections.Counter, [r.session['hist'] for r in results])))
+	hist = ["{}: {}".format(key, value) for key, value in sorted(hist.items(), key=lambda x: x[0])]
+	print("Session length histogram: [{}]".format(", ".join(hist)))
Index: doc/LaTeXmacros/common.tex
===================================================================
--- doc/LaTeXmacros/common.tex	(revision c292244d4c299c228d7e40cd8a91071f5066c92f)
+++ doc/LaTeXmacros/common.tex	(revision ef0b45633df5650457989c598232acaf4a0d8a66)
@@ -11,6 +11,6 @@
 %% Created On       : Sat Apr  9 10:06:17 2016
 %% Last Modified By : Peter A. Buhr
-%% Last Modified On : Sat Jan 23 09:06:39 2021
-%% Update Count     : 491
+%% Last Modified On : Wed Feb  3 10:57:33 2021
+%% Update Count     : 508
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
@@ -32,23 +32,39 @@
 \setlist[enumerate]{listparindent=\parindent}% global
 \setlist[enumerate,2]{leftmargin=\parindent,labelsep=*,align=parleft,label=\alph*.}% local
-\setlist[description]{itemsep=0pt,listparindent=\parindent,leftmargin=\parindent,labelsep=1.5ex}
+\setlist[description]{topsep=0.5ex,itemsep=0pt,listparindent=\parindent,leftmargin=\parindent,labelsep=1.5ex}
 
 % Names used in the document.
 
 \usepackage{xspace}
-\newcommand{\CFAIcon}{\textsf{C}\raisebox{\depth}{\rotatebox{180}{\textsf{A}}}\xspace} % Cforall symbolic name
-\newcommand{\CFA}{\protect\CFAIcon}		% safe for section/caption
-\newcommand{\CFL}{\textrm{Cforall}\xspace} % Cforall symbolic name
-\newcommand{\Celeven}{\textrm{C11}\xspace} % C11 symbolic name
-\newcommand{\CC}{\textrm{C}\kern-.1em\hbox{+\kern-.25em+}\xspace} % C++ symbolic name
-\newcommand{\CCeleven}{\textrm{C}\kern-.1em\hbox{+\kern-.25em+}11\xspace} % C++11 symbolic name
-\newcommand{\CCfourteen}{\textrm{C}\kern-.1em\hbox{+\kern-.25em+}14\xspace} % C++14 symbolic name
-\newcommand{\CCseventeen}{\textrm{C}\kern-.1em\hbox{+\kern-.25em+}17\xspace} % C++17 symbolic name
-\newcommand{\CCtwenty}{\textrm{C}\kern-.1em\hbox{+\kern-.25em+}20\xspace} % C++20 symbolic name
+\newcommand{\CFAIcon}{\textsf{C}\raisebox{\depth}{\rotatebox{180}{\textsf{A}}}} % Cforall icon
+\newcommand{\CFA}{\protect\CFAIcon\xspace}			% CFA symbolic name
+\newcommand{\CFL}{\textrm{Cforall}\xspace}			% Cforall non-icon name
+\newcommand{\Celeven}{\textrm{C11}\xspace}			% C11 symbolic name
+\newcommand{\CCIcon}{\textrm{C}\kern-.1em\hbox{+\kern-.25em+}} % C++ icon
+\newcommand{\CC}{\protect\CCIcon\xspace}			% C++ symbolic name
+% numbers disallowed in latex variables names => use number names
+\newcommand{\CCeleven}{\protect\CCIcon{11}\xspace}	% C++11 symbolic name
+\newcommand{\CCfourteen}{\protect\CCIcon{14}\xspace} % C++14 symbolic name
+\newcommand{\CCseventeen}{\protect\CCIcon{17}\xspace} % C++17 symbolic name
+\newcommand{\CCtwenty}{\protect\CCIcon{20}\xspace}	% C++20 symbolic name
 \newcommand{\Csharp}{C\raisebox{-0.7ex}{\Large$^\sharp$}\xspace} % C# symbolic name
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
+% remove special-character warning in PDF side-bar names
 \makeatletter
+\@ifpackageloaded{hyperref}{
+  \pdfstringdefDisableCommands{
+  \def\CFA{\CFL}
+  \def\CC{C++\xspace}
+  \def\CCeleven{C++11\xspace}
+  \def\CCfourteen{C++14\xspace}
+  \def\CCseventeen{C++17\xspace}
+  \def\CCtwenty{C++20\xspace}
+  \def\Csharp{C\#\xspace}
+  \def\lstinline{\xspace} % must use {} as delimiters, e.g., \lstinline{...}
+  }{}
+}
+
 % parindent is relative, i.e., toggled on/off in environments like itemize, so store the value for
 % use rather than use \parident directly.
@@ -89,5 +105,5 @@
 \newcommand{\italic}[1]{\emph{\hyperpage{#1}}}
 \newcommand{\Definition}[1]{\textbf{\hyperpage{#1}}}
-\newcommand{\see}[1]{\emph{see}~#1}
+\newcommand{\see}[1]{(see #1)}
 
 % Define some commands that produce formatted index entries suitable for cross-references.
@@ -266,5 +282,5 @@
 showlines=true,							% show blank lines at end of code
 aboveskip=4pt,							% spacing above/below code block
-belowskip=-2pt,
+belowskip=0pt,
 numberstyle=\footnotesize\sf,			% numbering style
 % replace/adjust listing characters that look bad in sanserif
Index: doc/bibliography/pl.bib
===================================================================
--- doc/bibliography/pl.bib	(revision c292244d4c299c228d7e40cd8a91071f5066c92f)
+++ doc/bibliography/pl.bib	(revision ef0b45633df5650457989c598232acaf4a0d8a66)
@@ -1797,13 +1797,14 @@
 }
 
-@article{Delisle19,
+@article{Delisle20,
     keywords	= {concurrency, Cforall},
     contributer	= {pabuhr@plg},
     author	= {Thierry Delisle and Peter A. Buhr},
     title	= {Advanced Control-flow and Concurrency in \textsf{C}$\mathbf{\forall}$},
-    year	= 2019,
+    year	= 2020,
     journal	= spe,
-    pages	= {1-33},
-    note	= {submitted},
+    pages	= {1-38},
+    note	= {\href{https://doi-org.proxy.lib.uwaterloo.ca/10.1002/spe.2925}{https://\-doi-org.proxy.lib.uwaterloo.ca/\-10.1002/\-spe.2925}},
+    note	= {},
 }
 
Index: doc/theses/andrew_beach_MMath/existing.tex
===================================================================
--- doc/theses/andrew_beach_MMath/existing.tex	(revision c292244d4c299c228d7e40cd8a91071f5066c92f)
+++ doc/theses/andrew_beach_MMath/existing.tex	(revision ef0b45633df5650457989c598232acaf4a0d8a66)
@@ -1,3 +1,3 @@
-\chapter{\texorpdfstring{\CFA Existing Features}{Cforall Existing Features}}
+\chapter{\CFA Existing Features}
 
 \CFA (C-for-all)~\cite{Cforall} is an open-source project extending ISO C with
@@ -12,5 +12,5 @@
 obvious to the reader.
 
-\section{\texorpdfstring{Overloading and \lstinline|extern|}{Overloading and extern}}
+\section{Overloading and \lstinline{extern}}
 \CFA has extensive overloading, allowing multiple definitions of the same name
 to be defined.~\cite{Moss18}
@@ -42,5 +42,5 @@
 
 \section{Reference Type}
-\CFA adds a rebindable reference type to C, but more expressive than the \CC
+\CFA adds a rebindable reference type to C, but more expressive than the \Cpp
 reference.  Multi-level references are allowed and act like auto-dereferenced
 pointers using the ampersand (@&@) instead of the pointer asterisk (@*@). \CFA
@@ -59,7 +59,7 @@
 
 Both constructors and destructors are operators, which means they are just
-functions with special operator names rather than type names in \CC. The
+functions with special operator names rather than type names in \Cpp. The
 special operator names may be used to call the functions explicitly (not
-allowed in \CC for constructors).
+allowed in \Cpp for constructors).
 
 In general, operator names in \CFA are constructed by bracketing an operator
@@ -88,5 +88,5 @@
 matching overloaded destructor @void ^?{}(T &);@ is called.  Without explicit
 definition, \CFA creates a default and copy constructor, destructor and
-assignment (like \CC). It is possible to define constructors/destructors for
+assignment (like \Cpp). It is possible to define constructors/destructors for
 basic and existing types.
 
@@ -94,5 +94,5 @@
 \CFA uses parametric polymorphism to create functions and types that are
 defined over multiple types. \CFA polymorphic declarations serve the same role
-as \CC templates or Java generics. The ``parametric'' means the polymorphism is
+as \Cpp templates or Java generics. The ``parametric'' means the polymorphism is
 accomplished by passing argument operations to associate \emph{parameters} at
 the call site, and these parameters are used in the function to differentiate
@@ -134,5 +134,5 @@
 
 Note, a function named @do_once@ is not required in the scope of @do_twice@ to
-compile it, unlike \CC template expansion. Furthermore, call-site inferencing
+compile it, unlike \Cpp template expansion. Furthermore, call-site inferencing
 allows local replacement of the most specific parametric functions needs for a
 call.
@@ -178,5 +178,5 @@
 }
 \end{cfa}
-The generic type @node(T)@ is an example of a polymorphic-type usage.  Like \CC
+The generic type @node(T)@ is an example of a polymorphic-type usage.  Like \Cpp
 templates usage, a polymorphic-type usage must specify a type parameter.
 
Index: doc/theses/andrew_beach_MMath/features.tex
===================================================================
--- doc/theses/andrew_beach_MMath/features.tex	(revision c292244d4c299c228d7e40cd8a91071f5066c92f)
+++ doc/theses/andrew_beach_MMath/features.tex	(revision ef0b45633df5650457989c598232acaf4a0d8a66)
@@ -5,10 +5,24 @@
 
 \section{Virtuals}
+Virtual types and casts are not part of the exception system nor are they
+required for an exception system. But an object-oriented style hierarchy is a
+great way of organizing exceptions so a minimal virtual system has been added
+to \CFA.
+
+The pattern of a simple hierarchy was borrowed from object-oriented
+programming was chosen for several reasons.
+The first is that it allows new exceptions to be added in user code
+and in libraries independently of each other. Another is it allows for
+different levels of exception grouping (all exceptions, all IO exceptions or
+a particular IO exception). Also it also provides a simple way of passing
+data back and forth across the throw.
+
 Virtual types and casts are not required for a basic exception-system but are
 useful for advanced exception features. However, \CFA is not object-oriented so
-there is no obvious concept of virtuals.  Hence, to create advanced exception
-features for this work, I needed to designed and implemented a virtual-like
+there is no obvious concept of virtuals. Hence, to create advanced exception
+features for this work, I needed to design and implement a virtual-like
 system for \CFA.
 
+% NOTE: Maybe we should but less of the rational here.
 Object-oriented languages often organized exceptions into a simple hierarchy,
 \eg Java.
@@ -30,5 +44,5 @@
 \end{center}
 The hierarchy provides the ability to handle an exception at different degrees
-of specificity (left to right).  Hence, it is possible to catch a more general
+of specificity (left to right). Hence, it is possible to catch a more general
 exception-type in higher-level code where the implementation details are
 unknown, which reduces tight coupling to the lower-level implementation.
@@ -61,12 +75,14 @@
 While much of the virtual infrastructure is created, it is currently only used
 internally for exception handling. The only user-level feature is the virtual
-cast, which is the same as the \CC \lstinline[language=C++]|dynamic_cast|.
+cast, which is the same as the \Cpp \lstinline[language=C++]|dynamic_cast|.
+\label{p:VirtualCast}
 \begin{cfa}
 (virtual TYPE)EXPRESSION
 \end{cfa}
-Note, the syntax and semantics matches a C-cast, rather than the unusual \CC
-syntax for special casts. Both the type of @EXPRESSION@ and @TYPE@ must be a
-pointer to a virtual type. The cast dynamically checks if the @EXPRESSION@ type
-is the same or a subtype of @TYPE@, and if true, returns a pointer to the
+Note, the syntax and semantics matches a C-cast, rather than the function-like
+\Cpp syntax for special casts. Both the type of @EXPRESSION@ and @TYPE@ must be
+a pointer to a virtual type.
+The cast dynamically checks if the @EXPRESSION@ type is the same or a subtype
+of @TYPE@, and if true, returns a pointer to the
 @EXPRESSION@ object, otherwise it returns @0p@ (null pointer).
 
@@ -77,36 +93,38 @@
 
 Exceptions are defined by the trait system; there are a series of traits, and
-if a type satisfies them, then it can be used as an exception.  The following
+if a type satisfies them, then it can be used as an exception. The following
 is the base trait all exceptions need to match.
 \begin{cfa}
 trait is_exception(exceptT &, virtualT &) {
-	virtualT const & @get_exception_vtable@(exceptT *);
+	virtualT const & get_exception_vtable(exceptT *);
 };
 \end{cfa}
-The function takes any pointer, including the null pointer, and returns a
-reference to the virtual-table object. Defining this function also establishes
-the virtual type and a virtual-table pair to the \CFA type-resolver and
-promises @exceptT@ is a virtual type and a child of the base exception-type.
-
-{\color{blue} PAB: I do not understand this paragraph.}
-One odd thing about @get_exception_vtable@ is that it should always be a
-constant function, returning the same value regardless of its argument.  A
-pointer or reference to the virtual table instance could be used instead,
-however using a function has some ease of implementation advantages and allows
-for easier disambiguation because the virtual type name (or the address of an
-instance that is in scope) can be used instead of the mangled virtual table
-name.  Also note the use of the word ``promise'' in the trait
-description. Currently, \CFA cannot check to see if either @exceptT@ or
-@virtualT@ match the layout requirements. This is considered part of
-@get_exception_vtable@'s correct implementation.
+The trait is defined over two types, the exception type and the virtual table
+type. This should be one-to-one, each exception type has only one virtual
+table type and vice versa. The only assertion in the trait is
+@get_exception_vtable@, which takes a pointer of the exception type and
+returns a reference to the virtual table type instance.
+
+The function @get_exception_vtable@ is actually a constant function.
+Recardless of the value passed in (including the null pointer) it should
+return a reference to the virtual table instance for that type.
+The reason it is a function instead of a constant is that it make type
+annotations easier to write as you can use the exception type instead of the
+virtual table type; which usually has a mangled name.
+% Also \CFA's trait system handles functions better than constants and doing
+% it this way
+
+% I did have a note about how it is the programmer's responsibility to make
+% sure the function is implemented correctly. But this is true of every
+% similar system I know of (except Agda's I guess) so I took it out.
 
 \section{Raise}
-\CFA provides two kinds of exception raise: termination (see
-\VRef{s:Termination}) and resumption (see \VRef{s:Resumption}), which are
+\CFA provides two kinds of exception raise: termination
+\see{\VRef{s:Termination}} and resumption \see{\VRef{s:Resumption}}, which are
 specified with the following traits.
 \begin{cfa}
 trait is_termination_exception(
 		exceptT &, virtualT & | is_exception(exceptT, virtualT)) {
-	void @defaultTerminationHandler@(exceptT &);
+	void defaultTerminationHandler(exceptT &);
 };
 \end{cfa}
@@ -118,5 +136,5 @@
 trait is_resumption_exception(
 		exceptT &, virtualT & | is_exception(exceptT, virtualT)) {
-	void @defaultResumptionHandler@(exceptT &);
+	void defaultResumptionHandler(exceptT &);
 };
 \end{cfa}
@@ -125,9 +143,18 @@
 
 Finally there are three convenience macros for referring to the these traits:
-@IS_EXCEPTION@, @IS_TERMINATION_EXCEPTION@ and @IS_RESUMPTION_EXCEPTION@.  Each
-takes the virtual type's name, and for polymorphic types only, the
-parenthesized list of polymorphic arguments. These macros do the name mangling
-to get the virtual-table name and provide the arguments to both sides
-{\color{blue}(PAB: What's a ``side''?)}
+@IS_EXCEPTION@, @IS_TERMINATION_EXCEPTION@ and @IS_RESUMPTION_EXCEPTION@.
+All three traits are hard to use while naming the virtual table as it has an
+internal mangled name. These macros take the exception name as their first
+argument and do the mangling. They all take a second argument for polymorphic
+types which is the parenthesized list of polymorphic arguments. These
+arguments are passed to both the exception type and the virtual table type as
+the arguments do have to match.
+
+For example consider a function that is polymorphic over types that have a
+defined arithmetic exception:
+\begin{cfa}
+forall(Num | IS_EXCEPTION(Arithmetic, (Num)))
+void some_math_function(Num & left, Num & right);
+\end{cfa}
 
 \subsection{Termination}
@@ -146,12 +173,13 @@
 throw EXPRESSION;
 \end{cfa}
-The expression must return a termination-exception reference, where the
-termination exception has a type with a @void defaultTerminationHandler(T &)@
-(default handler) defined. The handler is found at the call site using \CFA's
-trait system and passed into the exception system along with the exception
-itself.
-
-At runtime, a representation of the exception type and an instance of the
-exception type is copied into managed memory (heap) to ensure it remains in
+The expression must return a reference to a termination exception, where the
+termination exception is any type that satifies @is_termination_exception@
+at the call site.
+Through \CFA's trait system the functions in the traits are passed into the
+throw code. A new @defaultTerminationHandler@ can be defined in any scope to
+change the throw's behavior (see below).
+
+At runtime, the exception returned by the expression
+is copied into managed memory (heap) to ensure it remains in
 scope during unwinding. It is the user's responsibility to ensure the original
 exception object at the throw is freed when it goes out of scope. Being
@@ -165,7 +193,7 @@
 try {
 	GUARDED_BLOCK
-} @catch (EXCEPTION_TYPE$\(_1\)$ * NAME)@ { // termination handler 1
+} catch (EXCEPTION_TYPE$\(_1\)$ * NAME$\(_1\)$) { // termination handler 1
 	HANDLER_BLOCK$\(_1\)$
-} @catch (EXCEPTION_TYPE$\(_2\)$ * NAME)@ { // termination handler 2
+} catch (EXCEPTION_TYPE$\(_2\)$ * NAME$\(_2\)$) { // termination handler 2
 	HANDLER_BLOCK$\(_2\)$
 }
@@ -178,16 +206,18 @@
 Exception matching checks the representation of the thrown exception-type is
 the same or a descendant type of the exception types in the handler clauses. If
-there is a match, a pointer to the exception object created at the throw is
-bound to @NAME@ and the statements in the associated @HANDLER_BLOCK@ are
-executed. If control reaches the end of the handler, the exception is freed,
-and control continues after the try statement.
+it is the same of a descendent of @EXCEPTION_TYPE@$_i$ then @NAME@$_i$ is
+bound to a pointer to the exception and the statements in @HANDLER_BLOCK@$_i$
+are executed. If control reaches the end of the handler, the exception is
+freed and control continues after the try statement.
 
 The default handler visible at the throw statement is used if no matching
 termination handler is found after the entire stack is searched. At that point,
 the default handler is called with a reference to the exception object
-generated at the throw. If the default handler returns, the system default
-action is executed, which often terminates the program. This feature allows
+generated at the throw. If the default handler returns, control continues
+from after the throw statement. This feature allows
 each exception type to define its own action, such as printing an informative
 error message, when an exception is not handled in the program.
+However the default handler for all exception types triggers a cancellation
+using the exception.
 
 \subsection{Resumption}
@@ -196,6 +226,6 @@
 Resumption raise, called ``resume'', is as old as termination
 raise~\cite{Goodenough75} but is less popular. In many ways, resumption is
-simpler and easier to understand, as it is simply a dynamic call (as in
-Lisp). The semantics of resumption is: search the stack for a matching handler,
+simpler and easier to understand, as it is simply a dynamic call.
+The semantics of resumption is: search the stack for a matching handler,
 execute the handler, and continue execution after the resume. Notice, the stack
 cannot be unwound because execution returns to the raise point. Resumption is
@@ -209,12 +239,13 @@
 \end{cfa}
 The semantics of the @throwResume@ statement are like the @throw@, but the
-expression has a type with a @void defaultResumptionHandler(T &)@ (default
-handler) defined, where the handler is found at the call site by the type
-system.  At runtime, a representation of the exception type and an instance of
-the exception type is \emph{not} copied because the stack is maintained during
-the handler search.
+expression has return a reference a type that satifies the trait
+@is_resumption_exception@. Like with termination the exception system can
+use these assertions while (throwing/raising/handling) the exception.
+
+At runtime, no copies are made. As the stack is not unwound the exception and
+any values on the stack will remain in scope while the resumption is handled.
 
 Then the exception system searches the stack starting from the resume and
-proceeding towards the base of the stack, from callee to caller. At each stack
+proceeding to the base of the stack, from callee to caller. At each stack
 frame, a check is made for resumption handlers defined by the @catchResume@
 clauses of a @try@ statement.
@@ -222,7 +253,7 @@
 try {
 	GUARDED_BLOCK
-} @catchResume (EXCEPTION_TYPE$\(_1\)$ * NAME)@ { // resumption handler 1
+} catchResume (EXCEPTION_TYPE$\(_1\)$ * NAME$\(_1\)$) {
 	HANDLER_BLOCK$\(_1\)$
-} @catchResume (EXCEPTION_TYPE$\(_2\)$ * NAME)@ { // resumption handler 2
+} catchResume (EXCEPTION_TYPE$\(_2\)$ * NAME$\(_2\)$) {
 	HANDLER_BLOCK$\(_2\)$
 }
@@ -253,8 +284,8 @@
 current point on the stack because new try statements may have been pushed by
 the handler or functions called from the handler. If there is no match back to
-the point of the current handler, the search skips the stack frames already
-searched by the first resume and continues after the try statement. The default
-handler always continues from default handler associated with the point where
-the exception is created.
+the point of the current handler, the search skips\label{p:searchskip} the
+stack frames already searched by the first resume and continues after
+the try statement. The default handler always continues from default
+handler associated with the point where the exception is created.
 
 % This might need a diagram. But it is an important part of the justification
@@ -275,7 +306,8 @@
 \end{verbatim}
 
-This resumption search-pattern reflect the one for termination, which matches
-with programmer expectations. However, it avoids the \emph{recursive
-resumption} problem. If parts of the stack are searched multiple times, loops
+This resumption search pattern reflects the one for termination, and so
+should come naturally to most programmers.
+However, it avoids the \emph{recursive resumption} problem.
+If parts of the stack are searched multiple times, loops
 can easily form resulting in infinite recursion.
 
@@ -283,20 +315,23 @@
 \begin{cfa}
 try {
-	throwResume$\(_1\)$ (E &){};
-} catch( E * ) {
-	throwResume;
-}
-\end{cfa}
-Based on termination semantics, programmer expectation is for the re-resume to
-continue searching the stack frames after the try statement. However, the
-current try statement is still on the stack below the handler issuing the
-reresume (see \VRef{s:Reraise}). Hence, the try statement catches the re-raise
-again and does another re-raise \emph{ad infinitum}, which is confusing and
-difficult to debug. The \CFA resumption search-pattern skips the try statement
-so the reresume search continues after the try, mathcing programmer
-expectation.
+	throwResume (E &){}; // first
+} catchResume(E *) {
+	throwResume (E &){}; // second
+}
+\end{cfa}
+If this handler is ever used it will be placed on top of the stack above the
+try statement. If the stack was not masked than the @throwResume@ in the
+handler would always be caught by the handler, leading to an infinite loop.
+Masking avoids this problem and other more complex versions of it involving
+multiple handlers and exception types.
+
+Other masking stratagies could be used; such as masking the handlers that
+have caught an exception. This one was choosen because it creates a symmetry
+with termination (masked sections of the stack would be unwound with
+termination) and having only one pattern to learn is easier.
 
 \section{Conditional Catch}
-Both termination and resumption handler-clauses may perform conditional matching:
+Both termination and resumption handler clauses can be given an additional
+condition to further control which exceptions they handle:
 \begin{cfa}
 catch (EXCEPTION_TYPE * NAME ; @CONDITION@)
@@ -305,5 +340,5 @@
 exception matches, @CONDITION@ is executed. The condition expression may
 reference all names in scope at the beginning of the try block and @NAME@
-introduced in the handler clause.  If the condition is true, then the handler
+introduced in the handler clause. If the condition is true, then the handler
 matches. Otherwise, the exception search continues at the next appropriate kind
 of handler clause in the try block.
@@ -322,4 +357,7 @@
 
 \section{Reraise}
+\color{red}{From Andrew: I recomend we talk about why the language doesn't
+have rethrows/reraises instead.}
+
 \label{s:Reraise}
 Within the handler block or functions called from the handler block, it is
@@ -327,5 +365,7 @@
 @throwResume@, respective.
 \begin{cfa}
-catch( ... ) {
+try {
+	...
+} catch( ... ) {
 	... throw; // rethrow
 } catchResume( ... ) {
@@ -340,5 +380,4 @@
 handler is generated that does a program-level abort.
 
-
 \section{Finally Clauses}
 A @finally@ clause may be placed at the end of a @try@ statement.
@@ -346,18 +385,20 @@
 try {
 	GUARDED_BLOCK
-} ...	// any number or kind of handler clauses
-} finally {
+} ... // any number or kind of handler clauses
+... finally {
 	FINALLY_BLOCK
 }
 \end{cfa}
-The @FINALLY_BLOCK@ is executed when the try statement is unwound from the
-stack, \ie when the @GUARDED_BLOCK@ or any handler clause finishes. Hence, the
-finally block is always executed.
+The @FINALLY_BLOCK@ is executed when the try statement is removed from the
+stack, including when the @GUARDED_BLOCK@ or any handler clause finishes or
+during an unwind.
+The only time the block is not executed is if the program is exited before
+that happens.
 
 Execution of the finally block should always finish, meaning control runs off
 the end of the block. This requirement ensures always continues as if the
 finally clause is not present, \ie finally is for cleanup not changing control
-flow.  Because of this requirement, local control flow out of the finally block
-is forbidden.  The compiler precludes any @break@, @continue@, @fallthru@ or
+flow. Because of this requirement, local control flow out of the finally block
+is forbidden. The compiler precludes any @break@, @continue@, @fallthru@ or
 @return@ that causes control to leave the finally block. Other ways to leave
 the finally block, such as a long jump or termination are much harder to check,
@@ -369,6 +410,7 @@
 possible forwards the cancellation exception to a different stack.
 
+Cancellation is not an exception operation like termination or resumption.
 There is no special statement for starting a cancellation; instead the standard
-library function @cancel_stack@ is called passing an exception.  Unlike a
+library function @cancel_stack@ is called passing an exception. Unlike a
 raise, this exception is not used in matching only to pass information about
 the cause of the cancellation.
@@ -377,17 +419,18 @@
 \begin{description}
 \item[Main Stack:]
-
 The main stack is the one used by the program main at the start of execution,
-and is the only stack in a sequential program.  Hence, when cancellation is
-forwarded to the main stack, there is no other forwarding stack, so after the
-stack is unwound, there is a program-level abort.
+and is the only stack in a sequential program. Even in a concurrent program
+the main stack is only dependent on the environment that started the program.
+Hence, when the main stack is cancelled there is nowhere else in the program
+to notify. After the stack is unwound, there is a program-level abort.
 
 \item[Thread Stack:]
 A thread stack is created for a @thread@ object or object that satisfies the
-@is_thread@ trait.  A thread only has two points of communication that must
+@is_thread@ trait. A thread only has two points of communication that must
 happen: start and join. As the thread must be running to perform a
-cancellation, it must occur after start and before join, so join is a
-cancellation point.  After the stack is unwound, the thread halts and waits for
-another thread to join with it. The joining thread, checks for a cancellation,
+cancellation, it must occur after start and before join, so join is used
+for communication here.
+After the stack is unwound, the thread halts and waits for
+another thread to join with it. The joining thread checks for a cancellation,
 and if present, resumes exception @ThreadCancelled@.
 
@@ -397,18 +440,21 @@
 the exception is not caught. The implicit join does a program abort instead.
 
-This semantics is for safety. One difficult problem for any exception system is
-defining semantics when an exception is raised during an exception search:
-which exception has priority, the original or new exception? No matter which
-exception is selected, it is possible for the selected one to disrupt or
-destroy the context required for the other. {\color{blue} PAB: I do not
-understand the following sentences.} This loss of information can happen with
-join but as the thread destructor is always run when the stack is being unwound
-and one termination/cancellation is already active. Also since they are
-implicit they are easier to forget about.
+This semantics is for safety. If an unwind is triggered while another unwind
+is underway only one of them can proceed as they both want to ``consume'' the
+stack. Letting both try to proceed leads to very undefined behaviour.
+Both termination and cancellation involve unwinding and, since the default
+@defaultResumptionHandler@ preforms a termination that could more easily
+happen in an implicate join inside a destructor. So there is an error message
+and an abort instead.
+
+The recommended way to avoid the abort is to handle the intial resumption
+from the implicate join. If required you may put an explicate join inside a
+finally clause to disable the check and use the local
+@defaultResumptionHandler@ instead.
 
 \item[Coroutine Stack:] A coroutine stack is created for a @coroutine@ object
-or object that satisfies the @is_coroutine@ trait.  A coroutine only knows of
-two other coroutines, its starter and its last resumer.  The last resumer has
-the tightest coupling to the coroutine it activated.  Hence, cancellation of
+or object that satisfies the @is_coroutine@ trait. A coroutine only knows of
+two other coroutines, its starter and its last resumer. The last resumer has
+the tightest coupling to the coroutine it activated. Hence, cancellation of
 the active coroutine is forwarded to the last resumer after the stack is
 unwound, as the last resumer has the most precise knowledge about the current
Index: doc/theses/andrew_beach_MMath/future.tex
===================================================================
--- doc/theses/andrew_beach_MMath/future.tex	(revision c292244d4c299c228d7e40cd8a91071f5066c92f)
+++ doc/theses/andrew_beach_MMath/future.tex	(revision ef0b45633df5650457989c598232acaf4a0d8a66)
@@ -1,122 +1,133 @@
 \chapter{Future Work}
 
+\section{Language Improvements}
+\CFA is a developing programming language. As such, there are partially or
+unimplemented features of the language (including several broken components)
+that I had to workaround while building an exception handling system largely in
+the \CFA language (some C components).  The following are a few of these
+issues, and once implemented/fixed, how this would affect the exception system.
+\begin{itemize}
+\item
+The implementation of termination is not portable because it includes
+hand-crafted assembly statements. These sections must be ported by hand to
+support more hardware architectures, such as the ARM processor.
+\item
+Due to a type-system problem, the catch clause cannot bind the exception to a
+reference instead of a pointer. Since \CFA has a very general reference
+capability, programmers will want to use it. Once fixed, this capability should
+result in little or no change in the exception system.
+\item
+Termination handlers cannot use local control-flow transfers, \eg by @break@,
+@return@, \etc. The reason is that current code generation hoists a handler
+into a nested function for convenience (versus assemble-code generation at the
+@try@ statement). Hence, when the handler runs, its code is not in the lexical
+scope of the @try@ statement, where the local control-flow transfers are
+meaningful.
+\item
+There is no detection of colliding unwinds. It is possible for clean-up code
+run during an unwind to trigger another unwind that escapes the clean-up code
+itself; such as a termination exception caught further down the stack or a
+cancellation. There do exist ways to handle this but currently they are not
+even detected and the first unwind will simply be forgotten, often leaving
+it in a bad state.
+\item
+Also the exception system did not have a lot of time to be tried and tested.
+So just letting people use the exception system more will reveal new
+quality of life upgrades that can be made with time.
+\end{itemize}
+
 \section{Complete Virtual System}
-The virtual system should be completed. It was never supposed to be part of
-this project and so minimal work was done on it. A draft of what the complete
-system might look like was created but it was never finalized or implemented.
-A future project in \CFA would be to complete that work and to update the
-parts of the exception system that use the current version.
+The virtual system should be completed. It was not supposed to be part of this
+project, but was thrust upon it to do exception inheritance; hence, only
+minimal work was done. A draft for a complete virtual system is available but
+it is not finalized.  A future \CFA project is to complete that work and then
+update the exception system that uses the current version.
 
-There are several improvements to the virtual system that would improve
-the exception traits. The biggest one is an assertion that checks that one
-virtual type is a child of another virtual type. This would capture many of
-the requirements much more precisely.
+There are several improvements to the virtual system that would improve the
+exception traits. The most important one is an assertion to check one virtual
+type is a child of another. This check precisely captures many of the
+correctness requirements.
 
 The full virtual system might also include other improvement like associated
-types. This is a proposed feature that would allow traits to refer to types
-not listed in their header. This would allow the exception traits to not
-refer to the virtual table type explicatly which would remove the need for
-the interface macros.
+types to allow traits to refer to types not listed in their header. This
+feature allows exception traits to not refer to the virtual-table type
+explicitly, removing the need for the current interface macros.
 
-\section{Additional Throws}
-Several other kinds of throws, beyond the termination throw (@throw@),
-the resumption throw (@throwResume@) and the re-throws, were considered.
-None were as useful as the core throws but they would likely be worth
-revising.
+\section{Additional Raises}
+Several other kinds of exception raises were considered beyond termination
+(@throw@), resumption (@throwResume@), and reraise.
 
-The first ones are throws for asynchronous exceptions, throwing exceptions
-from one stack to another. These act like signals allowing for communication
-between the stacks. This is usually used with resumption as it allows the
-target stack to continue execution normally after the exception has been
-handled.
+The first is a non-local/concurrent raise providing asynchronous exceptions,
+\ie raising an exception on another stack. This semantics acts like signals
+allowing for out-of-band communication among coroutines and threads. This kind
+of raise is often restricted to resumption to allow the target stack to
+continue execution normally after the exception has been handled. That is,
+allowing one coroutine/thread to unwind the stack of another via termination is
+bad software engineering.
 
-This would much more coordination between the concurrency system and the
-exception system to handle. Most of the interesting design decisions around
-applying asynchronous exceptions appear to be around masking (controlling
-which exceptions may be thrown at a stack). It would likely require more of
-the virtual system and would also effect how default handlers are set.
+Non-local/concurrent requires more coordination between the concurrency system
+and the exception system. Many of the interesting design decisions centre
+around masking (controlling which exceptions may be thrown at a stack). It
+would likely require more of the virtual system and would also effect how
+default handlers are set.
 
-The other throws were designed to mimic bidirectional algebraic effects.
-Algebraic effects are used in some functional languages and allow a function
+Other raises were considered to mimic bidirectional algebraic effects.
+Algebraic effects are used in some functional languages allowing one function
 to have another function on the stack resolve an effect (which is defined with
-a function-like interface).
-These can be mimiced with resumptions and the the new throws were designed
-to try and mimic bidirectional algebraic effects, where control can go back
-and forth between the function effect caller and handler while the effect
-is underway.
+a functional-like interface).  This semantics can be mimicked with resumptions
+and new raises were discussed to mimic bidirectional algebraic-effects, where
+control can go back and forth between the function-effect caller and handler
+while the effect is underway.
 % resume-top & resume-reply
+These raises would be like the resumption raise except using different search
+patterns to find the handler.
 
-These throws would likely be just like the resumption throw except they would
-use different search patterns to find the handler to reply to.
+\section{Zero-Cost Try}
+\CFA does not have zero-cost try-statements because the compiler generates C
+code rather than assembler code \see{\VPageref{p:zero-cost}}. When the compiler
+does create its own assembly (or LLVM byte-code), then zero-cost try-statements
+are possible. The downside of zero-cost try-statements is the LSDA complexity,
+its size (program bloat), and the high cost of raising an exception.
 
-\section{Zero-Cost Exceptions}
-\CFA does not have zero-cost exceptions because it does not generate assembly
-but instead generates C code. See the implementation section. When the
-compiler does start to create its own assembly (or LLVM byte code) then
-zero-cost exceptions could be implemented.
+Alternatively, some research could be done into the simpler alternative method
+with a non-zero-cost try-statement but much lower cost exception raise. For
+example, programs are starting to use exception in the normal control path, so
+more exceptions are thrown. In these cases, the cost balance switches towards
+low-cost raise. Unfortunately, while exceptions remain exceptional, the
+libunwind model will probably remain the most effective option.
 
-Now in zero-cost exceptions the only part that is zero-cost are the try
-blocks. Some research could be done into the alternative methods for systems
-that expect a lot more exceptions to be thrown, allowing some overhead in
-entering and leaving try blocks to make throws faster. But while exceptions
-remain exceptional the libunwind model will probably remain the most effective
-option.
+Zero-cost resumptions is still an open problem. First, because libunwind does
+not support a successful-exiting stack-search without doing an unwind.
+Workarounds are possible but awkward. Ideally an extension to libunwind could
+be made, but that would either require separate maintenance or gain enough
+support to have it folded into the standard.
 
-Zero-cost resumptions have more problems to solve. First because libunwind
-does not support a successful exiting stack search without doing an unwind.
-There are several ways to hack that functionality in. Ideally an extension to
-libunwind could be made, but that would either require seperate maintenance
-or gain enough support to have it folded into the standard.
-
-Also new techniques to skip previously searched parts of the stack will have
-to be developed. The recursive resume problem still remains and ideally the
-same pattern of ignoring sections of the stack.
+Also new techniques to skip previously searched parts of the stack need to be
+developed to handle the recursive resume problem and support advanced algebraic
+effects.
 
 \section{Signal Exceptions}
-Exception Handling: Issues and a Proposed Notation suggests there are three
-types of exceptions: escape, notify and signal.
-Escape exceptions are our termination exceptions, notify exceptions are
-resumption exceptions and that leaves signal exception unimplemented.
+Goodenough~\cite{Goodenough75} suggests three types of exceptions: escape,
+notify and signal.  Escape are termination exceptions, notify are resumption
+exceptions, leaving signal unimplemented.
 
-Signal exceptions allow either behaviour, that is after the exception is
-handled control can either return to the throw or from where the handler is
-defined.
+A signal exception allows either behaviour, \ie after an exception is handled,
+the handler has the option of returning to the raise or after the @try@
+statement. Currently, \CFA fixes the semantics of the handler return
+syntactically by the @catch@ or @catchResume@ clause.
 
-The design should be rexamined and be updated for \CFA. A very direct
-translation would perhaps have a new throw and catch pair and a statement
-(or statements) could be used to decide if the handler returns to the throw
-or continues where it is, but there are other options.
+Signal exception should be reexamined and possibly be supported in \CFA. A very
+direct translation is to have a new raise and catch pair, and a new statement
+(or statements) would indicate if the handler returns to the raise or continues
+where it is; but there may be other options.
 
-For instance resumption could be extended to cover this use by allowing
-local control flow out of it. This would require an unwind as part of the
-transition as there are stack frames that have to be removed.
-This would mean there is no notify like throw but because \CFA does not have
-exception signatures a termination can be thrown from any resumption handler
-already so there are already ways one could try to do this in existing \CFA.
+For instance, resumption could be extended to cover this use by allowing local
+control flow out of it. This approach would require an unwind as part of the
+transition as there are stack frames that have to be removed.  This approach
+means there is no notify raise, but because \CFA does not have exception
+signatures, a termination can be thrown from within any resumption handler so
+there is already a way to do mimic this in existing \CFA.
 
 % Maybe talk about the escape; and escape CONTROL_STMT; statements or how
 % if we could choose if _Unwind_Resume proceeded to the clean-up stage this
 % would be much easier to implement.
-
-\section{Language Improvements}
-There is also a lot of work that are not follow ups to this work in terms of
-research, some have no interesting research to be done at all, but would
-improve \CFA as a programming language. The full list of these would
-naturally be quite extensive but here are a few examples that involve
-exceptions:
-
-\begin{itemize}
-\item The implementation of termination is not portable because it includes
-some assembly statements. These sections will have to be re-written to so
-\CFA has full support on more machines.
-\item Allowing exception handler to bind the exception to a reference instead
-of a pointer. This should actually result in no change in behaviour so there
-is no reason not to allow it. It is however a small improvement; giving a bit
-of flexibility to the user in what style they want to use.
-\item Enabling local control flow (by @break@, @return@ and
-similar statements) out of a termination handler. The current set-up makes
-this very difficult but the catch function that runs the handler after it has
-been matched could be inlined into the function's body, which would make this
-much easier. (To do the same for try blocks would probably wait for zero-cost
-exceptions, which would allow the try block to be inlined as well.)
-\end{itemize}
Index: doc/theses/andrew_beach_MMath/implement.tex
===================================================================
--- doc/theses/andrew_beach_MMath/implement.tex	(revision c292244d4c299c228d7e40cd8a91071f5066c92f)
+++ doc/theses/andrew_beach_MMath/implement.tex	(revision ef0b45633df5650457989c598232acaf4a0d8a66)
@@ -2,99 +2,107 @@
 % Goes over how all the features are implemented.
 
+The implementation work for this thesis covers two components: the virtual
+system and exceptions. Each component is discussed in detail.
+
 \section{Virtual System}
+\label{s:VirtualSystem}
 % Virtual table rules. Virtual tables, the pointer to them and the cast.
-The \CFA virtual system only has one public facing feature: virtual casts.
-However there is a lot of structure to support that and provide some other
-features for the standard library.
-
-All of this is accessed through a field inserted at the beginning of every
-virtual type. Currently it is called @virtual_table@ but it is not
-ment to be accessed by the user. This field is a pointer to the type's
-virtual table instance. It is assigned once during the object's construction
-and left alone after that.
-
-\subsection{Virtual Table Construction}
-For each virtual type a virtual table is constructed. This is both a new type
-and an instance of that type. Other instances of the type could be created
-but the system doesn't use them. So this section will go over the creation of
-the type and the instance.
-
-Creating the single instance is actually very important. The address of the
-table acts as the unique identifier for the virtual type. Similarly the first
-field in every virtual table is the parent's id; a pointer to the parent
-virtual table instance.
-
-The remaining fields contain the type's virtual members. First come the ones
-present on the parent type, in the same order as they were the parent, and
-then any that this type introduces. The types of the ones inherited from the
-parent may have a slightly modified type, in that references to the
-dispatched type are replaced with the current virtual type. These are always
-taken by pointer or reference.
-
-The structure itself is created where the virtual type is created. The name
-of the type is created by mangling the name of the base type. The name of the
-instance is also generated by name mangling.
-
-The fields are initialized automatically.
+While the \CFA virtual system currently has only one public feature, virtual
+cast \see{\VPageref{p:VirtualCast}}, substantial structure is required to
+support it, and provide features for exception handling and the standard
+library.
+
+\subsection{Virtual Table}
+The virtual system is accessed through a private constant field inserted at the
+beginning of every virtual type, called the virtual-table pointer. This field
+points at a type's virtual table and is assigned during the object's
+construction.  The address of a virtual table acts as the unique identifier for
+the virtual type, and the first field of a virtual table is a pointer to the
+parent virtual-table or @0p@.  The remaining fields are duplicated from the
+parent tables in this type's inheritance chain, followed by any fields this type
+introduces. Parent fields are duplicated so they can be changed (\CC
+\lstinline[language=c++]|override|), so that references to the dispatched type
+are replaced with the current virtual type.
+\PAB{Can you create a simple diagram of the layout?}
+% These are always taken by pointer or reference.
+
+% For each virtual type, a virtual table is constructed. This is both a new type
+% and an instance of that type. Other instances of the type could be created
+% but the system doesn't use them. So this section will go over the creation of
+% the type and the instance.
+
+A virtual table is created when the virtual type is created. The name of the
+type is created by mangling the name of the base type. The name of the instance
+is also generated by name mangling.  The fields are initialized automatically.
 The parent field is initialized by getting the type of the parent field and
 using that to calculate the mangled name of the parent's virtual table type.
 There are two special fields that are included like normal fields but have
 special initialization rules: the @size@ field is the type's size and is
-initialized with a sizeof expression, the @align@ field is the type's
-alignment and uses an alignof expression. The remaining fields are resolved
-to a name matching the field's name and type using the normal visibility
-and overload resolution rules of the type system.
-
-These operations are split up into several groups depending on where they
-take place which can vary for monomorphic and polymorphic types. The first
-devision is between the declarations and the definitions. Declarations, such
-as a function signature or a structure's name, must always be visible but may
-be repeated so they go in headers. Definitions, such as function bodies and a
-structure's layout, don't have to be visible on use but must occur exactly
-once and go into source files.
-
+initialized with a @sizeof@ expression, the @align@ field is the type's
+alignment and uses an @alignof@ expression. The remaining fields are resolved
+to a name matching the field's name and type using the normal visibility and
+overload resolution rules of the type system.
+
+These operations are split up into several groups depending on where they take
+place which varies for monomorphic and polymorphic types. The first devision is
+between the declarations and the definitions. Declarations, such as a function
+signature or a aggregate's name, must always be visible but may be repeated in
+the form of forward declarations in headers. Definitions, such as function
+bodies and a aggregate's layout, can be separately compiled but must occur
+exactly once in a source file.
+
+\begin{sloppypar}
 The declarations include the virtual type definition and forward declarations
 of the virtual table instance, constructor, message function and
-@get_exception_vtable@. The definition includes the storage and
-initialization of the virtual table instance and the bodies of the three
-functions.
+@get_exception_vtable@. The definition includes the storage and initialization
+of the virtual table instance and the bodies of the three functions.
+\end{sloppypar}
 
 Monomorphic instances put all of these two groups in one place each.
-
-Polymorphic instances also split out the core declarations and definitions
-from the per-instance information. The virtual table type and most of the
-functions are polymorphic so they are all part of the core. The virtual table
-instance and the @get_exception_vtable@ function.
-
+Polymorphic instances also split out the core declarations and definitions from
+the per-instance information. The virtual table type and most of the functions
+are polymorphic so they are all part of the core. The virtual table instance
+and the @get_exception_vtable@ function.
+
+\begin{sloppypar}
 Coroutines and threads need instances of @CoroutineCancelled@ and
-@ThreadCancelled@ respectively to use all of their functionality.
-When a new data type is declared with @coroutine@ or @thread@
-the forward declaration for the instance is created as well. The definition
-of the virtual table is created at the definition of the main function.
+@ThreadCancelled@ respectively to use all of their functionality.  When a new
+data type is declared with @coroutine@ or @thread@ the forward declaration for
+the instance is created as well. The definition of the virtual table is created
+at the definition of the main function.
+\end{sloppypar}
 
 \subsection{Virtual Cast}
-Virtual casts are implemented as a function call that does the check and a
-old C-style cast to do the type conversion. The C-cast is just to make sure
-the generated code is correct so the rest of the section is about that
-function.
-
-The function is @__cfa__virtual_cast@ and it is implemented in the
-standard library. It takes a pointer to the target type's virtual table and
-the object pointer being cast. The function is very simple, getting the
-object's virtual table pointer and then checking to see if it or any of
-its ancestors, by using the parent pointers, are the same as the target type
-virtual table pointer. It does this in a simple loop.
-
-For the generated code a forward decaration of the virtual works as follows.
-There is a forward declaration of @__cfa__virtual_cast@ in every cfa
-file so it can just be used. The object argument is the expression being cast
-so that is just placed in the argument list.
-
-To build the target type parameter the compiler will create a mapping from
-concrete type-name -- so for polymorphic types the parameters are filled in
--- to virtual table address. Every virtual table declaraction is added to the
-this table; repeats are ignored unless they have conflicting definitions.
-This does mean the declaractions have to be in scope, but they should usually
-be introduced as part of the type definition.
+Virtual casts are implemented as a function call that does the subtype check
+and a C coercion-cast to do the type conversion.
+% The C-cast is just to make sure the generated code is correct so the rest of
+% the section is about that function.
+The function is
+\begin{cfa}
+void * __cfa__virtual_cast( struct __cfa__parent_vtable const * parent,
+	struct __cfa__parent_vtable const * const * child );
+}
+\end{cfa}
+and it is implemented in the standard library. It takes a pointer to the target
+type's virtual table and the object pointer being cast. The function performs a
+linear search starting at the object's virtual-table and walking through the
+the parent pointers, checking to if it or any of its ancestors are the same as
+the target-type virtual table-pointer.
+
+For the generated code, a forward declaration of the virtual works as follows.
+There is a forward declaration of @__cfa__virtual_cast@ in every \CFA file so
+it can just be used. The object argument is the expression being cast so that
+is just placed in the argument list.
+
+To build the target type parameter, the compiler creates a mapping from
+concrete type-name -- so for polymorphic types the parameters are filled in --
+to virtual table address. Every virtual table declaration is added to the this
+table; repeats are ignored unless they have conflicting definitions.  Note,
+these declarations do not have to be in scope, but they should usually be
+introduced as part of the type definition.
+
+\PAB{I do not understood all of \VRef{s:VirtualSystem}. I think you need to
+write more to make it clear.}
+
 
 \section{Exceptions}
@@ -106,60 +114,151 @@
 % resumption doesn't as well.
 
-Many modern languages work with an interal stack that function push and pop
-their local data to. Stack unwinding removes large sections of the stack,
-often across functions.
-
-At a very basic level this can be done with @setjmp@ \& @longjmp@
-which simply move the top of the stack, discarding everything on the stack
-above a certain point. However this ignores all the clean-up code that should
-be run when certain sections of the stack are removed (for \CFA these are from
-destructors and finally clauses) and also requires that the point to which the
-stack is being unwound is known ahead of time. libunwind is used to address
-both of these problems.
-
-Libunwind, provided in @unwind.h@ on most platorms, is a C library
-that provides \CPP style stack unwinding. Its operation is divided into two
-phases. The search phase -- phase 1 -- is used to scan the stack and decide
-where the unwinding will stop, this allows for a dynamic target. The clean-up
-phase -- phase 2 -- does the actual unwinding and also runs any clean-up code
-as it goes.
-
-To use the libunwind each function must have a personality function and an
-LSDA (Language Specific Data Area). Libunwind actually does very little, it
-simply moves down the stack from function to function. Most of the actions are
-implemented by the personality function which libunwind calls on every
-function. Since this is shared across many functions or even every function in
-a language it will need a bit more information. This is provided by the LSDA
-which has the unique information for each function.
-
-Theoretically the LSDA can contain anything but conventionally it is a table
-with entries reperenting areas of the function and what has to be done there
-during unwinding. These areas are described in terms of where the instruction
-pointer is. If the current value of the instruction pointer is between two
-values reperenting the beginning and end of a region then execution is
-currently being executed. These are used to mark out try blocks and the
-scopes of objects with destructors to run.
-
-GCC will generate an LSDA and attach its personality function with the
-@-fexceptions@ flag. However this only handles the cleanup attribute.
-This attribute is used on a variable and specifies a function that should be
-run when the variable goes out of scope. The function is passed a pointer to
-the object as well so it can be used to mimic destructors. It however cannot
-be used to mimic try statements.
-
-\subsection{Implementing Personality Functions}
-Personality functions have a complex interface specified by libunwind.
-This section will cover some of the important parts of that interface.
-
-\begin{lstlisting}
-typedef _Unwind_Reason_Code (*_Unwind_Personality_Fn)(
-    int version,
-    _Unwind_Action action,
-    _Unwind_Exception_Class exception_class,
-    _Unwind_Exception * exception,
-    struct _Unwind_Context * context);
+% Many modern languages work with an interal stack that function push and pop
+% their local data to. Stack unwinding removes large sections of the stack,
+% often across functions.
+
+Stack unwinding is the process of removing stack frames (activations) from the
+stack. On function entry and return, unwinding is handled directly by the code
+embedded in the function. Usually, the stack-frame size is known statically
+based on parameter and local variable declarations.  For dynamically-sized
+local variables, a runtime computation is necessary to know the frame
+size. Finally, a function's frame-size may change during execution as local
+variables (static or dynamic sized) go in and out of scope.
+Allocating/deallocating stack space is usually an $O(1)$ operation achieved by
+bumping the hardware stack-pointer up or down as needed.
+
+Unwinding across multiple stack frames is more complex because individual stack
+management code associated with each frame is bypassed. That is, the location
+of a function's frame-management code is largely unknown and dispersed
+throughout the function, hence the current frame size managed by that code is
+also unknown. Hence, code unwinding across frames does not have direct
+knowledge about what is on the stack, and hence, how much of the stack needs to
+be removed.
+
+% At a very basic level this can be done with @setjmp@ \& @longjmp@ which simply
+% move the top of the stack, discarding everything on the stack above a certain
+% point. However this ignores all the cleanup code that should be run when
+% certain sections of the stack are removed (for \CFA these are from destructors
+% and finally clauses) and also requires that the point to which the stack is
+% being unwound is known ahead of time. libunwind is used to address both of
+% these problems.
+
+The traditional unwinding mechanism for C is implemented by saving a snap-shot
+of a function's state with @setjmp@ and restoring that snap-shot with
+@longjmp@. This approach bypasses the need to know stack details by simply
+reseting to a snap-shot of an arbitrary but existing function frame on the
+stack. It is up to the programmer to ensure the snap-shot is valid when it is
+reset, making this unwinding approach fragile with potential errors that are
+difficult to debug because the stack becomes corrupted.
+
+However, many languages define cleanup actions that must be taken when objects
+are deallocated from the stack or blocks end, such as running a variable's
+destructor or a @try@ statement's @finally@ clause. Handling these mechanisms
+requires walking the stack and checking each stack frame for these potential
+actions.
+
+For exceptions, it must be possible to walk the stack frames in search of @try@
+statements to match and execute a handler. For termination exceptions, it must
+also be possible to unwind all stack frames from the throw to the matching
+catch, and each of these frames must be checked for cleanup actions. Stack
+walking is where most of the complexity and expense of exception handling
+appears.
+
+One of the most popular tools for stack management is libunwind, a low-level
+library that provides tools for stack walking, handler execution, and
+unwinding. What follows is an overview of all the relevant features of
+libunwind needed for this work, and how \CFA uses them to implement exception
+handling.
+
+\subsection{libunwind Usage}
+Libunwind, accessed through @unwind.h@ on most platforms, is a C library that
+provides \CC-style stack-unwinding. Its operation is divided into two phases:
+search and cleanup. The dynamic target search -- phase 1 -- is used to scan the
+stack and decide where unwinding should stop (but no unwinding occurs). The
+cleanup -- phase 2 -- does the unwinding and also runs any cleanup code.
+
+To use libunwind, each function must have a personality function and a Language
+Specific Data Area (LSDA).  The LSDA has the unique information for each
+function to tell the personality function where a function is executing, its
+current stack frame, and what handlers should be checked.  Theoretically, the
+LSDA can contain any information but conventionally it is a table with entries
+representing regions of the function and what has to be done there during
+unwinding. These regions are bracketed by the instruction pointer. If the
+instruction pointer is within a region's start/end, then execution is currently
+executing in that region. Regions are used to mark out the scopes of objects
+with destructors and try blocks.
+
+% Libunwind actually does very little, it simply moves down the stack from
+% function to function. Most of the actions are implemented by the personality
+% function which libunwind calls on every function. Since this is shared across
+% many functions or even every function in a language it will need a bit more
+% information.
+
+The GCC compilation flag @-fexceptions@ causes the generation of an LSDA and
+attaches its personality function. \PAB{to what is it attached?}  However, this
+flag only handles the cleanup attribute
+\begin{cfa}
+void clean_up( int * var ) { ... }
+int avar __attribute__(( __cleanup(clean_up) ));
+\end{cfa}
+which is used on a variable and specifies a function, \eg @clean_up@, run when
+the variable goes out of scope. The function is passed a pointer to the object
+so it can be used to mimic destructors. However, this feature cannot be used to
+mimic @try@ statements.
+
+\subsection{Personality Functions}
+Personality functions have a complex interface specified by libunwind.  This
+section covers some of the important parts of the interface.
+
+A personality function performs four tasks, although not all have to be
+present.
+\begin{lstlisting}[language=C,{moredelim=**[is][\color{red}]{@}{@}}]
+typedef _Unwind_Reason_Code (*@_Unwind_Personality_Fn@) (
+	_Unwind_Action @action@,
+	_Unwind_Exception_Class @exception_class@,
+	_Unwind_Exception * @exception@,
+	struct _Unwind_Context * @context@
+);
 \end{lstlisting}
-
-The return value, the reason code, is an enumeration of possible messages
+The @action@ argument is a bitmask of possible actions:
+\begin{enumerate}
+\item
+@_UA_SEARCH_PHASE@ specifies a search phase and tells the personality function
+to check for handlers.  If there is a handler in a stack frame, as defined by
+the language, the personality function returns @_URC_HANDLER_FOUND@; otherwise
+it return @_URC_CONTINUE_UNWIND@.
+
+\item
+@_UA_CLEANUP_PHASE@ specifies a cleanup phase, where the entire frame is
+unwound and all cleanup code is run. The personality function does whatever
+cleanup the language defines (such as running destructors/finalizers) and then
+generally returns @_URC_CONTINUE_UNWIND@.
+
+\item
+\begin{sloppypar}
+@_UA_HANDLER_FRAME@ specifies a cleanup phase on a function frame that found a
+handler. The personality function must prepare to return to normal code
+execution and return @_URC_INSTALL_CONTEXT@.
+\end{sloppypar}
+
+\item
+@_UA_FORCE_UNWIND@ specifies a forced unwind call. Forced unwind only performs
+the cleanup phase and uses a different means to decide when to stop
+\see{\VRef{s:ForcedUnwind}}.
+\end{enumerate}
+
+The @exception_class@ argument is a copy of the
+\lstinline[language=C]|exception|'s @exception_class@ field.
+
+The \lstinline[language=C]|exception| argument is a pointer to the user
+provided storage object. It has two public fields, the exception class, which
+is actually just a number, identifying the exception handling mechanism that
+created it, and the cleanup function. The cleanup function is called if
+required by the exception.
+
+The @context@ argument is a pointer to an opaque type passed to helper
+functions called inside the personality function.
+
+The return value, @_Unwind_Reason_Code@, is an enumeration of possible messages
 that can be passed several places in libunwind. It includes a number of
 messages for special cases (some of which should never be used by the
@@ -167,138 +266,90 @@
 personality function should always return @_URC_CONTINUE_UNWIND@.
 
-The @version@ argument is the verson of the implementation that is
-calling the personality function. At this point it appears to always be 1 and
-it will likely stay that way until a new version of the API is updated.
-
-The @action@ argument is set of flags that tell the personality
-function when it is being called and what it must do on this invocation.
-The flags are as follows:
-\begin{itemize}
-\item@_UA_SEARCH_PHASE@: This flag is set whenever the personality
-function is called during the search phase. The personality function should
-decide if unwinding will stop in this function or not. If it does then the
-personality function should return @_URC_HANDLER_FOUND@.
-\item@_UA_CLEANUP_PHASE@: This flag is set whenever the personality
-function is called during the cleanup phase. If no other flags are set this
-means the entire frame will be unwound and all cleanup code should be run.
-\item@_UA_HANDLER_FRAME@: This flag is set during the cleanup phase
-on the function frame that found the handler. The personality function must
-prepare to return to normal code execution and return
-@_URC_INSTALL_CONTEXT@.
-\item@_UA_FORCE_UNWIND@: This flag is set if the personality function
-is called through a forced unwind call. Forced unwind only performs the
-cleanup phase and uses a different means to decide when to stop. See its
-section below.
-\end{itemize}
-
-The @exception_class@ argument is a copy of the @exception@'s
-@exception_class@ field.
-
-The @exception@ argument is a pointer to the user provided storage
-object. It has two public fields, the exception class which is actually just
-a number that identifies the exception handling mechanism that created it and
-the other is the clean-up function. The clean-up function is called if the
-exception needs to 
-
-The @context@ argument is a pointer to an opaque type. This is passed
-to the many helper functions that can be called inside the personality
-function.
-
 \subsection{Raise Exception}
-This could be considered the central function of libunwind. It preforms the
-two staged unwinding the library is built around and most of the rest of the
-interface of libunwind is here to support it. It's signature is as follows:
-
-\begin{lstlisting}
+Raising an exception is the central function of libunwind and it performs a
+two-staged unwinding.
+\begin{cfa}
 _Unwind_Reason_Code _Unwind_RaiseException(_Unwind_Exception *);
+\end{cfa}
+First, the function begins the search phase, calling the personality function
+of the most recent stack frame. It continues to call personality functions
+traversing the stack from newest to oldest until a function finds a handler or
+the end of the stack is reached. In the latter case, raise exception returns
+@_URC_END_OF_STACK@.
+
+Second, when a handler is matched, raise exception continues onto the cleanup
+phase.
+Once again, it calls the personality functions of each stack frame from newest
+to oldest. This pass stops at the stack frame containing the matching handler.
+If that personality function has not install a handler, it is an error.
+
+If an error is encountered, raise exception returns either
+@_URC_FATAL_PHASE1_ERROR@ or @_URC_FATAL_PHASE2_ERROR@ depending on when the
+error occurred.
+
+\subsection{Forced Unwind}
+\label{s:ForcedUnwind}
+Forced Unwind is the other central function in libunwind.
+\begin{cfa}
+_Unwind_Reason_Code _Unwind_ForcedUnwind( _Unwind_Exception *,
+	_Unwind_Stop_Fn, void *);
+\end{cfa}
+It also unwinds the stack but it does not use the search phase. Instead another
+function, the stop function, is used to stop searching.  The exception is the
+same as the one passed to raise exception. The extra arguments are the stop
+function and the stop parameter. The stop function has a similar interface as a
+personality function, except it is also passed the stop parameter.
+\begin{lstlisting}[language=C,{moredelim=**[is][\color{red}]{@}{@}}]
+typedef _Unwind_Reason_Code (*@_Unwind_Stop_Fn@)(
+	_Unwind_Action @action@,
+	_Unwind_Exception_Class @exception_class@,
+	_Unwind_Exception * @exception@,
+	struct _Unwind_Context * @context@,
+	void * @stop_parameter@);
 \end{lstlisting}
 
-When called the function begins the search phase, calling the personality
-function of the most recent stack frame. It will continue to call personality
-functions traversing the stack new-to-old until a function finds a handler or
-the end of the stack is reached. In the latter case raise exception will
-return with @_URC_END_OF_STACK@.
-
-Once a handler has been found raise exception continues onto the the cleanup
-phase. Once again it will call the personality functins of each stack frame
-from newest to oldest. This pass will stop at the stack frame that found the
-handler last time, if that personality function does not install the handler
-it is an error.
-
-If an error is encountered raise exception will return either
-@_URC_FATAL_PHASE1_ERROR@ or @_URC_FATAL_PHASE2_ERROR@ depending
-on when the error occured.
-
-\subsection{Forced Unwind}
-This is the second big function in libunwind. It also unwinds a stack but it
-does not use the search phase. Instead another function, the stop function,
-is used to decide when to stop.
-
-\begin{lstlisting}
-_Unwind_Reason_Code _Unwind_ForcedUnwind(
-    _Unwind_Exception *, _Unwind_Stop_Fn, void *);
-\end{lstlisting}
-
-The exception is the same as the one passed to raise exception. The extra
-arguments are the stop function and the stop parameter. The stop function has
-a similar interface as a personality function, except it is also passed the
-stop parameter.
-
-\begin{lstlisting}
-typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn)(
-    int version,
-    _Unwind_Action action,
-    _Unwind_Exception_Class exception_class,
-    _Unwind_Exception * exception,
-    struct _Unwind_Context * context,
-    void * stop_parameter);
-\end{lstlisting}
-
 The stop function is called at every stack frame before the personality
-function is called and then once more once after all frames of the stack have
-been unwound.
-
-Each time it is called the stop function should return @_URC_NO_REASON@
-or transfer control directly to other code outside of libunwind. The
-framework does not provide any assistance here.
-
-Its arguments are the same as the paired personality function.
-The actions @_UA_CLEANUP_PHASE@ and @_UA_FORCE_UNWIND@ are always
-set when it is called. By the official standard that is all but both GCC and
-Clang add an extra action on the last call at the end of the stack:
-@_UA_END_OF_STACK@.
+function is called and then once more after all frames of the stack are
+unwound.
+
+Each time it is called, the stop function should return @_URC_NO_REASON@ or
+transfer control directly to other code outside of libunwind. The framework
+does not provide any assistance here.
+
+\begin{sloppypar}
+Its arguments are the same as the paired personality function.  The actions
+@_UA_CLEANUP_PHASE@ and @_UA_FORCE_UNWIND@ are always set when it is
+called. Beyond the libunwind standard, both GCC and Clang add an extra action
+on the last call at the end of the stack: @_UA_END_OF_STACK@.
+\end{sloppypar}
 
 \section{Exception Context}
 % Should I have another independent section?
 % There are only two things in it, top_resume and current_exception. How it is
-% stored changes depending on wheither or not the thread-library is linked.
-
-The exception context is a piece of global storage used to maintain data
-across different exception operations and to communicate between different
-components.
-
-Each stack has its own exception context. In a purely sequental program, using
-only core Cforall, there is only one stack and the context is global. However
-if the library @libcfathread@ is linked then there can be multiple
-stacks so they will each need their own.
-
-To handle this code always gets the exception context from the function
-@this_exception_context@. The main exception handling code is in
-@libcfa@ and that library also defines the function as a weak symbol
-so it acts as a default. Meanwhile in @libcfathread@ the function is
-defined as a strong symbol that replaces it when the libraries are linked
-together.
-
-The version of the function defined in @libcfa@ is very simple. It
-returns a pointer to a global static variable. With only one stack this
-global instance is associated with the only stack.
-
-The version of the function defined in @libcfathread@ has to handle
-more as there are multiple stacks. The exception context is included as
-part of the per-stack data stored as part of coroutines. In the cold data
-section, stored at the base of each stack, is the exception context for that
-stack. The @this_exception_context@ uses the concurrency library to get
-the current coroutine and through it the cold data section and the exception
-context.
+% stored changes depending on whether or not the thread-library is linked.
+
+The exception context is global storage used to maintain data across different
+exception operations and to communicate among different components.
+
+Each stack must have its own exception context. In a sequential \CFA program,
+there is only one stack with a single global exception-context. However, when
+the library @libcfathread@ is linked, there are multiple stacks where each
+needs its own exception context.
+
+General access to the exception context is provided by function
+@this_exception_context@. For sequential execution, this function is defined as
+a weak symbol in the \CFA system-library, @libcfa@. When a \CFA program is
+concurrent, it links with @libcfathread@, where this function is defined with a
+strong symbol replacing the sequential version.
+
+% The version of the function defined in @libcfa@ is very simple. It returns a
+% pointer to a global static variable. With only one stack this global instance
+% is associated with the only stack.
+
+For coroutines, @this_exception_context@ accesses the exception context stored
+at the base of the stack. For threads, @this_exception_context@ uses the
+concurrency library to access the current stack of the thread or coroutine
+being executed by the thread, and then accesses the exception context stored at
+the base of this stack.
 
 \section{Termination}
@@ -306,133 +357,132 @@
 % catches. Talk about GCC nested functions.
 
-Termination exceptions use libunwind quite heavily because it matches the
-intended use from \CPP exceptions very closely. The main complication is that
-since the \CFA compiler works by translating to C code it cannot generate the
-assembly to form the LSDA for try blocks or destructors.
+Termination exceptions use libunwind heavily because it matches the intended
+use from \CC exceptions closely. The main complication for \CFA is that the
+compiler generates C code, making it very difficult to generate the assembly to
+form the LSDA for try blocks or destructors.
 
 \subsection{Memory Management}
-The first step of termination is to copy the exception into memory managed by
-the exception system. Currently the system just uses malloc, without reserved
-memory or and ``small allocation" optimizations. The exception handling
-mechanism manages memory for the exception as well as memory for libunwind
-and the system's own per-exception storage.
-
-Exceptions are stored in variable sized block. The first component is a fixed
-sized data structure that contains the information for libunwind and the
-exception system. The second component is a blob of memory that is big enough
-to store the exception. Macros with pointer arthritic and type cast are
-used to move between the components or go from the embedded
+The first step of a termination raise is to copy the exception into memory
+managed by the exception system. Currently, the system uses @malloc@, rather
+than reserved memory or the stack top. The exception handling mechanism manages
+memory for the exception as well as memory for libunwind and the system's own
+per-exception storage.
+
+Exceptions are stored in variable-sized blocks. \PAB{Show a memory layout
+figure.} The first component is a fixed sized data structure that contains the
+information for libunwind and the exception system. The second component is an
+area of memory big enough to store the exception. Macros with pointer arthritic
+and type cast are used to move between the components or go from the embedded
 @_Unwind_Exception@ to the entire node.
 
-All of these nodes are strung together in a linked list. One linked list per
-stack, with the head stored in the exception context. Within each linked list
-the most recently thrown exception is at the head and the older exceptions
-are further down the list. This list format allows exceptions to be thrown
-while a different exception is being handled. Only the exception at the head
-of the list is currently being handled, the other will wait for the
-exceptions before them to be removed.
-
-The virtual members in the exception's virtual table. The size of the
-exception, the copy function and the free function are all in the virtual
-table so they are decided per-exception type. The size and copy function are
-used right away when the exception is copied in to managed memory. After the
-exception is handled the free function is used to clean up the exception and
-then the entire node is passed to free.
-
-\subsection{Try Statements \& Catch Clauses}
-The try statements with termination handlers have a pretty complex conversion
-to compensate for the lack of assembly generation. Libunwind requires an LSDA
-(Language Specific Data Area) and personality function for a function to
-unwind across it. The LSDA in particular is hard to generate at the level of
-C which is what the \CFA compiler outputs so a work-around is used.
-
-This work around is a function called @__cfaehm_try_terminate@ in the
-standard library. The contents of a try block and the termination handlers
-are converted into functions. These are then passed to the try terminate
-function and it calls them. This puts the try statements in their own
-functions so that no function has to deal with both termination handlers and
-destructors.
-
-This function has some custom embedded assembly that defines its personality
-function and LSDA. This is hand coded in C which is why there is only one
-version of it, the compiler has no capability to generate it. The personality
-function is structured so that it may be expanded, but really it only handles
-this one function. Notably it does not handle any destructors so the function
-is constructed so that it does need to run it.
+All of these nodes are linked together in a list, one list per stack, with the
+list head stored in the exception context. Within each linked list, the most
+recently thrown exception is at the head followed by older thrown
+exceptions. This format allows exceptions to be thrown, while a different
+exception is being handled. The exception at the head of the list is currently
+being handled, while other exceptions wait for the exceptions before them to be
+removed.
+
+The virtual members in the exception's virtual table provide the size of the
+exception, the copy function, and the free function, so they are specific to an
+exception type. The size and copy function are used immediately to copy an
+exception into managed memory. After the exception is handled the free function
+is used to clean up the exception and then the entire node is passed to free.
+
+\subsection{Try Statements and Catch Clauses}
+The try statement with termination handlers is complex because it must
+compensate for the lack of assembly-code generated from \CFA. Libunwind
+requires an LSDA and personality function for control to unwind across a
+function. The LSDA in particular is hard to mimic in generated C code.
+
+The workaround is a function called @__cfaehm_try_terminate@ in the standard
+library. The contents of a try block and the termination handlers are converted
+into functions. These are then passed to the try terminate function and it
+calls them. This approach puts a try statement in its own functions so that no
+function has to deal with both termination handlers and destructors. \PAB{I do
+not understand the previous sentence.}
+
+This function has some custom embedded assembly that defines \emph{its}
+personality function and LSDA. The assembly is created with handcrafted C @asm@
+statements, which is why there is only one version of it. The personality
+function is structured so that it can be expanded, but currently it only
+handles this one function.  Notably, it does not handle any destructors so the
+function is constructed so that it does need to run it. \PAB{I do not
+understand the previous sentence.}
 
 The three functions passed to try terminate are:
-\begin{itemize}
-\item The try function: This function is the try block, all the code inside
-the try block is placed inside the try function. It takes no parameters and
-has no return value. This function is called during regular execution to run
-the try block.
-\item The match function: This function decides if this try statement should
-handle any given termination exception. It takes a pointer to the exception
-and returns 0 if the exception is not handled here. Otherwise the return value
-is the id of the handler that should handle the exception. It is called
-during the search phase.
-It is constructed from the conditional part of each handler. It runs each
-check in turn, first checking to see if the object
-\item The catch function: This function handles the exception. It takes a
-pointer to the exception and the handler's id and returns nothing. It is
-called after the clean-up phase.
-It is constructed by stitching together the bodies of each handler
-\end{itemize}
-All three are created with GCC nested functions. GCC nested functions can be
-used to create closures, functions that can refer to the state of other
-functions on the stack. This allows the functions to refer to the main
-function and all the variables in scope.
-
-These nested functions and all other functions besides
-@__cfaehm_try_terminate@ in \CFA use the GCC personality function and
-the @-fexceptions@ flag to generate the LSDA. This allows destructors
-to be implemented with the cleanup attribute.
+\begin{description}
+\item[try function:] This function is the try block, all the code inside the
+try block is placed inside the try function. It takes no parameters and has no
+return value. This function is called during regular execution to run the try
+block.
+
+\item[match function:] This function is called during the search phase and
+decides if a catch clause matches the termination exception.  It is constructed
+from the conditional part of each handler and runs each check, top to bottom,
+in turn, first checking to see if the exception type matches and then if the
+condition is true. It takes a pointer to the exception and returns 0 if the
+exception is not handled here. Otherwise the return value is the id of the
+handler that matches the exception.
+
+\item[handler function:] This function handles the exception. It takes a
+pointer to the exception and the handler's id and returns nothing. It is called
+after the cleanup phase.  It is constructed by stitching together the bodies of
+each handler and dispatches to the selected handler.
+\end{description}
+All three functions are created with GCC nested functions. GCC nested functions
+can be used to create closures, functions that can refer to the state of other
+functions on the stack. This approach allows the functions to refer to all the
+variables in scope for the function containing the @try@ statement.  These
+nested functions and all other functions besides @__cfaehm_try_terminate@ in
+\CFA use the GCC personality function and the @-fexceptions@ flag to generate
+the LSDA. This allows destructors to be implemented with the cleanup attribute.
 
 \section{Resumption}
 % The stack-local data, the linked list of nodes.
 
-Resumption uses a list of nodes for its stack traversal. The head of the list
-is stored in the exception context. The nodes in the list just have a pointer
+Resumption simple to implement because there is no stack unwinding. The
+resumption raise uses a list of nodes for its stack traversal. The head of the
+list is stored in the exception context. The nodes in the list have a pointer
 to the next node and a pointer to the handler function.
 
-The on a resumption throw the this list is traversed. At each node the
-handler function is called and is passed the exception by pointer. It returns
-true if the exception was handled and false otherwise.
-
-The handler function does both the matching and catching. It tries each
-the condition of @catchResume@ in order, top-to-bottom and until it
-finds a handler that matches. If no handler matches then the function returns
-false. Otherwise the matching handler is run, if it completes successfully
-the function returns true. Rethrows, through the @throwResume;@
-statement, cause the function to return true.
+A resumption raise traverses this list. At each node the handler function is
+called, passing the exception by pointer. It returns true if the exception is
+handled and false otherwise.
+
+The handler function does both the matching and handling. It computes the
+condition of each @catchResume@ in top-to-bottom order, until it finds a
+handler that matches. If no handler matches then the function returns
+false. Otherwise the matching handler is run; if it completes successfully, the
+function returns true. Reresume, through the @throwResume;@ statement, cause
+the function to return true.
 
 % Recursive Resumption Stuff:
-Blocking out part of the stack is accomplished by updating the front of the
-list as the search continues. Before the handler at a node is called the head
-of the list is updated to the next node of the current node. After the search
-is complete, successful or not, the head of the list is reset.
-
-This means the current handler and every handler that has already been
-checked are not on the list while a handler is run. If a resumption is thrown
-during the handling of another resumption the active handlers and all the
-other handler checked up to this point will not be checked again.
+Search skipping \see{\VPageref{p:searchskip}}, which ignores parts of the stack
+already examined, is accomplished by updating the front of the list as the
+search continues. Before the handler at a node is called the head of the list
+is updated to the next node of the current node. After the search is complete,
+successful or not, the head of the list is reset.
+
+This mechanism means the current handler and every handler that has already
+been checked are not on the list while a handler is run. If a resumption is
+thrown during the handling of another resumption the active handlers and all
+the other handler checked up to this point are not checked again.
 
 This structure also supports new handler added while the resumption is being
 handled. These are added to the front of the list, pointing back along the
-stack -- the first one will point over all the checked handlers -- and the
-ordering is maintained.
-
-\subsection{Libunwind Compatibility}
-Resumption does not use libunwind for two simple reasons. The first is that
-it does not have to unwind anything so would never need to use the clean-up
-phase. Still the search phase could be used to make it free to enter or exit
-a try statement with resumption handlers in the same way termination handlers
-are for the same trade off in the cost of the throw. This is where the second
-reason comes in, there is no way to return from a search without installing
-a handler or raising an error.
-
-Although work arounds could be created none seemed to be worth it for the
-prototype. This implementation has no difference in behaviour and is much
-simpler.
+stack -- the first one points over all the checked handlers -- and the ordering
+is maintained.
+
+\label{p:zero-cost}
+Note, the resumption implementation has a cost for entering/exiting a @try@
+statement with @catchResume@ clauses, whereas a @try@ statement with @catch@
+clauses has zero-cost entry/exit. While resumption does not need the stack
+unwinding and cleanup provided by libunwind, it could use the search phase to
+providing zero-cost enter/exit using the LSDA. Unfortunately, there is no way
+to return from a libunwind search without installing a handler or raising an
+error.  Although workarounds might be possible, they are beyond the scope of
+this thesis. The current resumption implementation has simplicity in its
+favour.
 % Seriously, just compare the size of the two chapters and then consider
 % that unwind is required knowledge for that chapter.
@@ -440,13 +490,12 @@
 \section{Finally}
 % Uses destructors and GCC nested functions.
-Finally clauses are a simple decomposition to some of the existing features.
-The code in the block is placed into a GCC nested function with a unique name,
-no arguments or return values. This nested function is then set as the
-clean-up function of an empty object that is declared at the beginning of a
-block placed around the contexts of the try statement.
+Finally clauses is placed into a GCC nested-function with a unique name, and no
+arguments or return values. This nested function is then set as the cleanup
+function of an empty object that is declared at the beginning of a block placed
+around the context of the associated @try@ statement.
 
 The rest is handled by GCC. The try block and all handlers are inside the
-block. When they are complete control exits the block and the empty object
-is cleaned up, which runs the function that contains the finally code.
+block. At completion, control exits the block and the empty object is cleaned
+up, which runs the function that contains the finally code.
 
 \section{Cancellation}
@@ -454,43 +503,44 @@
 
 Cancellation also uses libunwind to do its stack traversal and unwinding,
-however it uses a different primary function @_Unwind_ForcedUnwind@.
-Details of its interface can be found in the unwind section.
-
-The first step of cancellation is to find the stack was cancelled and which
-type of stack it is. Luckily the threads library stores the main thread
-pointer and the current thread pointer and every thread stores a pointer to
+however it uses a different primary function @_Unwind_ForcedUnwind@.  Details
+of its interface can be found in the \VRef{s:ForcedUnwind}.
+
+The first step of cancellation is to find the cancelled stack and its type:
+coroutine or thread. Fortunately, the thread library stores the main thread
+pointer and the current thread pointer, and every thread stores a pointer to
 its main coroutine and the coroutine it is currently executing.
 
-So if the the current thread's main and current coroutine do not match, it is
-a coroutine cancellation. Otherwise if the main and current thread do not
-match, it is a thread cancellation. Otherwise it is a main thread
-cancellation.
-
-However if the threading library is not linked then execution must be on the
-main stack as that is the only one that exists. So the entire check is skipped
-using the linker and weak symbols. Instead the main thread cancellation is
-unconditionally preformed.
-
-Regardless of how they are choosen afterwords the stop function and the stop
-parameter are passed to the forced unwind functon. The general pattern of all
-three stop functions is the same, they continue unwinding until the end of
-stack when they do there primary work.
-
-Main stack cancellation it is very simple. The ``transfer" is just an abort,
-the program stops executing.
-
-The coroutine cancellation stores the exception on the coroutine and then
-does a coroutine context switch. The rest is handled inside resume. Every time
-control returns from a resumed thread there is a check to see if it is
-cancelled. If it is the exception is retrieved and the CoroutineCancelled
-exception is constructed and loaded. It is then thrown as a regular exception
-with the default handler coming from the context of the resumption call.
-
-The thread cancellation stores the exception on the thread's main stack and
-then returns to the scheduler. The rest is handled by the joiner. The wait
-for the joined thread to finish works the same but after that it checks
-to see if there was a cancellation. If there was the exception is retrieved
-and the ThreadCancelled exception is constructed. The default handler is
-passed in as a function pointer. If it is null (as it is for the
-auto-generated joins on destructor call) it a default is used that simply
-calls abort; which gives the required handling on implicate join.
+The first check is if the current thread's main and current coroutine do not
+match, implying a coroutine cancellation; otherwise, it is a thread
+cancellation. Otherwise it is a main thread cancellation. \PAB{Previous
+sentence does not make sense.}
+
+However, if the threading library is not linked, the sequential execution is on
+the main stack. Hence, the entire check is skipped because the weak-symbol
+function is loaded. Therefore, a main thread cancellation is unconditionally
+performed.
+
+Regardless of how the stack is chosen, the stop function and parameter are
+passed to the forced-unwind function. The general pattern of all three stop
+functions is the same: they continue unwinding until the end of stack when they
+do there primary work.
+
+For main stack cancellation, the transfer is just a program abort.
+
+For coroutine cancellation, the exception is stored on the coroutine's stack,
+and the coroutine context switches to its last resumer. The rest is handled on
+the backside of the resume, which check if the resumed coroutine is
+cancelled. If cancelled, the exception is retrieved from the resumed coroutine,
+and a @CoroutineCancelled@ exception is constructed and loaded with the
+cancelled exception. It is then resumed as a regular exception with the default
+handler coming from the context of the resumption call.
+
+For thread cancellation, the exception is stored on the thread's main stack and
+then context switched to the scheduler. The rest is handled by the thread
+joiner. When the join is complete, the joiner checks if the joined thread is
+cancelled. If cancelled, the exception is retrieved and the joined thread, and
+a @ThreadCancelled@ exception is constructed and loaded with the cancelled
+exception. The default handler is passed in as a function pointer. If it is
+null (as it is for the auto-generated joins on destructor call), the default is
+used, which is a program abort.
+%; which gives the required handling on implicate join.
Index: doc/theses/andrew_beach_MMath/thesis-frontpgs.tex
===================================================================
--- doc/theses/andrew_beach_MMath/thesis-frontpgs.tex	(revision c292244d4c299c228d7e40cd8a91071f5066c92f)
+++ doc/theses/andrew_beach_MMath/thesis-frontpgs.tex	(revision ef0b45633df5650457989c598232acaf4a0d8a66)
@@ -36,5 +36,5 @@
 
         A thesis \\
-        presented to the University of Waterloo \\ 
+        presented to the University of Waterloo \\
         in fulfillment of the \\
         thesis requirement for the degree of \\
@@ -64,5 +64,5 @@
 \cleardoublepage
 
- 
+
 %----------------------------------------------------------------------
 % EXAMINING COMMITTEE (Required for Ph.D. theses only)
@@ -71,15 +71,16 @@
 \begin{center}\textbf{Examining Committee Membership}\end{center}
   \noindent
-The following served on the Examining Committee for this thesis. The decision of the Examining Committee is by majority vote.
-  \bigskip
-  
-  \noindent
-\begin{tabbing}
-Internal-External Member: \=  \kill % using longest text to define tab length
-External Examiner: \>  Bruce Bruce \\ 
+The following served on the Examining Committee for this thesis. The decision
+of the Examining Committee is by majority vote.
+  \bigskip
+
+  \noindent
+\begin{tabbing}
+Internal-External Member: \=  \kill % using longest text to define tab length
+External Examiner: \>  Bruce Bruce \\
 \> Professor, Dept. of Philosophy of Zoology, University of Wallamaloo \\
-\end{tabbing} 
-  \bigskip
-  
+\end{tabbing}
+  \bigskip
+
   \noindent
 \begin{tabbing}
@@ -91,5 +92,5 @@
 \end{tabbing}
   \bigskip
-  
+
   \noindent
   \begin{tabbing}
@@ -99,5 +100,5 @@
 \end{tabbing}
   \bigskip
-  
+
   \noindent
 \begin{tabbing}
@@ -107,5 +108,5 @@
 \end{tabbing}
   \bigskip
-  
+
   \noindent
 \begin{tabbing}
@@ -123,8 +124,10 @@
   % December 13th, 2006.  It is designed for an electronic thesis.
   \noindent
-I hereby declare that I am the sole author of this thesis. This is a true copy of the thesis, including any required final revisions, as accepted by my examiners.
-
-  \bigskip
-  
+I hereby declare that I am the sole author of this thesis. This is a true copy
+of the thesis, including any required final revisions, as accepted by my
+examiners.
+
+  \bigskip
+
   \noindent
 I understand that my thesis may be made electronically available to the public.
Index: doc/theses/andrew_beach_MMath/thesis.tex
===================================================================
--- doc/theses/andrew_beach_MMath/thesis.tex	(revision c292244d4c299c228d7e40cd8a91071f5066c92f)
+++ doc/theses/andrew_beach_MMath/thesis.tex	(revision ef0b45633df5650457989c598232acaf4a0d8a66)
@@ -45,5 +45,5 @@
 % FRONT MATERIAL
 %----------------------------------------------------------------------
-\input{thesis-frontpgs} 
+\input{thesis-frontpgs}
 
 %----------------------------------------------------------------------
@@ -65,5 +65,5 @@
 A \gls{computer} could compute $\pi$ all day long. In fact, subsets of digits
 of $\pi$'s decimal approximation would make a good source for psuedo-random
-vectors, \gls{rvec} . 
+vectors, \gls{rvec} .
 
 %----------------------------------------------------------------------
@@ -96,41 +96,41 @@
 
 \begin{itemize}
-\item A well-prepared PDF should be 
+\item A well-prepared PDF should be
   \begin{enumerate}
     \item Of reasonable size, {\it i.e.} photos cropped and compressed.
-    \item Scalable, to allow enlargment of text and drawings. 
-  \end{enumerate} 
+    \item Scalable, to allow enlargment of text and drawings.
+  \end{enumerate}
 \item Photos must be bit maps, and so are not scaleable by definition. TIFF and
 BMP are uncompressed formats, while JPEG is compressed. Most photos can be
 compressed without losing their illustrative value.
-\item Drawings that you make should be scalable vector graphics, \emph{not} 
+\item Drawings that you make should be scalable vector graphics, \emph{not}
 bit maps. Some scalable vector file formats are: EPS, SVG, PNG, WMF. These can
-all be converted into PNG or PDF, that pdflatex recognizes. Your drawing 
-package probably can export to one of these formats directly. Otherwise, a 
-common procedure is to print-to-file through a Postscript printer driver to 
-create a PS file, then convert that to EPS (encapsulated PS, which has a 
-bounding box to describe its exact size rather than a whole page). 
+all be converted into PNG or PDF, that pdflatex recognizes. Your drawing
+package probably can export to one of these formats directly. Otherwise, a
+common procedure is to print-to-file through a Postscript printer driver to
+create a PS file, then convert that to EPS (encapsulated PS, which has a
+bounding box to describe its exact size rather than a whole page).
 Programs such as GSView (a Ghostscript GUI) can create both EPS and PDF from
 PS files. Appendix~\ref{AppendixA} shows how to generate properly sized Matlab
 plots and save them as PDF.
 \item It's important to crop your photos and draw your figures to the size that
-you want to appear in your thesis. Scaling photos with the 
-includegraphics command will cause loss of resolution. And scaling down 
+you want to appear in your thesis. Scaling photos with the
+includegraphics command will cause loss of resolution. And scaling down
 drawings may cause any text annotations to become too small.
 \end{itemize}
- 
+
 For more information on \LaTeX\, see the uWaterloo Skills for the
-Academic Workplace \href{https://uwaterloo.ca/information-systems-technology/services/electronic-thesis-preparation-and-submission-support/ethesis-guide/creating-pdf-version-your-thesis/creating-pdf-files-using-latex/latex-ethesis-and-large-documents}{course notes}. 
+Academic Workplace \href{https://uwaterloo.ca/information-systems-technology/services/electronic-thesis-preparation-and-submission-support/ethesis-guide/creating-pdf-version-your-thesis/creating-pdf-files-using-latex/latex-ethesis-and-large-documents}{course notes}.
 \footnote{
 Note that while it is possible to include hyperlinks to external documents,
-it is not wise to do so, since anything you can't control may change over time. 
-It \emph{would} be appropriate and necessary to provide external links to 
-additional resources for a multimedia ``enhanced'' thesis. 
-But also note that if the \package{hyperref} package is not included, 
-as for the print-optimized option in this thesis template, any \cmmd{href} 
+it is not wise to do so, since anything you can't control may change over time.
+It \emph{would} be appropriate and necessary to provide external links to
+additional resources for a multimedia ``enhanced'' thesis.
+But also note that if the \package{hyperref} package is not included,
+as for the print-optimized option in this thesis template, any \cmmd{href}
 commands in your logical document are no longer defined.
 A work-around employed by this thesis template is to define a dummy
-\cmmd{href} command (which does nothing) in the preamble of the document, 
-before the \package{hyperref} package is included. 
+\cmmd{href} command (which does nothing) in the preamble of the document,
+before the \package{hyperref} package is included.
 The dummy definition is then redifined by the
 \package{hyperref} package when it is included.
@@ -138,5 +138,5 @@
 
 The classic book by Leslie Lamport \cite{lamport.book}, author of \LaTeX , is
-worth a look too, and the many available add-on packages are described by 
+worth a look too, and the many available add-on packages are described by
 Goossens \textit{et al} \cite{goossens.book}.
 
@@ -180,7 +180,7 @@
 Export Setup button in the figure Property Editor.
 
-\section{From the Command Line} 
+\section{From the Command Line}
 All figure properties can also be manipulated from the command line. Here's an
-example: 
+example:
 \begin{verbatim}
 x=[0:0.1:pi];
Index: doc/theses/andrew_beach_MMath/unwinding.tex
===================================================================
--- doc/theses/andrew_beach_MMath/unwinding.tex	(revision c292244d4c299c228d7e40cd8a91071f5066c92f)
+++ doc/theses/andrew_beach_MMath/unwinding.tex	(revision ef0b45633df5650457989c598232acaf4a0d8a66)
@@ -1,37 +1,54 @@
-\chapter{\texorpdfstring{Unwinding in \CFA}{Unwinding in Cforall}}
+\chapter{Unwinding in \CFA}
 
-Stack unwinding is the process of removing things from the stack. Within
-functions and on function return this is handled directly by the code in the
-function itself as it knows exactly what is on the stack just from the
-current location in the function. Unwinding across stack frames means that it
-is no longer knows exactly what is on the stack or even how much of the stack
-needs to be removed.
+Stack unwinding is the process of removing stack frames (activations) from the
+stack. On function entry and return, unwinding is handled directly by the code
+embedded in the function. Usually, the stack-frame size is known statically
+based on parameters and local variable declarations.  For dynamically-sized
+local variables, a runtime computation is necessary to know the frame
+size. Finally, a function's frame-size may change during execution as local
+variables (static or dynamic sized) go in and out of scope.
+Allocating/deallocating stack space is usually an $O(1)$ operation achieved by
+bumping the hardware stack-pointer up or down as needed.
 
-Even this is fairly simple if nothing needs to happen when the stack unwinds.
-Traditional C can unwind the stack by saving and restoring state (with
-@setjmp@ \& @longjmp@). However many languages define actions that
-have to be taken when something is removed from the stack, such as running
-a variable's destructor or a @try@ statement's @finally@
-clause. Handling this requires walking the stack going through each stack
-frame.
+Unwinding across multiple stack frames is more complex because individual stack
+management code associated with each frame is bypassed. That is, the location
+of a function's frame code is largely unknown and dispersed throughout the
+function, hence the current stack-frame size managed by that code is also
+unknown. Hence, code unwinding across frames does not have direct knowledge
+about what is on the stack, and hence, how much of the stack needs to be
+removed.
 
-For exceptions, this means everything from the point the exception is raised
-to the point it is caught, while checking each frame for handlers during the
-stack walk to find out where it should be caught. This is where the most of
-the expense and complexity of exception handling comes from.
+The traditional unwinding mechanism for C is implemented by saving a snap-shot
+of a function's state with @setjmp@ and restoring that snap-shot with
+@longjmp@. This approach bypasses the need to know stack details by simply
+reseting to a snap-shot of an arbitrary but existing function frame on the
+stack. It is up to the programmer to ensure the snap-shot is valid when it is
+reset, making the code fragile with potential errors that are difficult to
+debug because the stack becomes corrupted.
 
-To do all of this we use libunwind, a low level library that provides tools
-for stack walking and stack unwinding. What follows is an overview of all the
-relivant features of libunwind and then how \CFA uses them to implement its
-exception handling.
+However, many languages define cleanup actions that have to be taken when
+something is deallocated from the stack or blocks end, such as running a
+variable's destructor or a @try@ statement's @finally@ clause. Handling these
+mechanisms requires walking the stack and checking each stack frame for these
+potential actions.
+
+For exceptions, it must be possible to walk the stack frames in search of try
+statements with handlers to perform exception matching. For termination
+exceptions, it must be possible to unwind all stack frames from the throw to
+the matching catch, and each of these frames must be checked for cleanup
+actions. Stack walking is where the most of the complexity and expense of
+exception handling comes from.
+
+One of the most popular tools for stack management is libunwind, a low level
+library that provides tools for stack walking and unwinding. What follows is an
+overview of all the relevant features of libunwind and how \CFA uses them to
+implement its exception handling.
 
 \section{libunwind Usage}
-
-\CFA uses two primary functions in libunwind to create most of its
-exceptional control-flow: @_Unwind_RaiseException@ and
-@_Unwind_ForcedUnwind@.
-Their operation is divided into two phases: search and clean-up. The search
-phase -- phase 1 -- is used to scan the stack but not unwinding it. The
-clean-up phase -- phase 2 -- is used for unwinding.
+\CFA uses two primary functions in libunwind to create most of its exceptional
+control-flow: @_Unwind_RaiseException@ and @_Unwind_ForcedUnwind@.  Their
+operation is divided into two phases: search and clean-up. The search phase --
+phase 1 -- is used to scan the stack but not unwinding it. The clean-up phase
+-- phase 2 -- is used for unwinding.
 
 The raise-exception function uses both phases. It starts by searching for a
@@ -44,57 +61,57 @@
 A personality function performs three tasks, although not all have to be
 present. The tasks performed are decided by the actions provided.
-@_Unwind_Action@ is a bitmask of possible actions and an argument of
-this type is passed into the personality function.
+@_Unwind_Action@ is a bitmask of possible actions and an argument of this type
+is passed into the personality function.
 \begin{itemize}
-\item@_UA_SEARCH_PHASE@ is passed in search phase and tells the
-personality function to check for handlers. If there is a handler in this
-stack frame, as defined by the language, the personality function should
-return @_URC_HANDLER_FOUND@. Otherwise it should return
-@_URC_CONTINUE_UNWIND@.
-\item@_UA_CLEANUP_PHASE@ is passed in during the clean-up phase and
-means part or all of the stack frame is removed. The personality function
-should do whatever clean-up the language defines
-(such as running destructors/finalizers) and then generally returns
-@_URC_CONTINUE_UNWIND@.
-\item@_UA_HANDLER_FRAME@ means the personality function must install
-a handler. It is also passed in during the clean-up phase and is in addition
-to the clean-up action. libunwind provides several helpers for the personality
-function here. Once it is done, the personality function must return
-@_URC_INSTALL_CONTEXT@.
+\item
+\begin{sloppypar}
+@_UA_SEARCH_PHASE@ is passed in for the search phase and tells the personality
+function to check for handlers. If there is a handler in a stack frame, as
+defined by the language, the personality function returns @_URC_HANDLER_FOUND@;
+otherwise it return @_URC_CONTINUE_UNWIND@.
+\end{sloppypar}
+\item
+@_UA_CLEANUP_PHASE@ is passed in during the clean-up phase and means part or
+all of the stack frame is removed. The personality function does whatever
+clean-up the language defines (such as running destructors/finalizers) and then
+generally returns @_URC_CONTINUE_UNWIND@.
+\item
+@_UA_HANDLER_FRAME@ means the personality function must install a handler. It
+is also passed in during the clean-up phase and is in addition to the clean-up
+action. libunwind provides several helpers for the personality function. Once
+it is done, the personality function returns @_URC_INSTALL_CONTEXT@.
 \end{itemize}
-The personality function is given a number of other arguments. Some are for
-compatability and there is the @struct _Unwind_Context@ pointer which
-passed to many helpers to get information about the current stack frame.
+The personality function is given a number of other arguments. Some arguments
+are for compatibility, and there is the @struct _Unwind_Context@ pointer which
+is passed to many helpers to get information about the current stack frame.
 
-Forced-unwind only performs the clean-up phase. It takes three arguments:
-a pointer to the exception, a pointer to the stop function and a pointer to
-the stop parameter. It does most of the same things as phase two of
-raise-exception but with some extras.
-The first it passes in an extra action to the personality function on each
-stack frame, @_UA_FORCE_UNWIND@, which means a handler cannot be
+For cancellation, forced-unwind only performs the clean-up phase. It takes
+three arguments: a pointer to the exception, a pointer to the stop function and
+a pointer to the stop parameter. It does most of the same actions as phase two
+of raise-exception but passes in an extra action to the personality function on
+each stack frame, @_UA_FORCE_UNWIND@, which means a handler cannot be
 installed.
 
-The big change is that forced-unwind calls the stop function. Each time it
-steps into a frame, before calling the personality function, it calls the
-stop function. The stop function receives all the same arguments as the
-personality function will and the stop parameter supplied to forced-unwind.
+As well, forced-unwind calls the stop function each time it steps into a frame,
+before calling the personality function. The stop function receives all the
+same arguments as the personality function and the stop parameter supplied to
+forced-unwind.
 
 The stop function is called one more time at the end of the stack after all
-stack frames have been removed. By the standard API this is marked by setting
+stack frames have been removed. The standard API marks this frame by setting
 the stack pointer inside the context passed to the stop function. However both
 GCC and Clang add an extra action for this case @_UA_END_OF_STACK@.
 
-Each time function the stop function is called it can do one or two things.
-When it is not the end of the stack it can return @_URC_NO_REASON@ to
-continue unwinding.
+Each time the stop function is called, it can do one or two things.  When it is
+not the end of the stack it can return @_URC_NO_REASON@ to continue unwinding.
 % Is there a reason that NO_REASON is used instead of CONTINUE_UNWIND?
-Its only other option is to use its own means to transfer control elsewhere
-and never return to its caller. It may always do this and no additional tools
-are provided to do it.
+The other option is to use some other means to transfer control elsewhere and
+never return to its caller. libunwind provides no additional tools for
+alternate transfers of control.
 
-\section{\texorpdfstring{\CFA Implementation}{Cforall Implementation}}
+\section{\CFA Implementation}
 
-To use libunwind, \CFA provides several wrappers, its own storage,
-personality functions, and a stop function.
+To use libunwind, \CFA provides several wrappers, its own storage, personality
+functions, and a stop function.
 
 The wrappers perform three tasks: set-up, clean-up and controlling the
@@ -108,66 +125,60 @@
 The core control code is called every time a throw -- after set-up -- or
 re-throw is run. It uses raise-exception to search for a handler and to run it
-if one is found. If no handler is found and raise-exception returns then
+if one is found. If no handler is found and raise-exception returns, then
 forced-unwind is called to run all destructors on the stack before terminating
 the process.
 
-The stop function is very simple. It checks the end of stack flag to see if
-it is finished unwinding. If so, it calls @exit@ to end the process,
-otherwise it returns with no-reason to continue unwinding.
+The stop function is simple. It checks for the end of stack flag to see if
+unwinding is finished. If so, it calls @exit@ to end the process, otherwise it
+returns with no-reason to continue unwinding.
 % Yeah, this is going to have to change.
 
 The personality routine is more complex because it has to obtain information
-about the function by scanning the LSDA (Language Specific Data Area). This
+about the function by scanning the Language Specific Data Area (LSDA). This
 step allows a single personality function to be used for multiple functions and
-let that personaliity function figure out exactly where in the function
-execution was, what is currently in the stack frame and what handlers should
-be checked.
+lets that personality function figure out exactly where in the function
+execution is, what is currently in the stack frame, and what handlers should be
+checked.
 % Not that we do that yet.
 
-However, generating the LSDA is difficult. It requires knowledge about the
-location of the instruction pointer and stack layout, which varies with
-compiler and optimization levels. So for frames where there are only
-destructors, GCC's attribute cleanup with the @-fexception@ flag is
-sufficient to handle unwinding.
+It is also necessary to generate the LSDA, which is difficult. It requires
+knowledge about the location of the instruction pointer and stack layout, which
+varies with compiler and optimization levels. Fortunately, for frames where
+there are only destructors, GCC's attribute cleanup with the @-fexception@ flag
+is sufficient to handle unwinding.
 
-The only functions that require more than that are those that contain
-@try@ statements. A @try@ statement has a @try@
-clause, some number of @catch@ clauses and @catchResume@
-clauses and may have a @finally@ clause. Of these only @try@
-statements with @catch@ clauses need to be transformed and only they
-and the @try@ clause are involved.
+The only functions that require more information are those containing @try@
+statements. Specifically, only @try@ statements with @catch@ clauses need to be
+transformed.  The @try@ statement is converted into a series of closures that
+can access other parts of the function according to scoping rules but can be
+passed around. The @catch@ clauses are converted into two functions: the match
+function and the handler function.
 
-The @try@ statement is converted into a series of closures which can
-access other parts of the function according to scoping rules but can be
-passed around. The @try@ clause is converted into the try functions,
-almost entirely unchanged. The @catch@ clauses are converted into two
-functions; the match function and the catch function.
+Together the match function and the catch function form the code that runs when
+an exception passes out of the guarded block for a try statement. The match
+function is used during the search phase: it is passed an exception and checks
+each handler to see if the raised exception matches the handler exception. It
+returns an index that represents which handler matched or there is no
+match. The catch function is used during the clean-up phase, it is passed an
+exception and the index of a handler. It casts the exception to the exception
+type declared in that handler and then runs the handler's body.
 
-Together the match function and the catch function form the code that runs
-when an exception passes out of a try block. The match function is used during
-the search phase, it is passed an exception and checks each handler to see if
-it will handle the exception. It returns an index that repersents which
-handler matched or that none of them did. The catch function is used during
-the clean-up phase, it is passed an exception and the index of a handler. It
-casts the exception to the exception type declared in that handler and then
-runs the handler's body.
-
-These three functions are passed to @try_terminate@. This is an
+These three functions are passed to @try_terminate@, which is an
 % Maybe I shouldn't quote that, it isn't its actual name.
-internal hand-written function that has its own personality function and
-custom assembly LSD does the exception handling in \CFA. During normal
-execution all this function does is call the try function and then return.
-It is only when exceptions are thrown that anything interesting happens.
+internal hand-written function that has its own personality function and custom
+assembly LSDA for doing the exception handling in \CFA. During normal
+execution, this function calls the try function and then return.  It is only
+when exceptions are thrown that anything interesting happens.
 
 During the search phase the personality function gets the pointer to the match
-function and calls it. If the match function returns a handler index the
+function and calls it. If the match function returns a handler index, the
 personality function saves it and reports that the handler has been found,
-otherwise unwinding continues.
-During the clean-up phase the personality function only does anything if the
-handler was found in this frame. If it was then the personality function
-installs the handler, which is setting the instruction pointer in
-@try_terminate@ to an otherwise unused section that calls the catch
-function, passing it the current exception and handler index.
-@try_terminate@ returns as soon as the catch function returns.
+otherwise unwinding continues.  During the clean-up phase, the personality
+function only performs an action, when a handler is found in a frame. For each
+found frame, the personality function installs the handler, which sets the
+instruction pointer in @try_terminate@ to an otherwise unused section that
+calls the catch function, passing it the current exception and handler index.
+@try_terminate@ returns as soon as the catch function returns.  At this point
+control has returned to normal control flow.
 
-At this point control has returned to normal control flow.
+\PAB{Maybe a diagram would be helpful?}
Index: doc/theses/andrew_beach_MMath/uw-ethesis-frontpgs.tex
===================================================================
--- doc/theses/andrew_beach_MMath/uw-ethesis-frontpgs.tex	(revision c292244d4c299c228d7e40cd8a91071f5066c92f)
+++ doc/theses/andrew_beach_MMath/uw-ethesis-frontpgs.tex	(revision ef0b45633df5650457989c598232acaf4a0d8a66)
@@ -13,22 +13,18 @@
         \vspace*{1.0cm}
 
-        \Huge
-        {\bf Exception Handling in \CFA}
+        {\Huge\bf Exception Handling in \CFA}
 
         \vspace*{1.0cm}
 
-        \normalsize
         by \\
 
         \vspace*{1.0cm}
 
-        \Large
-        Andrew James Beach \\
+        {\Large Andrew James Beach} \\
 
         \vspace*{3.0cm}
 
-        \normalsize
         A thesis \\
-        presented to the University of Waterloo \\ 
+        presented to the University of Waterloo \\
         in fulfillment of the \\
         thesis requirement for the degree of \\
@@ -43,31 +39,35 @@
         \vspace*{1.0cm}
 
-        \copyright\ Andrew James Beach \the\year \\
+        \copyright{} Andrew James Beach \the\year \\
         \end{center}
 \end{titlepage}
 
-% The rest of the front pages should contain no headers and be numbered using Roman numerals starting with `ii'
+% The rest of the front pages should contain no headers and be numbered using
+% Roman numerals starting with `ii'.
 \pagestyle{plain}
 \setcounter{page}{2}
 
-\cleardoublepage % Ends the current page and causes all figures and tables that have so far appeared in the input to be printed.
-% In a two-sided printing style, it also makes the next page a right-hand (odd-numbered) page, producing a blank page if necessary.
+\cleardoublepage % Ends the current page and causes all figures and tables
+% that have so far appeared in the input to be printed. In a two-sided
+% printing style, it also makes the next page a right-hand (odd-numbered)
+% page, producing a blank page if necessary.
 
-\begin{comment} 
+\begin{comment}
 % E X A M I N I N G   C O M M I T T E E (Required for Ph.D. theses only)
 % Remove or comment out the lines below to remove this page
 \begin{center}\textbf{Examining Committee Membership}\end{center}
   \noindent
-The following served on the Examining Committee for this thesis. The decision of the Examining Committee is by majority vote.
+The following served on the Examining Committee for this thesis.
+The decision of the Examining Committee is by majority vote.
   \bigskip
-  
+
   \noindent
 \begin{tabbing}
 Internal-External Member: \=  \kill % using longest text to define tab length
-External Examiner: \>  Bruce Bruce \\ 
+External Examiner: \>  Bruce Bruce \\
 \> Professor, Dept. of Philosophy of Zoology, University of Wallamaloo \\
-\end{tabbing} 
+\end{tabbing}
   \bigskip
-  
+
   \noindent
 \begin{tabbing}
@@ -79,5 +79,5 @@
 \end{tabbing}
   \bigskip
-  
+
   \noindent
   \begin{tabbing}
@@ -87,5 +87,5 @@
 \end{tabbing}
   \bigskip
-  
+
   \noindent
 \begin{tabbing}
@@ -95,5 +95,5 @@
 \end{tabbing}
   \bigskip
-  
+
   \noindent
 \begin{tabbing}
@@ -111,10 +111,12 @@
   % December 13th, 2006.  It is designed for an electronic thesis.
  \begin{center}\textbf{Author's Declaration}\end{center}
-  
+
  \noindent
-I hereby declare that I am the sole author of this thesis. This is a true copy of the thesis, including any required final revisions, as accepted by my examiners.
+I hereby declare that I am the sole author of this thesis. This is a true copy
+of the thesis, including any required final revisions, as accepted by my
+examiners.
 
   \bigskip
-  
+
   \noindent
 I understand that my thesis may be made electronically available to the public.
Index: doc/theses/andrew_beach_MMath/uw-ethesis.tex
===================================================================
--- doc/theses/andrew_beach_MMath/uw-ethesis.tex	(revision c292244d4c299c228d7e40cd8a91071f5066c92f)
+++ doc/theses/andrew_beach_MMath/uw-ethesis.tex	(revision ef0b45633df5650457989c598232acaf4a0d8a66)
@@ -1,73 +1,95 @@
 %======================================================================
-% University of Waterloo Thesis Template for LaTeX 
-% Last Updated November, 2020 
-% by Stephen Carr, IST Client Services, 
+% University of Waterloo Thesis Template for LaTeX
+% Last Updated November, 2020
+% by Stephen Carr, IST Client Services,
 % University of Waterloo, 200 University Ave. W., Waterloo, Ontario, Canada
 % FOR ASSISTANCE, please send mail to request@uwaterloo.ca
 
 % DISCLAIMER
-% To the best of our knowledge, this template satisfies the current uWaterloo thesis requirements.
-% However, it is your responsibility to assure that you have met all requirements of the University and your particular department.
-
-% Many thanks for the feedback from many graduates who assisted the development of this template.
-% Also note that there are explanatory comments and tips throughout this template.
+% To the best of our knowledge, this template satisfies the current uWaterloo
+% thesis requirements. However, it is your responsibility to assure that you
+% have met all requirements of the University and your particular department.
+
+% Many thanks for the feedback from many graduates who assisted the
+% development of this template. Also note that there are explanatory comments
+% and tips throughout this template.
 %======================================================================
 % Some important notes on using this template and making it your own...
 
-% The University of Waterloo has required electronic thesis submission since October 2006. 
-% See the uWaterloo thesis regulations at
-% https://uwaterloo.ca/graduate-studies/thesis.
-% This thesis template is geared towards generating a PDF version optimized for viewing on an electronic display, including hyperlinks within the PDF.
-
-% DON'T FORGET TO ADD YOUR OWN NAME AND TITLE in the "hyperref" package configuration below. 
-% THIS INFORMATION GETS EMBEDDED IN THE PDF FINAL PDF DOCUMENT.
-% You can view the information if you view properties of the PDF document.
-
-% Many faculties/departments also require one or more printed copies. 
-% This template attempts to satisfy both types of output. 
+% The University of Waterloo has required electronic thesis submission since
+% October 2006. See the uWaterloo thesis regulations at:
+%   https://uwaterloo.ca/graduate-studies/thesis.
+% This thesis template is geared towards generating a PDF version optimized
+% for viewing on an electronic display, including hyperlinks within the PDF.
+
+% DON'T FORGET TO ADD YOUR OWN NAME AND TITLE in the "hyperref" package
+% configuration below. THIS INFORMATION GETS EMBEDDED IN THE FINAL PDF
+% DOCUMENT. You can view the information if you view properties of the PDF.
+
+% Many faculties/departments also require one or more printed copies.
+% This template attempts to satisfy both types of output.
 % See additional notes below.
-% It is based on the standard "book" document class which provides all necessary sectioning structures and allows multi-part theses.
-
-% If you are using this template in Overleaf (cloud-based collaboration service), then it is automatically processed and previewed for you as you edit.
-
-% For people who prefer to install their own LaTeX distributions on their own computers, and process the source files manually, the following notes provide the sequence of tasks:
- 
+% It is based on the standard "book" document class which provides all
+% necessary sectioning structures and allows multi-part theses.
+
+% If you are using this template in Overleaf (cloud-based collaboration
+% service), then it is automatically processed and previewed for you as you
+% edit.
+
+% For people who prefer to install their own LaTeX distributions on their own
+% computers, and process the source files manually, the following notes
+% provide the sequence of tasks:
+
 % E.g. to process a thesis called "mythesis.tex" based on this template, run:
 
 % pdflatex mythesis	-- first pass of the pdflatex processor
 % bibtex mythesis	-- generates bibliography from .bib data file(s)
-% makeindex         -- should be run only if an index is used 
-% pdflatex mythesis	-- fixes numbering in cross-references, bibliographic references, glossaries, index, etc.
-% pdflatex mythesis	-- it takes a couple of passes to completely process all cross-references
-
-% If you use the recommended LaTeX editor, Texmaker, you would open the mythesis.tex file, then click the PDFLaTeX button. Then run BibTeX (under the Tools menu).
-% Then click the PDFLaTeX button two more times. 
-% If you have an index as well,you'll need to run MakeIndex from the Tools menu as well, before running pdflatex
-% the last two times.
-
-% N.B. The "pdftex" program allows graphics in the following formats to be included with the "\includegraphics" command: PNG, PDF, JPEG, TIFF
-% Tip: Generate your figures and photos in the size you want them to appear in your thesis, rather than scaling them with \includegraphics options.
-% Tip: Any drawings you do should be in scalable vector graphic formats: SVG, PNG, WMF, EPS and then converted to PNG or PDF, so they are scalable in the final PDF as well.
+% makeindex         -- should be run only if an index is used
+% pdflatex mythesis	-- fixes numbering in cross-references, bibliographic
+%                      references, glossaries, index, etc.
+% pdflatex mythesis	-- it takes a couple of passes to completely process all
+%                      cross-references
+
+% If you use the recommended LaTeX editor, Texmaker, you would open the
+% mythesis.tex file, then click the PDFLaTeX button. Then run BibTeX (under
+% the Tools menu). Then click the PDFLaTeX button two more times.
+% If you have an index as well, you'll need to run MakeIndex from the Tools
+% menu as well, before running pdflatex the last two times.
+
+% N.B. The "pdftex" program allows graphics in the following formats to be
+% included with the "\includegraphics" command: PNG, PDF, JPEG, TIFF
+% Tip: Generate your figures and photos in the size you want them to appear
+% in your thesis, rather than scaling them with \includegraphics options.
+% Tip: Any drawings you do should be in scalable vector graphic formats: SVG,
+% PNG, WMF, EPS and then converted to PNG or PDF, so they are scalable in the
+% final PDF as well.
 % Tip: Photographs should be cropped and compressed so as not to be too large.
 
-% To create a PDF output that is optimized for double-sided printing: 
-% 1) comment-out the \documentclass statement in the preamble below, and un-comment the second \documentclass line.
-% 2) change the value assigned below to the boolean variable "PrintVersion" from " false" to "true".
-
-%======================================================================
+% To create a PDF output that is optimized for double-sided printing:
+% 1) comment-out the \documentclass statement in the preamble below, and
+%    un-comment the second \documentclass line.
+% 2) change the value assigned below to the boolean variable "PrintVersion"
+%    from "false" to "true".
+
+% ======================================================================
 %   D O C U M E N T   P R E A M B L E
-% Specify the document class, default style attributes, and page dimensions, etc.
+% Specify the document class, default style attributes, page dimensions, etc.
 % For hyperlinked PDF, suitable for viewing on a computer, use this:
 \documentclass[letterpaper,12pt,titlepage,oneside,final]{book}
 
-% For PDF, suitable for double-sided printing, change the PrintVersion variable below to "true" and use this \documentclass line instead of the one above:
+% For PDF, suitable for double-sided printing, change the PrintVersion
+% variable below to "true" and use this \documentclass line instead of the
+% one above:
 %\documentclass[letterpaper,12pt,titlepage,openright,twoside,final]{book}
 
+\usepackage{etoolbox}
+
 % Some LaTeX commands I define for my own nomenclature.
-% If you have to, it's easier to make changes to nomenclature once here than in a million places throughout your thesis!
+% If you have to, it's easier to make changes to nomenclature once here than
+% in a million places throughout your thesis!
 \newcommand{\package}[1]{\textbf{#1}} % package names in bold text
-\newcommand{\cmmd}[1]{\textbackslash\texttt{#1}} % command name in tt font 
-\newcommand{\href}[1]{#1} % does nothing, but defines the command so the print-optimized version will ignore \href tags (redefined by hyperref pkg).
-%\newcommand{\texorpdfstring}[2]{#1} % does nothing, but defines the command
+\newcommand{\cmmd}[1]{\textbackslash\texttt{#1}} % command name in tt font
+\newcommand{\href}[1]{#1} % does nothing, but defines the command so the
+% print-optimized version will ignore \href tags (redefined by hyperref pkg).
 % Anything defined here may be redefined by packages added below...
 
@@ -76,28 +98,35 @@
 \newboolean{PrintVersion}
 \setboolean{PrintVersion}{false}
-% CHANGE THIS VALUE TO "true" as necessary, to improve printed results for hard copies by overriding some options of the hyperref package, called below.
+% CHANGE THIS VALUE TO "true" as necessary, to improve printed results for
+% hard copies by overriding some options of the hyperref package, called below.
 
 %\usepackage{nomencl} % For a nomenclature (optional; available from ctan.org)
-\usepackage{amsmath,amssymb,amstext} % Lots of math symbols and environments
-\usepackage[pdftex]{graphicx} % For including graphics N.B. pdftex graphics driver 
+% Lots of math symbols and environments
+\usepackage{amsmath,amssymb,amstext}
+% For including graphics N.B. pdftex graphics driver
+\usepackage[pdftex]{graphicx}
+% Removes large sections of the document.
+\usepackage{comment}
 
 % Hyperlinks make it very easy to navigate an electronic document.
-% In addition, this is where you should specify the thesis title and author as they appear in the properties of the PDF document.
+% In addition, this is where you should specify the thesis title and author as
+% they appear in the properties of the PDF document.
 % Use the "hyperref" package
 % N.B. HYPERREF MUST BE THE LAST PACKAGE LOADED; ADD ADDITIONAL PKGS ABOVE
 \usepackage[pdftex,pagebackref=true]{hyperref} % with basic options
 %\usepackage[pdftex,pagebackref=true]{hyperref}
-		% N.B. pagebackref=true provides links back from the References to the body text. This can cause trouble for printing.
+% N.B. pagebackref=true provides links back from the References to the body
+% text. This can cause trouble for printing.
 \hypersetup{
     plainpages=false,       % needed if Roman numbers in frontpages
-    unicode=false,          % non-Latin characters in Acrobat’s bookmarks
-    pdftoolbar=true,        % show Acrobat’s toolbar?
-    pdfmenubar=true,        % show Acrobat’s menu?
+    unicode=false,          % non-Latin characters in Acrobat's bookmarks
+    pdftoolbar=true,        % show Acrobat's toolbar?
+    pdfmenubar=true,        % show Acrobat's menu?
     pdffitwindow=false,     % window fit to page when opened
     pdfstartview={FitH},    % fits the width of the page to the window
-%    pdftitle={uWaterloo\ LaTeX\ Thesis\ Template},    % title: CHANGE THIS TEXT!
+%    pdftitle={uWaterloo\ LaTeX\ Thesis\ Template}, % title: CHANGE THIS TEXT!
 %    pdfauthor={Author},    % author: CHANGE THIS TEXT! and uncomment this line
 %    pdfsubject={Subject},  % subject: CHANGE THIS TEXT! and uncomment this line
-%    pdfkeywords={keyword1} {key2} {key3}, % list of keywords, and uncomment this line if desired
+%    pdfkeywords={keyword1} {key2} {key3}, % optional list of keywords
     pdfnewwindow=true,      % links in new window
     colorlinks=true,        % false: boxed links; true: colored links
@@ -107,5 +136,6 @@
     urlcolor=cyan           % color of external links
 }
-\ifthenelse{\boolean{PrintVersion}}{   % for improved print quality, change some hyperref options
+% for improved print quality, change some hyperref options
+\ifthenelse{\boolean{PrintVersion}}{
 \hypersetup{	% override some previously defined hyperref options
 %    colorlinks,%
@@ -116,37 +146,52 @@
 }{} % end of ifthenelse (no else)
 
-\usepackage[automake,toc,abbreviations]{glossaries-extra} % Exception to the rule of hyperref being the last add-on package
-% If glossaries-extra is not in your LaTeX distribution, get it from CTAN (http://ctan.org/pkg/glossaries-extra),
-% although it's supposed to be in both the TeX Live and MikTeX distributions. There are also documentation and 
-% installation instructions there.
+% Exception to the rule of hyperref being the last add-on package
+\usepackage[automake,toc,abbreviations]{glossaries-extra}
+% If glossaries-extra is not in your LaTeX distribution, get it from CTAN
+% (http://ctan.org/pkg/glossaries-extra), although it's supposed to be in
+% both the TeX Live and MikTeX distributions. There are also documentation
+% and installation instructions there.
 
 % Setting up the page margins...
-\setlength{\textheight}{9in}\setlength{\topmargin}{-0.45in}\setlength{\headsep}{0.25in}
-% uWaterloo thesis requirements specify a minimum of 1 inch (72pt) margin at the
-% top, bottom, and outside page edges and a 1.125 in. (81pt) gutter margin (on binding side). 
-% While this is not an issue for electronic viewing, a PDF may be printed, and so we have the same page layout for both printed and electronic versions, we leave the gutter margin in.
-% Set margins to minimum permitted by uWaterloo thesis regulations:
+\setlength{\textheight}{9in}
+\setlength{\topmargin}{-0.45in}
+\setlength{\headsep}{0.25in}
+% uWaterloo thesis requirements specify a minimum of 1 inch (72pt) margin at
+% the top, bottom, and outside page edges and a 1.125 in. (81pt) gutter margin
+% (on binding side). While this is not an issue for electronic viewing, a PDF
+% may be printed, and so we have the same page layout for both printed and
+% electronic versions, we leave the gutter margin in. Set margins to minimum
+% permitted by uWaterloo thesis regulations:
 \setlength{\marginparwidth}{0pt} % width of margin notes
 % N.B. If margin notes are used, you must adjust \textwidth, \marginparwidth
 % and \marginparsep so that the space left between the margin notes and page
 % edge is less than 15 mm (0.6 in.)
-\setlength{\marginparsep}{0pt} % width of space between body text and margin notes
-\setlength{\evensidemargin}{0.125in} % Adds 1/8 in. to binding side of all
+% width of space between body text and margin notes
+\setlength{\marginparsep}{0pt}
+% Adds 1/8 in. to binding side of all
 % even-numbered pages when the "twoside" printing option is selected
-\setlength{\oddsidemargin}{0.125in} % Adds 1/8 in. to the left of all pages when "oneside" printing is selected, and to the left of all odd-numbered pages when "twoside" printing is selected
-\setlength{\textwidth}{6.375in} % assuming US letter paper (8.5 in. x 11 in.) and side margins as above
+\setlength{\evensidemargin}{0.125in}
+% Adds 1/8 in. to the left of all pages when "oneside" printing is selected,
+% and to the left of all odd-numbered pages when "twoside" printing is selected
+\setlength{\oddsidemargin}{0.125in}
+% assuming US letter paper (8.5 in. x 11 in.) and side margins as above
+\setlength{\textwidth}{6.375in}
 \raggedbottom
 
-% The following statement specifies the amount of space between paragraphs. Other reasonable specifications are \bigskipamount and \smallskipamount.
+% The following statement specifies the amount of space between paragraphs.
+% Other reasonable specifications are \bigskipamount and \smallskipamount.
 \setlength{\parskip}{\medskipamount}
 
-% The following statement controls the line spacing.  
-% The default spacing corresponds to good typographic conventions and only slight changes (e.g., perhaps "1.2"), if any, should be made.
+% The following statement controls the line spacing.
+% The default spacing corresponds to good typographic conventions and only
+% slight changes (e.g., perhaps "1.2"), if any, should be made.
 \renewcommand{\baselinestretch}{1} % this is the default line space setting
 
 % By default, each chapter will start on a recto (right-hand side) page.
-% We also force each section of the front pages to start on a recto page by inserting \cleardoublepage commands.
-% In many cases, this will require that the verso (left-hand) page be blank, and while it should be counted, a page number should not be printed.
-% The following statements ensure a page number is not printed on an otherwise blank verso page.
+% We also force each section of the front pages to start on a recto page by
+% inserting \cleardoublepage commands. In many cases, this will require that
+% the verso (left-hand) page be blank, and while it should be counted, a page
+% number should not be printed. The following statements ensure a page number
+% is not printed on an otherwise blank verso page.
 \let\origdoublepage\cleardoublepage
 \newcommand{\clearemptydoublepage}{%
@@ -154,19 +199,36 @@
 \let\cleardoublepage\clearemptydoublepage
 
-% Define Glossary terms (This is properly done here, in the preamble and could also be \input{} from a separate file...)
+% Define Glossary terms (This is properly done here, in the preamble and
+% could also be \input{} from a separate file...)
 \input{glossaries}
 \makeglossaries
 
-\usepackage{comment}
 % cfa macros used in the document
 %\usepackage{cfalab}
+% I'm going to bring back eventually.
+\makeatletter
+% Combines all \CC* commands:
+\newrobustcmd*\Cpp[1][\xspace]{\cfalab@Cpp#1}
+\newcommand\cfalab@Cpp{C\kern-.1em\hbox{+\kern-.25em+}}
+% Optional arguments do not work with pdf string. (Some fix-up required.)
+\pdfstringdefDisableCommands{\def\Cpp{C++}}
+\makeatother
+
 \input{common}
-\CFAStyle						% CFA code-style for all languages
-\lstset{language=CFA,basicstyle=\linespread{0.9}\tt}	% CFA default lnaguage
+% CFA code-style for all languages
+\CFAStyle
+% CFA default lnaguage
+\lstset{language=CFA,basicstyle=\linespread{0.9}\tt}
+% Annotations from Peter:
+\newcommand{\PAB}[1]{{\color{blue}PAB: #1}}
+% Change the style of abbreviations:
+\renewcommand{\abbrevFont}{}
 
 %======================================================================
 %   L O G I C A L    D O C U M E N T
 % The logical document contains the main content of your thesis.
-% Being a large document, it is a good idea to divide your thesis into several files, each one containing one chapter or other significant chunk of content, so you can easily shuffle things around later if desired.
+% Being a large document, it is a good idea to divide your thesis into several
+% files, each one containing one chapter or other significant chunk of content,
+% so you can easily shuffle things around later if desired.
 %======================================================================
 \begin{document}
@@ -175,18 +237,20 @@
 % FRONT MATERIAL
 % title page,declaration, borrowers' page, abstract, acknowledgements,
-% dedication, table of contents, list of tables, list of figures, nomenclature, etc.
-%----------------------------------------------------------------------
-\input{uw-ethesis-frontpgs} 
+% dedication, table of contents, list of tables, list of figures,
+% nomenclature, etc.
+%----------------------------------------------------------------------
+\input{uw-ethesis-frontpgs}
 
 %----------------------------------------------------------------------
 % MAIN BODY
 % We suggest using a separate file for each chapter of your thesis.
-% Start each chapter file with the \chapter command.
-% Only use \documentclass or \begin{document} and \end{document} commands in this master document.
+% Start each chapter file with the \chapter command. Only use \documentclass,
+% \begin{document} and \end{document} commands in this master document.
 % Tip: Putting each sentence on a new line is a way to simplify later editing.
 %----------------------------------------------------------------------
 \input{existing}
 \input{features}
-\input{unwinding}
+\input{implement}
+%\input{unwinding}
 \input{future}
 
@@ -198,13 +262,19 @@
 % Bibliography
 
-% The following statement selects the style to use for references.  
-% It controls the sort order of the entries in the bibliography and also the formatting for the in-text labels.
+% The following statement selects the style to use for references.
+% It controls the sort order of the entries in the bibliography and also the
+% formatting for the in-text labels.
 \bibliographystyle{plain}
-% This specifies the location of the file containing the bibliographic information.  
-% It assumes you're using BibTeX to manage your references (if not, why not?).
-\cleardoublepage % This is needed if the "book" document class is used, to place the anchor in the correct page, because the bibliography will start on its own page.
-% Use \clearpage instead if the document class uses the "oneside" argument
-\phantomsection  % With hyperref package, enables hyperlinking from the table of contents to bibliography             
-% The following statement causes the title "References" to be used for the bibliography section:
+% This specifies the location of the file containing the bibliographic
+% information. It assumes you're using BibTeX to manage your references (if
+% not, why not?).
+\cleardoublepage % This is needed if the "book" document class is used, to
+% place the anchor in the correct page, because the bibliography will start
+% on its own page.
+% Use \clearpage instead if the document class uses the "oneside" argument.
+\phantomsection  % With hyperref package, enables hyperlinking from the table
+% of contents to bibliography.
+% The following statement causes the title "References" to be used for the
+% bibliography section:
 \renewcommand*{\bibname}{References}
 
@@ -213,9 +283,11 @@
 
 \bibliography{uw-ethesis,pl}
-% Tip: You can create multiple .bib files to organize your references. 
-% Just list them all in the \bibliogaphy command, separated by commas (no spaces).
-
-% The following statement causes the specified references to be added to the bibliography even if they were not cited in the text. 
-% The asterisk is a wildcard that causes all entries in the bibliographic database to be included (optional).
+% Tip: You can create multiple .bib files to organize your references. Just
+% list them all in the \bibliogaphy command, separated by commas (no spaces).
+
+% The following statement causes the specified references to be added to the
+% bibliography even if they were not cited in the text. The asterisk is a
+% wildcard that causes all entries in the bibliographic database to be
+% included (optional).
 % \nocite{*}
 %----------------------------------------------------------------------
@@ -225,11 +297,14 @@
 % The \appendix statement indicates the beginning of the appendices.
 \appendix
-% Add an un-numbered title page before the appendices and a line in the Table of Contents
+% Add an un-numbered title page before the appendices and a line in the Table
+% of Contents
 % \chapter*{APPENDICES}
 % \addcontentsline{toc}{chapter}{APPENDICES}
-% Appendices are just more chapters, with different labeling (letters instead of numbers).
+% Appendices are just more chapters, with different labeling (letters instead
+% of numbers).
 % \input{appendix-matlab_plots.tex}
 
-% GLOSSARIES (Lists of definitions, abbreviations, symbols, etc. provided by the glossaries-extra package)
+% GLOSSARIES (Lists of definitions, abbreviations, symbols, etc.
+% provided by the glossaries-extra package)
 % -----------------------------
 \printglossaries
Index: libcfa/src/bits/weakso_locks.hfa
===================================================================
--- libcfa/src/bits/weakso_locks.hfa	(revision c292244d4c299c228d7e40cd8a91071f5066c92f)
+++ libcfa/src/bits/weakso_locks.hfa	(revision ef0b45633df5650457989c598232acaf4a0d8a66)
@@ -15,4 +15,6 @@
 // Update Count     :
 //
+
+#pragma once
 
 #include "bits/locks.hfa"
Index: libcfa/src/memory.cfa
===================================================================
--- libcfa/src/memory.cfa	(revision c292244d4c299c228d7e40cd8a91071f5066c92f)
+++ libcfa/src/memory.cfa	(revision ef0b45633df5650457989c598232acaf4a0d8a66)
@@ -10,6 +10,6 @@
 // Created On       : Tue Jun  2 16:48:00 2020
 // Last Modified By : Andrew Beach
-// Last Modified On : Tue Jun  3 12:30:00 2020
-// Update Count     : 0
+// Last Modified On : Mon Feb  1 16:10:00 2021
+// Update Count     : 1
 //
 
@@ -56,9 +56,8 @@
 }
 
-forall(T & | sized(T) | { void ^?{}(T &); })
+forall(T & | sized(T))
 void ?{}(counter_ptr(T) & this, counter_ptr(T) that) {
 	// `that` is a copy but it should have neither a constructor
 	// nor destructor run on it so it shouldn't need adjustment.
-	internal_decrement(this);
 	internal_copy(this, that);
 }
@@ -66,5 +65,7 @@
 forall(T & | sized(T), Args... | { void ?{}(T&, Args); })
 void ?{}(counter_ptr(T) & this, Args args) {
-	this.data = (counter_data(T)*)new(args);
+	this.data = malloc();
+	this.data->counter = 1;
+	(this.data->object){args};
 }
 
@@ -126,5 +127,6 @@
 forall(T & | sized(T), Args... | { void ?{}(T &, Args); })
 void ?{}(unique_ptr(T) & this, Args args) {
-	this.data = (T *)new(args);
+	this.data = malloc();
+	(*this.data){args};
 }
 
Index: libcfa/src/memory.hfa
===================================================================
--- libcfa/src/memory.hfa	(revision c292244d4c299c228d7e40cd8a91071f5066c92f)
+++ libcfa/src/memory.hfa	(revision ef0b45633df5650457989c598232acaf4a0d8a66)
@@ -10,6 +10,6 @@
 // Created On       : Tue Jun  2 16:48:00 2020
 // Last Modified By : Andrew Beach
-// Last Modified On : Tue Jun  3 12:29:00 2020
-// Update Count     : 0
+// Last Modified On : Fri Jan 29 15:52:00 2021
+// Update Count     : 1
 //
 
@@ -17,73 +17,86 @@
 
 // Internal data object.
-forall(T & | sized(T)) {
-	struct counter_data {
-		unsigned int counter;
-		T object;
-	};
+forall(T & | sized(T))
+struct counter_data {
+	unsigned int counter;
+	T object;
+};
 
-	forall(Args... | { void ?{}(T &, Args); })
-	void ?{}(counter_data(T) & this, Args args);
+forall(T & | sized(T), Args... | { void ?{}(T &, Args); })
+void ?{}(counter_data(T) & this, Args args);
 
-	forall( | { void ^?{}(T &); })
-	void ^?{}(counter_data(T) & this);
-}
+forall(T & | sized(T) | { void ^?{}(T &); })
+void ^?{}(counter_data(T) & this);
 
 // This is one of many pointers keeping this alive.
-forall(T & | sized(T)) {
-	struct counter_ptr {
-		counter_data(T) * data;
-	};
+forall(T & | sized(T))
+struct counter_ptr {
+	counter_data(T) * data;
+};
 
-	void ?{}(counter_ptr(T) & this);
-	void ?{}(counter_ptr(T) & this, zero_t);
-	forall( | { void ^?{}(T &); })
-	void ?{}(counter_ptr(T) & this, counter_ptr(T) that);
-	forall(Args... | { void ?{}(T&, Args); })
-	void ?{}(counter_ptr(T) & this, Args args);
+forall(T & | sized(T))
+void ?{}(counter_ptr(T) & this);
+forall(T & | sized(T))
+void ?{}(counter_ptr(T) & this, zero_t);
+forall(T & | sized(T))
+void ?{}(counter_ptr(T) & this, counter_ptr(T) that);
+forall(T & | sized(T), Args... | { void ?{}(T&, Args); })
+void ?{}(counter_ptr(T) & this, Args args);
 
-	forall( | { void ^?{}(T &); })
-	void ^?{}(counter_ptr(T) & this);
+forall(T & | sized(T) | { void ^?{}(T &); })
+void ^?{}(counter_ptr(T) & this);
 
-	T & *?(counter_ptr(T) & this);
+forall(T & | sized(T))
+T & *?(counter_ptr(T) & this);
 
-	forall( | { void ^?{}(T &); })
-	void ?=?(counter_ptr(T) & this, counter_ptr(T) that);
-	forall( | { void ^?{}(T &); })
-	void ?=?(counter_ptr(T) & this, zero_t);
+forall(T & | sized(T) | { void ^?{}(T &); })
+void ?=?(counter_ptr(T) & this, counter_ptr(T) that);
+forall(T & | sized(T) | { void ^?{}(T &); })
+void ?=?(counter_ptr(T) & this, zero_t);
 
-	int ?==?(counter_ptr(T) const & this, counter_ptr(T) const & that);
-	int ?!=?(counter_ptr(T) const & this, counter_ptr(T) const & that);
-	int ?==?(counter_ptr(T) const & this, zero_t);
-	int ?!=?(counter_ptr(T) const & this, zero_t);
-}
+forall(T & | sized(T))
+int ?==?(counter_ptr(T) const & this, counter_ptr(T) const & that);
+forall(T & | sized(T))
+int ?!=?(counter_ptr(T) const & this, counter_ptr(T) const & that);
+forall(T & | sized(T))
+int ?==?(counter_ptr(T) const & this, zero_t);
+forall(T & | sized(T))
+int ?!=?(counter_ptr(T) const & this, zero_t);
 
 // This is the only pointer that keeps this alive.
-forall(T &) {
-	struct unique_ptr {
-		T * data;
-	};
+forall(T &)
+struct unique_ptr {
+	T * data;
+};
 
-	void ?{}(unique_ptr(T) & this);
-	void ?{}(unique_ptr(T) & this, zero_t);
-	void ?{}(unique_ptr(T) & this, unique_ptr(T) that) = void;
-	forall( | sized(T), Args... | { void ?{}(T &, Args); })
-	void ?{}(unique_ptr(T) & this, Args args);
+forall(T &)
+void ?{}(unique_ptr(T) & this);
+forall(T &)
+void ?{}(unique_ptr(T) & this, zero_t);
+forall(T &)
+void ?{}(unique_ptr(T) & this, unique_ptr(T) that) = void;
+forall(T & | sized(T), Args... | { void ?{}(T &, Args); })
+void ?{}(unique_ptr(T) & this, Args args);
 
-	forall( | { void ^?{}(T &); })
-	void ^?{}(unique_ptr(T) & this);
+forall(T & | { void ^?{}(T &); })
+void ^?{}(unique_ptr(T) & this);
 
-	T & *?(unique_ptr(T) & this);
+forall(T & )
+T & *?(unique_ptr(T) & this);
 
-	void ?=?(unique_ptr(T) & this, unique_ptr(T) that) = void;
-	forall( | { void ^?{}(T &); })
-	void ?=?(unique_ptr(T) & this, zero_t);
+forall(T &)
+void ?=?(unique_ptr(T) & this, unique_ptr(T) that) = void;
+forall(T & | { void ^?{}(T &); })
+void ?=?(unique_ptr(T) & this, zero_t);
 
-	forall( | { void ^?{}(T &); })
-	void move(unique_ptr(T) & this, unique_ptr(T) & that);
+forall(T & | { void ^?{}(T &); })
+void move(unique_ptr(T) & this, unique_ptr(T) & that);
 
-	int ?==?(unique_ptr(T) const & this, unique_ptr(T) const & that);
-	int ?!=?(unique_ptr(T) const & this, unique_ptr(T) const & that);
-	int ?==?(unique_ptr(T) const & this, zero_t);
-	int ?!=?(unique_ptr(T) const & this, zero_t);
-}
+forall(T &)
+int ?==?(unique_ptr(T) const & this, unique_ptr(T) const & that);
+forall(T &)
+int ?!=?(unique_ptr(T) const & this, unique_ptr(T) const & that);
+forall(T &)
+int ?==?(unique_ptr(T) const & this, zero_t);
+forall(T &)
+int ?!=?(unique_ptr(T) const & this, zero_t);
Index: src/Parser/parser.yy
===================================================================
--- src/Parser/parser.yy	(revision c292244d4c299c228d7e40cd8a91071f5066c92f)
+++ src/Parser/parser.yy	(revision ef0b45633df5650457989c598232acaf4a0d8a66)
@@ -10,6 +10,6 @@
 // Created On       : Sat Sep  1 20:22:55 2001
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Mon Jan 11 21:32:10 2021
-// Update Count     : 4633
+// Last Modified On : Wed Feb  3 18:30:12 2021
+// Update Count     : 4700
 //
 
@@ -41,5 +41,5 @@
 
 %{
-#define YYDEBUG_LEXER_TEXT (yylval)						// lexer loads this up each time
+#define YYDEBUG_LEXER_TEXT( yylval )					// lexer loads this up each time
 #define YYDEBUG 1										// get the pretty debugging code to compile
 #define YYERROR_VERBOSE									// more information in syntax errors
@@ -63,5 +63,5 @@
 extern TypedefTable typedefTable;
 
-stack< LinkageSpec::Spec > linkageStack;
+stack<LinkageSpec::Spec> linkageStack;
 
 bool appendStr( string & to, string & from ) {
@@ -187,5 +187,5 @@
 	ConstantExpr * constant = dynamic_cast<ConstantExpr *>(type->expr.get());
 	if ( constant && (constant->get_constant()->get_value() == "0" || constant->get_constant()->get_value() == "1") ) {
-    	type = new ExpressionNode( new CastExpr( maybeMoveBuild< Expression >(type), new BasicType( Type::Qualifiers(), BasicType::SignedInt ) ) );
+		type = new ExpressionNode( new CastExpr( maybeMoveBuild<Expression>(type), new BasicType( Type::Qualifiers(), BasicType::SignedInt ) ) );
 	} // if
 	return new ForCtrl(
@@ -440,10 +440,10 @@
 
 %type<decl> type_qualifier type_qualifier_name forall type_qualifier_list_opt type_qualifier_list
-%type<decl> type_specifier type_specifier_nobody
+%type<decl> type_specifier type_specifier_nobody enum_specifier_nobody
 
 %type<decl> variable_declarator variable_ptr variable_array variable_function
 %type<decl> variable_abstract_declarator variable_abstract_ptr variable_abstract_array variable_abstract_function
 
-%type<decl> attribute_list_opt attribute_list attribute_name_list attribute attribute_name
+%type<decl> attribute_list_opt attribute_list attribute_opt attribute attribute_name_list attribute_name
 
 // initializers
@@ -578,5 +578,5 @@
 		{ $$ = $2; }
 	| '(' compound_statement ')'						// GCC, lambda expression
-		{ $$ = new ExpressionNode( new StmtExpr( dynamic_cast< CompoundStmt * >(maybeMoveBuild< Statement >($2) ) ) ); }
+		{ $$ = new ExpressionNode( new StmtExpr( dynamic_cast<CompoundStmt *>(maybeMoveBuild<Statement>($2) ) ) ); }
 	| type_name '.' identifier							// CFA, nested type
 		{ SemanticError( yylloc, "Qualified name is currently unimplemented." ); $$ = nullptr; }
@@ -610,12 +610,15 @@
 		{
 			// create a GenericExpr wrapper with one association pair
-			$$ = new GenericExpr( nullptr, { { maybeMoveBuildType($1), maybeMoveBuild<Expression>($3) } } );
+			$$ = new GenericExpr( nullptr, { { maybeMoveBuildType($1), maybeMoveBuild<Expression>( $3 ) } } );
 		}
 	| DEFAULT ':' assignment_expression
-		{ $$ = new GenericExpr( nullptr, { { maybeMoveBuild<Expression>($3) } } ); }
+		{ $$ = new GenericExpr( nullptr, { { maybeMoveBuild<Expression>( $3 ) } } ); }
 	;
 
 postfix_expression:
 	primary_expression
+	| postfix_expression '[' assignment_expression ',' comma_expression ']'
+		// { $$ = new ExpressionNode( build_binary_val( OperKinds::Index, $1, new ExpressionNode( build_binary_val( OperKinds::Index, $3, $5 ) ) ) ); }
+		{ SemanticError( yylloc, "New array subscript is currently unimplemented." ); $$ = nullptr; }
 	| postfix_expression '[' assignment_expression ']'
 		// CFA, comma_expression disallowed in this context because it results in a common user error: subscripting a
@@ -743,5 +746,5 @@
 			switch ( $1 ) {
 			  case OperKinds::AddressOf:
-				$$ = new ExpressionNode( new AddressExpr( maybeMoveBuild< Expression >( $2 ) ) );
+				$$ = new ExpressionNode( new AddressExpr( maybeMoveBuild<Expression>( $2 ) ) );
 				break;
 			  case OperKinds::PointTo:
@@ -749,5 +752,5 @@
 				break;
 			  case OperKinds::And:
-				$$ = new ExpressionNode( new AddressExpr( new AddressExpr( maybeMoveBuild< Expression >( $2 ) ) ) );
+				$$ = new ExpressionNode( new AddressExpr( new AddressExpr( maybeMoveBuild<Expression>( $2 ) ) ) );
 				break;
 			  default:
@@ -762,9 +765,9 @@
 	  	{ $$ = new ExpressionNode( build_unary_ptr( OperKinds::Decr, $2 ) ); }
 	| SIZEOF unary_expression
-		{ $$ = new ExpressionNode( new SizeofExpr( maybeMoveBuild< Expression >( $2 ) ) ); }
+		{ $$ = new ExpressionNode( new SizeofExpr( maybeMoveBuild<Expression>( $2 ) ) ); }
 	| SIZEOF '(' type_no_function ')'
 		{ $$ = new ExpressionNode( new SizeofExpr( maybeMoveBuildType( $3 ) ) ); }
 	| ALIGNOF unary_expression							// GCC, variable alignment
-		{ $$ = new ExpressionNode( new AlignofExpr( maybeMoveBuild< Expression >( $2 ) ) ); }
+		{ $$ = new ExpressionNode( new AlignofExpr( maybeMoveBuild<Expression>( $2 ) ) ); }
 	| ALIGNOF '(' type_no_function ')'					// GCC, type alignment
 		{ $$ = new ExpressionNode( new AlignofExpr( maybeMoveBuildType( $3 ) ) ); }
@@ -794,7 +797,7 @@
 		{ $$ = new ExpressionNode( build_keyword_cast( $2, $5 ) ); }
 	| '(' VIRTUAL ')' cast_expression					// CFA
-		{ $$ = new ExpressionNode( new VirtualCastExpr( maybeMoveBuild< Expression >( $4 ), maybeMoveBuildType( nullptr ) ) ); }
+		{ $$ = new ExpressionNode( new VirtualCastExpr( maybeMoveBuild<Expression>( $4 ), maybeMoveBuildType( nullptr ) ) ); }
 	| '(' VIRTUAL type_no_function ')' cast_expression	// CFA
-		{ $$ = new ExpressionNode( new VirtualCastExpr( maybeMoveBuild< Expression >( $5 ), maybeMoveBuildType( $3 ) ) ); }
+		{ $$ = new ExpressionNode( new VirtualCastExpr( maybeMoveBuild<Expression>( $5 ), maybeMoveBuildType( $3 ) ) ); }
 	| '(' RETURN type_no_function ')' cast_expression	// CFA
 		{ SemanticError( yylloc, "Return cast is currently unimplemented." ); $$ = nullptr; }
@@ -977,5 +980,5 @@
 	assignment_expression
 	| comma_expression ',' assignment_expression
-		{ $$ = new ExpressionNode( new CommaExpr( maybeMoveBuild< Expression >( $1 ), maybeMoveBuild< Expression >( $3 ) ) ); }
+		{ $$ = new ExpressionNode( new CommaExpr( maybeMoveBuild<Expression>( $1 ), maybeMoveBuild<Expression>( $3 ) ) ); }
 	;
 
@@ -1102,5 +1105,5 @@
 	constant_expression							{ $$ = $1; }
 	| constant_expression ELLIPSIS constant_expression	// GCC, subrange
-		{ $$ = new ExpressionNode( new RangeExpr( maybeMoveBuild< Expression >( $1 ), maybeMoveBuild< Expression >( $3 ) ) ); }
+		{ $$ = new ExpressionNode( new RangeExpr( maybeMoveBuild<Expression>( $1 ), maybeMoveBuild<Expression>( $3 ) ) ); }
 	| subrange											// CFA, subrange
 	;
@@ -1247,7 +1250,7 @@
 		{ $$ = new StatementNode( build_computedgoto( $3 ) ); }
 		// A semantic check is required to ensure fallthru appears only in the body of a choose statement.
-    | fall_through_name ';'								// CFA
+	| fall_through_name ';'								// CFA
 		{ $$ = new StatementNode( build_branch( BranchStmt::FallThrough ) ); }
-    | fall_through_name identifier_or_type_name ';'		// CFA
+	| fall_through_name identifier_or_type_name ';'		// CFA
 		{ $$ = new StatementNode( build_branch( $2, BranchStmt::FallThrough ) ); }
 	| fall_through_name DEFAULT ';'						// CFA
@@ -1448,7 +1451,7 @@
 asm_operand:											// GCC
 	string_literal '(' constant_expression ')'
-		{ $$ = new ExpressionNode( new AsmExpr( nullptr, $1, maybeMoveBuild< Expression >( $3 ) ) ); }
+		{ $$ = new ExpressionNode( new AsmExpr( nullptr, $1, maybeMoveBuild<Expression>( $3 ) ) ); }
 	| '[' IDENTIFIER ']' string_literal '(' constant_expression ')'
-		{ $$ = new ExpressionNode( new AsmExpr( $2, $4, maybeMoveBuild< Expression >( $6 ) ) ); }
+		{ $$ = new ExpressionNode( new AsmExpr( $2, $4, maybeMoveBuild<Expression>( $6 ) ) ); }
 	;
 
@@ -1736,4 +1739,10 @@
 	| sue_type_specifier_nobody
 	| type_type_specifier
+	;
+
+enum_specifier_nobody:									// type specifier - {...}
+		// Preclude SUE declarations in restricted scopes (see type_specifier_nobody)
+	basic_type_specifier
+	| sue_type_specifier_nobody
 	;
 
@@ -2004,9 +2013,4 @@
 	;
 
-fred:
-	// empty
-		{ yyy = false; }
-	;
-
 aggregate_type:											// struct, union
 	aggregate_key attribute_list_opt
@@ -2014,5 +2018,5 @@
 	  '{' field_declaration_list_opt '}' type_parameters_opt
 		{ $$ = DeclarationNode::newAggregate( $1, nullptr, $7, $5, true )->addQualifiers( $2 ); }
-	| aggregate_key attribute_list_opt identifier fred
+	| aggregate_key attribute_list_opt identifier
 		{
 			typedefTable.makeTypedef( *$3, forall || typedefTable.getEnclForall() ? TYPEGENname : TYPEDEFname ); // create typedef
@@ -2020,6 +2024,6 @@
 		}
 	  '{' field_declaration_list_opt '}' type_parameters_opt
-		{ $$ = DeclarationNode::newAggregate( $1, $3, $9, $7, true )->addQualifiers( $2 ); }
-	| aggregate_key attribute_list_opt type_name fred
+		{ $$ = DeclarationNode::newAggregate( $1, $3, $8, $6, true )->addQualifiers( $2 ); }
+	| aggregate_key attribute_list_opt type_name
 		{
 			// for type_name can be a qualified type name S.T, in which case only the last name in the chain needs a typedef (other names in the chain should already have one)
@@ -2028,5 +2032,5 @@
 		}
 	  '{' field_declaration_list_opt '}' type_parameters_opt
-		{ $$ = DeclarationNode::newAggregate( $1, $3->type->symbolic.name, $9, $7, true )->addQualifiers( $2 ); }
+		{ $$ = DeclarationNode::newAggregate( $1, $3->type->symbolic.name, $8, $6, true )->addQualifiers( $2 ); }
 	| aggregate_type_nobody
 	;
@@ -2040,5 +2044,5 @@
 
 aggregate_type_nobody:									// struct, union - {...}
-	aggregate_key attribute_list_opt identifier fred
+	aggregate_key attribute_list_opt identifier
 		{
 			typedefTable.makeTypedef( *$3, forall || typedefTable.getEnclForall() ? TYPEGENname : TYPEDEFname );
@@ -2046,5 +2050,5 @@
 			$$ = DeclarationNode::newAggregate( $1, $3, nullptr, nullptr, false )->addQualifiers( $2 );
 		}
-	| aggregate_key attribute_list_opt type_name fred
+	| aggregate_key attribute_list_opt type_name
 		{
 			forall = false;								// reset
@@ -2184,24 +2188,35 @@
 	;
 
+// Cannot use attribute_list_opt because of ambiguity with enum_specifier_nobody, which already parses attribute.
+// Hence, only a single attribute is allowed after the "ENUM".
 enum_type:												// enum
-	ENUM attribute_list_opt '{' enumerator_list comma_opt '}'
+	ENUM attribute_opt '{' enumerator_list comma_opt '}'
 		{ $$ = DeclarationNode::newEnum( nullptr, $4, true )->addQualifiers( $2 ); }
-	| ENUM attribute_list_opt identifier
+	| ENUM attribute_opt identifier
 		{ typedefTable.makeTypedef( *$3 ); }
 	  '{' enumerator_list comma_opt '}'
 		{ $$ = DeclarationNode::newEnum( $3, $6, true )->addQualifiers( $2 ); }
-	| ENUM attribute_list_opt type_name
+	| ENUM attribute_opt typedef						// enum cannot be generic
 	  '{' enumerator_list comma_opt '}'
-		{ $$ = DeclarationNode::newEnum( $3->type->symbolic.name, $5, true )->addQualifiers( $2 ); }
+		{ $$ = DeclarationNode::newEnum( $3->name, $5, true )->addQualifiers( $2 ); }
+	| ENUM enum_specifier_nobody '{' enumerator_list comma_opt '}'
+		// { $$ = DeclarationNode::newEnum( nullptr, $4, true ); }
+		{ SemanticError( yylloc, "Typed enumeration is currently unimplemented." ); $$ = nullptr; }
+	| ENUM enum_specifier_nobody declarator '{' enumerator_list comma_opt '}'
+		// {
+		// 	typedefTable.makeTypedef( *$3->name );
+		// 	$$ = DeclarationNode::newEnum( nullptr, $5, true );
+		// }
+		{ SemanticError( yylloc, "Typed enumeration is currently unimplemented." ); $$ = nullptr; }
 	| enum_type_nobody
 	;
 
 enum_type_nobody:										// enum - {...}
-	ENUM attribute_list_opt identifier
+	ENUM attribute_opt identifier
 		{
 			typedefTable.makeTypedef( *$3 );
 			$$ = DeclarationNode::newEnum( $3, 0, false )->addQualifiers( $2 );
 		}
-	| ENUM attribute_list_opt type_name
+	| ENUM attribute_opt type_name						// enum cannot be generic
 		{
 			typedefTable.makeTypedef( *$3->type->symbolic.name );
@@ -2220,6 +2235,8 @@
 	// empty
 		{ $$ = nullptr; }
-	| '=' constant_expression
-		{ $$ = $2; }
+	// | '=' constant_expression
+	// 	{ $$ = $2; }
+	| '=' initializer
+		{ $$ = $2->get_expression(); }					// FIX ME: enum only deals with constant_expression
 	;
 
@@ -2403,5 +2420,5 @@
 		{ $$ = $3; }
 	| '[' push constant_expression ELLIPSIS constant_expression pop ']' // GCC, multiple array elements
-		{ $$ = new ExpressionNode( new RangeExpr( maybeMoveBuild< Expression >( $3 ), maybeMoveBuild< Expression >( $5 ) ) ); }
+		{ $$ = new ExpressionNode( new RangeExpr( maybeMoveBuild<Expression>( $3 ), maybeMoveBuild<Expression>( $5 ) ) ); }
 	| '.' '[' push field_name_list pop ']'				// CFA, tuple field selector
 		{ $$ = $4; }
@@ -2441,8 +2458,9 @@
 type_parameter:											// CFA
 	type_class identifier_or_type_name
-		{   typedefTable.addToScope( *$2, TYPEDEFname, "9" );
-			if ( $1 == TypeDecl::Otype ) { SemanticError( yylloc, "otype keyword is deprecated" ); }
-			if ( $1 == TypeDecl::Dtype ) { SemanticError( yylloc, "dtype keyword is deprecated" ); }
-			if ( $1 == TypeDecl::Ttype ) { SemanticError( yylloc, "ttype keyword is deprecated" ); }
+		{
+			typedefTable.addToScope( *$2, TYPEDEFname, "9" );
+			if ( $1 == TypeDecl::Otype ) { SemanticError( yylloc, "otype keyword is deprecated, use T " ); }
+			if ( $1 == TypeDecl::Dtype ) { SemanticError( yylloc, "dtype keyword is deprecated, use T &" ); }
+			if ( $1 == TypeDecl::Ttype ) { SemanticError( yylloc, "ttype keyword is deprecated, use T ..." ); }
 		}
 	  type_initializer_opt assertion_list_opt
@@ -2738,5 +2756,5 @@
 subrange:
 	constant_expression '~' constant_expression			// CFA, integer subrange
-		{ $$ = new ExpressionNode( new RangeExpr( maybeMoveBuild< Expression >( $1 ), maybeMoveBuild< Expression >( $3 ) ) ); }
+		{ $$ = new ExpressionNode( new RangeExpr( maybeMoveBuild<Expression>( $1 ), maybeMoveBuild<Expression>( $3 ) ) ); }
 	;
 
@@ -2762,4 +2780,10 @@
 	| attribute_list attribute
 		{ $$ = $2->addQualifiers( $1 ); }
+	;
+
+attribute_opt:
+	// empty
+		{ $$ = nullptr; }
+	| attribute
 	;
 
@@ -3166,4 +3190,7 @@
 	| '[' ']' multi_array_dimension
 		{ $$ = DeclarationNode::newArray( 0, 0, false )->addArray( $3 ); }
+	| '[' push assignment_expression pop ',' comma_expression ']'
+		{ $$ = DeclarationNode::newArray( $3, 0, false )->addArray( DeclarationNode::newArray( $6, 0, false ) ); }
+		// { SemanticError( yylloc, "New array dimension is currently unimplemented." ); $$ = nullptr; }
 	| multi_array_dimension
 	;
Index: tests/.expect/attributes.nast.x64.txt
===================================================================
--- tests/.expect/attributes.nast.x64.txt	(revision c292244d4c299c228d7e40cd8a91071f5066c92f)
+++ tests/.expect/attributes.nast.x64.txt	(revision ef0b45633df5650457989c598232acaf4a0d8a66)
@@ -6,5 +6,5 @@
 
 }
-struct __attribute__ ((unused)) __anonymous0 {
+struct __anonymous0 {
 };
 static inline void _X12_constructorFv_S12__anonymous0_autogen___1(struct __anonymous0 *_X4_dstS12__anonymous0_1);
@@ -26,4 +26,5 @@
     return _X4_retS12__anonymous0_1;
 }
+__attribute__ ((unused)) struct __anonymous0 _X5DummyS12__anonymous0_1;
 struct __attribute__ ((unused)) Agn1;
 struct __attribute__ ((unused)) Agn2 {
Index: tests/.expect/attributes.nast.x86.txt
===================================================================
--- tests/.expect/attributes.nast.x86.txt	(revision c292244d4c299c228d7e40cd8a91071f5066c92f)
+++ tests/.expect/attributes.nast.x86.txt	(revision ef0b45633df5650457989c598232acaf4a0d8a66)
@@ -6,5 +6,5 @@
 
 }
-struct __attribute__ ((unused)) __anonymous0 {
+struct __anonymous0 {
 };
 static inline void _X12_constructorFv_S12__anonymous0_autogen___1(struct __anonymous0 *_X4_dstS12__anonymous0_1);
@@ -26,4 +26,5 @@
     return _X4_retS12__anonymous0_1;
 }
+__attribute__ ((unused)) struct __anonymous0 _X5DummyS12__anonymous0_1;
 struct __attribute__ ((unused)) Agn1;
 struct __attribute__ ((unused)) Agn2 {
Index: tests/.expect/attributes.oast.x64.txt
===================================================================
--- tests/.expect/attributes.oast.x64.txt	(revision c292244d4c299c228d7e40cd8a91071f5066c92f)
+++ tests/.expect/attributes.oast.x64.txt	(revision ef0b45633df5650457989c598232acaf4a0d8a66)
@@ -6,5 +6,5 @@
 
 }
-struct __attribute__ ((unused)) __anonymous0 {
+struct __anonymous0 {
 };
 static inline void _X12_constructorFv_S12__anonymous0_autogen___1(struct __anonymous0 *_X4_dstS12__anonymous0_1);
@@ -26,4 +26,5 @@
     return _X4_retS12__anonymous0_1;
 }
+__attribute__ ((unused)) struct __anonymous0 _X5DummyS12__anonymous0_1;
 struct __attribute__ ((unused)) Agn1;
 struct __attribute__ ((unused)) Agn2 {
Index: tests/.expect/attributes.oast.x86.txt
===================================================================
--- tests/.expect/attributes.oast.x86.txt	(revision c292244d4c299c228d7e40cd8a91071f5066c92f)
+++ tests/.expect/attributes.oast.x86.txt	(revision ef0b45633df5650457989c598232acaf4a0d8a66)
@@ -6,5 +6,5 @@
 
 }
-struct __attribute__ ((unused)) __anonymous0 {
+struct __anonymous0 {
 };
 static inline void _X12_constructorFv_S12__anonymous0_autogen___1(struct __anonymous0 *_X4_dstS12__anonymous0_1);
@@ -26,4 +26,5 @@
     return _X4_retS12__anonymous0_1;
 }
+__attribute__ ((unused)) struct __anonymous0 _X5DummyS12__anonymous0_1;
 struct __attribute__ ((unused)) Agn1;
 struct __attribute__ ((unused)) Agn2 {
Index: tests/.expect/smart-pointers.txt
===================================================================
--- tests/.expect/smart-pointers.txt	(revision ef0b45633df5650457989c598232acaf4a0d8a66)
+++ tests/.expect/smart-pointers.txt	(revision ef0b45633df5650457989c598232acaf4a0d8a66)
@@ -0,0 +1,1 @@
+done
Index: tests/attributes.cfa
===================================================================
--- tests/attributes.cfa	(revision c292244d4c299c228d7e40cd8a91071f5066c92f)
+++ tests/attributes.cfa	(revision ef0b45633df5650457989c598232acaf4a0d8a66)
@@ -10,6 +10,6 @@
 // Created On       : Mon Feb  6 16:07:02 2017
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Tue Nov  6 17:51:12 2018
-// Update Count     : 17
+// Last Modified On : Mon Jan 25 21:26:41 2021
+// Update Count     : 20
 // 
 
@@ -22,5 +22,5 @@
 
 // aggregate_name
-struct __attribute__(( unused )) {};
+struct __attribute__(( unused )) {} Dummy;
 struct __attribute__(( unused )) Agn1;
 struct __attribute__(( unused )) Agn2 {};
Index: tests/smart-pointers.cfa
===================================================================
--- tests/smart-pointers.cfa	(revision c292244d4c299c228d7e40cd8a91071f5066c92f)
+++ tests/smart-pointers.cfa	(revision ef0b45633df5650457989c598232acaf4a0d8a66)
@@ -2,5 +2,5 @@
 
 #include <memory.hfa>
-#include <stdlib.hfa>
+#include <assert.h>
 
 void counter_test(void) {
@@ -53,7 +53,21 @@
 }
 
+void declare_test(void) {
+	counter_ptr(int) ptr_i0 = 3;
+	counter_ptr(char) ptr_c0 = 'a';
+	counter_ptr(float) ptr_f0 = 3.5f;
+	counter_ptr(double) ptr_d0 = 3.5;
+
+	unique_ptr(int) ptr_i1 = 3;
+	unique_ptr(char) ptr_c1 = 'a';
+	unique_ptr(float) ptr_f1 = 3.5f;
+	unique_ptr(double) ptr_d1 = 3.5;
+}
+
 int main(int argc, char * argv[]) {
 	counter_test();
 	unique_test();
 	pointer_equality();
+
+	printf("done\n");
 }
Index: tools/prettyprinter/Makefile.am
===================================================================
--- tools/prettyprinter/Makefile.am	(revision c292244d4c299c228d7e40cd8a91071f5066c92f)
+++ tools/prettyprinter/Makefile.am	(revision ef0b45633df5650457989c598232acaf4a0d8a66)
@@ -11,6 +11,6 @@
 ## Created On       : Wed Jun 28 12:07:10 2017
 ## Last Modified By : Peter A. Buhr
-## Last Modified On : Mon Apr 16 09:43:23 2018
-## Update Count     : 20
+## Last Modified On : Thu Jan 28 08:48:22 2021
+## Update Count     : 23
 ###############################################################################
 
@@ -20,5 +20,5 @@
 BUILT_SOURCES = parser.hh
 
-AM_YFLAGS = -d -t -v
+AM_YFLAGS = -d -t -v -Wno-yacc
 
 SRC = lex.ll \
@@ -34,3 +34,3 @@
 pretty_CXXFLAGS = -Wno-deprecated -Wall -DYY_NO_INPUT -O2 -g -std=c++14
 
-MAINTAINERCLEANFILES = parser.output
+MOSTLYCLEANFILES = parser.output
Index: tools/prettyprinter/ParserTypes.h
===================================================================
--- tools/prettyprinter/ParserTypes.h	(revision c292244d4c299c228d7e40cd8a91071f5066c92f)
+++ tools/prettyprinter/ParserTypes.h	(revision ef0b45633df5650457989c598232acaf4a0d8a66)
@@ -13,11 +13,11 @@
 // Created On       : Sun Dec 16 15:00:49 2001
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Sat Jul 22 10:13:09 2017
-// Update Count     : 175
+// Last Modified On : Tue Jan 26 23:05:34 2021
+// Update Count     : 176
 // 
 
 #pragma once
 
-int yylex();
+extern "C" int yylex();
 
 #include <string>
Index: tools/prettyprinter/parser.yy
===================================================================
--- tools/prettyprinter/parser.yy	(revision c292244d4c299c228d7e40cd8a91071f5066c92f)
+++ tools/prettyprinter/parser.yy	(revision ef0b45633df5650457989c598232acaf4a0d8a66)
@@ -10,6 +10,6 @@
 // Created On       : Sat Dec 15 13:44:21 2001
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Sun Apr 15 21:40:30 2018
-// Update Count     : 1052
+// Last Modified On : Tue Jan 26 22:50:03 2021
+// Update Count     : 1053
 // 
 
@@ -17,4 +17,5 @@
 #define YYDEBUG_LEXER_TEXT( yylval )					// lexer loads this up each time
 #define YYDEBUG 1										// get the pretty debugging code to compile
+#define YYERROR_VERBOSE									// more information in syntax errors
 
 #include <iostream>