Index: doc/generic_types/evaluation/Makefile
===================================================================
--- doc/generic_types/evaluation/Makefile	(revision b3d70eba692f70343957b265f1ac7c17022df555)
+++ doc/generic_types/evaluation/Makefile	(revision 6a8ac0b3751e2c2cb5570243c8d6385e68ea5893)
@@ -1,2 +1,3 @@
+CC = gcc
 CFA = cfa
 DEPFLAGS = -MMD -MP
@@ -6,8 +7,9 @@
 endif
 CXXFLAGS = $(CFLAGS) --std=c++14
+MAKEFILE_NAME = ${firstword ${MAKEFILE_LIST}}
 
-.PHONY: all clean distclean run-c run-cpp run-cfa run
+.PHONY : all clean run-c run-cpp run-cfa run
 
-all: c-bench cpp-bench cfa-bench cpp-vbench
+all : c-bench cpp-bench cpp-vbench cfa-bench
 
 # rewrite object generation to auto-determine deps
@@ -17,94 +19,79 @@
 
 c-%.o : c-%.c
-c-%.o : c-%.c c-%.d
 	$(COMPILE.c) $(OUTPUT_OPTION) -c $<
 
 cpp-%.o : cpp-%.cpp
-cpp-%.o : cpp-%.cpp cpp-%.d
 	$(COMPILE.cpp) $(OUTPUT_OPTION) -c $<
 
 cfa-%.o : cfa-%.c
-cfa-%.o : cfa-%.c cfa-%.d
 	$(COMPILE.cfa) $(OUTPUT_OPTION) -c $<
 
-COBJS = c-stack.o c-pair.o c-print.o
-CPPOBJS = 
-CPPVOBJS = cpp-vstack.o
-CFAOBJS = cfa-stack.o cfa-pair.o cfa-print.o
+COBJS = c-stack.o c-pair.o c-print.o c-bench.o
+CPPOBJS = cpp-bench.o
+CPPVOBJS = cpp-vstack.o cpp-vbench.o
+CFAOBJS = cfa-stack.o cfa-pair.o cfa-print.o cfa-bench.o
 
-CFILES = c-bench.c bench.h $(COBJS:.o=.h) $(COBJS:.o=.c)
-CPPFILES = cpp-bench.cpp bench.hpp cpp-stack.hpp cpp-pair.hpp cpp-print.hpp
-CPPVFILES = cpp-vbench.cpp bench.hpp object.hpp $(CPPVOBJS:.o=.hpp) $(CPPVOBJS:.o=.cpp) cpp-vprint.hpp
-CFAFILES = cfa-bench.c bench.h $(CFAOBJS:.o=.h) $(CFAOBJS:.o=.c)
+${COBJS} ${CPPOBJS} ${CPPVOBJS} ${CFAOBJS} : ${MAKEFILE_NAME}
 
-c-bench: c-bench.c c-bench.d $(COBJS)
-	$(COMPILE.c) -o $@ $< $(COBJS) $(LDFLAGS)
+CFILES = bench.h $(patsubst c-bench.h,,$(COBJS:.o=.h)) $(COBJS:.o=.c)
+CPPFILES = bench.hpp cpp-stack.hpp cpp-pair.hpp cpp-print.hpp $(CPPOBJS:.o=.cpp)
+CPPVFILES = bench.hpp object.hpp cpp-vprint.hpp $(patsubst cpp-vbench.hpp,,$(CPPVOBJS:.o=.hpp)) $(CPPVOBJS:.o=.cpp)
+CFAFILES = bench.h $(patsubst cfa-bench.h,,$(CFAOBJS:.o=.h)) $(CFAOBJS:.o=.c)
 
-cpp-bench: cpp-bench.cpp cpp-bench.d $(CPPOBJS)
-	$(COMPILE.cpp) -o $@ $< $(CPPOBJS) $(LDFLAGS)
+c-bench : $(COBJS) c-bench.o
+	$(COMPILE.c) $(LDFLAGS) $^ -o $@
 
-cpp-vbench: cpp-vbench.cpp cpp-vbench.d $(CPPVOBJS)
-	$(COMPILE.cpp) -o $@ $< $(CPPVOBJS) $(LDFLAGS)
+cpp-bench : $(CPPOBJS) cpp-bench.o
+	$(COMPILE.cpp) $(LDFLAGS) $^ -o $@
 
-cfa-bench: cfa-bench.c cfa-bench.d $(CFAOBJS)
-	$(COMPILE.cfa) -o $@ $< $(CFAOBJS) $(LDFLAGS)
+cpp-vbench : $(CPPVOBJS) cpp-vbench.o
+	$(COMPILE.cpp) $(LDFLAGS) $^ -o $@
 
-clean:
-	-rm $(COBJS) c-bench
-	-rm $(CPPOBJS) cpp-bench
-	-rm $(CPPVOBJS) cpp-vbench
-	-rm $(CFAOBJS) cfa-bench
+cfa-bench : $(CFAOBJS) cfa-bench.o
+	$(COMPILE.cfa) $(LDFLAGS) $^ -o $@
 
-distclean: clean
-	-rm $(COBJS:.o=.d) c-bench.d
-	-rm $(CPPOBJS:.o=.d) cpp-bench.d
-	-rm $(CPPVOBJS:.o=.d) cpp-vbench.d
-	-rm $(CFAOBJS:.o=.d) cfa-bench.d
+# include dependency files
+-include $(COBJS:.o=.d)
+-include $(CPPOBJS:.o=.d)
+-include $(CPPVOBJS:.o=.d)
+-include $(CFAOBJS:.o=.d)
 
-run-c: c-bench
+clean :
+	rm -f $(COBJS) $(COBJS:.o=.d) c-bench
+	rm -f $(CPPOBJS) $(CPPOBJS:.o=.d) cpp-bench
+	rm -f $(CPPVOBJS) $(CPPVOBJS:.o=.d) cpp-vbench
+	rm -f $(CFAOBJS) $(CFAOBJS:.o=.d) cfa-bench
+
+run-c : c-bench
 	@echo
 	@echo '## C ##'
-	@/usr/bin/time -f 'max_memory:\t%M kilobytes' ./c-bench
+	@/usr/bin/time -f 'max_memory:\t%M kilobytes' ./$<
 	@printf 'source_size:\t%8d lines\n' `cat $(CFILES) | wc -l`
 	@printf 'redundant_type_annotations:%8d lines\n' `cat $(CFILES) | fgrep '/***/' -c`
-	@printf 'binary_size:\t%8d bytes\n' `stat -c %s c-bench`
+	@printf 'binary_size:\t%8d bytes\n' `stat -c %s $<`
 
-run-cfa: cfa-bench
+run-cpp : cpp-bench
+	@echo
+	@echo '## C++ ##'
+	@/usr/bin/time -f 'max_memory:\t %M kilobytes' ./$<
+	@printf 'source_size:\t%8d lines\n' `cat $(CPPFILES) | wc -l`
+	@printf 'redundant_type_annotations:%8d lines\n' `cat $(CPPFILES) | fgrep '/***/' -c`
+	@printf 'binary_size:\t%8d bytes\n' `stat -c %s $<`
+
+run-cppv : cpp-vbench
+	@echo
+	@echo '## C++obj ##'
+	@/usr/bin/time -f 'max_memory:\t%M kilobytes' ./$<
+	@printf 'source_size:\t%8d lines\n' `cat $(CPPVFILES) | wc -l`
+	@printf 'redundant_type_annotations:%8d lines\n' `cat $(CPPVFILES) | fgrep '/***/' -c`
+	@printf 'binary_size:\t%8d bytes\n' `stat -c %s $<`
+
+run-cfa : cfa-bench
 	@echo
 	@echo '## Cforall ##'
-	@/usr/bin/time -f 'max_memory:\t %M kilobytes' ./cfa-bench
+	@/usr/bin/time -f 'max_memory:\t %M kilobytes' ./$<
 	@printf 'source_size:\t%8d lines\n' `cat $(CFAFILES) | wc -l`
 	@printf 'redundant_type_annotations:%8d lines\n' `cat $(CFAFILES) | fgrep '/***/' -c`
-	@printf 'binary_size:\t%8d bytes\n' `stat -c %s cfa-bench`
+	@printf 'binary_size:\t%8d bytes\n' `stat -c %s $<`
 
-run-cpp: cpp-bench
-	@echo
-	@echo '## C++ ##'
-	@/usr/bin/time -f 'max_memory:\t %M kilobytes' ./cpp-bench
-	@printf 'source_size:\t%8d lines\n' `cat $(CPPFILES) | wc -l`
-	@printf 'redundant_type_annotations:%8d lines\n' `cat $(CPPFILES) | fgrep '/***/' -c`
-	@printf 'binary_size:\t%8d bytes\n' `stat -c %s cpp-bench`
-
-run-cppv: cpp-vbench
-	@echo
-	@echo '## C++obj ##'
-	@/usr/bin/time -f 'max_memory:\t%M kilobytes' ./cpp-vbench
-	@printf 'source_size:\t%8d lines\n' `cat $(CPPVFILES) | wc -l`
-	@printf 'redundant_type_annotations:%8d lines\n' `cat $(CPPVFILES) | fgrep '/***/' -c`
-	@printf 'binary_size:\t%8d bytes\n' `stat -c %s cpp-vbench`
-
-run: run-c run-cfa run-cpp run-cppv
-
-# so make doesn't fail without dependency files
-%.d: ;
-
-# so make won't delete dependency files
-.PRECIOUS: %.d
-
-# include dependency files
--include: $(COBJS:.o=.d)
--include: $(CPPOBJS:.o=.d)
--include: $(CFAOBJS:.o=.d)
--include: c-bench.d
--include: cpp-bench.d
--include: cfa-bench.d
+run : run-c run-cfa run-cpp run-cppv
Index: doc/generic_types/evaluation/c-bench.c
===================================================================
--- doc/generic_types/evaluation/c-bench.c	(revision b3d70eba692f70343957b265f1ac7c17022df555)
+++ doc/generic_types/evaluation/c-bench.c	(revision 6a8ac0b3751e2c2cb5570243c8d6385e68ea5893)
@@ -39,5 +39,5 @@
 
 int main(int argc, char** argv) {
-	FILE * out = fopen("c-out.txt", "w");
+	FILE * out = fopen("/dev/null", "w");
 	int maxi = 0, vali = 42;
 	struct stack si = new_stack(), ti;
Index: doc/generic_types/evaluation/c-stack.c
===================================================================
--- doc/generic_types/evaluation/c-stack.c	(revision b3d70eba692f70343957b265f1ac7c17022df555)
+++ doc/generic_types/evaluation/c-stack.c	(revision 6a8ac0b3751e2c2cb5570243c8d6385e68ea5893)
@@ -11,10 +11,8 @@
 void copy_stack(struct stack* s, const struct stack* t, void* (*copy)(const void*)) {
 	struct stack_node** crnt = &s->head;
-	struct stack_node* next = t->head;
-	while ( next ) {
+	for ( struct stack_node* next = t->head; next; next = next->next ) {
 		*crnt = malloc(sizeof(struct stack_node)); /***/
 		**crnt = (struct stack_node){ copy(next->value) }; /***/
 		crnt = &(*crnt)->next;
-		next = next->next;
 	}
 	*crnt = 0;
@@ -22,6 +20,5 @@
 
 void clear_stack(struct stack* s, void (*free_el)(void*)) {
-	struct stack_node* next = s->head;
-	while ( next ) {
+    for ( struct stack_node* next = s->head; next; ) {
 		struct stack_node* crnt = next;
 		next = crnt->next;
Index: doc/generic_types/evaluation/cfa-bench.c
===================================================================
--- doc/generic_types/evaluation/cfa-bench.c	(revision b3d70eba692f70343957b265f1ac7c17022df555)
+++ doc/generic_types/evaluation/cfa-bench.c	(revision 6a8ac0b3751e2c2cb5570243c8d6385e68ea5893)
@@ -6,5 +6,5 @@
 
 int main( int argc, char *argv[] ) {
-	FILE * out = fopen( "cfa-out.txt", "w" );
+	FILE * out = fopen( "/dev/null", "w" );
 	int maxi = 0, vali = 42;
 	stack(int) si, ti;
Index: doc/generic_types/evaluation/cfa-stack.c
===================================================================
--- doc/generic_types/evaluation/cfa-stack.c	(revision b3d70eba692f70343957b265f1ac7c17022df555)
+++ doc/generic_types/evaluation/cfa-stack.c	(revision 6a8ac0b3751e2c2cb5570243c8d6385e68ea5893)
@@ -11,10 +11,8 @@
 forall(otype T) void ?{}(stack(T)* s, stack(T) t) {
 	stack_node(T)** crnt = &s->head;
-	stack_node(T)* next = t.head;
-	while ( next ) {
+	for ( stack_node(T)* next = t.head; next; next = next->next ) {
 		*crnt = ((stack_node(T)*)malloc()){ next->value }; /***/
 		stack_node(T)* acrnt = *crnt;
 		crnt = &acrnt->next;
-		next = next->next;
 	}
 	*crnt = 0;
@@ -46,6 +44,5 @@
 
 forall(otype T) void clear(stack(T)* s) {
-	stack_node(T)* next = s->head;
-	while ( next ) {
+    for ( stack_node(T)* next = s->head; next; ) {
 		stack_node(T)* crnt = next;
 		next = crnt->next;
Index: doc/generic_types/evaluation/cpp-bench.cpp
===================================================================
--- doc/generic_types/evaluation/cpp-bench.cpp	(revision b3d70eba692f70343957b265f1ac7c17022df555)
+++ doc/generic_types/evaluation/cpp-bench.cpp	(revision 6a8ac0b3751e2c2cb5570243c8d6385e68ea5893)
@@ -7,5 +7,5 @@
 
 int main(int argc, char** argv) {
-	std::ofstream out{"cpp-out.txt"};
+	std::ofstream out{"/dev/null"};
 	int maxi = 0, vali = 42;
 	stack<int> si, ti;
Index: doc/generic_types/evaluation/cpp-stack.hpp
===================================================================
--- doc/generic_types/evaluation/cpp-stack.hpp	(revision b3d70eba692f70343957b265f1ac7c17022df555)
+++ doc/generic_types/evaluation/cpp-stack.hpp	(revision 6a8ac0b3751e2c2cb5570243c8d6385e68ea5893)
@@ -13,9 +13,7 @@
 	void copy(const stack<T>& o) {
 		node** crnt = &head;
-		node* next = o.head;
-		while ( next ) {
+		for ( node* next = o.head; next; next = next->next ) {
 			*crnt = new node{ next->value }; /***/
 			crnt = &(*crnt)->next;
-			next = next->next;
 		}
 		*crnt = nullptr;
@@ -23,6 +21,5 @@
 public:
 	void clear() {
-		node* next = head;
-		while ( next ) {
+	    for ( node* next = head; next; ) {
 			node* crnt = next;
 			next = crnt->next;
Index: doc/generic_types/evaluation/cpp-vbench.cpp
===================================================================
--- doc/generic_types/evaluation/cpp-vbench.cpp	(revision b3d70eba692f70343957b265f1ac7c17022df555)
+++ doc/generic_types/evaluation/cpp-vbench.cpp	(revision 6a8ac0b3751e2c2cb5570243c8d6385e68ea5893)
@@ -7,5 +7,5 @@
 
 int main(int argc, char** argv) {
-	std::ofstream out{"cpp-vout.txt"};
+	std::ofstream out{"/dev/null"};
 	integer maxi{ 0 }, vali{ 42 };
 	stack si, ti;
Index: doc/generic_types/evaluation/cpp-vstack.cpp
===================================================================
--- doc/generic_types/evaluation/cpp-vstack.cpp	(revision b3d70eba692f70343957b265f1ac7c17022df555)
+++ doc/generic_types/evaluation/cpp-vstack.cpp	(revision 6a8ac0b3751e2c2cb5570243c8d6385e68ea5893)
@@ -6,9 +6,7 @@
 void stack::copy(const stack& o) {
 	node** crnt = &head;
-	node* next = o.head;
-	while ( next ) {
+	for ( node* next = o.head; next; next = next->next ) {
 		*crnt = new node{ *next->value };
 		crnt = &(*crnt)->next;
-		next = next->next;
 	}
 	*crnt = nullptr;
@@ -35,6 +33,5 @@
 
 void stack::clear() {
-	node* next = head;
-	while ( next ) {
+    for ( node* next = head; next; ) {
 		node* crnt = next;
 		next = crnt->next;
Index: doc/generic_types/generic_types.tex
===================================================================
--- doc/generic_types/generic_types.tex	(revision b3d70eba692f70343957b265f1ac7c17022df555)
+++ doc/generic_types/generic_types.tex	(revision 6a8ac0b3751e2c2cb5570243c8d6385e68ea5893)
@@ -232,5 +232,5 @@
 int comp( const void * t1, const void * t2 ) { return *(double *)t1 < *(double *)t2 ? -1 :
 				*(double *)t2 < *(double *)t1 ? 1 : 0; }
-double key = 5.0, vals[10] = { /* 10 floating-point values */ };
+double key = 5.0, vals[10] = { /* 10 sorted floating-point values */ };
 double * val = (double *)bsearch( &key, vals, 10, sizeof(vals[0]), comp );	$\C{// search sorted array}$
 \end{lstlisting}
@@ -354,5 +354,5 @@
 One of the known shortcomings of standard C is that it does not provide reusable type-safe abstractions for generic data structures and algorithms.
 Broadly speaking, there are three approaches to implement abstract data-structures in C.
-One approach is to write bespoke data structures for each context in which they are needed.
+One approach is to write bespoke data-structures for each context in which they are needed.
 While this approach is flexible and supports integration with the C type-checker and tooling, it is also tedious and error-prone, especially for more complex data structures.
 A second approach is to use @void *@--based polymorphism, \eg the C standard-library functions @bsearch@ and @qsort@; an approach which does allow reuse of code for common functionality.
@@ -542,5 +542,5 @@
 \end{lstlisting}
 where the tuple variable-name serves the same purpose as the parameter name(s).
-Tuple variables can be composed of any types, except for array types, since array sizes are generally unknown.
+Tuple variables can be composed of any types, except for array types, since array sizes are generally unknown in C.
 
 One way to access the tuple-variable components is with assignment or composition:
@@ -552,9 +552,9 @@
 \begin{lstlisting}
 [int, int] * p = &qr;						$\C{// tuple pointer}$
-int rem = qr.1;								$\C{// access remainder}$
-int quo = div( 13, 5 ).0;					$\C{// access quotient}$
-p->0 = 5;									$\C{// change quotient}$
-bar( qr.1, qr );							$\C{// pass remainder and quotient/remainder}$
-rem = [42, div( 13, 5 )].0.1;				$\C{// access 2nd component of 1st component of tuple expression}$
+int rem = qr`.1`;							$\C{// access remainder}$
+int quo = div( 13, 5 )`.0`;					$\C{// access quotient}$
+p`->0` = 5;									$\C{// change quotient}$
+bar( qr`.1`, qr );							$\C{// pass remainder and quotient/remainder}$
+rem = [42, div( 13, 5 )]`.0.1`;				$\C{// access 2nd component of 1st component of tuple expression}$
 \end{lstlisting}
 
@@ -616,5 +616,5 @@
 This semantics means mass assignment differs from C cascading assignment (\eg @a = b = c@) in that conversions are applied in each individual assignment, which prevents data loss from the chain of conversions that can happen during a cascading assignment.
 For example, @[y, x] = 3.14@ performs the assignments @y = 3.14@ and @x = 3.14@, yielding @y == 3.14@ and @x == 3@;
-whereas C cascading assignment @y = x = 3.14@ performs the assignments @x = 3.14@ and @y = x@, yielding @3@ in @y@ and @x@.
+whereas, C cascading assignment @y = x = 3.14@ performs the assignments @x = 3.14@ and @y = x@, yielding @3@ in @y@ and @x@.
 Finally, tuple assignment is an expression where the result type is the type of the left-hand side of the assignment, just like all other assignment expressions in C.
 This example shows mass, multiple, and cascading assignment used in one expression:
@@ -742,5 +742,5 @@
 \end{lstlisting}
 Hence, function parameter and return lists are flattened for the purposes of type unification allowing the example to pass expression resolution.
-This relaxation is possible by extending the thunk scheme described by \citet{Bilson03}.
+This relaxation is possible by extending the thunk scheme described by~\citet{Bilson03}.
 Whenever a candidate's parameter structure does not exactly match the formal parameter's structure, a thunk is generated to specialize calls to the actual function:
 \begin{lstlisting}
@@ -748,5 +748,5 @@
 \end{lstlisting}
 so the thunk provides flattening and structuring conversions to inferred functions, improving the compatibility of tuples and polymorphism.
-These thunks take advantage of GCC C nested-functions to produce closures that have the usual function pointer signature.
+These thunks take advantage of GCC C nested-functions to produce closures that have the usual function-pointer signature.
 
 
@@ -829,5 +829,5 @@
 \subsection{Implementation}
 
-Tuples are implemented in the \CFA translator via a transformation into generic types.
+Tuples are implemented in the \CFA translator via a transformation into \emph{generic types}.
 For each $N$, the first time an $N$-tuple is seen in a scope a generic type with $N$ type parameters is generated, \eg:
 \begin{lstlisting}
@@ -1086,5 +1086,5 @@
 Finally, we demonstrate that \CFA performance for some idiomatic cases is better than C and close to \CC, showing the design is practically applicable.
 
-There is ongoing work on a wide range of \CFA feature extensions, including reference types, exceptions, concurrent primitives and modules.
+There is ongoing work on a wide range of \CFA feature extensions, including reference types, arrays with size, exceptions, concurrent primitives and modules.
 (While all examples in the paper compile and run, a public beta-release of \CFA will take another 8--12 months to finalize these additional extensions.)
 In addition, there are interesting future directions for the polymorphism design.
@@ -1092,5 +1092,5 @@
 \CFA polymorphic functions use dynamic virtual-dispatch; 
 the runtime overhead of this approach is low, but not as low as inlining, and it may be beneficial to provide a mechanism for performance-sensitive code.
-Two promising approaches are an @inline@ annotation at polymorphic function call sites to create a template-specialization of the function (provided the code is visible) or placing an @inline@ annotation on polymorphic function-definitions to instantiate a specialized version for some set of types.
+Two promising approaches are an @inline@ annotation at polymorphic function call sites to create a template-specialization of the function (provided the code is visible) or placing an @inline@ annotation on polymorphic function-definitions to instantiate a specialized version for some set of types (\CC template specialization).
 These approaches are not mutually exclusive and allow performance optimizations to be applied only when necessary, without suffering global code-bloat.
 In general, we believe separate compilation, producing smaller code, works well with loaded hardware-caches, which may offset the benefit of larger inlined-code.
@@ -1117,5 +1117,5 @@
 Throughout, @/***/@ designates a counted redundant type annotation.
 
-\medskip\noindent
+\smallskip\noindent
 \CFA
 \begin{lstlisting}[xleftmargin=2\parindentlnth,aboveskip=0pt,belowskip=0pt]
