Index: doc/theses/mike_brooks_MMath/array.tex
===================================================================
--- doc/theses/mike_brooks_MMath/array.tex	(revision 38e20a80ff2d2e0f1bf7fef08b0948ab64d1abf0)
+++ doc/theses/mike_brooks_MMath/array.tex	(revision ce02877193b9b7fb9e2bdf7486d493bf45be2bec)
@@ -6,21 +6,32 @@
 
 Arrays in C are possible the single most misunderstood and incorrectly used features in the language, resulting in the largest proportion of runtime errors and security violations.
-This chapter describes the new \CFA language and library features that introduce a length-checked array-type to the \CFA standard library~\cite{Cforall}, \eg:
-\begin{cfa}
-@array( float, 99 )@ x;					$\C{// x contains 99 floats}$
+This chapter describes the new \CFA language and library features that introduce a length-checked array-type to the \CFA standard library~\cite{Cforall}.
+
+Specifically, a new \CFA array is declared:
+\begin{cfa}
+@array( float, 99 )@ x;					$\C[2.75in]{// x contains 99 floats}$
+\end{cfa}
+using generic type @array@ with arguments @float@ and @99@.
+A function @f@ is declared with an @array@ parameter of length @42@.
+\begin{cfa}
 void f( @array( float, 42 )@ & p ) {}	$\C{// p accepts 42 floats}$
 f( x );									$\C{// statically rejected: types are different, 99 != 42}$
 
-forall( T, [N] )
-void g( @array( T, N )@ & p, int i ) {
+test2.cfa:3:1 error: Invalid application of existing declaration(s) in expression.
+Applying untyped:  Name: f ... to:  Name: x
+\end{cfa}
+The call @f( x )@ is invalid because the @array@ lengths @99@ and @42@ do not match.
+
+Next, function @g@ introduces a @forall@ prefix on type parameter @T@ and arbitrary \emph{dimension parameter} @N@, the new feature that represents a count of elements managed by the type system.
+\begin{cfa}
+forall( T, @[N]@ )
+void g( array( T, @N@ ) & p, int i ) {
 	T elem = p[i];						$\C{// dynamically checked: requires 0 <= i < N}$
 }
 g( x, 0 );								$\C{// T is float, N is 99, dynamic subscript check succeeds}$
-g( x, 1000 );							$\C{// T is float, N is 99, dynamic subscript check fails}$
-\end{cfa}
-This example declares variable @x@, with generic type @array@ using arguments @float@ and @99@.
-Function @f@ is declared with an @array@ parameter of length @42@.
-The call @f( x )@ is invalid because the @array@ lengths @99@ and @42@ do not match.
-Next, function @g@ introduces a @forall@ prefix on type parameter @T@ and arbitrary \emph{dimension parameter} @N@, the new feature that represents a count of elements managed by the type system.
+g( x, 1000 );							$\C{// T is float, N is 99, dynamic subscript check fails}\CRT$
+
+Cforall Runtime error: subscript 1000 exceeds dimension range [0,99) $for$ array 0x555555558020.
+\end{cfa}
 The call @g( x, 0 )@ is valid because @g@ accepts any length of array, where the type system infers @float@ for @T@ and length @99@ for @N@.
 Inferring values for @T@ and @N@ is implicit without programmer involvement.
@@ -35,23 +46,23 @@
 forall( [N] )
 void declDemo() {
-	float x1[N];				$\C{// built-in type ("C array")}$
-	array(float, N) x2;			$\C{// type from library}$
+	float x1[N];						$\C{// built-in type ("C array")}$
+	array(float, N) x2;					$\C{// type from library}$
 }
 \end{cfa}
 Both of the locally-declared array variables, @x1@ and @x2@, have 42 elements, each element being a @float@.
 The two variables have identical size and layout; they both encapsulate 42-float, stack \vs heap allocations with no additional ``bookkeeping'' allocations or headers.
-Providing this explicit generic approach required a significant extension to the \CFA type system to support a full-feature, safe, efficient (space and time) array-type, which forms the foundation for more complex array forms in \CFA.
+Providing this explicit generic approach requires a significant extension to the \CFA type system to support a full-feature, safe, efficient (space and time) array-type, which forms the foundation for more complex array forms in \CFA.
 
 Admittedly, the @array@ library type (type for @x2@) is syntactically different from its C counterpart.
 A future goal (TODO xref) is to provide a built-in array type with syntax approaching C's (type for @x1@);
 then, the library @array@ type can be removed giving \CFA a largely uniform array type.
-At present, the built-in array is only partially supported, so the generic @array@ is used exclusively in the discussion;
+At present, the C syntax @array@ is only partially supported, so the generic @array@ is used exclusively in the discussion;
 feature support and C compatibility are revisited in Section ? TODO.
 
-Offering an @array@ type, as a distinct alternative to the C array, is consistent with \CFA's goal of backwards compatibility, \ie virtually all existing C (gcc) programs can be compiled by \CFA with only a small number of changes, similar to \CC (g++).
+Offering the @array@ type, as a distinct alternative to the C array, is consistent with \CFA's goal of backwards compatibility, \ie virtually all existing C (@gcc@) programs can be compiled by \CFA with only a small number of changes, similar to \CC (@g++@).
 However, a few compatibility-breaking changes to the behaviour of the C array are necessary, both as an implementation convenience and to fix C's lax treatment of arrays.
 Hence, the @array@ type is an opportunity to start from a clean slate and show a cohesive selection of features, making it unnecessary to deal with every inherited complexity introduced by the C array TODO xref.
 
-My contributions are:
+My contributions in this chapter are:
 \begin{enumerate}
 \item A type system enhancement that lets polymorphic functions and generic types be parameterized by a numeric value: @forall( [N] )@.
@@ -100,11 +111,11 @@
 \end{figure}
 
-\VRef[Figure]{f:fHarness} shows a harness that uses the @f@ function illustrating how dynamic values are fed into the @array@ type.
-Here, the dimension of the @x@, @y@, and @result@ arrays is specified from a command-line value and these arrays are allocated on the stack.
+\VRef[Figure]{f:fHarness} shows a harness that uses function @f@ to illustrate how dynamic values are fed into the @array@ type.
+Here, the dimension of arrays @x@, @y@, and @result@ is specified from a command-line value, @dim@, and these arrays are allocated on the stack.
 Then the @x@ array is initialized with decreasing values, and the @y@ array with amounts offset by constant @0.005@, giving relative differences within tolerance initially and diverging for later values.
 The program main is run (see figure bottom) with inputs @5@ and @7@ for sequence lengths.
-The loops follow the familiar pattern of using the variable @n@ to iterate through the arrays.
-Most importantly, the type system implicitly captures @n@ at the call of @f@ and makes it available throughout @f@ as @N@.
-The example shows @n@ adapting into a type-system managed length at the declarations of @x@, @y@, and @result@, @N@ adapting in the same way at @f@'s loop bound, and a pass-thru use of @n@ at @f@'s declaration of @ret@.
+The loops follow the familiar pattern of using the variable @dim@ to iterate through the arrays.
+Most importantly, the type system implicitly captures @dim@ at the call of @f@ and makes it available throughout @f@ as @N@.
+The example shows @dim@ adapting into a type-system managed length at the declarations of @x@, @y@, and @result@, @N@ adapting in the same way at @f@'s loop bound, and a pass-thru use of @dim@ at @f@'s declaration of @ret@.
 Except for the lifetime-management issue of @result@, \ie explicit @free@, this program has eliminated both the syntactic and semantic problems associated with C arrays and their usage.
 These benefits cannot be underestimated.
@@ -141,5 +152,5 @@
 \CC has a (mistaken) belief that references are not objects, but pointers are objects.
 In the \CC example, the arrays fall back on C arrays, which have a duality with references with respect to automatic dereferencing.
-The \CFA array is a contiguous object with an address, which can stored as a reference or pointer.
+The \CFA array is a contiguous object with an address, which can be stored as a reference or pointer.
 \item
 C/\CC arrays cannot be copied, while \CFA arrays can be copied, making them a first-class object (although array copy is often avoided for efficiency).
@@ -151,5 +162,5 @@
 
 @template< typename T, size_t N >@
-void copy( T ret[N], T x[N] ) {
+void copy( T ret[@N@], T x[@N@] ) {
 	for ( int i = 0; i < N; i += 1 ) ret[i] = x[i];
 }
@@ -167,5 +178,5 @@
 int main() {
 	@forall( T, [N] )@   // nested function
-	void copy( array( T, N ) & ret, array( T, N ) & x ) {
+	void copy( array( T, @N@ ) & ret, array( T, @N@ ) & x ) {
 		for ( i; 10 ) ret[i] = x[i];
 	}
@@ -185,9 +196,9 @@
 
 Continuing the discussion of \VRef[Figure]{f:fHarness}, the example has @f@ expecting two arrays of the same length.
-A compile-time error occurs when attempting to call @f@ with arrays of differing lengths.
+As stated previous, a compile-time error occurs when attempting to call @f@ with arrays of differing lengths.
 % removing leading whitespace
 \lstinput[tabsize=1]{52-53}{hello-array.cfa}
 \lstinput[tabsize=1,aboveskip=0pt]{62-64}{hello-array.cfa}
-As is common practice in C, the programmer is free to cast, \ie to assert knowledge not shared with the type system.
+C allows casting to assert knowledge not shared with the type system.
 \lstinput{70-74}{hello-array.cfa}
 
@@ -197,13 +208,11 @@
 \lstinput{10-15}{hello-accordion.cfa}
 This structure's layout has the starting offset of @municipalities@ varying in @NprovTerty@, and the offset of @total_pt@ and @total_mun@ varying in both generic parameters.
-For a function that operates on a @CanadaPop@ structure, the type system handles this variation transparently.
+For a function that operates on a @CanPop@ structure, the type system handles this variation transparently.
 \lstinput{40-45}{hello-accordion.cfa}
-\VRef[Figure]{f:checkHarness} shows program results where different offset values being used.
-The output values show that @summarize@ and its caller agree on both the offsets (where the callee starts reading @cost_contribs@ and where the callee writes @total_cost@).
-Yet the call site just says, ``pass the request.''
+\VRef[Figure]{f:checkHarness} shows the @CanPop@ harness and results with different array sizes, if the municipalities changed after a census.
 
 \begin{figure}
 \lstinput{60-68}{hello-accordion.cfa}
-\lstinput{70-72}{hello-accordion.cfa}
+\lstinput{70-75}{hello-accordion.cfa}
 \caption{\lstinline{check} Harness}
 \label{f:checkHarness}
@@ -232,10 +241,10 @@
 In general, storage layout is hidden by subscripting, and only appears when passing arrays among different programming languages or accessing specific hardware.
 
-\VRef[Figure]{f:FixedVariable} shows two C90 approaches for manipulating contiguous arrays.
+\VRef[Figure]{f:FixedVariable} shows two C90 approaches for manipulating a contiguous matrix.
 Note, C90 does not support VLAs.
-The fixed-dimension approach uses the type system;
+The fixed-dimension approach (left) uses the type system;
 however, it requires all dimensions except the first to be specified at compile time, \eg @m[][6]@, allowing all subscripting stride calculations to be generated with constants.
 Hence, every matrix passed to @fp1@ must have exactly 6 columns but the row size can vary.
-The variable-dimension approach ignores (violates) the type system, \ie argument and parameters types do not match, and manually performs pointer arithmetic for subscripting in the macro @sub@.
+The variable-dimension approach (right) ignores (violates) the type system, \ie argument and parameters types do not match, and subscripting is performed manually using pointer arithmetic in the macro @sub@.
 
 \begin{figure}
@@ -258,5 +267,5 @@
 	...  printf( "%d ", @sub( m, r, c )@ );  ...
 }
-int vm1[4][4], vm2[6][8]; // no VLA
+int vm1[@4@][@4@], vm2[@6@][@8@]; // no VLA
 // initialize matrixes
 fp2( 4, 4, vm1 );
@@ -290,5 +299,5 @@
 The language decides if the matrix and array-of-array are laid out the same or differently.
 For example, an array-of-array may be an array of row pointers to arrays of columns, so the rows may not be contiguous in memory nor even the same length (triangular matrix).
-Regardless, there is usually a uniform subscripting syntax masking the memory layout, even though the two array forms could be differentiated at the subscript level, \eg @m[1,2]@ \vs @aa[1][2]@.
+Regardless, there is usually a uniform subscripting syntax masking the memory layout, even though a language could differentiated between the two forms using subscript syntax, \eg @m[1,2]@ \vs @aa[1][2]@.
 Nevertheless, controlling memory layout can make a difference in what operations are allowed and in performance (caching/NUMA effects).
 
@@ -301,5 +310,5 @@
 The focus of this work is on the contiguous multidimensional arrays in C.
 The reason is that programmers are often forced to use the more complex array-of-array form when a contiguous array would be simpler, faster, and safer.
-Nevertheless, the C array-of-array form continues to be useful for special circumstances.
+Nevertheless, the C array-of-array form is still important for special circumstances.
 
 \VRef[Figure]{f:ContiguousNon-contiguous} shows the extensions made in C99 for manipulating contiguous \vs non-contiguous arrays.\footnote{C90 also supported non-contiguous arrays.}
@@ -313,5 +322,5 @@
 While this contiguous-array capability is a step forward, it is still the programmer's responsibility to manually manage the number of dimensions and their sizes, both at the function definition and call sites.
 That is, the array does not automatically carry its structure and sizes for use in computing subscripts.
-While the non-contiguous style in @faa@ looks very similar to @fc@, the compiler only understands the unknown-sized array of row pointers, and it relies on the programmer to traverse the columns in a row correctly.
+While the non-contiguous style in @faa@ looks very similar to @fc@, the compiler only understands the unknown-sized array of row pointers, and it relies on the programmer to traverse the columns in a row correctly with a correctly bounded loop index.
 Specifically, there is no requirement that the rows are the same length, like a poem with different length lines.
 
@@ -365,5 +374,5 @@
 this model has no awareness of dimensions just the ability to access memory at a distance from a reference point (base-displacement addressing), \eg @x + 23@ or @x[23}@ $\Rightarrow$ 23rd element from the start of @x@.
 A programmer must manually build any notion of dimensions using other tools;
-hence, this style is not offering multidimensional arrays \see{\VRef[Figure]{f:FixedVariable}}.
+hence, this style is not offering multidimensional arrays \see{\VRef[Figure]{f:FixedVariable} right example}.
 \end{enumerate}
 
@@ -381,5 +390,5 @@
 A C/\CFA array interface includes the resulting memory layout.
 The defining requirement of a type-2 system is the ability to slice a column from a column-finest matrix.
-The required memory shape of such a slice is set, before any discussion of implementation.
+The required memory shape of such a slice is fixed, before any discussion of implementation.
 The implementation presented here is how the \CFA array library wrangles the C type system, to make it do memory steps that are consistent with this layout.
 TODO: do I have/need a presentation of just this layout, just the semantics of -[all]?
@@ -389,6 +398,6 @@
 Beyond what C's array type offers, the new array brings direct support for working with a noncontiguous array slice, allowing a program to work with dimension subscripts given in a non-physical order.
 
-The following examples use an @array( float, 5, 7) m@, loaded with values incremented by $0.1$, when stepping across the length-7 finely-strided column dimension, and stepping across the length-5 coarsely-strided row dimension.
-\par\noindent
+The following examples use the matrix declaration @array( float, 5, 7 ) m@, loaded with values incremented by $0.1$, when stepping across the length-7 finely-strided column dimension, and stepping across the length-5 coarsely-strided row dimension.
+\par
 \mbox{\lstinput{121-126}{hello-md.cfa}}
 \par\noindent
@@ -405,10 +414,10 @@
 Specifically, declaring the parameter @r@ with type @array@ means that @r@ is contiguous, which is unnecessarily restrictive.
 That is, @r@ need only be of a container type that offers a subscript operator (of type @ptrdiff_t@ $\rightarrow$ @float@) with managed length @N@.
-The new-array library provides the trait @ix@, so-defined.
+The new-array library provides the trait @ar@, so-defined.
 With it, the original declaration can be generalized with the same body.
 \lstinput{43-44}{hello-md.cfa}
 \lstinput[aboveskip=0pt]{145-145}{hello-md.cfa}
 The nontrivial slicing in this example now allows passing a \emph{noncontiguous} slice to @print1d@, where the new-array library provides a ``subscript by all'' operation for this purpose.
-In a multi-dimensional subscript operation, any dimension given as @all@ is a placeholder, \ie ``not yet subscripted by a value'', waiting for such a value, implementing the @ix@ trait.
+In a multi-dimensional subscript operation, any dimension given as @all@ is a placeholder, \ie ``not yet subscripted by a value'', waiting for such a value, implementing the @ar@ trait.
 \lstinput{150-151}{hello-md.cfa}
 
@@ -471,23 +480,21 @@
 In both cases, value 2 selects from the coarser dimension (rows of @x@),
 while the value 3 selects from the finer dimension (columns of @x@).
-The figure illustrates the value of each subexpression, comparing how numeric subscripting proceeds from @x@, vs from @x[all]@.
+The figure illustrates the value of each subexpression, comparing how numeric subscripting proceeds from @x@, \vs from @x[all]@.
 Proceeding from @x@ gives the numeric indices as coarse then fine, while proceeding from @x[all]@ gives them fine then coarse.
 These two starting expressions, which are the example's only multidimensional subexpressions
 (those that received zero numeric indices so far), are illustrated with vertical steps where a \emph{first} numeric index would select.
 
-The figure's presentation offers an intuition answering, What is an atomic element of @x[all]@?
-From there, @x[all]@ itself is simply a two-dimensional array, in the strict C sense, of these strange building blocks.
+The figure's presentation offers an intuition answering to: What is an atomic element of @x[all]@?
+From there, @x[all]@ itself is simply a two-dimensional array, in the strict C sense, of these building blocks.
 An atom (like the bottommost value, @x[all][3][2]@), is the contained value (in the square box)
 and a lie about its size (the wedge above it, growing upward).
-An array of these atoms (like the intermediate @x[all][3]@) is just a contiguous arrangement of them,
-done according to their size, as announced.  Call such an array a column.
-A column is almost ready to be arranged into a matrix; it is the \emph{contained value} of the next-level building block,
-but another lie about size is required.
-At first, an atom needed to be arranged as if it were bigger,
-but now a column needs to be arranged as if it is smaller (the wedge above it, shrinking upward).
+An array of these atoms (like the intermediate @x[all][3]@) is just a contiguous arrangement of them, done according to their size;
+call such an array a column.
+A column is almost ready to be arranged into a matrix;
+it is the \emph{contained value} of the next-level building block, but another lie about size is required.
+At first, an atom needs to be arranged as if it were bigger, but now a column needs to be arranged as if it is smaller (the wedge above it, shrinking upward).
 These lying columns, arranged contiguously according to their size (as announced) form the matrix @x[all]@.
-Because @x[all]@ takes indices, first for the fine stride, then for the coarse stride,
-it achieves the requirement of representing the transpose of @x@.
-Yet every time the programmer presents an index, a mere C-array subscript is achieving the offset calculation.
+Because @x[all]@ takes indices, first for the fine stride, then for the coarse stride, it achieves the requirement of representing the transpose of @x@.
+Yet every time the programmer presents an index, a C-array subscript is achieving the offset calculation.
 
 In the @x[all]@ case, after the finely strided subscript is done (column 3 is selected),
@@ -495,8 +502,8 @@
 compared with where analogous rows appear when the row-level option is presented for @x@.
 
-These size lies create an appearance of overlap.
-For example, in @x[all]@, the shaded band touches atoms 2.0, 2.1, 2.2, 2.3, 1.4, 1.5 and 1.6.
+\PAB{I don't understand this paragraph: These size lies create an appearance of overlap.
+For example, in \lstinline{x[all]}, the shaded band touches atoms 2.0, 2.1, 2.2, 2.3, 1.4, 1.5 and 1.6.
 But only the atom 2.3 is storing its value there.
-The rest are lying about (conflicting) claims on this location, but never exercising these alleged claims.
+The rest are lying about (conflicting) claims on this location, but never exercising these alleged claims.}
 
 Lying is implemented as casting.
@@ -504,48 +511,46 @@
 This structure uses one type in its internal field declaration and offers a different type as the return of its subscript operator.
 The field within is a plain-C array of the fictional type, which is 7 floats long for @x[all][3][2]@ and 1 float long for @x[all][3]@.
-The subscript operator presents what's really inside, by casting to the type below the wedge of lie.
+The subscript operator presents what is really inside, by casting to the type below the wedge of the lie.
 
 %  Does x[all] have to lie too?  The picture currently glosses over how it it advertises a size of 7 floats.  I'm leaving that as an edge case benignly misrepresented in the picture.  Edge cases only have to be handled right in the code.
 
-Casting, overlapping and lying are unsafe.
-The mission here is to implement a style-2 feature that the type system helps the programmer use safely.
-The offered style-2 system is allowed to be internally unsafe,
-just as C's implementation of a style-3 system (upon a style-4 system) is unsafe within,
-even when the programmer is using it without casts or pointer arithmetic.
-Having a style-2 system relieves the programmer from resorting to unsafe pointer arithmetic when working with noncontiguous slices.
-
-The choice to implement this style-2 system upon C's style-3 arrays, rather than its style-4 pointer arithmetic,
-reduces the attack surface of unsafe code.
-My casting is unsafe, but I do not do any pointer arithmetic.
-When a programmer works in the common-case style-3 subset (in the no-@[all]@ top of Figure~\ref{fig:subscr-all}),
-my casts are identities, and the C compiler is doing its usual displacement calculations.
-If I had implemented my system upon style-4 pointer arithmetic,
-then this common case would be circumventing C's battle-hardened displacement calculations in favour of my own.
-
-\noindent END: Paste looking for a home
+Casting, overlapping, and lying are unsafe.
+The mission is to implement a style-1 feature in the type system for safe use by a programmer.
+The offered style-1 system is allowed to be internally unsafe,
+just as C's implementation of a style-2 system (upon a style-3 system) is unsafe within, even when the programmer is using it without casts or pointer arithmetic.
+Having a style-1 system relieves the programmer from resorting to unsafe pointer arithmetic when working with noncontiguous slices.
+
+% PAB: repeat from previous paragraph.
+% The choice to implement this style-1 system upon C's style-2 arrays, rather than its style-3 pointer arithmetic, reduces the attack surface of unsafe code.
+% My casting is unsafe, but I do not do any pointer arithmetic.
+% When a programmer works in the common-case style-2 subset (in the no-@[all]@ top of Figure~\ref{fig:subscr-all}), my casts are identities, and the C compiler is doing its usual displacement calculations.
+% If I had implemented my system upon style-3 pointer arithmetic, then this common case would be circumventing C's battle-hardened displacement calculations in favour of my own.
+
+% \noindent END: Paste looking for a home
 
 The new-array library defines types and operations that ensure proper elements are accessed soundly in spite of the overlapping.
-The private @arpk@ structure (array with explicit packing) is generic over these two types (and more): the contained element, what it is masquerading as.
-This structure's public interface is the @array(...)@ construction macro and the two subscript operators.
-Construction by @array@ initializes the masquerading-as type information to be equal to the contained-element information.
-Subscripting by @all@ rearranges the order of masquerading-as types to achieve, in general, nontrivial striding.
-Subscripting by a number consumes the masquerading-as size of the contained element type, does normal array stepping according to that size, and returns there element found there, in unmasked form.
-
-The @arpk@ structure and its @-[i]@ operator are thus defined as:
-\begin{cfa}
-forall( ztype(N),			$\C{// length of current dimension}$
-	dtype(S) | sized(S),	$\C{// masquerading-as}$
-	dtype E_im,				$\C{// immediate element, often another array}$
-	dtype E_base			$\C{// base element, e.g. float, never array}$
+The @arpk@ structure and its @-[i]@ operator are defined as:
+\begin{cfa}
+forall(
+	[N],					$\C{// length of current dimension}$
+	S & | sized(S),			$\C{// masquerading-as}$
+	Timmed &,				$\C{// immediate element, often another array}$
+	Tbase &					$\C{// base element, e.g. float, never array}$
 ) { // distribute forall to each element
 	struct arpk {
 		S strides[N];		$\C{// so that sizeof(this) is N of S}$
 	};
-	// expose E_im, stride by S
-	E_im & ?[?]( arpk(N, S, E_im, E_base) & a, ptrdiff_t i ) {
-		return (E_im &) a.strides[i];
+	// expose Timmed, stride by S
+	static inline Timmed & ?[?]( arpk( N, S, Timmed, Tbase ) & a, long int i ) {
+		subcheck( a, i, 0, N );
+		return (Timmed &)a.strides[i];
 	}
 }
 \end{cfa}
+The private @arpk@ structure (array with explicit packing) is generic over four types: dimension length, masquerading-as, ...
+This structure's public interface is hidden behind the @array(...)@ macro and the subscript operator.
+Construction by @array@ initializes the masquerading-as type information to be equal to the contained-element information.
+Subscripting by @all@ rearranges the order of masquerading-as types to achieve, in general, nontrivial striding.
+Subscripting by a number consumes the masquerading-as size of the contained element type, does normal array stepping according to that size, and returns there element found there, in unmasked form.
 
 An instantiation of the @arpk@ generic is given by the @array(E_base, N0, N1, ...)@ expansion, which is @arpk( N0, Rec, Rec, E_base )@, where @Rec@ is @array(E_base, N1, ...)@.
@@ -568,9 +573,9 @@
 This section provides a demonstration of the effect.
 
-The experiment compares the \CFA array system with the padded-room system [TODO:xref] most typically exemplified by Java arrays, but also reflected in the C++ pattern where restricted vector usage models a checked array.
+The experiment compares the \CFA array system with the padded-room system [TODO:xref] most typically exemplified by Java arrays, but also reflected in the \CC pattern where restricted vector usage models a checked array.
 The essential feature of this padded-room system is the one-to-one correspondence between array instances and the symbolic bounds on which dynamic checks are based.
-The experiment compares with the C++ version to keep access to generated assembly code simple.
-
-As a control case, a simple loop (with no reused dimension sizes) is seen to get the same optimization treatment in both the \CFA and C++ versions.
+The experiment compares with the \CC version to keep access to generated assembly code simple.
+
+As a control case, a simple loop (with no reused dimension sizes) is seen to get the same optimization treatment in both the \CFA and \CC versions.
 When the programmer treats the array's bound correctly (making the subscript ``obviously fine''), no dynamic bound check is observed in the program's optimized assembly code.
 But when the bounds are adjusted, such that the subscript is possibly invalid, the bound check appears in the optimized assembly, ready to catch an occurrence the mistake.
@@ -589,6 +594,9 @@
 \section{Comparison with other arrays}
 
+
+\subsection{Rust}
+
 \CFA's array is the first lightweight application of dependently-typed bound tracking to an extension of C.
-Other extensions of C that apply dependently-typed bound tracking are heavyweight, in that the bound tracking is part of a linearly typed ownership system that further helps guarantee statically the validity of every pointer deference.
+Other extensions of C that apply dependently-typed bound tracking are heavyweight, in that the bound tracking is part of a linearly-typed ownership-system, which further helps guarantee statically the validity of every pointer deference.
 These systems, therefore, ask the programmer to convince the type checker that every pointer dereference is valid.
 \CFA imposes the lighter-weight obligation, with the more limited guarantee, that initially-declared bounds are respected thereafter.
@@ -598,34 +606,105 @@
 The \CFA array, applied to accordion structures [TOD: cross-reference] \emph{implies} the necessary pointer arithmetic, generated automatically, and not appearing at all in a user's program.
 
-\subsection{Safety in a padded room}
-
-Java's array [TODO:cite] is a straightforward example of assuring safety against undefined behaviour, at a cost of expressiveness for more applied properties.
-Consider the array parameter declarations in:
-
+
+\subsection{Java}
+
+Java arrays are arrays-of-arrays because all objects are references \see{\VRef{toc:mdimpl}}.
+For each array, Java implicitly storages the array dimension in a descriptor, supporting array length, subscript checking, and allowing dynamically-sized array-parameter declarations.
+\begin{cquote}
 \begin{tabular}{rl}
 C      &  @void f( size_t n, size_t m, float x[n][m] );@ \\
-Java   &  @void f( float[][] a );@
+Java   &  @void f( float x[][] );@
 \end{tabular}
-
-Java's safety against undefined behaviour assures the callee that, if @x@ is non-null, then @a.length@ is a valid access (say, evaluating to the number $\ell$) and if @i@ is in $[0, \ell)$ then @x[i]@ is a valid access.
-If a value of @i@ outside this range is used, a runtime error is guaranteed.
-In these respects, C offers no guarantees at all.
-Notably, the suggestion that @n@ is the intended size of the first dimension of @x@ is documentation only.
-Indeed, many might prefer the technically equivalent declarations @float x[][m]@ or @float (*a)[m]@ as emphasizing the ``no guarantees'' nature of an infrequently used language feature, over using the opportunity to explain a programmer intention.
-Moreover, even if @x[0][0]@ is valid for the purpose intended, C's basic infamous feature is the possibility of an @i@, such that @x[i][0]@ is not valid for the same purpose, and yet, its evaluation does not produce an error.
-
-Java's lack of expressiveness for more applied properties means these outcomes are possible:
-\begin{itemize}
-\item @x[0][17]@ and @x[2][17]@ are valid accesses, yet @x[1][17]@ is a runtime error, because @x[1]@ is a null pointer
-\item the same observation, now because @x[1]@ refers to an array of length 5
-\item execution times vary, because the @float@ values within @x@ are sometimes stored nearly contiguously, and other times, not at all
-\end{itemize}
-C's array has none of these limitations, nor do any of the ``array language'' comparators discussed in this section.
-
-This Java level of safety and expressiveness is also exemplified in the C family, with the commonly given advice [TODO:cite example], for C++ programmers to use @std::vector@ in place of the C++ language's array, which is essentially the C array.
-The advice is that, while a vector is also more powerful (and quirky) than an array, its capabilities include options to preallocate with an upfront size, to use an available bound-checked accessor (@a.at(i)@ in place of @x[i]@), to avoid using @push_back@, and to use a vector of vectors.
-Used with these restrictions, out-of-bound accesses are stopped, and in-bound accesses never exercise the vector's ability to grow, which is to say, they never make the program slow to reallocate and copy, and they never invalidate the program's other references to the contained values.
-Allowing this scheme the same referential integrity assumption that \CFA enjoys [TODO:xref], this scheme matches Java's safety and expressiveness exactly.
-[TODO: decide about going deeper; some of the Java expressiveness concerns have mitigations, up to even more tradeoffs.]
+\end{cquote}
+However, in the C prototype, the parameters @n@ and @m@  are documentation only as the intended size of the first and second dimension of @x@.
+\VRef[Figure]{f:JavaVsCTriangularMatrix} compares a triangular matrix (array-of-arrays) in dynamically safe Java to unsafe C.
+Each dynamically sized row in Java stores its dimension, while C requires the programmer to manage these sizes explicitly (@rlnth@).
+All subscripting is Java has bounds checking, while C has none.
+Both Java and C require explicit null checking, otherwise there is a runtime failure.
+
+\begin{figure}
+\setlength{\tabcolsep}{15pt}
+\begin{tabular}{ll@{}}
+\begin{java}
+int m[][] = {  // triangular matrix
+	new int [4],
+	new int [3],
+	new int [2],
+	new int [1],
+	null
+};
+
+for ( int r = 0; r < m.length; r += 1 ) {
+	if ( m[r] == null ) continue;
+	for ( int c = 0; c < m[r].length; c += 1 ) {
+		m[r][c] = c + r; // subscript checking
+	}
+
+}
+
+for ( int r = 0; r < m.length; r += 1 ) {
+	if ( m[r] == null ) {
+		System.out.println( "null row" );
+		continue;
+	}
+	for ( int c = 0; c < m[r].length; c += 1 ) {
+		System.out.print( m[r][c] + " " );
+	}
+	System.out.println();
+
+}
+\end{java}
+&
+\begin{cfa}
+int * m[5] = {  // triangular matrix
+	calloc( 4, sizeof(int) ),
+	calloc( 3, sizeof(int) ),
+	calloc( 2, sizeof(int) ),
+	calloc( 1, sizeof(int) ),
+	NULL
+};
+int rlnth = 4;
+for ( int r = 0; r < 5; r += 1 ) {
+	if ( m[r] == NULL ) continue;
+	for ( int c = 0; c < rlnth; c += 1 ) {
+		m[r][c] = c + r; // no subscript checking
+	}
+	rlnth -= 1;
+}
+rlnth = 4;
+for ( int r = 0; r < 5; r += 1 ) {
+	if ( m[r] == NULL ) {
+		printf( "null row\n" );
+		continue;
+	}
+	for ( int c = 0; c < rlnth; c += 1 ) {
+		printf( "%d ", m[r][c] );
+	}
+	printf( "\n" );
+	rlnth -= 1;
+}
+\end{cfa}
+\end{tabular}
+\caption{Java (left) \vs C (right) Triangular Matrix}
+\label{f:JavaVsCTriangularMatrix}
+\end{figure}
+
+The downside of the arrays-of-arrays approach is performance due to pointer chasing versus pointer arithmetic for a contiguous arrays.
+Furthermore, there is the cost of managing the implicit array descriptor.
+It is unlikely that a JIT can dynamically rewrite an arrays-of-arrays form into a contiguous form.
+
+
+\subsection{\CC}
+
+Because C arrays are difficult and dangerous, the mantra for \CC programmers is to use @std::vector@ in place of the C array.
+While the vector size can grow and shrink dynamically, \vs a fixed-size dynamic size with VLAs, the cost of this extra feature is mitigated by preallocating the maximum size (like the VLA) at the declaration (one dynamic call) to avoid using @push_back@.
+\begin{c++}
+vector< vector< int > > m( 5, vector<int>(8) ); // initialize size of 5 x 8 with 6 dynamic allocations
+\end{c++}
+Multidimensional arrays are arrays-of-arrays with associated costs.
+Each @vector@ array has an array descriptor contain the dimension, which allows bound checked using @x.at(i)@ in place of @x[i]@.
+Used with these restrictions, out-of-bound accesses are caught, and in-bound accesses never exercise the vector's ability to grow, preventing costly reallocate and copy, and never invalidate references to contained values.
+This scheme matches Java's safety and expressiveness exactly, but with the inherent costs.
+
 
 \subsection{Levels of dependently typed arrays}
@@ -655,5 +734,5 @@
 it can also do these other cool checks, but watch how I can mess with its conservativeness and termination
 
-Two current, state-of-the-art array languages, Dex\cite{arr:dex:long} and Futhark\cite{arr:futhark:tytheory}, offer offer novel contributions concerning similar, restricted dependent types for tracking array length.
+Two current, state-of-the-art array languages, Dex\cite{arr:dex:long} and Futhark\cite{arr:futhark:tytheory}, offer novel contributions concerning similar, restricted dependent types for tracking array length.
 Unlike \CFA, both are garbage-collected functional languages.
 Because they are garbage-collected, referential integrity is built-in, meaning that the heavyweight analysis, that \CFA aims to avoid, is unnecessary.
@@ -727,5 +806,5 @@
 [TODO: introduce Ada in the comparators]
 
-In Ada and Dex, an array is conceived as a function whose domain must satisfy only certain structural assumptions, while in C, C++, Java, Futhark and \CFA today, the domain is a prefix of the natural numbers.
+In Ada and Dex, an array is conceived as a function whose domain must satisfy only certain structural assumptions, while in C, \CC, Java, Futhark and \CFA today, the domain is a prefix of the natural numbers.
 The generality has obvious aesthetic benefits for programmers working on scheduling resources to weekdays, and for programmers who prefer to count from an initial number of their own choosing.
 
Index: doc/theses/mike_brooks_MMath/programs/hello-accordion.cfa
===================================================================
--- doc/theses/mike_brooks_MMath/programs/hello-accordion.cfa	(revision 38e20a80ff2d2e0f1bf7fef08b0948ab64d1abf0)
+++ doc/theses/mike_brooks_MMath/programs/hello-accordion.cfa	(revision ce02877193b9b7fb9e2bdf7486d493bf45be2bec)
@@ -9,5 +9,5 @@
 
 forall( T, @[NprovTerty]@, @[Nmunicipalities]@ )
-struct CanadaPop {
+struct CanPop {
 	array( T, @NprovTerty@ ) provTerty; $\C{// nested VLA}$
 	array( T, @Nmunicipalities@ ) municipalities; $\C{// nested VLA}$
@@ -19,8 +19,8 @@
 
 forall( T, [NprovTerty], [Nmunicipalities] )
-	void ?{}( T &, CanadaPop( T, NprovTerty, Nmunicipalities ) & this ) {}
+	void ?{}( T &, CanPop( T, NprovTerty, Nmunicipalities ) & this ) {}
 
 forall( T &, [NprovTerty], [Nmunicipalities] )
-	void ^?{}( CanadaPop( T, NprovTerty, Nmunicipalities ) & this ) {}
+	void ^?{}( CanPop( T, NprovTerty, Nmunicipalities ) & this ) {}
 
 
@@ -39,5 +39,5 @@
 
 forall( T, [NprovTerty], [Nmunicipalities] )
-void check( CanadaPop( T, NprovTerty, Nmunicipalities ) & pop ) with( pop ) {
+void check( CanPop( T, NprovTerty, Nmunicipalities ) & pop ) with( pop ) {
 	total_pt = total_mun = 0;
 	for ( i; NprovTerty ) total_pt += provTerty[i];
@@ -60,5 +60,5 @@
 int main( int argc, char * argv[] ) {
 	const int npt = ato( argv[1] ), nmun = ato( argv[2] );
-	@CanadaPop( int, npt, nmun ) pop;@
+	@CanPop( int, npt, nmun ) pop;@
 	// read in population numbers
 	@check( pop );@
@@ -71,4 +71,7 @@
 Total province/territory: 36,991,981
 Total municipalities: 36,991,981
+$\$$ ./a.out  13  3654
+Total province/territory: 36,991,981
+Total municipalities: 36,991,981
 */
 
Index: doc/theses/mike_brooks_MMath/programs/hello-array.cfa
===================================================================
--- doc/theses/mike_brooks_MMath/programs/hello-array.cfa	(revision 38e20a80ff2d2e0f1bf7fef08b0948ab64d1abf0)
+++ doc/theses/mike_brooks_MMath/programs/hello-array.cfa	(revision ce02877193b9b7fb9e2bdf7486d493bf45be2bec)
@@ -9,5 +9,5 @@
 
 forall( [@N@] )								$\C{// array dimension}$
-array( bool, @N@) & f( array( float, @N@ ) & x, array( float, @N@ ) & y ) {
+array( bool, @N@ ) & f( array( float, @N@ ) & x, array( float, @N@ ) & y ) {
 	array( bool, @N@ ) & ret = *@alloc@();	$\C{// sizeof ret  used by alloc}$
 	for ( i; @N@ ) {
@@ -29,13 +29,13 @@
 
 int main( int argc, char * argv[] ) {
-	const int @n@ = ato( argv[1] );			$\C{// deduce conversion type}$
-	array( float, @n@ ) x, y;				$\C{// VLAs}$
-	for ( i; n ) {							$\C{// initialize arrays}$
+	const int @dim@ = ato( argv[1] );		$\C{// deduce conversion type}$
+	array( float, @dim@ ) x, y;				$\C{// VLAs}$
+	for ( i; dim ) {						$\C{// initialize arrays}$
 		x[i] = 3.14 / (i + 1);
 		y[i] = x[i] + 0.005 ;
 	}
-	array( bool, @n@ ) & result = @f( x, y )@; $\C{// call}$
+	array( bool, @dim@ ) & result = @f( x, y )@; $\C{// call}$
 	sout | "result: " | nonl;				$\C{// print result}$
-	for ( i; n )
+	for ( i; dim )
 		sout | result[i] | nonl;
 	sout | nl;
Index: doc/theses/mike_brooks_MMath/programs/hello-md.cfa
===================================================================
--- doc/theses/mike_brooks_MMath/programs/hello-md.cfa	(revision 38e20a80ff2d2e0f1bf7fef08b0948ab64d1abf0)
+++ doc/theses/mike_brooks_MMath/programs/hello-md.cfa	(revision ce02877193b9b7fb9e2bdf7486d493bf45be2bec)
@@ -138,5 +138,5 @@
 
 
-print1d_cstyle( m[ 2 ] );  $\C{// row 2:  2.0  2.1  2.2  2.3  2.4  2.5  2.6}$
+print1d_cstyle( @m[ 2 ]@ );  $\C{// row 2:  2.0  2.1  2.2  2.3  2.4  2.5  2.6}$
 
 
Index: doc/theses/mike_brooks_MMath/uw-ethesis-frontpgs.tex
===================================================================
--- doc/theses/mike_brooks_MMath/uw-ethesis-frontpgs.tex	(revision 38e20a80ff2d2e0f1bf7fef08b0948ab64d1abf0)
+++ doc/theses/mike_brooks_MMath/uw-ethesis-frontpgs.tex	(revision ce02877193b9b7fb9e2bdf7486d493bf45be2bec)
@@ -140,4 +140,6 @@
 
 I would like to thank all the little people who made this thesis possible.
+
+Finally, a special thank you to Huawei Canada for funding this work.
 \cleardoublepage
 \phantomsection    % allows hyperref to link to the correct page
Index: doc/theses/mike_brooks_MMath/uw-ethesis.tex
===================================================================
--- doc/theses/mike_brooks_MMath/uw-ethesis.tex	(revision 38e20a80ff2d2e0f1bf7fef08b0948ab64d1abf0)
+++ doc/theses/mike_brooks_MMath/uw-ethesis.tex	(revision ce02877193b9b7fb9e2bdf7486d493bf45be2bec)
@@ -105,4 +105,5 @@
 \lstnewenvironment{c++}[1][]{\lstset{language=[GNU]C++,escapechar=\$,moredelim=**[is][\color{red}]{@}{@},}\lstset{#1}}{}
 \lstnewenvironment{pascal}[1][]{\lstset{language=pascal,escapechar=\$,moredelim=**[is][\color{red}]{@}{@},}\lstset{#1}}{}
+\lstnewenvironment{java}[1][]{\lstset{language=java,escapechar=\$,moredelim=**[is][\color{red}]{@}{@},}\lstset{#1}}{}
 \lstset{inputpath={programs}}
 
Index: doc/user/user.tex
===================================================================
--- doc/user/user.tex	(revision 38e20a80ff2d2e0f1bf7fef08b0948ab64d1abf0)
+++ doc/user/user.tex	(revision ce02877193b9b7fb9e2bdf7486d493bf45be2bec)
@@ -11,6 +11,6 @@
 %% Created On       : Wed Apr  6 14:53:29 2016
 %% Last Modified By : Peter A. Buhr
-%% Last Modified On : Tue Jul  9 10:43:40 2024
-%% Update Count     : 6887
+%% Last Modified On : Fri Jul 26 06:56:11 2024
+%% Update Count     : 6955
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
@@ -1598,5 +1598,5 @@
 and implicitly opened \emph{after} a function-body open, to give them higher priority:
 \begin{cfa}
-void f( S & s, char ®c® ) with ( s ) ®with( §\emph{\R{params}}§ )® { // syntax not allowed, illustration only
+void f( S & s, char ®c® ) with ( s ) ®with( §\emph{\R{params}}§ )® { // syntax disallowed, illustration only
 	s.c = ®c;®  i = 3;  d = 5.5;
 }
@@ -3313,93 +3313,49 @@
 for example, the following is incorrect:
 \begin{cfa}
-* [ int x ] f () fp; §\C{// routine name "f" is not allowed}§
-\end{cfa}
-
-
-\section{Named and Default Arguments}
-
-Named\index{named arguments}\index{arguments!named} and default\index{default arguments}\index{arguments!default} arguments~\cite{Hardgrave76}\footnote{
+* [ int x ] f () fp; §\C{// routine name "f" is disallowed}§
+\end{cfa}
+
+
+\section{Default and Named Parameter}
+
+Default\index{default parameter}\index{parameter!default} and named\index{named parameter}\index{parameter!named} parameters~\cite{Hardgrave76}\footnote{
 Francez~\cite{Francez77} proposed a further extension to the named-parameter passing style, which specifies what type of communication (by value, by reference, by name) the argument is passed to the routine.}
 are two mechanisms to simplify routine call.
-Both mechanisms are discussed with respect to \CFA.
-\begin{description}
-\item[Named (or Keyword) Arguments:]
-provide the ability to specify an argument to a routine call using the parameter name rather than the position of the parameter.
-For example, given the routine:
-\begin{cfa}
-void p( int x, int y, int z ) {...}
-\end{cfa}
-a positional call is:
-\begin{cfa}
-p( 4, 7, 3 );
-\end{cfa}
-whereas a named (keyword) call may be:
-\begin{cfa}
-p( z : 3, x : 4, y : 7 );  §\C{// rewrite \(\Rightarrow\) p( 4, 7, 3 )}§
-\end{cfa}
-Here the order of the arguments is unimportant, and the names of the parameters are used to associate argument values with the corresponding parameters.
-The compiler rewrites a named call into a positional call.
-The advantages of named parameters are:
-\begin{itemize}
-\item
-Remembering the names of the parameters may be easier than the order in the routine definition.
-\item
-Parameter names provide documentation at the call site (assuming the names are descriptive).
-\item
-Changes can be made to the order or number of parameters without affecting the call (although the call must still be recompiled).
-\end{itemize}
-
-Unfortunately, named arguments do not work in C-style programming-languages because a routine prototype is not required to specify parameter names, nor do the names in the prototype have to match with the actual definition.
-For example, the following routine prototypes and definition are all valid.
-\begin{cfa}
-void p( int, int, int ); §\C{// equivalent prototypes}§
-void p( int x, int y, int z );
-void p( int y, int x, int z );
-void p( int z, int y, int x );
-void p( int q, int r, int s ) {} §\C{// match with this definition}§
-\end{cfa}
-Forcing matching parameter names in routine prototypes with corresponding routine definitions is possible, but goes against a strong tradition in C programming.
-Alternatively, prototype definitions can be eliminated by using a two-pass compilation, and implicitly creating header files for exports.
-The former is easy to do, while the latter is more complex.
-
-Furthermore, named arguments do not work well in a \CFA-style programming-languages because they potentially introduces a new criteria for type matching.
-For example, it is technically possible to disambiguate between these two overloaded definitions of ©f© based on named arguments at the call site:
-\begin{cfa}
-int f( int i, int j );
-int f( int x, double y );
-
-f( j : 3, i : 4 ); §\C{// 1st f}§
-f( x : 7, y : 8.1 ); §\C{// 2nd f}§
-f( 4, 5 );  §\C{// ambiguous call}§
-\end{cfa}
-However, named arguments compound routine resolution in conjunction with conversions:
-\begin{cfa}
-f( i : 3, 5.7 ); §\C{// ambiguous call ?}§
-\end{cfa}
-Depending on the cost associated with named arguments, this call could be resolvable or ambiguous.
-Adding named argument into the routine resolution algorithm does not seem worth the complexity.
-Therefore, \CFA does \emph{not} attempt to support named arguments.
-
-\item[Default Arguments]
-provide the ability to associate a default value with a parameter so it can be optionally specified in the argument list.
-For example, given the routine:
-\begin{cfa}
-void p( int x = 1, int y = 2, int z = 3 ) {...}
-\end{cfa}
-the allowable positional calls are:
-\begin{cfa}
-p(); §\C{// rewrite \(\Rightarrow\) p( 1, 2, 3 )}§
-p( 4 ); §\C{// rewrite \(\Rightarrow\) p( 4, 2, 3 )}§
-p( 4, 4 ); §\C{// rewrite \(\Rightarrow\) p( 4, 4, 3 )}§
-p( 4, 4, 4 ); §\C{// rewrite \(\Rightarrow\) p( 4, 4, 4 )}§
-// empty arguments
-p(  , 4, 4 ); §\C{// rewrite \(\Rightarrow\) p( 1, 4, 4 )}§
-p( 4,  , 4 ); §\C{// rewrite \(\Rightarrow\) p( 4, 2, 4 )}§
-p( 4, 4,   ); §\C{// rewrite \(\Rightarrow\) p( 4, 4, 3 )}§
-p( 4,  ,   ); §\C{// rewrite \(\Rightarrow\) p( 4, 2, 3 )}§
-p(  , 4,   ); §\C{// rewrite \(\Rightarrow\) p( 1, 4, 3 )}§
-p(  ,  , 4 ); §\C{// rewrite \(\Rightarrow\) p( 1, 2, 4 )}§
-p(  ,  ,   ); §\C{// rewrite \(\Rightarrow\) p( 1, 2, 3 )}§
-\end{cfa}
+
+
+\subsection{Default}
+
+A default parameter associates a default value with a parameter so it can be optionally specified in the argument list.
+For example, given the routine prototype:
+\begin{cfa}
+void f( int x ®= 1®, int y ®= 2®, int z ®= 3® );
+\end{cfa}
+allowable calls are:
+\begin{cquote}
+\setlength{\tabcolsep}{0.75in}
+\begin{tabular}{@{}ll@{}}
+\textbf{positional arguments} & \textbf{empty arguments} \\
+\begin{cfa}
+f();			§\C[0.75in]{// rewrite \(\Rightarrow\) f( 1, 2, 3 )}§
+f( 4 );			§\C{// rewrite \(\Rightarrow\) f( 4, 2, 3 )}§
+f( 4, 4 );		§\C{// rewrite \(\Rightarrow\) f( 4, 4, 3 )}§
+f( 4, 4, 4 );	§\C{// rewrite \(\Rightarrow\) f( 4, 4, 4 )}\CRT§
+
+
+
+\end{cfa}
+&
+\begin{cfa}
+f( ®?®, 4, 4 );		§\C[1.0in]{// rewrite \(\Rightarrow\) f( 1, 4, 4 )}§
+f( 4, ®?®, 4 );		§\C{// rewrite \(\Rightarrow\) f( 4, 2, 4 )}§
+f( 4, 4, ®?® );		§\C{// rewrite \(\Rightarrow\) f( 4, 4, 3 )}§
+f( 4, ®?®, ®?® );	§\C{// rewrite \(\Rightarrow\) f( 4, 2, 3 )}§
+f( ®?®, 4, ®?® );	§\C{// rewrite \(\Rightarrow\) f( 1, 4, 3 )}§
+f( ®?®, ®?®, 4 );	§\C{// rewrite \(\Rightarrow\) f( 1, 2, 4 )}§
+f( ®?®, ®?®, ®?® );	§\C{// rewrite \(\Rightarrow\) f( 1, 2, 3 )}\CRT§
+\end{cfa}
+\end{tabular}
+\end{cquote}
+where the ©?© selects the default value as the argument.
 Here the missing arguments are inserted from the default values in the parameter list.
 The compiler rewrites missing default values into explicit positional arguments.
@@ -3408,5 +3364,5 @@
 \item
 Routines with a large number of parameters are often very generalized, giving a programmer a number of different options on how a computation is performed.
-For many of these kinds of routines, there are standard or default settings that work for the majority of computations.
+For many of these routines, there are standard or default settings that work for the majority of computations.
 Without default values for parameters, a programmer is forced to specify these common values all the time, resulting in long argument lists that are error prone.
 \item
@@ -3422,41 +3378,12 @@
 Instead, a default value is used, which may not be the programmer's intent.
 
-Default values may only appear in a prototype versus definition context:
-\begin{cfa}
-void p( int x, int y = 2, int z = 3 ); §\C{// prototype: allowed}§
-void p( int, int = 2, int = 3 ); §\C{// prototype: allowed}§
-void p( int x, int y = 2, int z = 3 ) {} §\C{// definition: not allowed}§
+Default parameters may only appear in a prototype versus definition context:
+\begin{cfa}
+void f( int x, int y = 2, int z = 3 );	§\C{// prototype: allowed}§
+void f( int, int = 2, int = 3 );		§\C{// prototype: allowed}§
+void f( int x, int y = 2, int z = 3 ) ®{}® §\C{// definition: disallowed}§
 \end{cfa}
 The reason for this restriction is to allow separate compilation.
-Multiple prototypes with different default values is an error.
-\end{description}
-
-Ellipse (``...'') arguments present problems when used with default arguments.
-The conflict occurs because both named and ellipse arguments must appear after positional arguments, giving two possibilities:
-\begin{cfa}
-p( /* positional */, ... , /* named */ );
-p( /* positional */, /* named */, ... );
-\end{cfa}
-While it is possible to implement both approaches, the first possibly is more complex than the second, \eg:
-\begin{cfa}
-p( int x, int y, int z, ... );
-p( 1, 4, 5, 6, z : 3, y : 2 ); §\C{// assume p( /* positional */, ... , /* named */ );}§
-p( 1, z : 3, y : 2, 4, 5, 6 ); §\C{// assume p( /* positional */, /* named */, ... );}§
-\end{cfa}
-In the first call, it is necessary for the programmer to conceptually rewrite the call, changing named arguments into positional, before knowing where the ellipse arguments begin.
-Hence, this approach seems significantly more difficult, and hence, confusing and error prone.
-In the second call, the named arguments separate the positional and ellipse arguments, making it trivial to read the call.
-
-The problem is exacerbated with default arguments, \eg:
-\begin{cfa}
-void p( int x, int y = 2, int z = 3... );
-p( 1, 4, 5, 6, z : 3 ); §\C{// assume p( /* positional */, ... , /* named */ );}§
-p( 1, z : 3, 4, 5, 6 ); §\C{// assume p( /* positional */, /* named */, ... );}§
-\end{cfa}
-The first call is an error because arguments 4 and 5 are actually positional not ellipse arguments;
-therefore, argument 5 subsequently conflicts with the named argument z : 3.
-In the second call, the default value for y is implicitly inserted after argument 1 and the named arguments separate the positional and ellipse arguments, making it trivial to read the call.
-For these reasons, \CFA requires named arguments before ellipse arguments.
-Finally, while ellipse arguments are needed for a small set of existing C routines, like ©printf©, the extended \CFA type system largely eliminates the need for ellipse arguments \see{\VRef{s:Overloading}}, making much of this discussion moot.
+Multiple prototypes with different default values is undefined.
 
 Default arguments and overloading \see{\VRef{s:Overloading}} are complementary.
@@ -3466,5 +3393,5 @@
 \multicolumn{1}{c@{\hspace{3em}}}{\textbf{default arguments}}	& \multicolumn{1}{c}{\textbf{overloading}}	\\
 \begin{cfa}
-void p( int x, int y = 2, int z = 3 ) {...}
+void f( int x, int y = 2, int z = 3 ) {...}
 
 
@@ -3472,20 +3399,88 @@
 &
 \begin{cfa}
-void p( int x, int y, int z ) {...}
-void p( int x ) { p( x, 2, 3 ); }
-void p( int x, int y ) { p( x, y, 3 ); }
+void f( int x, int y, int z ) {...}
+void f( int x ) { f( x, 2, 3 ); }
+void f( int x, int y ) { f( x, y, 3 ); }
 \end{cfa}
 \end{tabular}
 \end{cquote}
 the number of required overloaded routines is linear in the number of default values, which is unacceptable growth.
-In general, overloading should only be used over default arguments if the body of the routine is significantly different.
-Furthermore, overloading cannot handle accessing default arguments in the middle of a positional list, via a missing argument, such as:
-\begin{cfa}
-p( 1, /* default */, 5 ); §\C{// rewrite \(\Rightarrow\) p( 1, 2, 5 )}§
-\end{cfa}
-
-Given the \CFA restrictions above, both named and default arguments are backwards compatible.
-\Index*[C++]{\CC{}} only supports default arguments;
-\Index*{Ada} supports both named and default arguments.
+In general, overloading is used over default parameters, if the body of the routine is significantly different.
+Furthermore, overloading cannot handle accessing default arguments in the middle of a positional list.
+\begin{cfa}
+f( 1, ®?®, 5 );							§\C{// rewrite \(\Rightarrow\) f( 1, 2, 5 )}§
+\end{cfa}
+
+
+\subsection{Named (or Keyword)}
+
+A named (keyword) parameter provides the ability to specify an argument to a routine call using the parameter name rather than the position of the parameter.
+For example, given the routine prototype:
+\begin{cfa}
+void f( int ®?®x, int ®?®y, int ®?®z );
+\end{cfa}
+allowable calls are:
+\begin{cfa}
+f( ?x = 3, ?y = 4, ?z = 5 );			§\C{// rewrite \(\Rightarrow\) f( 3, 4, 5 )}§
+f( ?y = 4, ?z = 5, ?x = 3 );			§\C{// rewrite \(\Rightarrow\) f( 3, 4, 5 )}§
+f( ?z = 5, ?x = 3, ?y = 4 );			§\C{// rewrite \(\Rightarrow\) f( 3, 4, 5 )}§
+f( ?x = 3, ?z = 5, ?y = 4 );			§\C{// rewrite \(\Rightarrow\) f( 3, 4, 5 )}§
+\end{cfa}
+Here the ordering of the the parameters and arguments is unimportant, and the names of the parameters are used to associate argument values with the corresponding parameters.
+The compiler rewrites a named call into a positional call.
+Note, the syntax ©?x = 3© is necessary for the argument, because ©x = 3© has an existing meaning, \ie assign ©3© to ©x© and pass the value of ©x©.
+The advantages of named parameters are:
+\begin{itemize}
+\item
+Remembering the names of the parameters may be easier than the order in the routine definition.
+\item
+Parameter names provide documentation at the call site (assuming the names are descriptive).
+\item
+Changes can be made to the order or number of parameters without affecting the call (although the call must still be recompiled).
+\end{itemize}
+
+Named parameters may only appear in a prototype versus definition context:
+\begin{cfa}
+void f( int  x, int ?y, int ?z );		§\C{// prototype: allowed}§
+void f( int ?x, int , int ?z );			§\C{// prototype: allowed}§
+void f( int x, int ?y, int ?z ) ®{}®	§\C{// definition: disallowed}§
+\end{cfa}
+The reason for this restriction is to allow separate compilation.
+Multiple prototypes with different positional parameter names is an error.
+
+The named parameter is not part of type resolution;
+only the type of the expression assigned to the named parameter affects type resolution.
+\begin{cfa}
+int f( int ?i, int ?j );
+int f( int ?i, double ?j );
+f( ?j = 3, ?i = 4 );					§\C{// 1st f}§
+f( ?i = 7, ?j = 8.1 );					§\C{// 2nd f}§
+\end{cfa}
+
+
+\subsection{Mixed Default/Named}
+
+Default and named parameters can be intermixed and named parameters can have a default value.
+For example, given the routine prototype:
+\begin{cfa}
+void f( int x, int y ®= 1®, int ®?®z ®= 2® );
+\end{cfa}
+allowable calls are:
+\begin{cfa}
+f( 3 );									§\C{// rewrite \(\Rightarrow\) f( 3, 1, 2 )}§
+f( 3, 4 );								§\C{// rewrite \(\Rightarrow\) f( 3, 4, 2 )}§
+f( 3, ?z = 5 );							§\C{// rewrite \(\Rightarrow\) f( 3, 1, 5 )}§
+f( 3, 4, ?z = 5 );						§\C{// rewrite \(\Rightarrow\) f( 3, 4, 5 )}§
+f( ?z = 5, 3 );							§\C{// rewrite \(\Rightarrow\) f( 3, 1, 5 )}§
+f( 3, ?z = 5, 4 );						§\C{// rewrite \(\Rightarrow\) f( 3, 4, 5 )}§
+\end{cfa}
+Finally, the ellipse (``...'') parameter must appear after positional and named parameters in a routine prototype.
+\begin{cfa}
+void f( int i = 1, int ?j = 2, ®...® );
+\end{cfa}
+
+\CFA named and default arguments are backwards compatible with C.
+\Index*[C++]{\CC{}} only supports default parameters;
+\Index*{Ada} supports both named and default parameters.
 
 
Index: libcfa/src/collections/array.hfa
===================================================================
--- libcfa/src/collections/array.hfa	(revision 38e20a80ff2d2e0f1bf7fef08b0948ab64d1abf0)
+++ libcfa/src/collections/array.hfa	(revision ce02877193b9b7fb9e2bdf7486d493bf45be2bec)
@@ -1,5 +1,5 @@
 #pragma once
 
-#include <assert.h>
+//#include <assert.h>
 
 
@@ -8,4 +8,12 @@
 #define ztag(n) ttag(n)
 
+#ifdef __CFA_DEBUG__
+#define subcheck( arr, sub, lb, ub ) \
+	if ( (sub) < (lb) || (sub) >= (ub) ) \
+		abort( "subscript %ld exceeds dimension range [%d,%zd) for array %p.\n", \
+			   (sub), (lb), (ub), (arr) )
+#else
+#define subcheck( arr, sub, lb, ub ) do {} while (0)
+#endif
 
 // 
@@ -36,123 +44,132 @@
 //
 forall( [N], S & | sized(S), Timmed &, Tbase & ) {
-
-    //
-    // Single-dim array sruct (with explicit packing and atom)
-    //
-    struct arpk {
-        S strides[N];
-    };
-
-    // About the choice of integral types offered as subscript overloads:
-    // Intent is to cover these use cases:
-    //    a[0]                                                // i : zero_t
-    //    a[1]                                                // i : one_t
-    //    a[2]                                                // i : int
-    //    float foo( ptrdiff_t i ) { return a[i]; }           // i : ptrdiff_t
-    //    float foo( size_t i ) { return a[i]; }              // i : size_t
-    //    forall( [N] ) ... for( i; N ) { total += a[i]; }    // i : typeof( sizeof(42) )
-    //    for( i; 5 ) { total += a[i]; }                      // i : int
-    //
-    // It gets complicated by:
-    // -  CFA does overloading on concrete types, like int and unsigned int, not on typedefed
-    //    types like size_t.  So trying to overload on ptrdiff_t vs int works in 64-bit mode
-    //    but not in 32-bit mode.
-    // -  Given bug of Trac #247, CFA gives sizeof expressions type unsigned long int, when it
-    //    should give them type size_t.
-    //
-    //                          gcc -m32         cfa -m32 given bug         gcc -m64 (and cfa)
-    // ptrdiff_t                int              int                        long int
-    // size_t                   unsigned int     unsigned int               unsigned long int
-    // typeof( sizeof(42) )     unsigned int     unsigned long int          unsigned long int
-    // int                      int              int                        int
-    //
-    // So the solution must support types {zero_t, one_t, int, unsigned int, long int, unsigned long int}
-    //
-    // The solution cannot rely on implicit conversions (e.g. just have one overload for ptrdiff_t)
-    // because assertion satisfaction requires types to match exacly.  Both higher-dimensional
-    // subscripting and operations on slices use asserted subscript operators.  The test case
-    // array-container/array-sbscr-cases covers the combinations.  Mike beleives that commenting out
-    // any of the current overloads leads to one of those cases failing, either on 64- or 32-bit.
-    // Mike is open to being shown a smaller set of overloads that still passes the test.
-
-    static inline Timmed & ?[?]( arpk(N, S, Timmed, Tbase) & a, zero_t ) {
-        assert( 0 < N );
-        return (Timmed &) a.strides[0];
-    }
-
-    static inline Timmed & ?[?]( arpk(N, S, Timmed, Tbase) & a, one_t ) {
-        assert( 1 < N );
-        return (Timmed &) a.strides[1];
-    }
-
-    static inline Timmed & ?[?]( arpk(N, S, Timmed, Tbase) & a, int i ) {
-        assert( i < N );
-        return (Timmed &) a.strides[i];
-    }
-
-    static inline const Timmed & ?[?]( const arpk(N, S, Timmed, Tbase) & a, int i ) {
-        assert( i < N );
-        return (Timmed &) a.strides[i];
-    }
-
-    static inline Timmed & ?[?]( arpk(N, S, Timmed, Tbase) & a, unsigned int i ) {
-        assert( i < N );
-        return (Timmed &) a.strides[i];
-    }
-
-    static inline const Timmed & ?[?]( const arpk(N, S, Timmed, Tbase) & a, unsigned int i ) {
-        assert( i < N );
-        return (Timmed &) a.strides[i];
-    }
-
-    static inline Timmed & ?[?]( arpk(N, S, Timmed, Tbase) & a, long int i ) {
-        assert( i < N );
-        return (Timmed &) a.strides[i];
-    }
-
-    static inline const Timmed & ?[?]( const arpk(N, S, Timmed, Tbase) & a, long int i ) {
-        assert( i < N );
-        return (Timmed &) a.strides[i];
-    }
-
-    static inline Timmed & ?[?]( arpk(N, S, Timmed, Tbase) & a, unsigned long int i ) {
-        assert( i < N );
-        return (Timmed &) a.strides[i];
-    }
-
-    static inline const Timmed & ?[?]( const arpk(N, S, Timmed, Tbase) & a, unsigned long int i ) {
-        assert( i < N );
-        return (Timmed &) a.strides[i];
-    }
-
-    static inline size_t ?`len( arpk(N, S, Timmed, Tbase) & a ) {
-        return N;
-    }
-
-    static inline void __taglen( tag(arpk(N, S, Timmed, Tbase)), tag(N) ) {}
+	//
+	// Single-dim array struct (with explicit packing and atom)
+	//
+	struct arpk {
+		S strides[N];
+	};
+
+	// About the choice of integral types offered as subscript overloads:
+	// Intent is to cover these use cases:
+	//    a[0]                                                // i : zero_t
+	//    a[1]                                                // i : one_t
+	//    a[2]                                                // i : int
+	//    float foo( ptrdiff_t i ) { return a[i]; }           // i : ptrdiff_t
+	//    float foo( size_t i ) { return a[i]; }              // i : size_t
+	//    forall( [N] ) ... for( i; N ) { total += a[i]; }    // i : typeof( sizeof(42) )
+	//    for( i; 5 ) { total += a[i]; }                      // i : int
+	//
+	// It gets complicated by:
+	// -  CFA does overloading on concrete types, like int and unsigned int, not on typedefed
+	//    types like size_t.  So trying to overload on ptrdiff_t vs int works in 64-bit mode
+	//    but not in 32-bit mode.
+	// -  Given bug of Trac #247, CFA gives sizeof expressions type unsigned long int, when it
+	//    should give them type size_t.
+	//
+	//                          gcc -m32         cfa -m32 given bug         gcc -m64 (and cfa)
+	// ptrdiff_t                int              int                        long int
+	// size_t                   unsigned int     unsigned int               unsigned long int
+	// typeof( sizeof(42) )     unsigned int     unsigned long int          unsigned long int
+	// int                      int              int                        int
+	//
+	// So the solution must support types {zero_t, one_t, int, unsigned int, long int, unsigned long int}
+	//
+	// The solution cannot rely on implicit conversions (e.g. just have one overload for ptrdiff_t)
+	// because assertion satisfaction requires types to match exacly.  Both higher-dimensional
+	// subscripting and operations on slices use asserted subscript operators.  The test case
+	// array-container/array-sbscr-cases covers the combinations.  Mike beleives that commenting out
+	// any of the current overloads leads to one of those cases failing, either on 64- or 32-bit.
+	// Mike is open to being shown a smaller set of overloads that still passes the test.
+
+	static inline Timmed & ?[?]( arpk( N, S, Timmed, Tbase ) & a, zero_t ) {
+		//assert( 0 < N );
+		subcheck( a, 0L, 0, N );
+		return (Timmed &)a.strides[0];
+	}
+
+	static inline Timmed & ?[?]( arpk( N, S, Timmed, Tbase ) & a, one_t ) {
+		//assert( 1 < N );
+		subcheck( a, 1L, 0, N );
+		return (Timmed &)a.strides[1];
+	}
+
+	static inline Timmed & ?[?]( arpk( N, S, Timmed, Tbase ) & a, int i ) {
+		//assert( i < N );
+		subcheck( a, (long int)i, 0, N );
+		return (Timmed &)a.strides[i];
+	}
+
+	static inline const Timmed & ?[?]( const arpk( N, S, Timmed, Tbase ) & a, int i ) {
+		//assert( i < N );
+		subcheck( a, (long int)i, 0, N );
+		return (Timmed &)a.strides[i];
+	}
+
+	static inline Timmed & ?[?]( arpk( N, S, Timmed, Tbase ) & a, unsigned int i ) {
+		//assert( i < N );
+		subcheck( a, (long int)i, 0, N );
+		return (Timmed &)a.strides[i];
+	}
+
+	static inline const Timmed & ?[?]( const arpk( N, S, Timmed, Tbase ) & a, unsigned int i ) {
+		//assert( i < N );
+		subcheck( a, (unsigned long int)i, 0, N );
+		return (Timmed &)a.strides[i];
+	}
+
+	static inline Timmed & ?[?]( arpk( N, S, Timmed, Tbase ) & a, long int i ) {
+		//assert( i < N );
+		subcheck( a, i, 0, N );
+		return (Timmed &)a.strides[i];
+	}
+
+	static inline const Timmed & ?[?]( const arpk( N, S, Timmed, Tbase ) & a, long int i ) {
+		//assert( i < N );
+		subcheck( a, i, 0, N );
+		return (Timmed &)a.strides[i];
+	}
+
+	static inline Timmed & ?[?]( arpk( N, S, Timmed, Tbase ) & a, unsigned long int i ) {
+		//assert( i < N );
+		subcheck( a, i, 0, N );
+		return (Timmed &)a.strides[i];
+	}
+
+	static inline const Timmed & ?[?]( const arpk( N, S, Timmed, Tbase ) & a, unsigned long int i ) {
+		//assert( i < N );
+		subcheck( a, i, 0, N );
+		return (Timmed &)a.strides[i];
+	}
+
+	static inline size_t ?`len( arpk( N, S, Timmed, Tbase ) & a ) {
+		return N;
+	}
+
+	static inline void __taglen( tag(arpk( N, S, Timmed, Tbase )), tag(N) ) {}
 }
 
 // RAII pattern has workarounds for
 //  - Trac 226:  Simplest handling would be, require immediate element to be otype, let autogen
-//    raii happen.  Performance on even a couple dimensions is unacceptable because of exponential
-//    thunk creation: ?{}() needs all four otype funcs from next level, so does ^?{}(), so do the
-//    other two.  This solution offers ?{}() that needs only ?{}(), and similar for ^?{}.
+//	raii happen.  Performance on even a couple dimensions is unacceptable because of exponential
+//	thunk creation: ?{}() needs all four otype funcs from next level, so does ^?{}(), so do the
+//	other two.  This solution offers ?{}() that needs only ?{}(), and similar for ^?{}.
 
 forall( [N], S & | sized(S), Timmed &, Tbase & | { void ?{}( Timmed & ); } )
-static inline void ?{}( arpk(N, S, Timmed, Tbase) & this ) {    
-    void ?{}( S (&)[N] ) {}
-    ?{}(this.strides);
-
-    for (i; N) ?{}( (Timmed &) this.strides[i] );
+static inline void ?{}( arpk( N, S, Timmed, Tbase ) & this ) {	
+	void ?{}( S (&)[N] ) {}
+	?{}(this.strides);
+
+	for (i; N) ?{}( (Timmed &)this.strides[i] );
 }
 
 forall( [N], S & | sized(S), Timmed &, Tbase & | { void ^?{}( Timmed & ); } )
-static inline void ^?{}( arpk(N, S, Timmed, Tbase) & this ) {
-    void ^?{}( S (&)[N] ) {}
-    ^?{}(this.strides);
-
-    for (i; N ) {
-        ^?{}( (Timmed &) this.strides[N-i-1] );
-    }
+static inline void ^?{}( arpk( N, S, Timmed, Tbase ) & this ) {
+	void ^?{}( S (&)[N] ) {}
+	^?{}(this.strides);
+
+	for (i; N ) {
+		^?{}( (Timmed &)this.strides[N-i-1] );
+	}
 }
 
@@ -165,20 +182,20 @@
 
 forall( [N], ZTags ... , Trslt &, Tatom & | { Trslt mkar_( tag(Tatom), ZTags ); } )
-static inline arpk(N, Trslt, Trslt, Tatom) mkar_( tag(Tatom), tag(N), ZTags ) {}
+static inline arpk( N, Trslt, Trslt, Tatom) mkar_( tag(Tatom), tag(N), ZTags ) {}
 
 // based on https://stackoverflow.com/questions/1872220/is-it-possible-to-iterate-over-arguments-in-variadic-macros
 
-    // Make a FOREACH macro
-    #define FE_0(WHAT)
-    #define FE_1(WHAT, X) WHAT(X)
-    #define FE_2(WHAT, X, ...) WHAT(X)FE_1(WHAT, __VA_ARGS__)
-    #define FE_3(WHAT, X, ...) WHAT(X)FE_2(WHAT, __VA_ARGS__)
-    #define FE_4(WHAT, X, ...) WHAT(X)FE_3(WHAT, __VA_ARGS__)
-    #define FE_5(WHAT, X, ...) WHAT(X)FE_4(WHAT, __VA_ARGS__)
-    //... repeat as needed
-
-    #define GET_MACRO(_0,_1,_2,_3,_4,_5,NAME,...) NAME
-    #define FOR_EACH(action,...) \
-    GET_MACRO(_0,__VA_ARGS__,FE_5,FE_4,FE_3,FE_2,FE_1,FE_0)(action,__VA_ARGS__)
+	// Make a FOREACH macro
+	#define FE_0(WHAT)
+	#define FE_1(WHAT, X) WHAT(X)
+	#define FE_2(WHAT, X, ...) WHAT(X)FE_1(WHAT, __VA_ARGS__)
+	#define FE_3(WHAT, X, ...) WHAT(X)FE_2(WHAT, __VA_ARGS__)
+	#define FE_4(WHAT, X, ...) WHAT(X)FE_3(WHAT, __VA_ARGS__)
+	#define FE_5(WHAT, X, ...) WHAT(X)FE_4(WHAT, __VA_ARGS__)
+	//... repeat as needed
+
+	#define GET_MACRO(_0,_1,_2,_3,_4,_5,NAME,...) NAME
+	#define FOR_EACH(action,...) \
+	GET_MACRO(_0,__VA_ARGS__,FE_5,FE_4,FE_3,FE_2,FE_1,FE_0)(action,__VA_ARGS__)
 
 #define COMMA_ttag(X) , ttag(X)
@@ -200,5 +217,5 @@
 forall( TA &, TB &, TC &, IxAB, IxBC ... | { TB & ?[?]( TA &, IxAB ); TC & ?[?]( TB &, IxBC ); } )
 static inline TC & ?[?]( TA & this, IxAB ab, IxBC bc ) {
-    return this[ab][bc];
+	return this[ab][bc];
 }
 
@@ -209,15 +226,15 @@
 forall( TA &, TB &, TC &, IxAB_0, IxBC | { TB & ?[?]( TA &, IxAB_0 ); TC & ?[?]( TB &, IxBC ); } )
 static inline TC & ?[?]( TA & this, IxAB_0 ab, IxBC bc ) {
-    return this[ab][bc];
+	return this[ab][bc];
 }
 
 forall( TA &, TB &, TC &, IxAB_0, IxAB_1, IxBC | { TB & ?[?]( TA &, IxAB_0, IxAB_1 ); TC & ?[?]( TB &, IxBC ); } )
 static inline TC & ?[?]( TA & this, IxAB_0 ab0, IxAB_1 ab1, IxBC bc ) {
-    return this[[ab0,ab1]][bc];
+	return this[[ab0,ab1]][bc];
 }
 
 forall( TA &, TB &, TC &, IxAB_0, IxAB_1, IxAB_2, IxBC | { TB & ?[?]( TA &, IxAB_0, IxAB_1, IxAB_2 ); TC & ?[?]( TB &, IxBC ); } )
 static inline TC & ?[?]( TA & this, IxAB_0 ab0, IxAB_1 ab1, IxAB_2 ab2, IxBC bc ) {
-    return this[[ab0,ab1,ab2]][bc];
+	return this[[ab0,ab1,ab2]][bc];
 }
 
@@ -237,14 +254,14 @@
 // Base
 forall( [Nq], Sq & | sized(Sq), Tbase & )
-static inline tag(arpk(Nq, Sq, Tbase, Tbase)) enq_( tag(Tbase), tag(Nq), tag(Sq), tag(Tbase) ) {
-    tag(arpk(Nq, Sq, Tbase, Tbase)) ret;
-    return ret;
+static inline tag(arpk( Nq, Sq, Tbase, Tbase )) enq_( tag(Tbase ), tag(Nq), tag(Sq), tag(Tbase ) ) {
+	tag(arpk( Nq, Sq, Tbase, Tbase )) ret;
+	return ret;
 }
 
 // Rec
 forall( [Nq], Sq & | sized(Sq), [N], S & | sized(S), recq &, recr &, Tbase & | { tag(recr) enq_( tag(Tbase), tag(Nq), tag(Sq), tag(recq) ); } )
-static inline tag(arpk(N, S, recr, Tbase)) enq_( tag(Tbase), tag(Nq), tag(Sq), tag(arpk(N, S, recq, Tbase)) ) {
-    tag(arpk(N, S, recr, Tbase)) ret;
-    return ret;
+static inline tag(arpk( N, S, recr, Tbase )) enq_( tag(Tbase ), tag(Nq), tag(Sq), tag(arpk( N, S, recq, Tbase )) ) {
+	tag(arpk( N, S, recr, Tbase )) ret;
+	return ret;
 }
 
@@ -252,6 +269,6 @@
 extern struct all_t {} all;
 forall( [N], S & | sized(S), Te &, result &, Tbase & | { tag(result) enq_( tag(Tbase), tag(N), tag(S), tag(Te) ); } )
-static inline result & ?[?]( arpk(N, S, Te, Tbase) & this, all_t ) {
-    return (result&) this;
+static inline result & ?[?]( arpk( N, S, Te, Tbase ) & this, all_t ) {
+	return (result&) this;
 }
 
@@ -263,22 +280,22 @@
 // forall(A &, Tv &, [N])
 // trait ar {
-//     Tv& ?[?]( A&, zero_t );
-//     Tv& ?[?]( A&, one_t  );
-//     Tv& ?[?]( A&, int    );
-//                   ...
-//     size_t ?`len( A& );
-//     void __taglen( tag(C), tag(N) );
+//	 Tv& ?[?]( A&, zero_t );
+//	 Tv& ?[?]( A&, one_t  );
+//	 Tv& ?[?]( A&, int	);
+//				   ...
+//	 size_t ?`len( A& );
+//	 void __taglen( tag(C), tag(N) );
 // };
 
 // working around N's not being accepted as arguments to traits
 
-#define ar(A, Tv, N) {                 \
-    Tv& ?[?]( A&, zero_t );            \
-    Tv& ?[?]( A&, one_t );             \
-    Tv& ?[?]( A&, int );               \
-    Tv& ?[?]( A&, unsigned int );      \
-    Tv& ?[?]( A&, long int );          \
-    Tv& ?[?]( A&, unsigned long int ); \
-    size_t ?`len( A& );                \
-    void __taglen( tag(A), tag(N) );   \
-}
+#define ar( A, Tv, N ) {				\
+	Tv& ?[?]( A&, zero_t );				\
+	Tv& ?[?]( A&, one_t );				\
+	Tv& ?[?]( A&, int );				\
+	Tv& ?[?]( A&, unsigned int );		\
+	Tv& ?[?]( A&, long int );			\
+	Tv& ?[?]( A&, unsigned long int );	\
+	size_t ?`len( A& );					\
+	void __taglen( tag(A), tag(N) );	\
+}
Index: libcfa/src/enum.cfa
===================================================================
--- libcfa/src/enum.cfa	(revision 38e20a80ff2d2e0f1bf7fef08b0948ab64d1abf0)
+++ libcfa/src/enum.cfa	(revision ce02877193b9b7fb9e2bdf7486d493bf45be2bec)
@@ -25,7 +25,7 @@
 	E pred( E e ) {
 		E lower = lowerBound();
-		if ( fromInstance(e) <= fromInstance(lower ) )
+		if ( fromInstance( e ) <= fromInstance(lower ) )
 			abort( "call to pred() exceeds enumeration lower bound of %d", fromInstance( lower ) );
-		return pred_unsafe(e);
+		return pred_unsafe( e );
 	}
 
Index: src/GenPoly/Box.cpp
===================================================================
--- src/GenPoly/Box.cpp	(revision 38e20a80ff2d2e0f1bf7fef08b0948ab64d1abf0)
+++ src/GenPoly/Box.cpp	(revision ce02877193b9b7fb9e2bdf7486d493bf45be2bec)
@@ -673,9 +673,16 @@
 		TypeVarMap const & typeVars,
 		ast::TypeSubstitution const * typeSubs ) {
-	if ( expr->result && isPolyType( expr->result, typeVars, typeSubs ) ) {
-		if ( auto name = expr->func.as<ast::NameExpr>() ) {
-			if ( "*?" == name->name ) {
-				return true;
-			}
+	if ( auto name = expr->func.as<ast::NameExpr>() ) {
+		if ( "*?" == name->name ) {
+			// It's a deref.
+			// Must look under the * (and strip its ptr-ty) because expr's
+			// result could be ar/ptr-decayed.  If expr.inner:T(*)[n], then
+			// expr is a poly deref, even though expr:T*, which is not poly.
+			auto ptrExpr = expr->args.front();
+			auto ptrTy = ptrExpr->result.as<ast::PointerType>();
+			assert(ptrTy); // thing being deref'd must be pointer
+			auto referentTy = ptrTy->base;
+			assert(referentTy);
+			return isPolyType( referentTy, typeVars, typeSubs );
 		}
 	}
@@ -1192,14 +1199,17 @@
 		assert( 2 == expr->args.size() );
 
-		ast::Type const * baseType1 =
-			isPolyPtr( expr->args.front()->result, scopeTypeVars, typeSubs );
-		ast::Type const * baseType2 =
-			isPolyPtr( expr->args.back()->result, scopeTypeVars, typeSubs );
+		ast::Type const * arg1Ty = expr->args.front()->result;
+		ast::Type const * arg2Ty = expr->args.back()->result;
+
+		// two cases: a[i] with first arg poly ptr, i[a] with second arg poly ptr
+		bool isPoly1 = isPolyPtr( arg1Ty, scopeTypeVars, typeSubs ) != nullptr;
+		bool isPoly2 = isPolyPtr( arg2Ty, scopeTypeVars, typeSubs ) != nullptr;
+
 		// If neither argument is a polymorphic pointer, do nothing.
-		if ( !baseType1 && !baseType2 ) {
+		if ( !isPoly1 && !isPoly2 ) {
 			return expr;
 		}
 		// The arguments cannot both be polymorphic pointers.
-		assert( !baseType1 || !baseType2 );
+		assert( !isPoly1 || !isPoly2 );
 		// (So exactly one of the arguments is a polymorphic pointer.)
 
@@ -1210,16 +1220,22 @@
 		ast::UntypedExpr * ret = new ast::UntypedExpr( location,
 				new ast::NameExpr( location, "?+?" ) );
-		if ( baseType1 ) {
+		if ( isPoly1 ) {
+			assert( arg1Ty );
+			auto arg1TyPtr = dynamic_cast<ast::PointerType const * >( arg1Ty );
+			assert( arg1TyPtr );
 			auto multiply = ast::UntypedExpr::createCall( location2, "?*?", {
 				expr->args.back(),
-				new ast::SizeofExpr( location1, deepCopy( baseType1 ) ),
+				new ast::SizeofExpr( location1, deepCopy( arg1TyPtr->base ) ),
 			} );
 			ret->args.push_back( expr->args.front() );
 			ret->args.push_back( multiply );
 		} else {
-			assert( baseType2 );
+			assert( isPoly2 );
+			assert( arg2Ty );
+			auto arg2TyPtr = dynamic_cast<ast::PointerType const * >( arg2Ty );
+			assert( arg2TyPtr );
 			auto multiply = ast::UntypedExpr::createCall( location1, "?*?", {
 				expr->args.front(),
-				new ast::SizeofExpr( location2, deepCopy( baseType2 ) ),
+				new ast::SizeofExpr( location2, deepCopy( arg2TyPtr->base ) ),
 			} );
 			ret->args.push_back( multiply );
@@ -1234,6 +1250,12 @@
 		assert( 1 == expr->args.size() );
 
+		auto ptrExpr = expr->args.front();
+		auto ptrTy = ptrExpr->result.as<ast::PointerType>();
+		assert(ptrTy); // thing being deref'd must be pointer
+		auto referentTy = ptrTy->base;
+		assert(referentTy);
+
 		// If this isn't for a poly type, then do nothing.
-		if ( !isPolyType( expr->result, scopeTypeVars, typeSubs ) ) {
+		if ( !isPolyType( referentTy, scopeTypeVars, typeSubs ) ) {
 			return expr;
 		}
@@ -1243,4 +1265,8 @@
 		// Fix expression type to remove pointer.
 		ret->result = expr->result;
+		// apply pointer decay
+		if (auto retArTy = ret->result.as<ast::ArrayType>()) {
+			ret->result = new ast::PointerType( retArTy->base );
+		}
 		ret->env = expr->env ? expr->env : ret->env;
 		return ret;
@@ -1291,38 +1317,49 @@
 		return makeIncrDecrExpr(
 			expr->location, expr, baseType, "++?" == varName );
-	// Addition and Subtration Intrinsics:
+	// Addition and Subtraction Intrinsics:
 	} else if ( "?+?" == varName || "?-?" == varName ) {
 		assert( expr->result );
 		assert( 2 == expr->args.size() );
 
-		auto baseType1 =
-			isPolyPtr( expr->args.front()->result, scopeTypeVars, typeSubs );
-		auto baseType2 =
-			isPolyPtr( expr->args.back()->result, scopeTypeVars, typeSubs );
+		ast::Type const * arg1Ty = expr->args.front()->result;
+		ast::Type const * arg2Ty = expr->args.back()->result;
+
+		bool isPoly1 = isPolyPtr( arg1Ty, scopeTypeVars, typeSubs ) != nullptr;
+		bool isPoly2 = isPolyPtr( arg2Ty, scopeTypeVars, typeSubs ) != nullptr;
 
 		CodeLocation const & location = expr->location;
 		CodeLocation const & location1 = expr->args.front()->location;
 		CodeLocation const & location2 = expr->args.back()->location;
-		// LHS op RHS -> (LHS op RHS) / sizeof(LHS)
-		if ( baseType1 && baseType2 ) {
+		// LHS minus RHS -> (LHS minus RHS) / sizeof(LHS)
+		if ( isPoly1 && isPoly2 ) {
+			assert( "?-?" == varName );
+			assert( arg1Ty );
+			auto arg1TyPtr = dynamic_cast<ast::PointerType const * >( arg1Ty );
+			assert( arg1TyPtr );
 			auto divide = ast::UntypedExpr::createCall( location, "?/?", {
 				expr,
-				new ast::SizeofExpr( location, deepCopy( baseType1 ) ),
+				new ast::SizeofExpr( location, deepCopy( arg1TyPtr->base ) ),
 			} );
 			if ( expr->env ) divide->env = expr->env;
 			return divide;
 		// LHS op RHS -> LHS op (RHS * sizeof(LHS))
-		} else if ( baseType1 ) {
+		} else if ( isPoly1 ) {
+			assert( arg1Ty );
+			auto arg1TyPtr = dynamic_cast<ast::PointerType const * >( arg1Ty );
+			assert( arg1TyPtr );
 			auto multiply = ast::UntypedExpr::createCall( location2, "?*?", {
 				expr->args.back(),
-				new ast::SizeofExpr( location1, deepCopy( baseType1 ) ),
+				new ast::SizeofExpr( location1, deepCopy( arg1TyPtr->base ) ),
 			} );
 			return ast::mutate_field_index(
 				expr, &ast::ApplicationExpr::args, 1, multiply );
 		// LHS op RHS -> (LHS * sizeof(RHS)) op RHS
-		} else if ( baseType2 ) {
+		} else if ( isPoly2 ) {
+			assert( arg2Ty );
+			auto arg2TyPtr = dynamic_cast<ast::PointerType const * >( arg2Ty );
+			assert( arg2TyPtr );
 			auto multiply = ast::UntypedExpr::createCall( location1, "?*?", {
 				expr->args.front(),
-				new ast::SizeofExpr( location2, deepCopy( baseType2 ) ),
+				new ast::SizeofExpr( location2, deepCopy( arg2TyPtr->base ) ),
 			} );
 			return ast::mutate_field_index(
@@ -1588,7 +1625,8 @@
 	/// Change the type of generic aggregate members to char[].
 	void mutateMembers( ast::AggregateDecl * aggr );
-	/// Returns the calculated sizeof expression for type, or nullptr for use
-	/// C sizeof().
+	/// Returns the calculated sizeof/alignof expressions for type, or
+	/// nullptr for use C size/alignof().
 	ast::Expr const * genSizeof( CodeLocation const &, ast::Type const * );
+	ast::Expr const * genAlignof( CodeLocation const &, ast::Type const * );
 	/// Enters a new scope for type-variables,
 	/// adding the type variables from the provided type.
@@ -1613,15 +1651,30 @@
 {}
 
+static ast::Type * polyToMonoTypeRec( CodeLocation const & loc,
+		ast::Type const * ty ) {
+	ast::Type * ret;
+	if ( auto aTy = dynamic_cast<ast::ArrayType const *>( ty ) ) {
+		// recursive case
+		auto monoBase = polyToMonoTypeRec( loc, aTy->base );
+		ret = new ast::ArrayType( monoBase, aTy->dimension,
+			aTy->isVarLen, aTy->isStatic, aTy->qualifiers );
+	} else {
+		// base case
+		auto charType = new ast::BasicType( ast::BasicKind::Char );
+		auto size = new ast::NameExpr( loc,
+			sizeofName( Mangle::mangleType( ty ) ) );
+		ret = new ast::ArrayType( charType, size,
+			ast::VariableLen, ast::DynamicDim, ast::CV::Qualifiers() );
+	}
+	return ret;
+}
+
 /// Converts polymorphic type into a suitable monomorphic representation.
-/// Currently: __attribute__(( aligned(8) )) char[size_T];
-ast::Type * polyToMonoType( CodeLocation const & location,
-		ast::Type const * declType ) {
-	auto charType = new ast::BasicType( ast::BasicKind::Char );
-	auto size = new ast::NameExpr( location,
-		sizeofName( Mangle::mangleType( declType ) ) );
-	auto ret = new ast::ArrayType( charType, size,
-		ast::VariableLen, ast::DynamicDim, ast::CV::Qualifiers() );
+/// Simple cases: T -> __attribute__(( aligned(8) )) char[sizeof_T];
+/// Array cases: T[eOut][eIn] ->  __attribute__(( aligned(8) )) char[eOut][eIn][sizeof_T];
+ast::Type * polyToMonoType( CodeLocation const & loc, ast::Type const * ty ) {
+	auto ret = polyToMonoTypeRec( loc, ty );
 	ret->attributes.emplace_back( new ast::Attribute( "aligned",
-		{ ast::ConstantExpr::from_int( location, 8 ) } ) );
+		{ ast::ConstantExpr::from_int( loc, 8 ) } ) );
 	return ret;
 }
@@ -1716,4 +1769,25 @@
 	// Forally, side effects are not safe in this function. But it works.
 	erase_if( mutDecl->attributes, matchAndMove );
+
+	// Change the decl's type.
+	// Upon finishing the box pass, it shall be void*.
+	// At this middle-of-box-pass point, that type is T.
+
+	// example 1
+	// before box:                                  T     t ;
+	// before here:  char _bufxx    [_sizeof_Y1T];  T     t = _bufxx;
+	// after here:   char _bufxx    [_sizeof_Y1T];  T     t = _bufxx;  (no change here - non array case)
+	// after box:    char _bufxx    [_sizeof_Y1T];  void *t = _bufxx;
+
+	// example 2
+	// before box:                                  T     t[42] ;
+	// before here:  char _bufxx[42][_sizeof_Y1T];  T     t[42] = _bufxx;
+	// after here:   char _bufxx[42][_sizeof_Y1T];  T     t     = _bufxx;
+	// after box:    char _bufxx[42][_sizeof_Y1T];  void *t     = _bufxx;
+
+	// Strip all "array of" wrappers
+	while ( auto arrayType = dynamic_cast<ast::ArrayType const *>( mutDecl->type.get() ) ) {
+		mutDecl->type = arrayType->base;
+	}
 
 	mutDecl->init = new ast::SingleInit( decl->location,
@@ -1869,10 +1943,6 @@
 		ast::AlignofExpr const * expr ) {
 	ast::Type const * type = expr->type ? expr->type : expr->expr->result;
-	if ( findGeneric( expr->location, type ) ) {
-		return new ast::NameExpr( expr->location,
-			alignofName( Mangle::mangleType( type ) ) );
-	} else {
-		return expr;
-	}
+	ast::Expr const * gen = genAlignof( expr->location, type );
+	return ( gen ) ? gen : expr;
 }
 
@@ -2095,4 +2165,7 @@
 
 		return true;
+
+	} else if ( auto inst = dynamic_cast<ast::ArrayType const *>( type ) ) {
+		return findGeneric( location, inst->base );
 	}
 	return false;
@@ -2155,6 +2228,20 @@
 		return makeOp( location, "?*?", sizeofBase, dim );
 	} else if ( findGeneric( location, type ) ) {
-		// Generate calculated size for generic type.
+		// Generate reference to _sizeof parameter
 		return new ast::NameExpr( location, sizeofName(
+				Mangle::mangleType( type ) ) );
+	} else {
+		return nullptr;
+	}
+}
+
+ast::Expr const * PolyGenericCalculator::genAlignof(
+		CodeLocation const & location, ast::Type const * type ) {
+	if ( auto * array = dynamic_cast<ast::ArrayType const *>( type ) ) {
+		// alignof array is alignof element
+		return genAlignof( location, array->base );
+	} else if ( findGeneric( location, type ) ) {
+		// Generate reference to _alignof parameter
+		return new ast::NameExpr( location, alignofName(
 				Mangle::mangleType( type ) ) );
 	} else {
Index: src/Parser/parser.yy
===================================================================
--- src/Parser/parser.yy	(revision 38e20a80ff2d2e0f1bf7fef08b0948ab64d1abf0)
+++ src/Parser/parser.yy	(revision ce02877193b9b7fb9e2bdf7486d493bf45be2bec)
@@ -10,6 +10,6 @@
 // Created On       : Sat Sep  1 20:22:55 2001
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Tue Jul  9 10:29:01 2024
-// Update Count     : 6713
+// Last Modified On : Fri Jul 26 14:09:30 2024
+// Update Count     : 6733
 //
 
@@ -852,23 +852,4 @@
 	;
 
-argument_expression_list_opt:
-	// empty
-		{ $$ = nullptr; }
-	| argument_expression_list
-	;
-
-argument_expression_list:
-	argument_expression
-	| argument_expression_list_opt ',' argument_expression
-		{ $$ = $1->set_last( $3 ); }
-	;
-
-argument_expression:
-	'@'													// CFA, default parameter
-		{ SemanticError( yylloc, "Default parameter for argument is currently unimplemented." ); $$ = nullptr; }
-		// { $$ = new ExpressionNode( build_constantInteger( *new string( "2" ) ) ); }
-	| assignment_expression
-	;
-
 field_name_list:										// CFA, tuple field selector
 	field
@@ -1116,4 +1097,27 @@
 constant_expression:
 	conditional_expression
+	;
+
+argument_expression_list_opt:
+	// empty
+		{ $$ = nullptr; }
+	| argument_expression_list
+	;
+
+argument_expression_list:
+	argument_expression
+	// | argument_expression_list_opt ',' argument_expression // CFA, allow empty argument
+	| argument_expression_list ',' argument_expression	// no empty argument
+		{ $$ = $1->set_last( $3 ); }
+	;
+
+argument_expression:
+	'?'													// CFA, default parameter
+		// { SemanticError( yylloc, "Argument to default parameter is currently unimplemented." ); $$ = nullptr; }
+		{ $$ = new ExpressionNode( build_constantInteger( yylloc, *new string( "2" ) ) ); }
+	| '?' identifier '=' assignment_expression			// CFA, keyword argument
+		// { SemanticError( yylloc, "keyword argument is currently unimplemented." ); $$ = nullptr; }
+		{ $$ = $4; }
+	| assignment_expression
 	;
 
@@ -3543,4 +3547,7 @@
 	identifier_at
 		{ $$ = DeclarationNode::newName( $1 ); }
+	| '?' identifier
+		// { SemanticError( yylloc, "keyword parameter is currently unimplemented." ); $$ = nullptr; }
+		{ $$ = DeclarationNode::newName( $2 ); }
 	| '(' paren_identifier ')'							// redundant parenthesis
 		{ $$ = $2; }
Index: tests/Makefile.am
===================================================================
--- tests/Makefile.am	(revision 38e20a80ff2d2e0f1bf7fef08b0948ab64d1abf0)
+++ tests/Makefile.am	(revision ce02877193b9b7fb9e2bdf7486d493bf45be2bec)
@@ -69,5 +69,5 @@
 .PHONY : concurrency list .validate .test_makeflags
 .INTERMEDIATE : .validate .validate.cfa .test_makeflags
-EXTRA_PROGRAMS = avl_test linkonce linking/mangling/anon .dummy_hack # build but do not install
+EXTRA_PROGRAMS = array-collections/boxed avl_test linkonce linking/mangling/anon .dummy_hack # build but do not install
 EXTRA_DIST = test.py \
 	pybin/__init__.py \
@@ -77,4 +77,6 @@
 	pybin/tools.py \
 	long_tests.hfa \
+	array-collections/boxed.hfa \
+	array-collections/boxed.cases.hfa \
 	avltree/avl-private.h \
 	avltree/avl.h \
@@ -104,4 +106,5 @@
 	done
 
+array_collections_boxed_SOURCES = array-collections/boxed.main.cfa array-collections/boxed.bookend.cfa
 avl_test_SOURCES = avltree/avl_test.cfa avltree/avl0.cfa avltree/avl1.cfa avltree/avl2.cfa avltree/avl3.cfa avltree/avl4.cfa avltree/avl-private.cfa
 linkonce_SOURCES = link-once/main.cfa link-once/partner.cfa
Index: tests/array-collections/.expect/boxed.txt
===================================================================
--- tests/array-collections/.expect/boxed.txt	(revision ce02877193b9b7fb9e2bdf7486d493bf45be2bec)
+++ tests/array-collections/.expect/boxed.txt	(revision ce02877193b9b7fb9e2bdf7486d493bf45be2bec)
@@ -0,0 +1,248 @@
+------- 1a (singleton): T x[1], expecting T=short, got sizeof(T)=2, expecting 2-byte elems
+Delta 0--1 expected 2 bytes, actual 2 bytes
+Delta 1--2 expected 2 bytes, actual 2 bytes
+Delta 0--2 expected 4 bytes, actual 4 bytes
+Delta 0--n expected 2 bytes, actual 2 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 1b (singleton): T x[1], expecting T=bigun, got sizeof(T)=40, expecting 40-byte elems
+Delta 0--1 expected 40 bytes, actual 40 bytes
+Delta 1--2 expected 40 bytes, actual 40 bytes
+Delta 0--2 expected 80 bytes, actual 80 bytes
+Delta 0--n expected 40 bytes, actual 40 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 2a (general): T x[42], expecting T=char, got sizeof(T)=1, expecting 1-byte elems
+Delta 0--1 expected 1 bytes, actual 1 bytes
+Delta 1--2 expected 1 bytes, actual 1 bytes
+Delta 0--2 expected 2 bytes, actual 2 bytes
+Delta 0--n expected 42 bytes, actual 42 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 2b (general): T x[42], expecting T=float, got sizeof(T)=4, expecting 4-byte elems
+Delta 0--1 expected 4 bytes, actual 4 bytes
+Delta 1--2 expected 4 bytes, actual 4 bytes
+Delta 0--2 expected 8 bytes, actual 8 bytes
+Delta 0--n expected 168 bytes, actual 168 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 2c (general): T x[42], expecting T=long long, got sizeof(T)=8, expecting 8-byte elems
+Delta 0--1 expected 8 bytes, actual 8 bytes
+Delta 1--2 expected 8 bytes, actual 8 bytes
+Delta 0--2 expected 16 bytes, actual 16 bytes
+Delta 0--n expected 336 bytes, actual 336 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 2d (general): T x[42], expecting T=bigun, got sizeof(T)=40, expecting 40-byte elems
+Delta 0--1 expected 40 bytes, actual 40 bytes
+Delta 1--2 expected 40 bytes, actual 40 bytes
+Delta 0--2 expected 80 bytes, actual 80 bytes
+Delta 0--n expected 1680 bytes, actual 1680 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 3a (user VLA): T x[n], got n=1, expecting T=int, got sizeof(T)=4, expecting 4-byte elems
+Delta 0--1 expected 4 bytes, actual 4 bytes
+Delta 1--2 expected 4 bytes, actual 4 bytes
+Delta 0--2 expected 8 bytes, actual 8 bytes
+Delta 0--n expected 4 bytes, actual 4 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 3b (user VLA): T x[n], got n=42, expecting T=int, got sizeof(T)=4, expecting 4-byte elems
+Delta 0--1 expected 4 bytes, actual 4 bytes
+Delta 1--2 expected 4 bytes, actual 4 bytes
+Delta 0--2 expected 8 bytes, actual 8 bytes
+Delta 0--n expected 168 bytes, actual 168 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 3c (user VLA): T x[n], got n=1, expecting T=bigun, got sizeof(T)=40, expecting 40-byte elems
+Delta 0--1 expected 40 bytes, actual 40 bytes
+Delta 1--2 expected 40 bytes, actual 40 bytes
+Delta 0--2 expected 80 bytes, actual 80 bytes
+Delta 0--n expected 40 bytes, actual 40 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 3d (user VLA): T x[n], got n=42, expecting T=bigun, got sizeof(T)=40, expecting 40-byte elems
+Delta 0--1 expected 40 bytes, actual 40 bytes
+Delta 1--2 expected 40 bytes, actual 40 bytes
+Delta 0--2 expected 80 bytes, actual 80 bytes
+Delta 0--n expected 1680 bytes, actual 1680 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 4a (2-dimensional): T x[42][42], expecting T=char, got sizeof(T)=1, expecting 1-byte atoms
+Delta 0,0--0,1 expected 1 bytes, actual 1 bytes
+Delta 0,1--0,2 expected 1 bytes, actual 1 bytes
+Delta 0,0--0,2 expected 2 bytes, actual 2 bytes
+Delta 0,0--0,n expected 42 bytes, actual 42 bytes
+Delta 0,0--1,0 expected 42 bytes, actual 42 bytes
+Delta 1,0--2,0 expected 42 bytes, actual 42 bytes
+Delta 0,0--2,0 expected 84 bytes, actual 84 bytes
+Delta 0,0--n,0 expected 1764 bytes, actual 1764 bytes
+Delta 0,0--n,n expected 1806 bytes, actual 1806 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 4b (2-dimensional): T x[42][42], expecting T=bigun, got sizeof(T)=40, expecting 40-byte atoms
+Delta 0,0--0,1 expected 40 bytes, actual 40 bytes
+Delta 0,1--0,2 expected 40 bytes, actual 40 bytes
+Delta 0,0--0,2 expected 80 bytes, actual 80 bytes
+Delta 0,0--0,n expected 1680 bytes, actual 1680 bytes
+Delta 0,0--1,0 expected 1680 bytes, actual 1680 bytes
+Delta 1,0--2,0 expected 1680 bytes, actual 1680 bytes
+Delta 0,0--2,0 expected 3360 bytes, actual 3360 bytes
+Delta 0,0--n,0 expected 70560 bytes, actual 70560 bytes
+Delta 0,0--n,n expected 72240 bytes, actual 72240 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 5a (pair): pair(T,T) x[42], expecting T=double, got sizeof(T)=8, expecting 16-byte atoms
+Delta 0--1 expected 16 bytes, actual 16 bytes
+Delta 1--2 expected 16 bytes, actual 16 bytes
+Delta 0--2 expected 32 bytes, actual 32 bytes
+Delta 0--n expected 672 bytes, actual 672 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 5b (pair): pair(T,T) x[42], expecting T=bigun, got sizeof(T)=40, expecting 80-byte atoms
+Delta 0--1 expected 80 bytes, actual 80 bytes
+Delta 1--2 expected 80 bytes, actual 80 bytes
+Delta 0--2 expected 160 bytes, actual 160 bytes
+Delta 0--n expected 3360 bytes, actual 3360 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 6a (raii): T x[42], expecting T=my_mgd_t, got sizeof(T)=4, expecting 4-byte elems
+ctor call 0 targets first + 0 bytes
+ctor call 1 targets first + 4 bytes
+ctor call 40 targets first + 160 bytes
+ctor call 41 targets first + 164 bytes
+dtor call 0 targets first - 0 bytes
+dtor call 1 targets first - 4 bytes
+dtor call 40 targets first - 160 bytes
+dtor call 41 targets first - 164 bytes
+dtor lo off by 0 bytes, hi off by 0 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 7a (communication, poly-poly direct, by param T[]): T x[42], expecting T=char, got sizeof(T)=1, expecting 1-byte elems
+Delta 0--1 expected 1 bytes, actual 1 bytes
+Delta 1--2 expected 1 bytes, actual 1 bytes
+Delta 0--2 expected 2 bytes, actual 2 bytes
+Delta 0--n expected 42 bytes, actual 42 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 7b (communication, poly-poly direct, by param T[]): T x[42], expecting T=float, got sizeof(T)=4, expecting 4-byte elems
+Delta 0--1 expected 4 bytes, actual 4 bytes
+Delta 1--2 expected 4 bytes, actual 4 bytes
+Delta 0--2 expected 8 bytes, actual 8 bytes
+Delta 0--n expected 168 bytes, actual 168 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 7c (communication, poly-poly direct, by param T[]): T x[42], expecting T=long long, got sizeof(T)=8, expecting 8-byte elems
+Delta 0--1 expected 8 bytes, actual 8 bytes
+Delta 1--2 expected 8 bytes, actual 8 bytes
+Delta 0--2 expected 16 bytes, actual 16 bytes
+Delta 0--n expected 336 bytes, actual 336 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 7d (communication, poly-poly direct, by param T[]): T x[42], expecting T=bigun, got sizeof(T)=40, expecting 40-byte elems
+Delta 0--1 expected 40 bytes, actual 40 bytes
+Delta 1--2 expected 40 bytes, actual 40 bytes
+Delta 0--2 expected 80 bytes, actual 80 bytes
+Delta 0--n expected 1680 bytes, actual 1680 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 8a (communication, poly-poly direct, by param T(*)[*]): T x[42], expecting T=double, got sizeof(T)=8, expecting 8-byte elems
+Delta 0--1 expected 8 bytes, actual 8 bytes
+Delta 1--2 expected 8 bytes, actual 8 bytes
+Delta 0--2 expected 16 bytes, actual 16 bytes
+Delta 0--n expected 336 bytes, actual 336 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 9a (communication, poly-poly assertion, by param T[]): T x[42], expecting T=char, got sizeof(T)=1, expecting 1-byte elems
+Delta 0--1 expected 1 bytes, actual 1 bytes
+Delta 1--2 expected 1 bytes, actual 1 bytes
+Delta 0--2 expected 2 bytes, actual 2 bytes
+Delta 0--n expected 42 bytes, actual 42 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 9b (communication, poly-poly assertion, by param T[]): T x[42], expecting T=float, got sizeof(T)=4, expecting 4-byte elems
+Delta 0--1 expected 4 bytes, actual 4 bytes
+Delta 1--2 expected 4 bytes, actual 4 bytes
+Delta 0--2 expected 8 bytes, actual 8 bytes
+Delta 0--n expected 168 bytes, actual 168 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 9c (communication, poly-poly assertion, by param T[]): T x[42], expecting T=long long, got sizeof(T)=8, expecting 8-byte elems
+Delta 0--1 expected 8 bytes, actual 8 bytes
+Delta 1--2 expected 8 bytes, actual 8 bytes
+Delta 0--2 expected 16 bytes, actual 16 bytes
+Delta 0--n expected 336 bytes, actual 336 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 9d (communication, poly-poly assertion, by param T[]): T x[42], expecting T=bigun, got sizeof(T)=40, expecting 40-byte elems
+Delta 0--1 expected 40 bytes, actual 40 bytes
+Delta 1--2 expected 40 bytes, actual 40 bytes
+Delta 0--2 expected 80 bytes, actual 80 bytes
+Delta 0--n expected 1680 bytes, actual 1680 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 10a (communication, poly-poly assertion, by param T(*)[*]): T x[42], expecting T=double, got sizeof(T)=8, expecting 8-byte elems
+Delta 0--1 expected 8 bytes, actual 8 bytes
+Delta 1--2 expected 8 bytes, actual 8 bytes
+Delta 0--2 expected 16 bytes, actual 16 bytes
+Delta 0--n expected 336 bytes, actual 336 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 11a (communication, poly-mono assertion, by param T[]): T x[42], expecting T=char, got sizeof(T)=1, expecting 1-byte elems
+Delta 0--1 expected 1 bytes, actual 1 bytes
+Delta 1--2 expected 1 bytes, actual 1 bytes
+Delta 0--2 expected 2 bytes, actual 2 bytes
+Delta 0--n expected 42 bytes, actual 42 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 11b (communication, poly-mono assertion, by param T[]): T x[42], expecting T=bigun, got sizeof(T)=40, expecting 40-byte elems
+Delta 0--1 expected 40 bytes, actual 40 bytes
+Delta 1--2 expected 40 bytes, actual 40 bytes
+Delta 0--2 expected 80 bytes, actual 80 bytes
+Delta 0--n expected 1680 bytes, actual 1680 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 12a (communication, poly-mono assertion, by param T(*)[*]): T x[42], expecting T=double, got sizeof(T)=8, expecting 8-byte elems
+Delta 0--1 expected 8 bytes, actual 8 bytes
+Delta 1--2 expected 8 bytes, actual 8 bytes
+Delta 0--2 expected 16 bytes, actual 16 bytes
+Delta 0--n expected 336 bytes, actual 336 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 13a (communication, mono-poly direct, by param T[]): char x[42], expecting 1-byte elems
+Delta 0--1 expected 1 bytes, actual 1 bytes
+Delta 1--2 expected 1 bytes, actual 1 bytes
+Delta 0--2 expected 2 bytes, actual 2 bytes
+Delta 0--n expected 42 bytes, actual 42 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 13b (communication, mono-poly direct, by param T[]): bigun x[42], expecting 40-byte elems
+Delta 0--1 expected 40 bytes, actual 40 bytes
+Delta 1--2 expected 40 bytes, actual 40 bytes
+Delta 0--2 expected 80 bytes, actual 80 bytes
+Delta 0--n expected 1680 bytes, actual 1680 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 13a (communication, mono-poly direct, by param T(*)[*]): double x[42], expecting 8-byte elems
+Delta 0--1 expected 8 bytes, actual 8 bytes
+Delta 1--2 expected 8 bytes, actual 8 bytes
+Delta 0--2 expected 16 bytes, actual 16 bytes
+Delta 0--n expected 336 bytes, actual 336 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 15a (operators): T x[42], expecting T=char, got sizeof(T)=1, expecting 1-byte elems
+?[?] rev off by 0
+?+? off by 0
+?+? rev off by 0
+?+=? off by 0
+?-=? off by 0
+?-? +ve off by 0
+bookends were not set
+------- 15b (operators): T x[42], expecting T=bigun, got sizeof(T)=40, expecting 40-byte elems
+?[?] rev off by 0
+?+? off by 0
+?+? rev off by 0
+?+=? off by 0
+?-=? off by 0
+?-? +ve off by 0
+bookends were not set
Index: tests/array-collections/array-sbscr-types.cfa
===================================================================
--- tests/array-collections/array-sbscr-types.cfa	(revision 38e20a80ff2d2e0f1bf7fef08b0948ab64d1abf0)
+++ tests/array-collections/array-sbscr-types.cfa	(revision ce02877193b9b7fb9e2bdf7486d493bf45be2bec)
@@ -28,4 +28,5 @@
 // generally using ptrdiff_t-typed variables to convey numeric values.
 
+#include <assert.h>
 
 #define show( expr ) printf( "%.1f\n", expr )
Index: tests/array-collections/boxed.bookend.cfa
===================================================================
--- tests/array-collections/boxed.bookend.cfa	(revision ce02877193b9b7fb9e2bdf7486d493bf45be2bec)
+++ tests/array-collections/boxed.bookend.cfa	(revision ce02877193b9b7fb9e2bdf7486d493bf45be2bec)
@@ -0,0 +1,80 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2023 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// boxed.bookend.cfa -- stack address recording and acceptance for the "array boxed" test
+//
+// Author           : Mike Brooks
+// Created On       : Thu Jul 25 17:00:00 2024
+// Last Modified By :
+// Last Modified On :
+// Update Count     :
+//
+
+// See general test documentation in boxed.main.cfa.
+// See abbreviation definitions in boxed.cases.hfa.
+
+
+
+
+#include "boxed.hfa"
+
+char * ar_lo = (char *) -1;
+char * ar_hi = 0p;
+static char * bookend_lo = (char *) -1;
+static char * bookend_hi = 0p;
+
+void bookendInner( void ) {
+    char var = 'x';
+    (void) var;
+    bookend_lo = & var;
+}
+
+#define TC(...)
+#define TR( TRID, SZS, SZV, ETG, ACCS, SPS, OVLD ) \
+    F_SIG( bookendOuter, TRID, SZS, SZV, ACCS, SPS, OVLD ) {                                  \
+        char var = 'x';                                                              \
+        (void) var;                                                                  \
+        bookend_hi = & var;                                                          \
+        return CALL( allocAndAccess, TRID, SZS, n, expectedElmSz, tcid, vart );     \
+    }
+#include "boxed.cases.hfa"
+#undef TC
+#undef TR
+
+void resetBookends( void ) {
+    bookend_lo = (char *) -1;
+    bookend_hi = 0p;
+    ar_lo = (char *) -1;
+    ar_hi = 0p;
+}
+
+void reportBookends( void ) {
+    ptrdiff_t ar_lo_fwd_offs = ar_lo - bookend_lo;
+    ptrdiff_t ar_hi_rev_offs = bookend_hi - ar_hi;
+
+    VPRT( "Bookends are %p and %p\n", bookend_lo, bookend_hi );
+    VPRT( "Array ends are %p and %p\n", ar_lo, ar_hi );
+    VPRT( "Bookend lo fwd offset %zd\n", bookend_lo - bookend_lo );
+    VPRT( "Array lo fwd offset %zd\n", ar_lo_fwd_offs );
+    VPRT( "Array hi fwd offset %zd\n", ar_hi - bookend_lo );
+    VPRT( "Bookend hi fwd offset %zd\n", bookend_hi - bookend_lo );
+    VPRT( "Bookend lo rev offset %zd\n", bookend_hi - bookend_lo );
+    VPRT( "Array lo rev offset %zd\n", bookend_hi - ar_lo );
+    VPRT( "Array hi rev offset %zd\n", ar_hi_rev_offs );
+    VPRT( "Bookend hi rev offset %zd\n", bookend_hi - bookend_hi );
+
+    if (bookend_lo >= bookend_hi) {
+        printf("bookends were not set\n");
+        return;
+    }
+    if (ar_lo >= ar_hi) {
+        printf("array bounds were not set\n");
+        return;
+    }
+
+    printf("array starts after lo bookend: %s\n", ar_lo_fwd_offs > 0 ? "yes" : "no" );
+    printf("array ends before hi bookend: %s\n", ar_hi_rev_offs > 0 ? "yes" : "no" );
+}
Index: tests/array-collections/boxed.cases.hfa
===================================================================
--- tests/array-collections/boxed.cases.hfa	(revision ce02877193b9b7fb9e2bdf7486d493bf45be2bec)
+++ tests/array-collections/boxed.cases.hfa	(revision ce02877193b9b7fb9e2bdf7486d493bf45be2bec)
@@ -0,0 +1,116 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2023 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// boxed.cases.hfa -- tables of test cases for the "array boxed" test
+//
+// Author           : Mike Brooks
+// Created On       : Thu Jul 25 17:00:00 2024
+// Last Modified By :
+// Last Modified On :
+// Update Count     :
+//
+
+// See general test documentation in boxed.main.cfa.
+
+/*
+This pair of tables summarizes the handwritten functions of .main.cfa, for automatically wrapping and calling them.
+
+TR        test rig                    one handwritten function and its stack of generated wrappers
+- TRID    test rig identifier         (primary key)
+- SZS     sizing style                how the array's length (in number of elements) is given
+  - NSTAT static number of elements   generated code hardcodes the array length; the outputted C VLA accommodates only varying-sized T
+  - NDYN  dynamic number of elements  generated code uses a parameter for the length; represents a VLA apparent to the CFA programmer
+- SZV     sizing value                concrete size of the test case, except for (TR, NDYN), which has the parameter name
+- ETG     element type generator      how the array's element type relates to T
+  - ID    identity                    array is of T
+  - PAIR  pair                        array is of pair(T,T)
+- ACCS    access style                how the access-side code sees the elements
+  - BUF   buffer                      accessor is working directly with the declared array (buffer) variable
+  - RAII  RAII                        accessor is a constructor/destructor pair
+  - PFST  pointer to first element    accessor is in a support function, who receives the array as parameter T x[]
+  - PARR  pointer to array            accessor is in a support function, who receives the array as parameter T (*x)[length]
+- SPS     support polymorphism style  when passing the array to a support (helper) function, how the call uses type variables and assertions
+  - NA    not applicable              the rig does not use a support function
+  - PPD   poly-poly direct            polymorphic calls polymorphic, directly (C-style name lookup)
+  - PPA   poly-poly assertion         polymorphic calls polymorphic, via assertion
+  - PMA   poly-mono assertion         polymorphic calls monomorphic, via assertion
+  - MPD   mono-poly direct            monomorphic calls polymorphic, directly (C-style name lookup)
+- OVLD    overloading type            type of pointer returned from the function (wrapper), lets caller 
+TC        test case                   one call to (the stack of wrappers of) a handwritten function
+- TRID    test rig identifier         (primary key, pseudo foreign key)
+- TCID    test case identifier        (primary key)
+- SZS     sizing style                (duplicate, join result)
+- SZV     sizing value                (duplicate, join result), except for TC under TR NDYN, which has concrete size of the test case
+- ETG     element type generator      (duplicate, join result)
+- VART    varying type                type to use for T in this call
+*/
+
+// #define TR( TRID,       SZS,   SZV, ETG,   ACCS, SPS, OVLD              )
+// #define TC( TRID, TCID, SZS,   SZV, ETG,                      VART      )
+
+           TR( 1,          NSTAT, 1,   ID,    BUF,  NA,  T                 )
+           TC( 1,    a,    NSTAT, 1,   ID,                       short     )
+           TC( 1,    b,    NSTAT, 1,   ID,                       bigun     )
+
+           TR( 2,          NSTAT, 42,  ID,    BUF,  NA,  T                 )
+           TC( 2,    a,    NSTAT, 42,  ID,                       char      )
+           TC( 2,    b,    NSTAT, 42,  ID,                       float     )
+           TC( 2,    c,    NSTAT, 42,  ID,                       long long )
+           TC( 2,    d,    NSTAT, 42,  ID,                       bigun     )
+
+           TR( 3,          NDYN,  n,   ID,    BUF,  NA,  T                 )
+           TC( 3,    a,    NDYN,  1,   ID,                       int       )
+           TC( 3,    b,    NDYN,  42,  ID,                       int       )
+           TC( 3,    c,    NDYN,  1,   ID,                       bigun     )
+           TC( 3,    d,    NDYN,  42,  ID,                       bigun     )
+
+           TR( 4,          NSTAT, 42,  ID,    BUF,  NA,  T                 )
+           TC( 4,    a,    NSTAT, 42,  ID,                       char      )
+           TC( 4,    b,    NSTAT, 42,  ID,                       bigun     )
+
+           TR( 5,          NSTAT, 42,  PAIR,  BUF,  NA,  T                 )
+           TC( 5,    a,    NSTAT, 42,  PAIR,                     double    )
+           TC( 5,    b,    NSTAT, 42,  PAIR,                     bigun     )
+
+           TR( 6,          NSTAT, 42,  ID,    RAII, NA,  T                 )
+           TC( 6,    a,    NSTAT, 42,  ID,                       my_mgd_t  )
+
+           TR( 7,          NSTAT, 42,  ID,    PFST, PPD, T                 )
+           TC( 7,    a,    NSTAT, 42,  ID,                       char      )
+           TC( 7,    b,    NSTAT, 42,  ID,                       float     )
+           TC( 7,    c,    NSTAT, 42,  ID,                       long long )
+           TC( 7,    d,    NSTAT, 42,  ID,                       bigun     )
+
+           TR( 8,          NSTAT, 42,  ID,    PARR, PPD, T                 )
+           TC( 8,    a,    NSTAT, 42,  ID,                       double    )
+
+           TR( 9,          NSTAT, 42,  ID,    PFST, PPA, T                 )
+           TC( 9,    a,    NSTAT, 42,  ID,                       char      )
+           TC( 9,    b,    NSTAT, 42,  ID,                       float     )
+           TC( 9,    c,    NSTAT, 42,  ID,                       long long )
+           TC( 9,    d,    NSTAT, 42,  ID,                       bigun     )
+
+           TR( 10,         NSTAT, 42,  ID,    PARR, PPA, T                 )
+           TC( 10,   a,    NSTAT, 42,  ID,                       double    )
+
+           TR( 11,         NSTAT, 42,  ID,    PFST, PMA, T                 )
+           TC( 11,   a,    NSTAT, 42,  ID,                       char      )
+           TC( 11,   b,    NSTAT, 42,  ID,                       bigun     )
+
+           TR( 12,         NSTAT, 42,  ID,    PARR, PMA, T                 )
+           TC( 12,   a,    NSTAT, 42,  ID,                       double    )
+
+           TR( 13,         NSTAT, 42,  ID,    PFST, MPD, char              ) // overload 1
+           TR( 13,         NSTAT, 42,  ID,    PFST, MPD, bigun             ) // overload 2
+           TC( 13,   a,    NSTAT, 42,  ID,                       char      )
+           TC( 13,   b,    NSTAT, 42,  ID,                       bigun     )
+
+           TR( 14,         NSTAT, 42,  ID,    PARR, MPD, double            )
+           TC( 14,   a,    NSTAT, 42,  ID,                       double    )
+
+           TR( 15,         NSTAT, 42,  ID,    PFST, PPD, T                 )
+           TC( 15,   a,    NSTAT, 42,  ID,                       char      )
+           TC( 15,   b,    NSTAT, 42,  ID,                       bigun     )
Index: tests/array-collections/boxed.hfa
===================================================================
--- tests/array-collections/boxed.hfa	(revision ce02877193b9b7fb9e2bdf7486d493bf45be2bec)
+++ tests/array-collections/boxed.hfa	(revision ce02877193b9b7fb9e2bdf7486d493bf45be2bec)
@@ -0,0 +1,113 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2023 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// boxed.hfa -- inter-compile unit dependencies and common macros for the "array boxed" test
+//
+// Author           : Mike Brooks
+// Created On       : Thu Jul 25 17:00:00 2024
+// Last Modified By :
+// Last Modified On :
+// Update Count     :
+//
+
+// See general test documentation in boxed.main.cfa.
+// See abbreviation definitions in boxed.cases.hfa.
+
+
+#ifdef SUPPRESS_INIT
+#define DECTYVAR(T) T*
+#define INITARR @= {}
+#else
+#define DECTYVAR(T) T
+#define INITARR
+#endif
+
+// ETG definitions
+#define ID(TY) TY
+#define PAIR(TY) pair(TY, TY)
+
+#define DECL(            F_SLUG, TRID,   SZS, SZV, ACCS, SPS, OVLD ) F_SIG( F_SLUG, TRID, SZS, SZV, ACCS, SPS, OVLD );
+
+#define CALL(            F_SLUG, TRID, SZS, SZV, ... ) CALL__SZS_ ## SZS( F_SLUG, TRID, SZV, __VA_ARGS__ )
+#define CALL__SZS_NSTAT( F_SLUG, TRID,      SZV, ... ) F_NAME_NSTAT( F_SLUG, TRID ) ( __VA_ARGS__      )
+#define CALL__SZS_NDYN(  F_SLUG, TRID,      SZV, ... ) F_NAME_NDYN(  F_SLUG, TRID ) ( __VA_ARGS__, SZV )
+
+#define F_SIG(                      F_SLUG, TRID, SZS, SZV, ACCS, SPS, OVLD ) F_SIG__SPS_ ## SPS( F_SLUG, TRID, SZS, SZV, ACCS, SPS, OVLD )
+#define F_SIG__SPS_NA(              F_SLUG, TRID, SZS, SZV, ACCS, SPS, OVLD ) F_SIG__POLY_SMPL(   F_SLUG, TRID, SZS, SZV, ACCS, SPS, OVLD )
+#define F_SIG__SPS_PPD(             F_SLUG, TRID, SZS, SZV, ACCS, SPS, OVLD ) F_SIG__POLY_SMPL(   F_SLUG, TRID, SZS, SZV, ACCS, SPS, OVLD )
+#define F_SIG__SPS_PPA(             F_SLUG, TRID, SZS, SZV, ACCS, SPS, OVLD ) F_SIG__POLY_ASSN(   F_SLUG, TRID, SZS, SZV, ACCS, SPS, OVLD )
+#define F_SIG__SPS_PMA(             F_SLUG, TRID, SZS, SZV, ACCS, SPS, OVLD ) F_SIG__POLY_ASSN(   F_SLUG, TRID, SZS, SZV, ACCS, SPS, OVLD )
+#define F_SIG__SPS_MPD(             F_SLUG, TRID, SZS, SZV, ACCS, SPS, OVLD ) F_SIG__POLY_NONE(   F_SLUG, TRID, SZS, SZV, ACCS, SPS, OVLD )
+#define F_SIG__POLY_SMPL(           F_SLUG, TRID, SZS, SZV, ACCS, SPS, OVLD ) F_SIG__POLY_SMPL__SZS_ ## SZS( F_SLUG, TRID, SZV, ACCS, SPS, OVLD )
+#define F_SIG__POLY_SMPL__SZS_NSTAT(F_SLUG, TRID,      SZV, ACCS, SPS, OVLD ) forall( DECTYVAR(T) ) OVLD * F_NAME_NSTAT(F_SLUG, TRID ) ( size_t expectedElmSz, const char * tcid, const char * vart           )
+#define F_SIG__POLY_SMPL__SZS_NDYN( F_SLUG, TRID,      SZV, ACCS, SPS, OVLD ) forall( DECTYVAR(T) ) OVLD * F_NAME_NDYN( F_SLUG, TRID ) ( size_t expectedElmSz, const char * tcid, const char * vart, size_t n )
+#define F_SIG__POLY_ASSN(           F_SLUG, TRID, SZS, SZV, ACCS, SPS, OVLD ) F_SIG__POLY_ASSN__SZS_ ## SZS( F_SLUG, TRID, SZV, ACCS, SPS, OVLD )
+#define F_SIG__POLY_ASSN__SZS_NSTAT(F_SLUG, TRID,      SZV, ACCS, SPS, OVLD ) forall( DECTYVAR(T) | { DECL_ACCESS( F_SLUG, TRID, ACCS, SPS, SZV ) } ) OVLD * F_NAME_NSTAT(F_SLUG, TRID) ( size_t expectedElmSz, const char * tcid, const char * vart           )
+#define F_SIG__POLY_NONE(           F_SLUG, TRID, SZS, SZV, ACCS, SPS, OVLD ) F_SIG__POLY_NONE__SZS_ ## SZS( F_SLUG, TRID, SZV, ACCS, SPS, OVLD )
+#define F_SIG__POLY_NONE__SZS_NSTAT(F_SLUG, TRID,      SZV, ACCS, SPS, OVLD ) OVLD * F_NAME_NSTAT(F_SLUG, TRID) ( size_t expectedElmSz, const char * tcid, const char * vart )
+
+#define F_NAME_NSTAT( F_SLUG, TRID ) F_SLUG ## _ ## TRID
+#define F_NAME_NDYN(  F_SLUG, TRID ) F_SLUG ## _ ## TRID
+
+#define DECL_ACCESS( F_SLUG, TRID, ACCS, SPS, SZ ) void F_NAME_NSTAT( access, TRID ) ( size_t, ACCESS_PARM_TY(ACCS, SZ) );
+#define ACCESS_PARM_TY(ACCS, SZ) ACCESS_PARM_TY__ACCS_ ## ACCS( SZ )
+#define ACCESS_PARM_TY__ACCS_PFST(SZ) T *
+#define ACCESS_PARM_TY__ACCS_PARR(SZ) T (*)[SZ]
+
+// Used as the "polymorphic, but not T" element type.
+forall(U, V)
+struct pair {
+    U fst;
+    V snd;
+};
+
+// Used as the "larger than a pointer" element type.
+// Size chosen empirically to give buffers larger than the whole stack frame
+// for a pointer-sized element.
+struct bigun {
+    long long int a;
+    long long int b;
+    long long int c;
+    long long int d;
+    long long int e;
+};
+
+// Verbose output is unstable from one compiler-target-optimization to another.
+// So it can't run in the overnight test.  But it helps see what went wrong.
+#ifdef VERBOSE
+#define VPRT(...) printf(__VA_ARGS__)
+#else
+#define VPRT(...)
+#endif
+
+
+
+
+// defined in bookend.cfa
+
+void bookendInner( void );
+
+#define TC(...)
+#define TR( TRID, SZS, SZV, ETG, ACCS, SPS, OVLD ) DECL( bookendOuter, TRID, SZS, SZV, ACCS, SPS, OVLD )
+#include "boxed.cases.hfa"
+#undef TC
+#undef TR
+
+void resetBookends( void );
+void reportBookends( void );
+
+extern char * ar_hi;
+extern char * ar_lo;
+
+
+
+// defined in main.cfa
+
+#define TC(...)
+#define TR( TRID, SZS, SZV, ETG, ACCS, SPS, OVLD ) DECL( allocAndAccess, TRID, SZS, SZV, ACCS, SPS, OVLD )
+#include "boxed.cases.hfa"
+#undef TC
+#undef TR
Index: tests/array-collections/boxed.main.cfa
===================================================================
--- tests/array-collections/boxed.main.cfa	(revision ce02877193b9b7fb9e2bdf7486d493bf45be2bec)
+++ tests/array-collections/boxed.main.cfa	(revision ce02877193b9b7fb9e2bdf7486d493bf45be2bec)
@@ -0,0 +1,457 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2023 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// boxed.main.cfa -- core logic of the "array boxed" test
+//
+// Author           : Mike Brooks
+// Created On       : Thu Jul 25 17:00:00 2024
+// Last Modified By :
+// Last Modified On :
+// Update Count     :
+//
+
+// See abbreviation definitions in boxed.cases.hfa.
+
+/*
+The "array boxed" test deals with an array of T's, when T is dynamically sized.
+
+All cases generate a VLA, because even a sinlge (dynamically sized) T would be
+backed by a VLA.  All cases generate pointer arithmetic on, and casts from,
+void*, because (dynamically sized) T has no correspondig type in generated C.
+These facts are true about boxing in general.  The test ensures that the VLA
+is big enough and that accessed elements are spaced by the correct amounts,
+specifically for cases where the user declares an array of T's, i.e. demands
+several adjacent char-buffer-implemented T's.
+
+The core test logic occurs in the functions named allocAndAccess, below.  It
+allocates an array of T's, then accesses them.  In some cases, the access is
+within the allocAndAccess function, in others, it's within a called helper
+function.  The access logic prints information about the spacing of the
+elements (as it sees them) and it stores the array-edge addreses for
+subsequent validation.
+
+The access output uses n, rather than (n-1), as its "end" address, just to
+keep expectation arithmetic simple.  So the output does discuss addresses of
+elements that do not exist.
+
+The access output uses an expectedElemSz parameter, and calculations from it.
+Care is taken to ensure that we are not merely comparing two executions of the
+same, possibly flawed, math.  First, the value of expectedElemSz is always
+calculated using concrete types, e.g. sizeof(float), while the SUT-produced
+value is from (implied use of) literally sizeof(T), just in a case where we
+have T=float.  Second, the details within the calculation are not the main
+feature of interest, rather, it's _whether_ this calcuation is being applied
+in the cases where it should be, instead of, for example, seeming to assume
+sizeof(T)==1 or sizeof(T)==sizeof(size_t), both being bugs that actually
+occurred.
+
+An allocAndAccess function runs in an instrumentation context that observes
+the stack frame that allocAndAccess gets.  This instrumentation verifies that
+the recorded array-edge addresses are within the stack frame.  If the SUT has
+a bug due to a mistake in the box-pass's generated buffer declaration causes
+a function (like allocAndAccess) that declares an array of T's to get an
+incorrectly sized stack frame.  This test was created along with a fix of such
+a bug.
+
+Including the instrumentation context, the call graph is:
+    main
+        run_X
+            bookendOuter_X
+                allocAndAccess_X
+                    bookendInner
+            reportBookends
+The outer and inner "bookend" functions record the addresses of a local
+variable within their respective stack frames, thus giving a lenient
+approximation of the extent of the allocAndAccess stack frame, and
+thereby, of its VLA.  Requiring a sufficiently large VLA, and seeing the
+resulting access stay in bounds (with constant overhead shown under verbose
+output) gives confidence in the actual VLA being of the right size.
+
+For this instrumentation to work, separate compilation (optimization) units
+are required: outer and inner "bookend" functions in one, allocAndAccess in the
+other.  Otherwise, the optimizer sees the full call chain and compresses its
+use of frame pointers / VLA zones, into one ABI frame.  Then, the outer and
+inner reference local varaibles no longer span the VLA.  So, the "bookend"
+routines are in boxed.bookend.cfa, while everything else is here.
+
+These code elements are boilerplate, and are realized with macros driven by the
+tables in boxed.cases.hfa:
+    boxed.main.cfa      main calls run_X
+    boxed.main.cfa      declaration and definition of run_X, including
+                            calling bookendOuter_X
+                            calling reportBookends
+    boxed.hfa           declaration of bookendOuter_X
+    boxed.bookend.cfa   definition of bookendOuter_X, including
+                            calling allocAndAccess_X
+    boxed.hfa           declaration of allocAndAccess_X
+The definition of allocAndAcces_X is kept bespoke, to keep the actual test
+details readable.  As a result, the list of allocAndAccess_X definition in
+boxed.main.cfa must be kept aligned with the tables in boxed.cases.hfa.
+A common definition of bookendInner is used acress all test cases, so its
+declaration and definition are not table driven.
+
+*/
+
+#include "boxed.hfa"
+
+#define SHOW_ACCESS_1D( N_ELEMS )                                                               \
+    char * e0 = (char *) & x[0];                                                                \
+    char * e1 = (char *) & x[1];                                                                \
+    char * e2 = (char *) & x[2];                                                                \
+    char * en = (char *) & x[N_ELEMS];                                                          \
+                                                                                                \
+    ptrdiff_t d01 = e1 - e0;                                                                    \
+    ptrdiff_t d12 = e2 - e1;                                                                    \
+    ptrdiff_t d02 = e2 - e0;                                                                    \
+    ptrdiff_t d0n = en - e0;                                                                    \
+                                                                                                \
+    printf("Delta 0--1 expected %zd bytes, actual %zd bytes\n", 1 * expectedElmSz, d01);        \
+    printf("Delta 1--2 expected %zd bytes, actual %zd bytes\n", 1 * expectedElmSz, d12);        \
+    printf("Delta 0--2 expected %zd bytes, actual %zd bytes\n", 2 * expectedElmSz, d02);        \
+    printf("Delta 0--n expected %zd bytes, actual %zd bytes\n", N_ELEMS * expectedElmSz, d0n);  \
+                                                                                                \
+    VPRT( "Array start %p end %p\n", e0, en );                                                  \
+                                                                                                \
+    ar_lo = e0;                                                                                 \
+    ar_hi = en;
+
+
+#define SHOW_ACCESS_2D( N_ELEMS )                                                               \
+    char * e00 = (char *) & x[0][0];                                                                \
+    char * e01 = (char *) & x[0][1];                                                                \
+    char * e02 = (char *) & x[0][2];                                                                \
+    char * e0n = (char *) & x[0][N_ELEMS];                                                          \
+                                                                                                \
+    char * e10 = (char *) & x[1][0];                                                                \
+    char * e20 = (char *) & x[2][0];                                                                \
+    char * en0 = (char *) & x[N_ELEMS][0];                                                          \
+                                                                                                \
+    char * enn = (char *) & x[N_ELEMS][N_ELEMS];                                                          \
+                                                                                                \
+    ptrdiff_t d_00_01 = e01 - e00;                                                                    \
+    ptrdiff_t d_01_02 = e02 - e01;                                                                    \
+    ptrdiff_t d_00_02 = e02 - e00;                                                                    \
+    ptrdiff_t d_00_0n = e0n - e00;                                                                    \
+                                                                                                \
+    ptrdiff_t d_00_10 = e10 - e00;                                                                    \
+    ptrdiff_t d_10_20 = e20 - e10;                                                                    \
+    ptrdiff_t d_00_20 = e20 - e00;                                                                    \
+    ptrdiff_t d_00_n0 = en0 - e00;                                                                    \
+                                                                                                \
+    ptrdiff_t d_00_nn = enn - e00;                                                                    \
+                                                                                                \
+    printf("Delta 0,0--0,1 expected %zd bytes, actual %zd bytes\n", 1 * 1 * expectedElmSz, d_00_01);        \
+    printf("Delta 0,1--0,2 expected %zd bytes, actual %zd bytes\n", 1 * 1 * expectedElmSz, d_01_02);        \
+    printf("Delta 0,0--0,2 expected %zd bytes, actual %zd bytes\n", 1 * 2 * expectedElmSz, d_00_02);        \
+    printf("Delta 0,0--0,n expected %zd bytes, actual %zd bytes\n", 1 * N_ELEMS * expectedElmSz, d_00_0n);  \
+                                                                                                \
+    printf("Delta 0,0--1,0 expected %zd bytes, actual %zd bytes\n", N_ELEMS * 1 * expectedElmSz, d_00_10);        \
+    printf("Delta 1,0--2,0 expected %zd bytes, actual %zd bytes\n", N_ELEMS * 1 * expectedElmSz, d_10_20);        \
+    printf("Delta 0,0--2,0 expected %zd bytes, actual %zd bytes\n", N_ELEMS * 2 * expectedElmSz, d_00_20);        \
+    printf("Delta 0,0--n,0 expected %zd bytes, actual %zd bytes\n", N_ELEMS * N_ELEMS * expectedElmSz, d_00_n0);  \
+                                                                                                \
+    printf("Delta 0,0--n,n expected %zd bytes, actual %zd bytes\n", N_ELEMS * N_ELEMS * expectedElmSz + \
+                                                                    1       * N_ELEMS * expectedElmSz, d_00_nn);        \
+                                                                                                \
+    VPRT( "Array start %p end %p\n", e00, enn );                                                  \
+                                                                                                \
+    ar_lo = e00;                                                                                 \
+    ar_hi = en0; /* first byte past the end is not after the first row that does not exist */
+
+
+
+
+
+// ---------- 1, singleton
+
+forall( T ) T * allocAndAccess_1 ( size_t expectedElmSz, const char * tcid, const char * vart ) { 
+    printf("------- 1%s (singleton): T x[1], expecting T=%s, got sizeof(T)=%zd, expecting %zd-byte elems\n", tcid, vart, sizeof(T), expectedElmSz);
+    T x[ 1 ] INITARR;
+    bookendInner();
+    SHOW_ACCESS_1D( 1 )
+    return 0p;
+}
+
+// ---------- 2, general
+
+forall( T ) T * allocAndAccess_2 ( size_t expectedElmSz, const char * tcid, const char * vart ) { 
+    printf("------- 2%s (general): T x[42], expecting T=%s, got sizeof(T)=%zd, expecting %zd-byte elems\n", tcid, vart, sizeof(T), expectedElmSz);
+    T x[ 42 ] INITARR;
+    bookendInner();
+    SHOW_ACCESS_1D( 42 )
+    return 0p;
+}
+
+// ---------- 3, user VLA
+
+forall( T ) T * allocAndAccess_3 ( size_t expectedElmSz, const char * tcid, const char * vart, size_t n ) { 
+    printf("------- 3%s (user VLA): T x[n], got n=%zd, expecting T=%s, got sizeof(T)=%zd, expecting %zd-byte elems\n", tcid, n, vart, sizeof(T), expectedElmSz);
+    T x[ n ] INITARR;
+    bookendInner();
+    SHOW_ACCESS_1D( n )
+    return 0p;
+}
+
+// ---------- 4, 2-dimensional
+
+forall( T ) T * allocAndAccess_4 ( size_t expectedElmSz, const char * tcid, const char * vart ) { 
+    printf("------- 4%s (2-dimensional): T x[42][42], expecting T=%s, got sizeof(T)=%zd, expecting %zd-byte atoms\n", tcid, vart, sizeof(T), expectedElmSz);
+    T x[ 42 ][ 42 ] INITARR;
+    bookendInner();
+    SHOW_ACCESS_2D( 42 )
+    return 0p;
+}
+
+// ---------- 5, pair
+
+forall( T ) T * allocAndAccess_5 ( size_t expectedElmSz, const char * tcid, const char * vart ) { 
+    printf("------- 5%s (pair): pair(T,T) x[42], expecting T=%s, got sizeof(T)=%zd, expecting %zd-byte atoms\n", tcid, vart, sizeof(T), expectedElmSz);
+    pair(T,T) x[ 42 ] INITARR;
+    bookendInner();
+    SHOW_ACCESS_1D( 42 )
+    return 0p;
+}
+
+// ---------- 6, raii
+
+struct my_mgd_t {
+    float x;
+};
+
+// Auxiliary state used in the RAII rig only.  Only to format/excerpt output.  Reset per TC.
+static struct {
+    size_t total_elems;     // size of array being managed
+    size_t ctor_calls;      // number of ctor calls seen so far
+    size_t dtor_calls;      // ^dtor
+    char * ctor_first;      // argument of first ctor call
+    char * dtor_first;      // ^dtor
+    char * dtor_lo;         // lowest dtor argument seen yet
+    char * dtor_hi;         // ^highest
+} raii;
+
+void ?{}( my_mgd_t & this ) {
+    if (raii.ctor_first == 0p) raii.ctor_first = (char *) & this;
+    VPRT( "ctor call %zd targets %p\n", raii.ctor_calls, &this );
+    if (raii.ctor_calls < 2 || raii.total_elems - raii.ctor_calls <= 2)
+        printf( "ctor call %zd targets first + %zd bytes\n", raii.ctor_calls, ((char*)&this - raii.ctor_first) );
+    // ctor call locations fill the conformed ar_lo/hi
+    if ( (char *) & this < ar_lo ) ar_lo = (char *) & this;
+    if ( (char *) & this > ar_hi ) ar_hi = (char *) & this;
+    raii.ctor_calls += 1;
+}
+
+void ^?{}( my_mgd_t & this ) {
+    // dtor calls count backward
+    if (raii.dtor_first == 0p) raii.dtor_first = (char *) & this;
+    VPRT( "dtor call %zd targets %p\n", raii.dtor_calls, &this );
+    if (raii.dtor_calls < 2 || raii.total_elems - raii.dtor_calls <= 2)
+        printf( "dtor call %zd targets first - %zd bytes\n", raii.dtor_calls, (raii.dtor_first - (char*)&this) );
+    // dtor call locations fill auxiliary state; reconciled with the conformed ones on last call
+    if ( (char *) & this < raii.dtor_lo ) raii.dtor_lo = (char *) & this;
+    if ( (char *) & this > raii.dtor_hi ) raii.dtor_hi = (char *) & this;
+    raii.dtor_calls += 1;
+    if (raii.dtor_calls >= raii.total_elems)
+        printf( "dtor lo off by %zd bytes, hi off by %zd bytes\n", (ar_lo - raii.dtor_lo), (ar_hi - raii.dtor_hi) );
+}
+
+forall( T ) T * allocAndAccess_6 ( size_t expectedElmSz, const char * tcid, const char * vart ) {
+    raii.total_elems = 42;
+    raii.ctor_calls = 0;
+    raii.dtor_calls = 0;
+    raii.ctor_first = 0p;
+    raii.dtor_first = 0p;
+    raii.dtor_lo = (char*)-1;
+    raii.dtor_hi = 0p;
+    printf("------- 6%s (raii): T x[42], expecting T=%s, got sizeof(T)=%zd, expecting %zd-byte elems\n", tcid, vart, sizeof(T), expectedElmSz);
+    T x[ 42 ] INITARR;
+    bookendInner();
+    // no SHOW_ACCESS: it happens in the cdtors
+    return 0p;
+}
+
+// ---------- 7, comm, PPD, PFST
+
+forall( T* ) void access_7 ( size_t expectedElmSz, T x[] ) { 
+    SHOW_ACCESS_1D(42)
+}
+forall( T ) T * allocAndAccess_7 ( size_t expectedElmSz, const char * tcid, const char * vart ) { 
+    printf("------- 7%s (communication, poly-poly direct, by param T[]): T x[42], expecting T=%s, got sizeof(T)=%zd, expecting %zd-byte elems\n", tcid, vart, sizeof(T), expectedElmSz);
+    T x[ 42 ] INITARR;
+    bookendInner();
+    access_7( expectedElmSz, x );
+    return 0p;
+}
+
+// ---------- 8, comm, PPD, PARR
+
+forall( T* ) void access_8 ( size_t expectedElmSz, T (*temp)[42] ) { 
+    T * x = *temp;
+    SHOW_ACCESS_1D(42)
+}
+forall( T ) T * allocAndAccess_8 ( size_t expectedElmSz, const char * tcid, const char * vart ) { 
+    printf("------- 8%s (communication, poly-poly direct, by param T(*)[*]): T x[42], expecting T=%s, got sizeof(T)=%zd, expecting %zd-byte elems\n", tcid, vart, sizeof(T), expectedElmSz);
+    T x[ 42 ] INITARR;
+    bookendInner();
+    access_8( expectedElmSz, &x );
+    return 0p;
+}
+
+// ---------- 9, comm, PPA, PFST
+
+forall( T | { void access_9 ( size_t, T x[] ); } )
+T * allocAndAccess_9 ( size_t expectedElmSz, const char * tcid, const char * vart ) { 
+    printf("------- 9%s (communication, poly-poly assertion, by param T[]): T x[42], expecting T=%s, got sizeof(T)=%zd, expecting %zd-byte elems\n", tcid, vart, sizeof(T), expectedElmSz);
+    T x[ 42 ] INITARR;
+    bookendInner();
+    access_9( expectedElmSz, x );
+    return 0p;
+}
+forall( T* ) void access_9 ( size_t expectedElmSz, T x[] ) { 
+    SHOW_ACCESS_1D(42)
+}
+
+// ---------- 10, comm, PPA, PARR
+
+forall( T | { void access_10 ( size_t, T (*)[42] ); } )
+T * allocAndAccess_10( size_t expectedElmSz, const char * tcid, const char * vart ) { 
+    printf("------- 10%s (communication, poly-poly assertion, by param T(*)[*]): T x[42], expecting T=%s, got sizeof(T)=%zd, expecting %zd-byte elems\n", tcid, vart, sizeof(T), expectedElmSz);
+    T x[ 42 ] INITARR;
+    bookendInner();
+    access_10( expectedElmSz, &x );
+    return 0p;
+}
+forall( T* ) void access_10( size_t expectedElmSz, T (*temp)[42] ) {
+    T * x = *temp;
+    SHOW_ACCESS_1D(42)
+}
+
+// ---------- 11, comm, PMA, PFST_11
+
+forall( T | { void access_11( size_t, T * ); } )
+T * allocAndAccess_11 ( size_t expectedElmSz, const char * tcid, const char * vart ) { 
+    printf("------- 11%s (communication, poly-mono assertion, by param T[]): T x[42], expecting T=%s, got sizeof(T)=%zd, expecting %zd-byte elems\n", tcid, vart, sizeof(T), expectedElmSz);
+    T x[ 42 ] INITARR;
+    bookendInner();
+    access_11( expectedElmSz, x );
+    return 0p;
+}
+void access_11 ( size_t expectedElmSz, char x[] ) {
+    SHOW_ACCESS_1D(42)
+}
+void access_11 ( size_t expectedElmSz, bigun x[] ) { 
+    SHOW_ACCESS_1D(42)
+}
+
+// ---------- 12, comm, PMA, PARR
+
+forall( T | { void access_12 ( size_t, T (*)[42] ); } )
+T * allocAndAccess_12 ( size_t expectedElmSz, const char * tcid, const char * vart ) { 
+    printf("------- 12%s (communication, poly-mono assertion, by param T(*)[*]): T x[42], expecting T=%s, got sizeof(T)=%zd, expecting %zd-byte elems\n", tcid, vart, sizeof(T), expectedElmSz);
+    T x[ 42 ] INITARR;
+    bookendInner();
+    access_12( expectedElmSz, &x );
+    return 0p;
+}
+void access_12 ( size_t expectedElmSz, double (*temp)[42] ) {
+    double * x = *temp;
+    SHOW_ACCESS_1D(42)
+}
+
+// ---------- 13, comm, MPD, PFST
+
+forall( T* ) void access_13( size_t expectedElmSz, T x[] ) { 
+    SHOW_ACCESS_1D(42)
+}
+char * allocAndAccess_13 ( size_t expectedElmSz, const char * tcid, const char * vart ) { 
+    printf("------- 13%s (communication, mono-poly direct, by param T[]): char x[42], expecting %zd-byte elems\n", tcid, expectedElmSz);
+    char x[ 42 ] INITARR;
+    bookendInner();
+    access_13( expectedElmSz, x );
+    return 0p;
+}
+bigun * allocAndAccess_13( size_t expectedElmSz, const char * tcid, const char * vart ) { 
+    printf("------- 13%s (communication, mono-poly direct, by param T[]): bigun x[42], expecting %zd-byte elems\n", tcid, expectedElmSz);
+    bigun x[ 42 ] INITARR;
+    bookendInner();
+    access_13( expectedElmSz, x );
+    return 0p;
+}
+
+// ---------- 14, comm, MPD, PARR
+
+forall( T* ) void access_14 ( size_t expectedElmSz, T (*temp)[42] ) { 
+    T * x = *temp;
+    SHOW_ACCESS_1D(42)
+}
+double * allocAndAccess_14 ( size_t expectedElmSz, const char * tcid, const char * vart ) { 
+    printf("------- 13%s (communication, mono-poly direct, by param T(*)[*]): double x[42], expecting %zd-byte elems\n", tcid, expectedElmSz);
+    double x[ 42 ] INITARR;
+    bookendInner();
+    access_14( expectedElmSz, &x );
+    return 0p;
+}
+
+// ---------- 15, operators
+
+forall( T* ) void access_15 ( size_t expectedElmSz, T x[] ) {
+    // correctness of x and ?[?] established by earlier tests
+    T * x5 = & x[5];
+
+    #define SHOW( OP, ACT, EXP ) printf( #OP " off by %zd\n", ((size_t)(EXP)) - ((size_t)(ACT)) )
+    { T * xx = & 5[x];            SHOW( ?[?] rev,  xx, x5 ); }
+    { T * xx = x + 5;             SHOW( ?+?,       xx, x5 ); }
+    { T * xx = 5 + x;             SHOW( ?+? rev,   xx, x5 ); }
+    { T * xx = x;   xx += 5;      SHOW( ?+=?,      xx, x5 ); }
+//  { T * xx = x;   for(5) xx++;  SHOW( ?++,       xx, x5 ); }
+//  { T * xx = x;   for(5) ++xx;  SHOW( ++?,       xx, x5 ); }
+    { T * xx = x5;  xx -= 5;      SHOW( ?-=?,      xx, x  ); }
+//  { T * xx = x5;  for(5) xx--;  SHOW( ?--,       xx, x  ); }
+//  { T * xx = x5;  for(5) --xx;  SHOW( --?,       xx, x  ); }
+    #undef SHOW
+
+    ptrdiff_t expPos5 = x5 - x;
+    ptrdiff_t expNeg5 = x - x5;
+
+    printf( "?-? +ve off by %zd\n", ((ptrdiff_t) 5) - expPos5 );
+//  printf( "?-? -ve off by %zd\n", ((ptrdiff_t)-5) - expNeg5 );
+}
+
+forall( T ) T * allocAndAccess_15 ( size_t expectedElmSz, const char * tcid, const char * vart ) { 
+    printf("------- 15%s (operators): T x[42], expecting T=%s, got sizeof(T)=%zd, expecting %zd-byte elems\n", tcid, vart, sizeof(T), expectedElmSz);
+    T x[ 42 ] INITARR;
+    // bookends unused
+    access_15( expectedElmSz, x );
+    return 0p;
+}
+
+
+
+
+
+#define TC(...)
+#define TR( TRID,       SZS,   SZV, ETG,   ACCS, SPS, OVLD              ) \
+    F_SIG( run, TRID, SZS, SZV, ACCS, SPS, OVLD ) {                                              \
+        resetBookends();                                                                \
+        OVLD * retval = CALL( bookendOuter, TRID, SZS, SZV, expectedElmSz, tcid, vart ); \
+        reportBookends();                                                               \
+        return retval;                                                                  \
+    }
+#include "boxed.cases.hfa"
+#undef TC
+#undef TR
+
+
+#define Q_(x) #x
+#define Q(x) Q_(x)
+
+int main() {
+    #define TR(...)
+    #define TC( TRID, TCID, SZS, SZV, ETG, VART ) \
+        { VART * ignore = CALL( run, TRID, SZS, SZV, sizeof(ETG(VART)), Q(TCID), Q(VART) ); (void) ignore; }
+    #include "boxed.cases.hfa"
+    #undef TR
+    #undef TC
+}