Index: src/GenPoly/Box.cpp
===================================================================
--- src/GenPoly/Box.cpp	(revision 3cb693c3b7d8140becb455be4fa6fbed8e3ed220)
+++ src/GenPoly/Box.cpp	(revision fd4df379a19cd8ddb365ec1ad548df3a82d4efc1)
@@ -673,9 +673,16 @@
 		TypeVarMap const & typeVars,
 		ast::TypeSubstitution const * typeSubs ) {
-	if ( expr->result && isPolyType( expr->result, typeVars, typeSubs ) ) {
-		if ( auto name = expr->func.as<ast::NameExpr>() ) {
-			if ( "*?" == name->name ) {
-				return true;
-			}
+	if ( auto name = expr->func.as<ast::NameExpr>() ) {
+		if ( "*?" == name->name ) {
+			// It's a deref.
+			// Must look under the * (and strip its ptr-ty) because expr's
+			// result could be ar/ptr-decayed.  If expr.inner:T(*)[n], then
+			// expr is a poly deref, even though expr:T*, which is not poly.
+			auto ptrExpr = expr->args.front();
+			auto ptrTy = ptrExpr->result.as<ast::PointerType>();
+			assert(ptrTy); // thing being deref'd must be pointer
+			auto referentTy = ptrTy->base;
+			assert(referentTy);
+			return isPolyType( referentTy, typeVars, typeSubs );
 		}
 	}
@@ -1192,14 +1199,17 @@
 		assert( 2 == expr->args.size() );
 
-		ast::Type const * baseType1 =
-			isPolyPtr( expr->args.front()->result, scopeTypeVars, typeSubs );
-		ast::Type const * baseType2 =
-			isPolyPtr( expr->args.back()->result, scopeTypeVars, typeSubs );
+		ast::Type const * arg1Ty = expr->args.front()->result;
+		ast::Type const * arg2Ty = expr->args.back()->result;
+
+		// two cases: a[i] with first arg poly ptr, i[a] with second arg poly ptr
+		bool isPoly1 = isPolyPtr( arg1Ty, scopeTypeVars, typeSubs ) != nullptr;
+		bool isPoly2 = isPolyPtr( arg2Ty, scopeTypeVars, typeSubs ) != nullptr;
+
 		// If neither argument is a polymorphic pointer, do nothing.
-		if ( !baseType1 && !baseType2 ) {
+		if ( !isPoly1 && !isPoly2 ) {
 			return expr;
 		}
 		// The arguments cannot both be polymorphic pointers.
-		assert( !baseType1 || !baseType2 );
+		assert( !isPoly1 || !isPoly2 );
 		// (So exactly one of the arguments is a polymorphic pointer.)
 
@@ -1210,16 +1220,22 @@
 		ast::UntypedExpr * ret = new ast::UntypedExpr( location,
 				new ast::NameExpr( location, "?+?" ) );
-		if ( baseType1 ) {
+		if ( isPoly1 ) {
+			assert( arg1Ty );
+			auto arg1TyPtr = dynamic_cast<ast::PointerType const * >( arg1Ty );
+			assert( arg1TyPtr );
 			auto multiply = ast::UntypedExpr::createCall( location2, "?*?", {
 				expr->args.back(),
-				new ast::SizeofExpr( location1, deepCopy( baseType1 ) ),
+				new ast::SizeofExpr( location1, deepCopy( arg1TyPtr->base ) ),
 			} );
 			ret->args.push_back( expr->args.front() );
 			ret->args.push_back( multiply );
 		} else {
-			assert( baseType2 );
+			assert( isPoly2 );
+			assert( arg2Ty );
+			auto arg2TyPtr = dynamic_cast<ast::PointerType const * >( arg2Ty );
+			assert( arg2TyPtr );
 			auto multiply = ast::UntypedExpr::createCall( location1, "?*?", {
 				expr->args.front(),
-				new ast::SizeofExpr( location2, deepCopy( baseType2 ) ),
+				new ast::SizeofExpr( location2, deepCopy( arg2TyPtr->base ) ),
 			} );
 			ret->args.push_back( multiply );
@@ -1234,6 +1250,12 @@
 		assert( 1 == expr->args.size() );
 
+		auto ptrExpr = expr->args.front();
+		auto ptrTy = ptrExpr->result.as<ast::PointerType>();
+		assert(ptrTy); // thing being deref'd must be pointer
+		auto referentTy = ptrTy->base;
+		assert(referentTy);
+
 		// If this isn't for a poly type, then do nothing.
-		if ( !isPolyType( expr->result, scopeTypeVars, typeSubs ) ) {
+		if ( !isPolyType( referentTy, scopeTypeVars, typeSubs ) ) {
 			return expr;
 		}
@@ -1243,4 +1265,8 @@
 		// Fix expression type to remove pointer.
 		ret->result = expr->result;
+		// apply pointer decay
+		if (auto retArTy = ret->result.as<ast::ArrayType>()) {
+			ret->result = new ast::PointerType( retArTy->base );
+		}
 		ret->env = expr->env ? expr->env : ret->env;
 		return ret;
@@ -1291,38 +1317,49 @@
 		return makeIncrDecrExpr(
 			expr->location, expr, baseType, "++?" == varName );
-	// Addition and Subtration Intrinsics:
+	// Addition and Subtraction Intrinsics:
 	} else if ( "?+?" == varName || "?-?" == varName ) {
 		assert( expr->result );
 		assert( 2 == expr->args.size() );
 
-		auto baseType1 =
-			isPolyPtr( expr->args.front()->result, scopeTypeVars, typeSubs );
-		auto baseType2 =
-			isPolyPtr( expr->args.back()->result, scopeTypeVars, typeSubs );
+		ast::Type const * arg1Ty = expr->args.front()->result;
+		ast::Type const * arg2Ty = expr->args.back()->result;
+
+		bool isPoly1 = isPolyPtr( arg1Ty, scopeTypeVars, typeSubs ) != nullptr;
+		bool isPoly2 = isPolyPtr( arg2Ty, scopeTypeVars, typeSubs ) != nullptr;
 
 		CodeLocation const & location = expr->location;
 		CodeLocation const & location1 = expr->args.front()->location;
 		CodeLocation const & location2 = expr->args.back()->location;
-		// LHS op RHS -> (LHS op RHS) / sizeof(LHS)
-		if ( baseType1 && baseType2 ) {
+		// LHS minus RHS -> (LHS minus RHS) / sizeof(LHS)
+		if ( isPoly1 && isPoly2 ) {
+			assert( "?-?" == varName );
+			assert( arg1Ty );
+			auto arg1TyPtr = dynamic_cast<ast::PointerType const * >( arg1Ty );
+			assert( arg1TyPtr );
 			auto divide = ast::UntypedExpr::createCall( location, "?/?", {
 				expr,
-				new ast::SizeofExpr( location, deepCopy( baseType1 ) ),
+				new ast::SizeofExpr( location, deepCopy( arg1TyPtr->base ) ),
 			} );
 			if ( expr->env ) divide->env = expr->env;
 			return divide;
 		// LHS op RHS -> LHS op (RHS * sizeof(LHS))
-		} else if ( baseType1 ) {
+		} else if ( isPoly1 ) {
+			assert( arg1Ty );
+			auto arg1TyPtr = dynamic_cast<ast::PointerType const * >( arg1Ty );
+			assert( arg1TyPtr );
 			auto multiply = ast::UntypedExpr::createCall( location2, "?*?", {
 				expr->args.back(),
-				new ast::SizeofExpr( location1, deepCopy( baseType1 ) ),
+				new ast::SizeofExpr( location1, deepCopy( arg1TyPtr->base ) ),
 			} );
 			return ast::mutate_field_index(
 				expr, &ast::ApplicationExpr::args, 1, multiply );
 		// LHS op RHS -> (LHS * sizeof(RHS)) op RHS
-		} else if ( baseType2 ) {
+		} else if ( isPoly2 ) {
+			assert( arg2Ty );
+			auto arg2TyPtr = dynamic_cast<ast::PointerType const * >( arg2Ty );
+			assert( arg2TyPtr );
 			auto multiply = ast::UntypedExpr::createCall( location1, "?*?", {
 				expr->args.front(),
-				new ast::SizeofExpr( location2, deepCopy( baseType2 ) ),
+				new ast::SizeofExpr( location2, deepCopy( arg2TyPtr->base ) ),
 			} );
 			return ast::mutate_field_index(
@@ -1588,7 +1625,8 @@
 	/// Change the type of generic aggregate members to char[].
 	void mutateMembers( ast::AggregateDecl * aggr );
-	/// Returns the calculated sizeof expression for type, or nullptr for use
-	/// C sizeof().
+	/// Returns the calculated sizeof/alignof expressions for type, or
+	/// nullptr for use C size/alignof().
 	ast::Expr const * genSizeof( CodeLocation const &, ast::Type const * );
+	ast::Expr const * genAlignof( CodeLocation const &, ast::Type const * );
 	/// Enters a new scope for type-variables,
 	/// adding the type variables from the provided type.
@@ -1613,15 +1651,30 @@
 {}
 
+static ast::Type * polyToMonoTypeRec( CodeLocation const & loc,
+		ast::Type const * ty ) {
+	ast::Type * ret;
+	if ( auto aTy = dynamic_cast<ast::ArrayType const *>( ty ) ) {
+		// recursive case
+		auto monoBase = polyToMonoTypeRec( loc, aTy->base );
+		ret = new ast::ArrayType( monoBase, aTy->dimension,
+			aTy->isVarLen, aTy->isStatic, aTy->qualifiers );
+	} else {
+		// base case
+		auto charType = new ast::BasicType( ast::BasicKind::Char );
+		auto size = new ast::NameExpr( loc,
+			sizeofName( Mangle::mangleType( ty ) ) );
+		ret = new ast::ArrayType( charType, size,
+			ast::VariableLen, ast::DynamicDim, ast::CV::Qualifiers() );
+	}
+	return ret;
+}
+
 /// Converts polymorphic type into a suitable monomorphic representation.
-/// Currently: __attribute__(( aligned(8) )) char[size_T];
-ast::Type * polyToMonoType( CodeLocation const & location,
-		ast::Type const * declType ) {
-	auto charType = new ast::BasicType( ast::BasicKind::Char );
-	auto size = new ast::NameExpr( location,
-		sizeofName( Mangle::mangleType( declType ) ) );
-	auto ret = new ast::ArrayType( charType, size,
-		ast::VariableLen, ast::DynamicDim, ast::CV::Qualifiers() );
+/// Simple cases: T -> __attribute__(( aligned(8) )) char[sizeof_T];
+/// Array cases: T[eOut][eIn] ->  __attribute__(( aligned(8) )) char[eOut][eIn][sizeof_T];
+ast::Type * polyToMonoType( CodeLocation const & loc, ast::Type const * ty ) {
+	auto ret = polyToMonoTypeRec( loc, ty );
 	ret->attributes.emplace_back( new ast::Attribute( "aligned",
-		{ ast::ConstantExpr::from_int( location, 8 ) } ) );
+		{ ast::ConstantExpr::from_int( loc, 8 ) } ) );
 	return ret;
 }
@@ -1716,4 +1769,25 @@
 	// Forally, side effects are not safe in this function. But it works.
 	erase_if( mutDecl->attributes, matchAndMove );
+
+	// Change the decl's type.
+	// Upon finishing the box pass, it shall be void*.
+	// At this middle-of-box-pass point, that type is T.
+
+	// example 1
+	// before box:                                  T     t ;
+	// before here:  char _bufxx    [_sizeof_Y1T];  T     t = _bufxx;
+	// after here:   char _bufxx    [_sizeof_Y1T];  T     t = _bufxx;  (no change here - non array case)
+	// after box:    char _bufxx    [_sizeof_Y1T];  void *t = _bufxx;
+
+	// example 2
+	// before box:                                  T     t[42] ;
+	// before here:  char _bufxx[42][_sizeof_Y1T];  T     t[42] = _bufxx;
+	// after here:   char _bufxx[42][_sizeof_Y1T];  T     t     = _bufxx;
+	// after box:    char _bufxx[42][_sizeof_Y1T];  void *t     = _bufxx;
+
+	// Strip all "array of" wrappers
+	while ( auto arrayType = dynamic_cast<ast::ArrayType const *>( mutDecl->type.get() ) ) {
+		mutDecl->type = arrayType->base;
+	}
 
 	mutDecl->init = new ast::SingleInit( decl->location,
@@ -1869,10 +1943,6 @@
 		ast::AlignofExpr const * expr ) {
 	ast::Type const * type = expr->type ? expr->type : expr->expr->result;
-	if ( findGeneric( expr->location, type ) ) {
-		return new ast::NameExpr( expr->location,
-			alignofName( Mangle::mangleType( type ) ) );
-	} else {
-		return expr;
-	}
+	ast::Expr const * gen = genAlignof( expr->location, type );
+	return ( gen ) ? gen : expr;
 }
 
@@ -2095,4 +2165,7 @@
 
 		return true;
+
+	} else if ( auto inst = dynamic_cast<ast::ArrayType const *>( type ) ) {
+		return findGeneric( location, inst->base );
 	}
 	return false;
@@ -2155,6 +2228,20 @@
 		return makeOp( location, "?*?", sizeofBase, dim );
 	} else if ( findGeneric( location, type ) ) {
-		// Generate calculated size for generic type.
+		// Generate reference to _sizeof parameter
 		return new ast::NameExpr( location, sizeofName(
+				Mangle::mangleType( type ) ) );
+	} else {
+		return nullptr;
+	}
+}
+
+ast::Expr const * PolyGenericCalculator::genAlignof(
+		CodeLocation const & location, ast::Type const * type ) {
+	if ( auto * array = dynamic_cast<ast::ArrayType const *>( type ) ) {
+		// alignof array is alignof element
+		return genAlignof( location, array->base );
+	} else if ( findGeneric( location, type ) ) {
+		// Generate reference to _alignof parameter
+		return new ast::NameExpr( location, alignofName(
 				Mangle::mangleType( type ) ) );
 	} else {
Index: tests/Makefile.am
===================================================================
--- tests/Makefile.am	(revision 3cb693c3b7d8140becb455be4fa6fbed8e3ed220)
+++ tests/Makefile.am	(revision fd4df379a19cd8ddb365ec1ad548df3a82d4efc1)
@@ -69,5 +69,5 @@
 .PHONY : concurrency list .validate .test_makeflags
 .INTERMEDIATE : .validate .validate.cfa .test_makeflags
-EXTRA_PROGRAMS = avl_test linkonce linking/mangling/anon .dummy_hack # build but do not install
+EXTRA_PROGRAMS = array-collections/boxed avl_test linkonce linking/mangling/anon .dummy_hack # build but do not install
 EXTRA_DIST = test.py \
 	pybin/__init__.py \
@@ -77,4 +77,6 @@
 	pybin/tools.py \
 	long_tests.hfa \
+	array-collections/boxed.hfa \
+	array-collections/boxed.cases.hfa \
 	avltree/avl-private.h \
 	avltree/avl.h \
@@ -104,4 +106,5 @@
 	done
 
+array_collections_boxed_SOURCES = array-collections/boxed.main.cfa array-collections/boxed.bookend.cfa
 avl_test_SOURCES = avltree/avl_test.cfa avltree/avl0.cfa avltree/avl1.cfa avltree/avl2.cfa avltree/avl3.cfa avltree/avl4.cfa avltree/avl-private.cfa
 linkonce_SOURCES = link-once/main.cfa link-once/partner.cfa
Index: tests/array-collections/.expect/boxed.txt
===================================================================
--- tests/array-collections/.expect/boxed.txt	(revision fd4df379a19cd8ddb365ec1ad548df3a82d4efc1)
+++ tests/array-collections/.expect/boxed.txt	(revision fd4df379a19cd8ddb365ec1ad548df3a82d4efc1)
@@ -0,0 +1,248 @@
+------- 1a (singleton): T x[1], expecting T=short, got sizeof(T)=2, expecting 2-byte elems
+Delta 0--1 expected 2 bytes, actual 2 bytes
+Delta 1--2 expected 2 bytes, actual 2 bytes
+Delta 0--2 expected 4 bytes, actual 4 bytes
+Delta 0--n expected 2 bytes, actual 2 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 1b (singleton): T x[1], expecting T=bigun, got sizeof(T)=40, expecting 40-byte elems
+Delta 0--1 expected 40 bytes, actual 40 bytes
+Delta 1--2 expected 40 bytes, actual 40 bytes
+Delta 0--2 expected 80 bytes, actual 80 bytes
+Delta 0--n expected 40 bytes, actual 40 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 2a (general): T x[42], expecting T=char, got sizeof(T)=1, expecting 1-byte elems
+Delta 0--1 expected 1 bytes, actual 1 bytes
+Delta 1--2 expected 1 bytes, actual 1 bytes
+Delta 0--2 expected 2 bytes, actual 2 bytes
+Delta 0--n expected 42 bytes, actual 42 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 2b (general): T x[42], expecting T=float, got sizeof(T)=4, expecting 4-byte elems
+Delta 0--1 expected 4 bytes, actual 4 bytes
+Delta 1--2 expected 4 bytes, actual 4 bytes
+Delta 0--2 expected 8 bytes, actual 8 bytes
+Delta 0--n expected 168 bytes, actual 168 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 2c (general): T x[42], expecting T=long long, got sizeof(T)=8, expecting 8-byte elems
+Delta 0--1 expected 8 bytes, actual 8 bytes
+Delta 1--2 expected 8 bytes, actual 8 bytes
+Delta 0--2 expected 16 bytes, actual 16 bytes
+Delta 0--n expected 336 bytes, actual 336 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 2d (general): T x[42], expecting T=bigun, got sizeof(T)=40, expecting 40-byte elems
+Delta 0--1 expected 40 bytes, actual 40 bytes
+Delta 1--2 expected 40 bytes, actual 40 bytes
+Delta 0--2 expected 80 bytes, actual 80 bytes
+Delta 0--n expected 1680 bytes, actual 1680 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 3a (user VLA): T x[n], got n=1, expecting T=int, got sizeof(T)=4, expecting 4-byte elems
+Delta 0--1 expected 4 bytes, actual 4 bytes
+Delta 1--2 expected 4 bytes, actual 4 bytes
+Delta 0--2 expected 8 bytes, actual 8 bytes
+Delta 0--n expected 4 bytes, actual 4 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 3b (user VLA): T x[n], got n=42, expecting T=int, got sizeof(T)=4, expecting 4-byte elems
+Delta 0--1 expected 4 bytes, actual 4 bytes
+Delta 1--2 expected 4 bytes, actual 4 bytes
+Delta 0--2 expected 8 bytes, actual 8 bytes
+Delta 0--n expected 168 bytes, actual 168 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 3c (user VLA): T x[n], got n=1, expecting T=bigun, got sizeof(T)=40, expecting 40-byte elems
+Delta 0--1 expected 40 bytes, actual 40 bytes
+Delta 1--2 expected 40 bytes, actual 40 bytes
+Delta 0--2 expected 80 bytes, actual 80 bytes
+Delta 0--n expected 40 bytes, actual 40 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 3d (user VLA): T x[n], got n=42, expecting T=bigun, got sizeof(T)=40, expecting 40-byte elems
+Delta 0--1 expected 40 bytes, actual 40 bytes
+Delta 1--2 expected 40 bytes, actual 40 bytes
+Delta 0--2 expected 80 bytes, actual 80 bytes
+Delta 0--n expected 1680 bytes, actual 1680 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 4a (2-dimensional): T x[42][42], expecting T=char, got sizeof(T)=1, expecting 1-byte atoms
+Delta 0,0--0,1 expected 1 bytes, actual 1 bytes
+Delta 0,1--0,2 expected 1 bytes, actual 1 bytes
+Delta 0,0--0,2 expected 2 bytes, actual 2 bytes
+Delta 0,0--0,n expected 42 bytes, actual 42 bytes
+Delta 0,0--1,0 expected 42 bytes, actual 42 bytes
+Delta 1,0--2,0 expected 42 bytes, actual 42 bytes
+Delta 0,0--2,0 expected 84 bytes, actual 84 bytes
+Delta 0,0--n,0 expected 1764 bytes, actual 1764 bytes
+Delta 0,0--n,n expected 1806 bytes, actual 1806 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 4b (2-dimensional): T x[42][42], expecting T=bigun, got sizeof(T)=40, expecting 40-byte atoms
+Delta 0,0--0,1 expected 40 bytes, actual 40 bytes
+Delta 0,1--0,2 expected 40 bytes, actual 40 bytes
+Delta 0,0--0,2 expected 80 bytes, actual 80 bytes
+Delta 0,0--0,n expected 1680 bytes, actual 1680 bytes
+Delta 0,0--1,0 expected 1680 bytes, actual 1680 bytes
+Delta 1,0--2,0 expected 1680 bytes, actual 1680 bytes
+Delta 0,0--2,0 expected 3360 bytes, actual 3360 bytes
+Delta 0,0--n,0 expected 70560 bytes, actual 70560 bytes
+Delta 0,0--n,n expected 72240 bytes, actual 72240 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 5a (pair): pair(T,T) x[42], expecting T=double, got sizeof(T)=8, expecting 16-byte atoms
+Delta 0--1 expected 16 bytes, actual 16 bytes
+Delta 1--2 expected 16 bytes, actual 16 bytes
+Delta 0--2 expected 32 bytes, actual 32 bytes
+Delta 0--n expected 672 bytes, actual 672 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 5b (pair): pair(T,T) x[42], expecting T=bigun, got sizeof(T)=40, expecting 80-byte atoms
+Delta 0--1 expected 80 bytes, actual 80 bytes
+Delta 1--2 expected 80 bytes, actual 80 bytes
+Delta 0--2 expected 160 bytes, actual 160 bytes
+Delta 0--n expected 3360 bytes, actual 3360 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 6a (raii): T x[42], expecting T=my_mgd_t, got sizeof(T)=4, expecting 4-byte elems
+ctor call 0 targets first + 0 bytes
+ctor call 1 targets first + 4 bytes
+ctor call 40 targets first + 160 bytes
+ctor call 41 targets first + 164 bytes
+dtor call 0 targets first - 0 bytes
+dtor call 1 targets first - 4 bytes
+dtor call 40 targets first - 160 bytes
+dtor call 41 targets first - 164 bytes
+dtor lo off by 0 bytes, hi off by 0 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 7a (communication, poly-poly direct, by param T[]): T x[42], expecting T=char, got sizeof(T)=1, expecting 1-byte elems
+Delta 0--1 expected 1 bytes, actual 1 bytes
+Delta 1--2 expected 1 bytes, actual 1 bytes
+Delta 0--2 expected 2 bytes, actual 2 bytes
+Delta 0--n expected 42 bytes, actual 42 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 7b (communication, poly-poly direct, by param T[]): T x[42], expecting T=float, got sizeof(T)=4, expecting 4-byte elems
+Delta 0--1 expected 4 bytes, actual 4 bytes
+Delta 1--2 expected 4 bytes, actual 4 bytes
+Delta 0--2 expected 8 bytes, actual 8 bytes
+Delta 0--n expected 168 bytes, actual 168 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 7c (communication, poly-poly direct, by param T[]): T x[42], expecting T=long long, got sizeof(T)=8, expecting 8-byte elems
+Delta 0--1 expected 8 bytes, actual 8 bytes
+Delta 1--2 expected 8 bytes, actual 8 bytes
+Delta 0--2 expected 16 bytes, actual 16 bytes
+Delta 0--n expected 336 bytes, actual 336 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 7d (communication, poly-poly direct, by param T[]): T x[42], expecting T=bigun, got sizeof(T)=40, expecting 40-byte elems
+Delta 0--1 expected 40 bytes, actual 40 bytes
+Delta 1--2 expected 40 bytes, actual 40 bytes
+Delta 0--2 expected 80 bytes, actual 80 bytes
+Delta 0--n expected 1680 bytes, actual 1680 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 8a (communication, poly-poly direct, by param T(*)[*]): T x[42], expecting T=double, got sizeof(T)=8, expecting 8-byte elems
+Delta 0--1 expected 8 bytes, actual 8 bytes
+Delta 1--2 expected 8 bytes, actual 8 bytes
+Delta 0--2 expected 16 bytes, actual 16 bytes
+Delta 0--n expected 336 bytes, actual 336 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 9a (communication, poly-poly assertion, by param T[]): T x[42], expecting T=char, got sizeof(T)=1, expecting 1-byte elems
+Delta 0--1 expected 1 bytes, actual 1 bytes
+Delta 1--2 expected 1 bytes, actual 1 bytes
+Delta 0--2 expected 2 bytes, actual 2 bytes
+Delta 0--n expected 42 bytes, actual 42 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 9b (communication, poly-poly assertion, by param T[]): T x[42], expecting T=float, got sizeof(T)=4, expecting 4-byte elems
+Delta 0--1 expected 4 bytes, actual 4 bytes
+Delta 1--2 expected 4 bytes, actual 4 bytes
+Delta 0--2 expected 8 bytes, actual 8 bytes
+Delta 0--n expected 168 bytes, actual 168 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 9c (communication, poly-poly assertion, by param T[]): T x[42], expecting T=long long, got sizeof(T)=8, expecting 8-byte elems
+Delta 0--1 expected 8 bytes, actual 8 bytes
+Delta 1--2 expected 8 bytes, actual 8 bytes
+Delta 0--2 expected 16 bytes, actual 16 bytes
+Delta 0--n expected 336 bytes, actual 336 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 9d (communication, poly-poly assertion, by param T[]): T x[42], expecting T=bigun, got sizeof(T)=40, expecting 40-byte elems
+Delta 0--1 expected 40 bytes, actual 40 bytes
+Delta 1--2 expected 40 bytes, actual 40 bytes
+Delta 0--2 expected 80 bytes, actual 80 bytes
+Delta 0--n expected 1680 bytes, actual 1680 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 10a (communication, poly-poly assertion, by param T(*)[*]): T x[42], expecting T=double, got sizeof(T)=8, expecting 8-byte elems
+Delta 0--1 expected 8 bytes, actual 8 bytes
+Delta 1--2 expected 8 bytes, actual 8 bytes
+Delta 0--2 expected 16 bytes, actual 16 bytes
+Delta 0--n expected 336 bytes, actual 336 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 11a (communication, poly-mono assertion, by param T[]): T x[42], expecting T=char, got sizeof(T)=1, expecting 1-byte elems
+Delta 0--1 expected 1 bytes, actual 1 bytes
+Delta 1--2 expected 1 bytes, actual 1 bytes
+Delta 0--2 expected 2 bytes, actual 2 bytes
+Delta 0--n expected 42 bytes, actual 42 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 11b (communication, poly-mono assertion, by param T[]): T x[42], expecting T=bigun, got sizeof(T)=40, expecting 40-byte elems
+Delta 0--1 expected 40 bytes, actual 40 bytes
+Delta 1--2 expected 40 bytes, actual 40 bytes
+Delta 0--2 expected 80 bytes, actual 80 bytes
+Delta 0--n expected 1680 bytes, actual 1680 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 12a (communication, poly-mono assertion, by param T(*)[*]): T x[42], expecting T=double, got sizeof(T)=8, expecting 8-byte elems
+Delta 0--1 expected 8 bytes, actual 8 bytes
+Delta 1--2 expected 8 bytes, actual 8 bytes
+Delta 0--2 expected 16 bytes, actual 16 bytes
+Delta 0--n expected 336 bytes, actual 336 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 13a (communication, mono-poly direct, by param T[]): char x[42], expecting 1-byte elems
+Delta 0--1 expected 1 bytes, actual 1 bytes
+Delta 1--2 expected 1 bytes, actual 1 bytes
+Delta 0--2 expected 2 bytes, actual 2 bytes
+Delta 0--n expected 42 bytes, actual 42 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 13b (communication, mono-poly direct, by param T[]): bigun x[42], expecting 40-byte elems
+Delta 0--1 expected 40 bytes, actual 40 bytes
+Delta 1--2 expected 40 bytes, actual 40 bytes
+Delta 0--2 expected 80 bytes, actual 80 bytes
+Delta 0--n expected 1680 bytes, actual 1680 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 13a (communication, mono-poly direct, by param T(*)[*]): double x[42], expecting 8-byte elems
+Delta 0--1 expected 8 bytes, actual 8 bytes
+Delta 1--2 expected 8 bytes, actual 8 bytes
+Delta 0--2 expected 16 bytes, actual 16 bytes
+Delta 0--n expected 336 bytes, actual 336 bytes
+array starts after lo bookend: yes
+array ends before hi bookend: yes
+------- 15a (operators): T x[42], expecting T=char, got sizeof(T)=1, expecting 1-byte elems
+?[?] rev off by 0
+?+? off by 0
+?+? rev off by 0
+?+=? off by 0
+?-=? off by 0
+?-? +ve off by 0
+bookends were not set
+------- 15b (operators): T x[42], expecting T=bigun, got sizeof(T)=40, expecting 40-byte elems
+?[?] rev off by 0
+?+? off by 0
+?+? rev off by 0
+?+=? off by 0
+?-=? off by 0
+?-? +ve off by 0
+bookends were not set
Index: tests/array-collections/boxed.bookend.cfa
===================================================================
--- tests/array-collections/boxed.bookend.cfa	(revision fd4df379a19cd8ddb365ec1ad548df3a82d4efc1)
+++ tests/array-collections/boxed.bookend.cfa	(revision fd4df379a19cd8ddb365ec1ad548df3a82d4efc1)
@@ -0,0 +1,80 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2023 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// boxed.bookend.cfa -- stack address recording and acceptance for the "array boxed" test
+//
+// Author           : Mike Brooks
+// Created On       : Thu Jul 25 17:00:00 2024
+// Last Modified By :
+// Last Modified On :
+// Update Count     :
+//
+
+// See general test documentation in boxed.main.cfa.
+// See abbreviation definitions in boxed.cases.hfa.
+
+
+
+
+#include "boxed.hfa"
+
+char * ar_lo = (char *) -1;
+char * ar_hi = 0p;
+static char * bookend_lo = (char *) -1;
+static char * bookend_hi = 0p;
+
+void bookendInner( void ) {
+    char var = 'x';
+    (void) var;
+    bookend_lo = & var;
+}
+
+#define TC(...)
+#define TR( TRID, SZS, SZV, ETG, ACCS, SPS, OVLD ) \
+    F_SIG( bookendOuter, TRID, SZS, SZV, ACCS, SPS, OVLD ) {                                  \
+        char var = 'x';                                                              \
+        (void) var;                                                                  \
+        bookend_hi = & var;                                                          \
+        return CALL( allocAndAccess, TRID, SZS, n, expectedElmSz, tcid, vart );     \
+    }
+#include "boxed.cases.hfa"
+#undef TC
+#undef TR
+
+void resetBookends( void ) {
+    bookend_lo = (char *) -1;
+    bookend_hi = 0p;
+    ar_lo = (char *) -1;
+    ar_hi = 0p;
+}
+
+void reportBookends( void ) {
+    ptrdiff_t ar_lo_fwd_offs = ar_lo - bookend_lo;
+    ptrdiff_t ar_hi_rev_offs = bookend_hi - ar_hi;
+
+    VPRT( "Bookends are %p and %p\n", bookend_lo, bookend_hi );
+    VPRT( "Array ends are %p and %p\n", ar_lo, ar_hi );
+    VPRT( "Bookend lo fwd offset %zd\n", bookend_lo - bookend_lo );
+    VPRT( "Array lo fwd offset %zd\n", ar_lo_fwd_offs );
+    VPRT( "Array hi fwd offset %zd\n", ar_hi - bookend_lo );
+    VPRT( "Bookend hi fwd offset %zd\n", bookend_hi - bookend_lo );
+    VPRT( "Bookend lo rev offset %zd\n", bookend_hi - bookend_lo );
+    VPRT( "Array lo rev offset %zd\n", bookend_hi - ar_lo );
+    VPRT( "Array hi rev offset %zd\n", ar_hi_rev_offs );
+    VPRT( "Bookend hi rev offset %zd\n", bookend_hi - bookend_hi );
+
+    if (bookend_lo >= bookend_hi) {
+        printf("bookends were not set\n");
+        return;
+    }
+    if (ar_lo >= ar_hi) {
+        printf("array bounds were not set\n");
+        return;
+    }
+
+    printf("array starts after lo bookend: %s\n", ar_lo_fwd_offs > 0 ? "yes" : "no" );
+    printf("array ends before hi bookend: %s\n", ar_hi_rev_offs > 0 ? "yes" : "no" );
+}
Index: tests/array-collections/boxed.cases.hfa
===================================================================
--- tests/array-collections/boxed.cases.hfa	(revision fd4df379a19cd8ddb365ec1ad548df3a82d4efc1)
+++ tests/array-collections/boxed.cases.hfa	(revision fd4df379a19cd8ddb365ec1ad548df3a82d4efc1)
@@ -0,0 +1,116 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2023 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// boxed.cases.hfa -- tables of test cases for the "array boxed" test
+//
+// Author           : Mike Brooks
+// Created On       : Thu Jul 25 17:00:00 2024
+// Last Modified By :
+// Last Modified On :
+// Update Count     :
+//
+
+// See general test documentation in boxed.main.cfa.
+
+/*
+This pair of tables summarizes the handwritten functions of .main.cfa, for automatically wrapping and calling them.
+
+TR        test rig                    one handwritten function and its stack of generated wrappers
+- TRID    test rig identifier         (primary key)
+- SZS     sizing style                how the array's length (in number of elements) is given
+  - NSTAT static number of elements   generated code hardcodes the array length; the outputted C VLA accommodates only varying-sized T
+  - NDYN  dynamic number of elements  generated code uses a parameter for the length; represents a VLA apparent to the CFA programmer
+- SZV     sizing value                concrete size of the test case, except for (TR, NDYN), which has the parameter name
+- ETG     element type generator      how the array's element type relates to T
+  - ID    identity                    array is of T
+  - PAIR  pair                        array is of pair(T,T)
+- ACCS    access style                how the access-side code sees the elements
+  - BUF   buffer                      accessor is working directly with the declared array (buffer) variable
+  - RAII  RAII                        accessor is a constructor/destructor pair
+  - PFST  pointer to first element    accessor is in a support function, who receives the array as parameter T x[]
+  - PARR  pointer to array            accessor is in a support function, who receives the array as parameter T (*x)[length]
+- SPS     support polymorphism style  when passing the array to a support (helper) function, how the call uses type variables and assertions
+  - NA    not applicable              the rig does not use a support function
+  - PPD   poly-poly direct            polymorphic calls polymorphic, directly (C-style name lookup)
+  - PPA   poly-poly assertion         polymorphic calls polymorphic, via assertion
+  - PMA   poly-mono assertion         polymorphic calls monomorphic, via assertion
+  - MPD   mono-poly direct            monomorphic calls polymorphic, directly (C-style name lookup)
+- OVLD    overloading type            type of pointer returned from the function (wrapper), lets caller 
+TC        test case                   one call to (the stack of wrappers of) a handwritten function
+- TRID    test rig identifier         (primary key, pseudo foreign key)
+- TCID    test case identifier        (primary key)
+- SZS     sizing style                (duplicate, join result)
+- SZV     sizing value                (duplicate, join result), except for TC under TR NDYN, which has concrete size of the test case
+- ETG     element type generator      (duplicate, join result)
+- VART    varying type                type to use for T in this call
+*/
+
+// #define TR( TRID,       SZS,   SZV, ETG,   ACCS, SPS, OVLD              )
+// #define TC( TRID, TCID, SZS,   SZV, ETG,                      VART      )
+
+           TR( 1,          NSTAT, 1,   ID,    BUF,  NA,  T                 )
+           TC( 1,    a,    NSTAT, 1,   ID,                       short     )
+           TC( 1,    b,    NSTAT, 1,   ID,                       bigun     )
+
+           TR( 2,          NSTAT, 42,  ID,    BUF,  NA,  T                 )
+           TC( 2,    a,    NSTAT, 42,  ID,                       char      )
+           TC( 2,    b,    NSTAT, 42,  ID,                       float     )
+           TC( 2,    c,    NSTAT, 42,  ID,                       long long )
+           TC( 2,    d,    NSTAT, 42,  ID,                       bigun     )
+
+           TR( 3,          NDYN,  n,   ID,    BUF,  NA,  T                 )
+           TC( 3,    a,    NDYN,  1,   ID,                       int       )
+           TC( 3,    b,    NDYN,  42,  ID,                       int       )
+           TC( 3,    c,    NDYN,  1,   ID,                       bigun     )
+           TC( 3,    d,    NDYN,  42,  ID,                       bigun     )
+
+           TR( 4,          NSTAT, 42,  ID,    BUF,  NA,  T                 )
+           TC( 4,    a,    NSTAT, 42,  ID,                       char      )
+           TC( 4,    b,    NSTAT, 42,  ID,                       bigun     )
+
+           TR( 5,          NSTAT, 42,  PAIR,  BUF,  NA,  T                 )
+           TC( 5,    a,    NSTAT, 42,  PAIR,                     double    )
+           TC( 5,    b,    NSTAT, 42,  PAIR,                     bigun     )
+
+           TR( 6,          NSTAT, 42,  ID,    RAII, NA,  T                 )
+           TC( 6,    a,    NSTAT, 42,  ID,                       my_mgd_t  )
+
+           TR( 7,          NSTAT, 42,  ID,    PFST, PPD, T                 )
+           TC( 7,    a,    NSTAT, 42,  ID,                       char      )
+           TC( 7,    b,    NSTAT, 42,  ID,                       float     )
+           TC( 7,    c,    NSTAT, 42,  ID,                       long long )
+           TC( 7,    d,    NSTAT, 42,  ID,                       bigun     )
+
+           TR( 8,          NSTAT, 42,  ID,    PARR, PPD, T                 )
+           TC( 8,    a,    NSTAT, 42,  ID,                       double    )
+
+           TR( 9,          NSTAT, 42,  ID,    PFST, PPA, T                 )
+           TC( 9,    a,    NSTAT, 42,  ID,                       char      )
+           TC( 9,    b,    NSTAT, 42,  ID,                       float     )
+           TC( 9,    c,    NSTAT, 42,  ID,                       long long )
+           TC( 9,    d,    NSTAT, 42,  ID,                       bigun     )
+
+           TR( 10,         NSTAT, 42,  ID,    PARR, PPA, T                 )
+           TC( 10,   a,    NSTAT, 42,  ID,                       double    )
+
+           TR( 11,         NSTAT, 42,  ID,    PFST, PMA, T                 )
+           TC( 11,   a,    NSTAT, 42,  ID,                       char      )
+           TC( 11,   b,    NSTAT, 42,  ID,                       bigun     )
+
+           TR( 12,         NSTAT, 42,  ID,    PARR, PMA, T                 )
+           TC( 12,   a,    NSTAT, 42,  ID,                       double    )
+
+           TR( 13,         NSTAT, 42,  ID,    PFST, MPD, char              ) // overload 1
+           TR( 13,         NSTAT, 42,  ID,    PFST, MPD, bigun             ) // overload 2
+           TC( 13,   a,    NSTAT, 42,  ID,                       char      )
+           TC( 13,   b,    NSTAT, 42,  ID,                       bigun     )
+
+           TR( 14,         NSTAT, 42,  ID,    PARR, MPD, double            )
+           TC( 14,   a,    NSTAT, 42,  ID,                       double    )
+
+           TR( 15,         NSTAT, 42,  ID,    PFST, PPD, T                 )
+           TC( 15,   a,    NSTAT, 42,  ID,                       char      )
+           TC( 15,   b,    NSTAT, 42,  ID,                       bigun     )
Index: tests/array-collections/boxed.hfa
===================================================================
--- tests/array-collections/boxed.hfa	(revision fd4df379a19cd8ddb365ec1ad548df3a82d4efc1)
+++ tests/array-collections/boxed.hfa	(revision fd4df379a19cd8ddb365ec1ad548df3a82d4efc1)
@@ -0,0 +1,113 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2023 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// boxed.hfa -- inter-compile unit dependencies and common macros for the "array boxed" test
+//
+// Author           : Mike Brooks
+// Created On       : Thu Jul 25 17:00:00 2024
+// Last Modified By :
+// Last Modified On :
+// Update Count     :
+//
+
+// See general test documentation in boxed.main.cfa.
+// See abbreviation definitions in boxed.cases.hfa.
+
+
+#ifdef SUPPRESS_INIT
+#define DECTYVAR(T) T*
+#define INITARR @= {}
+#else
+#define DECTYVAR(T) T
+#define INITARR
+#endif
+
+// ETG definitions
+#define ID(TY) TY
+#define PAIR(TY) pair(TY, TY)
+
+#define DECL(            F_SLUG, TRID,   SZS, SZV, ACCS, SPS, OVLD ) F_SIG( F_SLUG, TRID, SZS, SZV, ACCS, SPS, OVLD );
+
+#define CALL(            F_SLUG, TRID, SZS, SZV, ... ) CALL__SZS_ ## SZS( F_SLUG, TRID, SZV, __VA_ARGS__ )
+#define CALL__SZS_NSTAT( F_SLUG, TRID,      SZV, ... ) F_NAME_NSTAT( F_SLUG, TRID ) ( __VA_ARGS__      )
+#define CALL__SZS_NDYN(  F_SLUG, TRID,      SZV, ... ) F_NAME_NDYN(  F_SLUG, TRID ) ( __VA_ARGS__, SZV )
+
+#define F_SIG(                      F_SLUG, TRID, SZS, SZV, ACCS, SPS, OVLD ) F_SIG__SPS_ ## SPS( F_SLUG, TRID, SZS, SZV, ACCS, SPS, OVLD )
+#define F_SIG__SPS_NA(              F_SLUG, TRID, SZS, SZV, ACCS, SPS, OVLD ) F_SIG__POLY_SMPL(   F_SLUG, TRID, SZS, SZV, ACCS, SPS, OVLD )
+#define F_SIG__SPS_PPD(             F_SLUG, TRID, SZS, SZV, ACCS, SPS, OVLD ) F_SIG__POLY_SMPL(   F_SLUG, TRID, SZS, SZV, ACCS, SPS, OVLD )
+#define F_SIG__SPS_PPA(             F_SLUG, TRID, SZS, SZV, ACCS, SPS, OVLD ) F_SIG__POLY_ASSN(   F_SLUG, TRID, SZS, SZV, ACCS, SPS, OVLD )
+#define F_SIG__SPS_PMA(             F_SLUG, TRID, SZS, SZV, ACCS, SPS, OVLD ) F_SIG__POLY_ASSN(   F_SLUG, TRID, SZS, SZV, ACCS, SPS, OVLD )
+#define F_SIG__SPS_MPD(             F_SLUG, TRID, SZS, SZV, ACCS, SPS, OVLD ) F_SIG__POLY_NONE(   F_SLUG, TRID, SZS, SZV, ACCS, SPS, OVLD )
+#define F_SIG__POLY_SMPL(           F_SLUG, TRID, SZS, SZV, ACCS, SPS, OVLD ) F_SIG__POLY_SMPL__SZS_ ## SZS( F_SLUG, TRID, SZV, ACCS, SPS, OVLD )
+#define F_SIG__POLY_SMPL__SZS_NSTAT(F_SLUG, TRID,      SZV, ACCS, SPS, OVLD ) forall( DECTYVAR(T) ) OVLD * F_NAME_NSTAT(F_SLUG, TRID ) ( size_t expectedElmSz, const char * tcid, const char * vart           )
+#define F_SIG__POLY_SMPL__SZS_NDYN( F_SLUG, TRID,      SZV, ACCS, SPS, OVLD ) forall( DECTYVAR(T) ) OVLD * F_NAME_NDYN( F_SLUG, TRID ) ( size_t expectedElmSz, const char * tcid, const char * vart, size_t n )
+#define F_SIG__POLY_ASSN(           F_SLUG, TRID, SZS, SZV, ACCS, SPS, OVLD ) F_SIG__POLY_ASSN__SZS_ ## SZS( F_SLUG, TRID, SZV, ACCS, SPS, OVLD )
+#define F_SIG__POLY_ASSN__SZS_NSTAT(F_SLUG, TRID,      SZV, ACCS, SPS, OVLD ) forall( DECTYVAR(T) | { DECL_ACCESS( F_SLUG, TRID, ACCS, SPS, SZV ) } ) OVLD * F_NAME_NSTAT(F_SLUG, TRID) ( size_t expectedElmSz, const char * tcid, const char * vart           )
+#define F_SIG__POLY_NONE(           F_SLUG, TRID, SZS, SZV, ACCS, SPS, OVLD ) F_SIG__POLY_NONE__SZS_ ## SZS( F_SLUG, TRID, SZV, ACCS, SPS, OVLD )
+#define F_SIG__POLY_NONE__SZS_NSTAT(F_SLUG, TRID,      SZV, ACCS, SPS, OVLD ) OVLD * F_NAME_NSTAT(F_SLUG, TRID) ( size_t expectedElmSz, const char * tcid, const char * vart )
+
+#define F_NAME_NSTAT( F_SLUG, TRID ) F_SLUG ## _ ## TRID
+#define F_NAME_NDYN(  F_SLUG, TRID ) F_SLUG ## _ ## TRID
+
+#define DECL_ACCESS( F_SLUG, TRID, ACCS, SPS, SZ ) void F_NAME_NSTAT( access, TRID ) ( size_t, ACCESS_PARM_TY(ACCS, SZ) );
+#define ACCESS_PARM_TY(ACCS, SZ) ACCESS_PARM_TY__ACCS_ ## ACCS( SZ )
+#define ACCESS_PARM_TY__ACCS_PFST(SZ) T *
+#define ACCESS_PARM_TY__ACCS_PARR(SZ) T (*)[SZ]
+
+// Used as the "polymorphic, but not T" element type.
+forall(U, V)
+struct pair {
+    U fst;
+    V snd;
+};
+
+// Used as the "larger than a pointer" element type.
+// Size chosen empirically to give buffers larger than the whole stack frame
+// for a pointer-sized element.
+struct bigun {
+    long long int a;
+    long long int b;
+    long long int c;
+    long long int d;
+    long long int e;
+};
+
+// Verbose output is unstable from one compiler-target-optimization to another.
+// So it can't run in the overnight test.  But it helps see what went wrong.
+#ifdef VERBOSE
+#define VPRT(...) printf(__VA_ARGS__)
+#else
+#define VPRT(...)
+#endif
+
+
+
+
+// defined in bookend.cfa
+
+void bookendInner( void );
+
+#define TC(...)
+#define TR( TRID, SZS, SZV, ETG, ACCS, SPS, OVLD ) DECL( bookendOuter, TRID, SZS, SZV, ACCS, SPS, OVLD )
+#include "boxed.cases.hfa"
+#undef TC
+#undef TR
+
+void resetBookends( void );
+void reportBookends( void );
+
+extern char * ar_hi;
+extern char * ar_lo;
+
+
+
+// defined in main.cfa
+
+#define TC(...)
+#define TR( TRID, SZS, SZV, ETG, ACCS, SPS, OVLD ) DECL( allocAndAccess, TRID, SZS, SZV, ACCS, SPS, OVLD )
+#include "boxed.cases.hfa"
+#undef TC
+#undef TR
Index: tests/array-collections/boxed.main.cfa
===================================================================
--- tests/array-collections/boxed.main.cfa	(revision fd4df379a19cd8ddb365ec1ad548df3a82d4efc1)
+++ tests/array-collections/boxed.main.cfa	(revision fd4df379a19cd8ddb365ec1ad548df3a82d4efc1)
@@ -0,0 +1,457 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2023 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// boxed.main.cfa -- core logic of the "array boxed" test
+//
+// Author           : Mike Brooks
+// Created On       : Thu Jul 25 17:00:00 2024
+// Last Modified By :
+// Last Modified On :
+// Update Count     :
+//
+
+// See abbreviation definitions in boxed.cases.hfa.
+
+/*
+The "array boxed" test deals with an array of T's, when T is dynamically sized.
+
+All cases generate a VLA, because even a sinlge (dynamically sized) T would be
+backed by a VLA.  All cases generate pointer arithmetic on, and casts from,
+void*, because (dynamically sized) T has no correspondig type in generated C.
+These facts are true about boxing in general.  The test ensures that the VLA
+is big enough and that accessed elements are spaced by the correct amounts,
+specifically for cases where the user declares an array of T's, i.e. demands
+several adjacent char-buffer-implemented T's.
+
+The core test logic occurs in the functions named allocAndAccess, below.  It
+allocates an array of T's, then accesses them.  In some cases, the access is
+within the allocAndAccess function, in others, it's within a called helper
+function.  The access logic prints information about the spacing of the
+elements (as it sees them) and it stores the array-edge addreses for
+subsequent validation.
+
+The access output uses n, rather than (n-1), as its "end" address, just to
+keep expectation arithmetic simple.  So the output does discuss addresses of
+elements that do not exist.
+
+The access output uses an expectedElemSz parameter, and calculations from it.
+Care is taken to ensure that we are not merely comparing two executions of the
+same, possibly flawed, math.  First, the value of expectedElemSz is always
+calculated using concrete types, e.g. sizeof(float), while the SUT-produced
+value is from (implied use of) literally sizeof(T), just in a case where we
+have T=float.  Second, the details within the calculation are not the main
+feature of interest, rather, it's _whether_ this calcuation is being applied
+in the cases where it should be, instead of, for example, seeming to assume
+sizeof(T)==1 or sizeof(T)==sizeof(size_t), both being bugs that actually
+occurred.
+
+An allocAndAccess function runs in an instrumentation context that observes
+the stack frame that allocAndAccess gets.  This instrumentation verifies that
+the recorded array-edge addresses are within the stack frame.  If the SUT has
+a bug due to a mistake in the box-pass's generated buffer declaration causes
+a function (like allocAndAccess) that declares an array of T's to get an
+incorrectly sized stack frame.  This test was created along with a fix of such
+a bug.
+
+Including the instrumentation context, the call graph is:
+    main
+        run_X
+            bookendOuter_X
+                allocAndAccess_X
+                    bookendInner
+            reportBookends
+The outer and inner "bookend" functions record the addresses of a local
+variable within their respective stack frames, thus giving a lenient
+approximation of the extent of the allocAndAccess stack frame, and
+thereby, of its VLA.  Requiring a sufficiently large VLA, and seeing the
+resulting access stay in bounds (with constant overhead shown under verbose
+output) gives confidence in the actual VLA being of the right size.
+
+For this instrumentation to work, separate compilation (optimization) units
+are required: outer and inner "bookend" functions in one, allocAndAccess in the
+other.  Otherwise, the optimizer sees the full call chain and compresses its
+use of frame pointers / VLA zones, into one ABI frame.  Then, the outer and
+inner reference local varaibles no longer span the VLA.  So, the "bookend"
+routines are in boxed.bookend.cfa, while everything else is here.
+
+These code elements are boilerplate, and are realized with macros driven by the
+tables in boxed.cases.hfa:
+    boxed.main.cfa      main calls run_X
+    boxed.main.cfa      declaration and definition of run_X, including
+                            calling bookendOuter_X
+                            calling reportBookends
+    boxed.hfa           declaration of bookendOuter_X
+    boxed.bookend.cfa   definition of bookendOuter_X, including
+                            calling allocAndAccess_X
+    boxed.hfa           declaration of allocAndAccess_X
+The definition of allocAndAcces_X is kept bespoke, to keep the actual test
+details readable.  As a result, the list of allocAndAccess_X definition in
+boxed.main.cfa must be kept aligned with the tables in boxed.cases.hfa.
+A common definition of bookendInner is used acress all test cases, so its
+declaration and definition are not table driven.
+
+*/
+
+#include "boxed.hfa"
+
+#define SHOW_ACCESS_1D( N_ELEMS )                                                               \
+    char * e0 = (char *) & x[0];                                                                \
+    char * e1 = (char *) & x[1];                                                                \
+    char * e2 = (char *) & x[2];                                                                \
+    char * en = (char *) & x[N_ELEMS];                                                          \
+                                                                                                \
+    ptrdiff_t d01 = e1 - e0;                                                                    \
+    ptrdiff_t d12 = e2 - e1;                                                                    \
+    ptrdiff_t d02 = e2 - e0;                                                                    \
+    ptrdiff_t d0n = en - e0;                                                                    \
+                                                                                                \
+    printf("Delta 0--1 expected %zd bytes, actual %zd bytes\n", 1 * expectedElmSz, d01);        \
+    printf("Delta 1--2 expected %zd bytes, actual %zd bytes\n", 1 * expectedElmSz, d12);        \
+    printf("Delta 0--2 expected %zd bytes, actual %zd bytes\n", 2 * expectedElmSz, d02);        \
+    printf("Delta 0--n expected %zd bytes, actual %zd bytes\n", N_ELEMS * expectedElmSz, d0n);  \
+                                                                                                \
+    VPRT( "Array start %p end %p\n", e0, en );                                                  \
+                                                                                                \
+    ar_lo = e0;                                                                                 \
+    ar_hi = en;
+
+
+#define SHOW_ACCESS_2D( N_ELEMS )                                                               \
+    char * e00 = (char *) & x[0][0];                                                                \
+    char * e01 = (char *) & x[0][1];                                                                \
+    char * e02 = (char *) & x[0][2];                                                                \
+    char * e0n = (char *) & x[0][N_ELEMS];                                                          \
+                                                                                                \
+    char * e10 = (char *) & x[1][0];                                                                \
+    char * e20 = (char *) & x[2][0];                                                                \
+    char * en0 = (char *) & x[N_ELEMS][0];                                                          \
+                                                                                                \
+    char * enn = (char *) & x[N_ELEMS][N_ELEMS];                                                          \
+                                                                                                \
+    ptrdiff_t d_00_01 = e01 - e00;                                                                    \
+    ptrdiff_t d_01_02 = e02 - e01;                                                                    \
+    ptrdiff_t d_00_02 = e02 - e00;                                                                    \
+    ptrdiff_t d_00_0n = e0n - e00;                                                                    \
+                                                                                                \
+    ptrdiff_t d_00_10 = e10 - e00;                                                                    \
+    ptrdiff_t d_10_20 = e20 - e10;                                                                    \
+    ptrdiff_t d_00_20 = e20 - e00;                                                                    \
+    ptrdiff_t d_00_n0 = en0 - e00;                                                                    \
+                                                                                                \
+    ptrdiff_t d_00_nn = enn - e00;                                                                    \
+                                                                                                \
+    printf("Delta 0,0--0,1 expected %zd bytes, actual %zd bytes\n", 1 * 1 * expectedElmSz, d_00_01);        \
+    printf("Delta 0,1--0,2 expected %zd bytes, actual %zd bytes\n", 1 * 1 * expectedElmSz, d_01_02);        \
+    printf("Delta 0,0--0,2 expected %zd bytes, actual %zd bytes\n", 1 * 2 * expectedElmSz, d_00_02);        \
+    printf("Delta 0,0--0,n expected %zd bytes, actual %zd bytes\n", 1 * N_ELEMS * expectedElmSz, d_00_0n);  \
+                                                                                                \
+    printf("Delta 0,0--1,0 expected %zd bytes, actual %zd bytes\n", N_ELEMS * 1 * expectedElmSz, d_00_10);        \
+    printf("Delta 1,0--2,0 expected %zd bytes, actual %zd bytes\n", N_ELEMS * 1 * expectedElmSz, d_10_20);        \
+    printf("Delta 0,0--2,0 expected %zd bytes, actual %zd bytes\n", N_ELEMS * 2 * expectedElmSz, d_00_20);        \
+    printf("Delta 0,0--n,0 expected %zd bytes, actual %zd bytes\n", N_ELEMS * N_ELEMS * expectedElmSz, d_00_n0);  \
+                                                                                                \
+    printf("Delta 0,0--n,n expected %zd bytes, actual %zd bytes\n", N_ELEMS * N_ELEMS * expectedElmSz + \
+                                                                    1       * N_ELEMS * expectedElmSz, d_00_nn);        \
+                                                                                                \
+    VPRT( "Array start %p end %p\n", e00, enn );                                                  \
+                                                                                                \
+    ar_lo = e00;                                                                                 \
+    ar_hi = en0; /* first byte past the end is not after the first row that does not exist */
+
+
+
+
+
+// ---------- 1, singleton
+
+forall( T ) T * allocAndAccess_1 ( size_t expectedElmSz, const char * tcid, const char * vart ) { 
+    printf("------- 1%s (singleton): T x[1], expecting T=%s, got sizeof(T)=%zd, expecting %zd-byte elems\n", tcid, vart, sizeof(T), expectedElmSz);
+    T x[ 1 ] INITARR;
+    bookendInner();
+    SHOW_ACCESS_1D( 1 )
+    return 0p;
+}
+
+// ---------- 2, general
+
+forall( T ) T * allocAndAccess_2 ( size_t expectedElmSz, const char * tcid, const char * vart ) { 
+    printf("------- 2%s (general): T x[42], expecting T=%s, got sizeof(T)=%zd, expecting %zd-byte elems\n", tcid, vart, sizeof(T), expectedElmSz);
+    T x[ 42 ] INITARR;
+    bookendInner();
+    SHOW_ACCESS_1D( 42 )
+    return 0p;
+}
+
+// ---------- 3, user VLA
+
+forall( T ) T * allocAndAccess_3 ( size_t expectedElmSz, const char * tcid, const char * vart, size_t n ) { 
+    printf("------- 3%s (user VLA): T x[n], got n=%zd, expecting T=%s, got sizeof(T)=%zd, expecting %zd-byte elems\n", tcid, n, vart, sizeof(T), expectedElmSz);
+    T x[ n ] INITARR;
+    bookendInner();
+    SHOW_ACCESS_1D( n )
+    return 0p;
+}
+
+// ---------- 4, 2-dimensional
+
+forall( T ) T * allocAndAccess_4 ( size_t expectedElmSz, const char * tcid, const char * vart ) { 
+    printf("------- 4%s (2-dimensional): T x[42][42], expecting T=%s, got sizeof(T)=%zd, expecting %zd-byte atoms\n", tcid, vart, sizeof(T), expectedElmSz);
+    T x[ 42 ][ 42 ] INITARR;
+    bookendInner();
+    SHOW_ACCESS_2D( 42 )
+    return 0p;
+}
+
+// ---------- 5, pair
+
+forall( T ) T * allocAndAccess_5 ( size_t expectedElmSz, const char * tcid, const char * vart ) { 
+    printf("------- 5%s (pair): pair(T,T) x[42], expecting T=%s, got sizeof(T)=%zd, expecting %zd-byte atoms\n", tcid, vart, sizeof(T), expectedElmSz);
+    pair(T,T) x[ 42 ] INITARR;
+    bookendInner();
+    SHOW_ACCESS_1D( 42 )
+    return 0p;
+}
+
+// ---------- 6, raii
+
+struct my_mgd_t {
+    float x;
+};
+
+// Auxiliary state used in the RAII rig only.  Only to format/excerpt output.  Reset per TC.
+static struct {
+    size_t total_elems;     // size of array being managed
+    size_t ctor_calls;      // number of ctor calls seen so far
+    size_t dtor_calls;      // ^dtor
+    char * ctor_first;      // argument of first ctor call
+    char * dtor_first;      // ^dtor
+    char * dtor_lo;         // lowest dtor argument seen yet
+    char * dtor_hi;         // ^highest
+} raii;
+
+void ?{}( my_mgd_t & this ) {
+    if (raii.ctor_first == 0p) raii.ctor_first = (char *) & this;
+    VPRT( "ctor call %zd targets %p\n", raii.ctor_calls, &this );
+    if (raii.ctor_calls < 2 || raii.total_elems - raii.ctor_calls <= 2)
+        printf( "ctor call %zd targets first + %zd bytes\n", raii.ctor_calls, ((char*)&this - raii.ctor_first) );
+    // ctor call locations fill the conformed ar_lo/hi
+    if ( (char *) & this < ar_lo ) ar_lo = (char *) & this;
+    if ( (char *) & this > ar_hi ) ar_hi = (char *) & this;
+    raii.ctor_calls += 1;
+}
+
+void ^?{}( my_mgd_t & this ) {
+    // dtor calls count backward
+    if (raii.dtor_first == 0p) raii.dtor_first = (char *) & this;
+    VPRT( "dtor call %zd targets %p\n", raii.dtor_calls, &this );
+    if (raii.dtor_calls < 2 || raii.total_elems - raii.dtor_calls <= 2)
+        printf( "dtor call %zd targets first - %zd bytes\n", raii.dtor_calls, (raii.dtor_first - (char*)&this) );
+    // dtor call locations fill auxiliary state; reconciled with the conformed ones on last call
+    if ( (char *) & this < raii.dtor_lo ) raii.dtor_lo = (char *) & this;
+    if ( (char *) & this > raii.dtor_hi ) raii.dtor_hi = (char *) & this;
+    raii.dtor_calls += 1;
+    if (raii.dtor_calls >= raii.total_elems)
+        printf( "dtor lo off by %zd bytes, hi off by %zd bytes\n", (ar_lo - raii.dtor_lo), (ar_hi - raii.dtor_hi) );
+}
+
+forall( T ) T * allocAndAccess_6 ( size_t expectedElmSz, const char * tcid, const char * vart ) {
+    raii.total_elems = 42;
+    raii.ctor_calls = 0;
+    raii.dtor_calls = 0;
+    raii.ctor_first = 0p;
+    raii.dtor_first = 0p;
+    raii.dtor_lo = (char*)-1;
+    raii.dtor_hi = 0p;
+    printf("------- 6%s (raii): T x[42], expecting T=%s, got sizeof(T)=%zd, expecting %zd-byte elems\n", tcid, vart, sizeof(T), expectedElmSz);
+    T x[ 42 ] INITARR;
+    bookendInner();
+    // no SHOW_ACCESS: it happens in the cdtors
+    return 0p;
+}
+
+// ---------- 7, comm, PPD, PFST
+
+forall( T* ) void access_7 ( size_t expectedElmSz, T x[] ) { 
+    SHOW_ACCESS_1D(42)
+}
+forall( T ) T * allocAndAccess_7 ( size_t expectedElmSz, const char * tcid, const char * vart ) { 
+    printf("------- 7%s (communication, poly-poly direct, by param T[]): T x[42], expecting T=%s, got sizeof(T)=%zd, expecting %zd-byte elems\n", tcid, vart, sizeof(T), expectedElmSz);
+    T x[ 42 ] INITARR;
+    bookendInner();
+    access_7( expectedElmSz, x );
+    return 0p;
+}
+
+// ---------- 8, comm, PPD, PARR
+
+forall( T* ) void access_8 ( size_t expectedElmSz, T (*temp)[42] ) { 
+    T * x = *temp;
+    SHOW_ACCESS_1D(42)
+}
+forall( T ) T * allocAndAccess_8 ( size_t expectedElmSz, const char * tcid, const char * vart ) { 
+    printf("------- 8%s (communication, poly-poly direct, by param T(*)[*]): T x[42], expecting T=%s, got sizeof(T)=%zd, expecting %zd-byte elems\n", tcid, vart, sizeof(T), expectedElmSz);
+    T x[ 42 ] INITARR;
+    bookendInner();
+    access_8( expectedElmSz, &x );
+    return 0p;
+}
+
+// ---------- 9, comm, PPA, PFST
+
+forall( T | { void access_9 ( size_t, T x[] ); } )
+T * allocAndAccess_9 ( size_t expectedElmSz, const char * tcid, const char * vart ) { 
+    printf("------- 9%s (communication, poly-poly assertion, by param T[]): T x[42], expecting T=%s, got sizeof(T)=%zd, expecting %zd-byte elems\n", tcid, vart, sizeof(T), expectedElmSz);
+    T x[ 42 ] INITARR;
+    bookendInner();
+    access_9( expectedElmSz, x );
+    return 0p;
+}
+forall( T* ) void access_9 ( size_t expectedElmSz, T x[] ) { 
+    SHOW_ACCESS_1D(42)
+}
+
+// ---------- 10, comm, PPA, PARR
+
+forall( T | { void access_10 ( size_t, T (*)[42] ); } )
+T * allocAndAccess_10( size_t expectedElmSz, const char * tcid, const char * vart ) { 
+    printf("------- 10%s (communication, poly-poly assertion, by param T(*)[*]): T x[42], expecting T=%s, got sizeof(T)=%zd, expecting %zd-byte elems\n", tcid, vart, sizeof(T), expectedElmSz);
+    T x[ 42 ] INITARR;
+    bookendInner();
+    access_10( expectedElmSz, &x );
+    return 0p;
+}
+forall( T* ) void access_10( size_t expectedElmSz, T (*temp)[42] ) {
+    T * x = *temp;
+    SHOW_ACCESS_1D(42)
+}
+
+// ---------- 11, comm, PMA, PFST_11
+
+forall( T | { void access_11( size_t, T * ); } )
+T * allocAndAccess_11 ( size_t expectedElmSz, const char * tcid, const char * vart ) { 
+    printf("------- 11%s (communication, poly-mono assertion, by param T[]): T x[42], expecting T=%s, got sizeof(T)=%zd, expecting %zd-byte elems\n", tcid, vart, sizeof(T), expectedElmSz);
+    T x[ 42 ] INITARR;
+    bookendInner();
+    access_11( expectedElmSz, x );
+    return 0p;
+}
+void access_11 ( size_t expectedElmSz, char x[] ) {
+    SHOW_ACCESS_1D(42)
+}
+void access_11 ( size_t expectedElmSz, bigun x[] ) { 
+    SHOW_ACCESS_1D(42)
+}
+
+// ---------- 12, comm, PMA, PARR
+
+forall( T | { void access_12 ( size_t, T (*)[42] ); } )
+T * allocAndAccess_12 ( size_t expectedElmSz, const char * tcid, const char * vart ) { 
+    printf("------- 12%s (communication, poly-mono assertion, by param T(*)[*]): T x[42], expecting T=%s, got sizeof(T)=%zd, expecting %zd-byte elems\n", tcid, vart, sizeof(T), expectedElmSz);
+    T x[ 42 ] INITARR;
+    bookendInner();
+    access_12( expectedElmSz, &x );
+    return 0p;
+}
+void access_12 ( size_t expectedElmSz, double (*temp)[42] ) {
+    double * x = *temp;
+    SHOW_ACCESS_1D(42)
+}
+
+// ---------- 13, comm, MPD, PFST
+
+forall( T* ) void access_13( size_t expectedElmSz, T x[] ) { 
+    SHOW_ACCESS_1D(42)
+}
+char * allocAndAccess_13 ( size_t expectedElmSz, const char * tcid, const char * vart ) { 
+    printf("------- 13%s (communication, mono-poly direct, by param T[]): char x[42], expecting %zd-byte elems\n", tcid, expectedElmSz);
+    char x[ 42 ] INITARR;
+    bookendInner();
+    access_13( expectedElmSz, x );
+    return 0p;
+}
+bigun * allocAndAccess_13( size_t expectedElmSz, const char * tcid, const char * vart ) { 
+    printf("------- 13%s (communication, mono-poly direct, by param T[]): bigun x[42], expecting %zd-byte elems\n", tcid, expectedElmSz);
+    bigun x[ 42 ] INITARR;
+    bookendInner();
+    access_13( expectedElmSz, x );
+    return 0p;
+}
+
+// ---------- 14, comm, MPD, PARR
+
+forall( T* ) void access_14 ( size_t expectedElmSz, T (*temp)[42] ) { 
+    T * x = *temp;
+    SHOW_ACCESS_1D(42)
+}
+double * allocAndAccess_14 ( size_t expectedElmSz, const char * tcid, const char * vart ) { 
+    printf("------- 13%s (communication, mono-poly direct, by param T(*)[*]): double x[42], expecting %zd-byte elems\n", tcid, expectedElmSz);
+    double x[ 42 ] INITARR;
+    bookendInner();
+    access_14( expectedElmSz, &x );
+    return 0p;
+}
+
+// ---------- 15, operators
+
+forall( T* ) void access_15 ( size_t expectedElmSz, T x[] ) {
+    // correctness of x and ?[?] established by earlier tests
+    T * x5 = & x[5];
+
+    #define SHOW( OP, ACT, EXP ) printf( #OP " off by %zd\n", ((size_t)(EXP)) - ((size_t)(ACT)) )
+    { T * xx = & 5[x];            SHOW( ?[?] rev,  xx, x5 ); }
+    { T * xx = x + 5;             SHOW( ?+?,       xx, x5 ); }
+    { T * xx = 5 + x;             SHOW( ?+? rev,   xx, x5 ); }
+    { T * xx = x;   xx += 5;      SHOW( ?+=?,      xx, x5 ); }
+//  { T * xx = x;   for(5) xx++;  SHOW( ?++,       xx, x5 ); }
+//  { T * xx = x;   for(5) ++xx;  SHOW( ++?,       xx, x5 ); }
+    { T * xx = x5;  xx -= 5;      SHOW( ?-=?,      xx, x  ); }
+//  { T * xx = x5;  for(5) xx--;  SHOW( ?--,       xx, x  ); }
+//  { T * xx = x5;  for(5) --xx;  SHOW( --?,       xx, x  ); }
+    #undef SHOW
+
+    ptrdiff_t expPos5 = x5 - x;
+    ptrdiff_t expNeg5 = x - x5;
+
+    printf( "?-? +ve off by %zd\n", ((ptrdiff_t) 5) - expPos5 );
+//  printf( "?-? -ve off by %zd\n", ((ptrdiff_t)-5) - expNeg5 );
+}
+
+forall( T ) T * allocAndAccess_15 ( size_t expectedElmSz, const char * tcid, const char * vart ) { 
+    printf("------- 15%s (operators): T x[42], expecting T=%s, got sizeof(T)=%zd, expecting %zd-byte elems\n", tcid, vart, sizeof(T), expectedElmSz);
+    T x[ 42 ] INITARR;
+    // bookends unused
+    access_15( expectedElmSz, x );
+    return 0p;
+}
+
+
+
+
+
+#define TC(...)
+#define TR( TRID,       SZS,   SZV, ETG,   ACCS, SPS, OVLD              ) \
+    F_SIG( run, TRID, SZS, SZV, ACCS, SPS, OVLD ) {                                              \
+        resetBookends();                                                                \
+        OVLD * retval = CALL( bookendOuter, TRID, SZS, SZV, expectedElmSz, tcid, vart ); \
+        reportBookends();                                                               \
+        return retval;                                                                  \
+    }
+#include "boxed.cases.hfa"
+#undef TC
+#undef TR
+
+
+#define Q_(x) #x
+#define Q(x) Q_(x)
+
+int main() {
+    #define TR(...)
+    #define TC( TRID, TCID, SZS, SZV, ETG, VART ) \
+        { VART * ignore = CALL( run, TRID, SZS, SZV, sizeof(ETG(VART)), Q(TCID), Q(VART) ); (void) ignore; }
+    #include "boxed.cases.hfa"
+    #undef TR
+    #undef TC
+}
