Index: libcfa/src/Makefile.am
===================================================================
--- libcfa/src/Makefile.am	(revision 0bdfcc3388f9d38f0193e11bf3fda5e335326dff)
+++ libcfa/src/Makefile.am	(revision e50d9cb8bcb9d4dee3eb25c5c5ffe27efe455a18)
@@ -111,4 +111,5 @@
 	concurrency/invoke.h \
 	concurrency/future.hfa \
+	concurrency/once.hfa \
 	concurrency/kernel/fwd.hfa \
 	concurrency/mutex_stmt.hfa
Index: libcfa/src/bits/defs.hfa
===================================================================
--- libcfa/src/bits/defs.hfa	(revision 0bdfcc3388f9d38f0193e11bf3fda5e335326dff)
+++ libcfa/src/bits/defs.hfa	(revision e50d9cb8bcb9d4dee3eb25c5c5ffe27efe455a18)
@@ -31,7 +31,9 @@
 #define __cfa_anonymous_object(x) inline struct x
 #define __cfa_dlink1(x) dlink(x)
+#define __cfa_dlink2(x, name) inline struct name { inline dlink(x); }
 #else
 #define __cfa_anonymous_object(x) struct x __cfa_anonymous_object
 #define __cfa_dlink1(x) struct { struct x * next; struct x * back; }
+#define __cfa_dlink2(x, name) struct { struct x * next; struct x * back; } __dlink ## name
 #endif
 
Index: libcfa/src/concurrency/invoke.h
===================================================================
--- libcfa/src/concurrency/invoke.h	(revision 0bdfcc3388f9d38f0193e11bf3fda5e335326dff)
+++ libcfa/src/concurrency/invoke.h	(revision e50d9cb8bcb9d4dee3eb25c5c5ffe27efe455a18)
@@ -195,11 +195,11 @@
 		struct __monitor_group_t monitors;
 
-		// used to put threads on dlist data structure
+		// intrusive link fields, used for locks, monitors and any user defined data structure
+		// default link fields for dlist
 		__cfa_dlink1(thread$) user_link;
 
-		struct {
-			struct thread$ * next;
-			struct thread$ * prev;
-		} node;
+		// secondary intrusive link fields, used for global cluster list
+		// default link fields for dlist
+		__cfa_dlink2(thread$, cltr_link);
 
 		// used to store state between clh lock/unlock
@@ -230,11 +230,10 @@
 	#ifdef __cforall
 	extern "Cforall" {
+		static inline thread$ * volatile & ?`next ( thread$ * this ) {
+			return this->user_link.next;
+		}
 
 		static inline thread$ *& get_next( thread$ & this ) __attribute__((const)) {
 			return this.user_link.next;
-		}
-
-		static inline [thread$ *&, thread$ *& ] __get( thread$ & this ) __attribute__((const)) {
-			return this.node.[next, prev];
 		}
 
@@ -244,4 +243,7 @@
 			return result;
 		}
+
+		P9_EMBEDDED(thread$, thread$.cltr_link)
+		P9_EMBEDDED(thread$.cltr_link, dlink(thread$))
 
 		static inline void ?{}(__monitor_group_t & this) {
Index: libcfa/src/concurrency/kernel.hfa
===================================================================
--- libcfa/src/concurrency/kernel.hfa	(revision 0bdfcc3388f9d38f0193e11bf3fda5e335326dff)
+++ libcfa/src/concurrency/kernel.hfa	(revision e50d9cb8bcb9d4dee3eb25c5c5ffe27efe455a18)
@@ -256,5 +256,5 @@
 	// List of threads
 	__spinlock_t thread_list_lock;
-	__dllist_t(struct thread$) threads;
+	dlist(struct thread$, thread$.cltr_link) threads;
 	unsigned int nthreads;
 
Index: libcfa/src/concurrency/kernel/startup.cfa
===================================================================
--- libcfa/src/concurrency/kernel/startup.cfa	(revision 0bdfcc3388f9d38f0193e11bf3fda5e335326dff)
+++ libcfa/src/concurrency/kernel/startup.cfa	(revision e50d9cb8bcb9d4dee3eb25c5c5ffe27efe455a18)
@@ -535,6 +535,4 @@
 	#endif
 
-	node.next = 0p;
-	node.prev = 0p;
 	doregister(curr_cluster, this);
 
@@ -659,5 +657,5 @@
 	#endif
 
-	threads{ __get };
+	threads{};
 
 	io.arbiter = create();
@@ -739,5 +737,5 @@
 	lock      (cltr->thread_list_lock __cfaabi_dbg_ctx2);
 	cltr->nthreads += 1;
-	push_front(cltr->threads, thrd);
+	insert_first(cltr->threads, thrd);
 	unlock    (cltr->thread_list_lock);
 }
@@ -745,6 +743,10 @@
 void unregister( cluster * cltr, thread$ & thrd ) {
 	lock  (cltr->thread_list_lock __cfaabi_dbg_ctx2);
-	remove(cltr->threads, thrd );
-	cltr->nthreads -= 1;
+	{
+		tytagref( dlink(thread$), dlink(thread$) ) ?`inner( thread$ & this ) = void;
+		with( DLINK_VIA( thread$, thread$.cltr_link ) )
+			remove( thrd );
+		cltr->nthreads -= 1;
+	}
 	unlock(cltr->thread_list_lock);
 }
Index: libcfa/src/concurrency/once.hfa
===================================================================
--- libcfa/src/concurrency/once.hfa	(revision e50d9cb8bcb9d4dee3eb25c5c5ffe27efe455a18)
+++ libcfa/src/concurrency/once.hfa	(revision e50d9cb8bcb9d4dee3eb25c5c5ffe27efe455a18)
@@ -0,0 +1,106 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// once.hfa -- Algorithms to prevent concurrent calls to cause duplicate calls
+//
+// Author           : Thierry Delisle
+// Created On       : Thu Oct 11:40:47 2022
+// Last Modified By :
+// Last Modified On :
+// Update Count     :
+//
+
+#pragma once
+
+#include "containers/lockfree.hfa"
+#include "kernel/fwd.hfa"
+
+enum once_state {
+	ARMED = 0,
+	IN_PROGRESS,
+	READY
+};
+
+struct once_flag {
+	volatile int state;
+	poison_list( thread$ ) waiters;
+};
+
+static inline {
+	void ?{}(once_flag & this) { this.state = ARMED; }
+
+	void once_wait$(once_flag & this) {
+		// just push the thread to the list
+		if(push( this.waiters, active_thread() )) {
+			// the list wasn't poisoned, push was successful, just park.
+			park();
+		}
+	}
+
+	void once_call$( once_flag & this, void (*func)(void) ) {
+		/* paranoid */ verify( once_state.IN_PROGRESS == __atomic_load_n(&this.state, __ATOMIC_RELAXED) );
+		/* paranoid */ verify( ! is_poisoned(this.waiters) );
+
+		// call the thing we are here for!
+		func();
+
+		/* paranoid */ verify( ! is_poisoned(this.waiters) );
+		/* paranoid */ verify( once_state.IN_PROGRESS == __atomic_load_n(&this.state, __ATOMIC_RELAXED) );
+
+		// Mark the call as being done.
+		__atomic_store_n( &this.state, (int)once_state.IN_PROGRESS, __ATOMIC_SEQ_CST );
+
+		// wake up the sleepers and make sure no new sleeper arrives
+		thread$ * sleeper = poison( this.waiters );
+
+		/* paranoid */ verify( ! is_poisoned(this.waiters) );
+		/* paranoid */ verify( once_state.READY == __atomic_load_n(&this.state, __ATOMIC_RELAXED) );
+
+		while(sleeper != 0p) {
+			// find the next thread now because unpark invalidates the pointer
+			thread$ * next = advance(sleeper);
+
+			// wake-up the thread, invalidates pointer
+			unpark( sleeper );
+
+			// update the current
+			sleeper = next;
+		}
+	}
+
+	bool call_once( once_flag & this, void (*func)(void) ) {
+		// is the call already done?
+		if(likely(once_state.READY == __atomic_load_n(&this.state, __ATOMIC_RELAXED))) {
+			/* paranoid */ verify( is_poisoned(this.waiters) );
+			return false;
+		}
+
+		// Try to CAS ourself as the thread that will actually call the function
+		int expected = ARMED;
+		if( __atomic_compare_exchange_n( &this.state, &expected, (int)once_state.IN_PROGRESS, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) ) {
+
+			// we won the race, call the function
+			once_call$( this, func );
+
+			/* paranoid */ verify( is_poisoned(this.waiters) );
+			/* paranoid */ verify( once_state.READY == __atomic_load_n(&this.state, __ATOMIC_RELAXED) );
+
+			// in case someone cares, this call did do the underlying call
+			return true;
+		}
+		else {
+
+			// someone else is doing the call, just wait
+			once_wait$( this );
+
+			/* paranoid */ verify( is_poisoned(this.waiters) );
+			/* paranoid */ verify( once_state.READY == __atomic_load_n(&this.state, __ATOMIC_RELAXED) );
+
+			// in case someone cares, someone else did the call
+			return false;
+		}
+	}
+}
Index: libcfa/src/concurrency/thread.cfa
===================================================================
--- libcfa/src/concurrency/thread.cfa	(revision 0bdfcc3388f9d38f0193e11bf3fda5e335326dff)
+++ libcfa/src/concurrency/thread.cfa	(revision e50d9cb8bcb9d4dee3eb25c5c5ffe27efe455a18)
@@ -54,7 +54,4 @@
 	#endif
 
-	node.next = 0p;
-	node.prev = 0p;
-
 	clh_node = malloc( );
 	*clh_node = false;
Index: libcfa/src/containers/lockfree.hfa
===================================================================
--- libcfa/src/containers/lockfree.hfa	(revision 0bdfcc3388f9d38f0193e11bf3fda5e335326dff)
+++ libcfa/src/containers/lockfree.hfa	(revision e50d9cb8bcb9d4dee3eb25c5c5ffe27efe455a18)
@@ -142,4 +142,5 @@
 
 	static inline void ?{}(poison_list(T) & this) { this.head = 0p; }
+	static inline bool is_poisoned( const poison_list(T) & this ) { return 1p == this.head; }
 
  	static inline forall(| { T * volatile & ?`next ( T * ); })
@@ -147,6 +148,6 @@
 		// Adds an element to the list
 		// Multi-Thread Safe, Lock-Free
-		T * push(poison_list(T) & this, T * elem) __attribute__((artificial));
-		T * push(poison_list(T) & this, T * elem) {
+		bool push(poison_list(T) & this, T * elem) __attribute__((artificial));
+		bool push(poison_list(T) & this, T * elem) {
 			/* paranoid */ verify(0p == (elem`next));
 			__atomic_store_n( &elem`next, (T*)1p, __ATOMIC_RELAXED );
@@ -156,5 +157,5 @@
 			for() {
 				// check if it's poisoned
-				if(expected == 1p) return 0p;
+				if(expected == 1p) return false;
 
 				// try to CAS the elem in
@@ -162,11 +163,12 @@
 					// We managed to exchange in, we are done
 
-					// We should never succeed the CAS if it's poisonned.
-					/* paranoid */ verify( expected != 1p );
+					// We should never succeed the CAS if it's poisonned and the elem should be 1p.
+					/* paranoid */ verify( expected  != 1p );
+					/* paranoid */ verify( elem`next == 1p );
 
 					// If we aren't the first, we need to tell the person before us
 					// No need to
 					elem`next = expected;
-					return expected;
+					return true;
 				}
 			}
@@ -190,5 +192,5 @@
 		T * poison(poison_list(T) & this) {
 			T * ret = __atomic_exchange_n( &this.head, (T*)1p, __ATOMIC_SEQ_CST );
-			/* paranoid */ verify( ret != (T*)1p );
+			/* paranoid */ verifyf( ret != (T*)1p, "Poison list %p poisoned more than once!", &this );
 			return ret;
 		}
Index: src/AST/Pass.impl.hpp
===================================================================
--- src/AST/Pass.impl.hpp	(revision 0bdfcc3388f9d38f0193e11bf3fda5e335326dff)
+++ src/AST/Pass.impl.hpp	(revision e50d9cb8bcb9d4dee3eb25c5c5ffe27efe455a18)
@@ -617,4 +617,5 @@
 				maybe_accept( node, &FunctionDecl::returns );
 				maybe_accept( node, &FunctionDecl::type );
+				maybe_accept( node, &FunctionDecl::attributes );
 				// First remember that we are now within a function.
 				ValueGuard< bool > oldInFunction( inFunction );
@@ -625,5 +626,4 @@
 				atFunctionTop = true;
 				maybe_accept( node, &FunctionDecl::stmts );
-				maybe_accept( node, &FunctionDecl::attributes );
 			}
 		}
Index: src/Common/PassVisitor.impl.h
===================================================================
--- src/Common/PassVisitor.impl.h	(revision 0bdfcc3388f9d38f0193e11bf3fda5e335326dff)
+++ src/Common/PassVisitor.impl.h	(revision e50d9cb8bcb9d4dee3eb25c5c5ffe27efe455a18)
@@ -607,4 +607,5 @@
 			indexerAddId( &func );
 			maybeMutate_impl( node->type, *this );
+			maybeMutate_impl( node->attributes, *this );
 			// First remember that we are now within a function.
 			ValueGuard< bool > oldInFunction( inFunction );
@@ -615,5 +616,4 @@
 			atFunctionTop = true;
 			maybeMutate_impl( node->statements, *this );
-			maybeMutate_impl( node->attributes, *this );
 		}
 	}
Index: src/GenPoly/Box.cc
===================================================================
--- src/GenPoly/Box.cc	(revision 0bdfcc3388f9d38f0193e11bf3fda5e335326dff)
+++ src/GenPoly/Box.cc	(revision e50d9cb8bcb9d4dee3eb25c5c5ffe27efe455a18)
@@ -68,8 +68,5 @@
 		/// Adds layout-generation functions to polymorphic types.
 		class LayoutFunctionBuilder final : public WithDeclsToAdd, public WithVisitorRef<LayoutFunctionBuilder>, public WithShortCircuiting {
-			// Current level of nested functions:
-			unsigned int functionNesting = 0;
 		public:
-			void previsit( FunctionDecl *functionDecl );
 			void previsit( StructDecl *structDecl );
 			void previsit( UnionDecl *unionDecl );
@@ -237,12 +234,4 @@
 	////////////////////////////////// LayoutFunctionBuilder ////////////////////////////////////////////
 
-	void LayoutFunctionBuilder::previsit( FunctionDecl *functionDecl ) {
-		visit_children = false;
-		maybeAccept( functionDecl->get_functionType(), *visitor );
-		++functionNesting;
-		maybeAccept( functionDecl->get_statements(), *visitor );
-		--functionNesting;
-	}
-
 	/// Get a list of type declarations that will affect a layout function
 	std::list< TypeDecl* > takeOtypeOnly( std::list< TypeDecl* > &decls ) {
@@ -271,9 +260,9 @@
 
 	/// Builds a layout function declaration
-	FunctionDecl *buildLayoutFunctionDecl( AggregateDecl *typeDecl, unsigned int functionNesting, FunctionType *layoutFnType ) {
+	FunctionDecl *buildLayoutFunctionDecl( AggregateDecl *typeDecl, bool isInFunction, FunctionType *layoutFnType ) {
 		// Routines at global scope marked "static" to prevent multiple definitions is separate translation units
 		// because each unit generates copies of the default routines for each aggregate.
 		FunctionDecl *layoutDecl = new FunctionDecl( layoutofName( typeDecl ),
-													 functionNesting > 0 ? Type::StorageClasses() : Type::StorageClasses( Type::Static ),
+													 isInFunction ? Type::StorageClasses() : Type::StorageClasses( Type::Static ),
 													 LinkageSpec::AutoGen, layoutFnType, new CompoundStmt(),
 													 std::list< Attribute * >(), Type::FuncSpecifiers( Type::Inline ) );
@@ -347,5 +336,5 @@
 
 		// build function decl
-		FunctionDecl *layoutDecl = buildLayoutFunctionDecl( structDecl, functionNesting, layoutFnType );
+		FunctionDecl *layoutDecl = buildLayoutFunctionDecl( structDecl, isInFunction(), layoutFnType );
 
 		// calculate struct layout in function body
@@ -354,14 +343,10 @@
 		addExpr( layoutDecl->get_statements(), makeOp( "?=?", derefVar( sizeParam ), new ConstantExpr( Constant::from_ulong( 0 ) ) ) );
 		addExpr( layoutDecl->get_statements(), makeOp( "?=?", derefVar( alignParam ), new ConstantExpr( Constant::from_ulong( 1 ) ) ) );
-		unsigned long n_members = 0;
-		bool firstMember = true;
-		for ( Declaration* member : structDecl->get_members() ) {
-			DeclarationWithType *dwt = dynamic_cast< DeclarationWithType * >( member );
+		for ( auto index_member : enumerate( structDecl->members ) ) {
+			DeclarationWithType *dwt = dynamic_cast< DeclarationWithType * >( index_member.val );
 			assert( dwt );
 			Type *memberType = dwt->get_type();
 
-			if ( firstMember ) {
-				firstMember = false;
-			} else {
+			if ( 0 < index_member.idx ) {
 				// make sure all members after the first (automatically aligned at 0) are properly padded for alignment
 				addStmt( layoutDecl->get_statements(), makeAlignTo( derefVar( sizeParam ), new AlignofExpr( memberType->clone() ) ) );
@@ -369,7 +354,6 @@
 
 			// place current size in the current offset index
-			addExpr( layoutDecl->get_statements(), makeOp( "?=?", makeOp( "?[?]", new VariableExpr( offsetParam ), new ConstantExpr( Constant::from_ulong( n_members ) ) ),
+			addExpr( layoutDecl->get_statements(), makeOp( "?=?", makeOp( "?[?]", new VariableExpr( offsetParam ), new ConstantExpr( Constant::from_ulong( index_member.idx ) ) ),
 			                                                      derefVar( sizeParam ) ) );
-			++n_members;
 
 			// add member size to current size
@@ -406,5 +390,5 @@
 
 		// build function decl
-		FunctionDecl *layoutDecl = buildLayoutFunctionDecl( unionDecl, functionNesting, layoutFnType );
+		FunctionDecl *layoutDecl = buildLayoutFunctionDecl( unionDecl, isInFunction(), layoutFnType );
 
 		// calculate union layout in function body
@@ -566,13 +550,12 @@
 				if ( tyParam.second.isComplete ) {
 					Type *concrete = env->lookup( tyParam.first );
-					if ( concrete ) {
-						arg = appExpr->get_args().insert( arg, new SizeofExpr( concrete->clone() ) );
-						arg++;
-						arg = appExpr->get_args().insert( arg, new AlignofExpr( concrete->clone() ) );
-						arg++;
-					} else {
-						// xxx - should this be an assertion?
-						SemanticError( appExpr, toString( *env, "\nunbound type variable: ", tyParam.first, " in application " ) );
-					} // if
+					// If there is an unbound type variable, it should have detected already.
+					assertf( concrete, "Unbound type variable: %s in: %s",
+						toCString( tyParam.first ), toCString( *env ) );
+
+					arg = appExpr->get_args().insert( arg, new SizeofExpr( concrete->clone() ) );
+					arg++;
+					arg = appExpr->get_args().insert( arg, new AlignofExpr( concrete->clone() ) );
+					arg++;
 				} // if
 			} // for
@@ -638,6 +621,6 @@
 
 		void Pass1::replaceParametersWithConcrete( ApplicationExpr *appExpr, std::list< Expression* >& params ) {
-			for ( std::list< Expression* >::iterator param = params.begin(); param != params.end(); ++param ) {
-				TypeExpr *paramType = dynamic_cast< TypeExpr* >( *param );
+			for ( Expression * const param : params ) {
+				TypeExpr *paramType = dynamic_cast< TypeExpr* >( param );
 				assertf(paramType, "Aggregate parameters should be type expressions");
 				paramType->set_type( replaceWithConcrete( appExpr, paramType->get_type(), false ) );
@@ -692,6 +675,40 @@
 		}
 
+		// find instances of polymorphic type parameters
+		struct PolyFinder {
+			const TyVarMap * tyVars = nullptr;
+			bool found = false;
+
+			void previsit( TypeInstType * t ) {
+				if ( isPolyType( t, *tyVars ) ) {
+					found = true;
+				}
+			}
+		};
+
+		// true if there is an instance of a polymorphic type parameter in t
+		bool hasPolymorphism( Type * t, const TyVarMap &tyVars ) {
+			PassVisitor<PolyFinder> finder;
+			finder.pass.tyVars = &tyVars;
+			maybeAccept( t, finder );
+			return finder.pass.found;
+		}
+
+		/// cast parameters to polymorphic functions so that types are replaced with
+		/// void * if they are type parameters in the formal type.
+		/// this gets rid of warnings from gcc.
+		void addCast( Expression *&actual, Type *formal, const TyVarMap &tyVars ) {
+			// type contains polymorphism, but isn't exactly a polytype, in which case it
+			// has some real actual type (e.g. unsigned int) and casting to void * is wrong
+			if ( hasPolymorphism( formal, tyVars ) && ! isPolyType( formal, tyVars ) ) {
+				Type * newType = formal->clone();
+				newType = ScrubTyVars::scrub( newType, tyVars );
+				actual = new CastExpr( actual, newType );
+			} // if
+		}
+
 		void Pass1::boxParam( Type *param, Expression *&arg, const TyVarMap &exprTyVars ) {
 			assertf( arg->result, "arg does not have result: %s", toString( arg ).c_str() );
+			addCast( arg, param, exprTyVars );
 			if ( ! needsBoxing( param, arg->result, exprTyVars, env ) ) return;
 
@@ -724,42 +741,9 @@
 		}
 
-		// find instances of polymorphic type parameters
-		struct PolyFinder {
-			const TyVarMap * tyVars = nullptr;
-			bool found = false;
-
-			void previsit( TypeInstType * t ) {
-				if ( isPolyType( t, *tyVars ) ) {
-					found = true;
-				}
-			}
-		};
-
-		// true if there is an instance of a polymorphic type parameter in t
-		bool hasPolymorphism( Type * t, const TyVarMap &tyVars ) {
-			PassVisitor<PolyFinder> finder;
-			finder.pass.tyVars = &tyVars;
-			maybeAccept( t, finder );
-			return finder.pass.found;
-		}
-
-		/// cast parameters to polymorphic functions so that types are replaced with
-		/// void * if they are type parameters in the formal type.
-		/// this gets rid of warnings from gcc.
-		void addCast( Expression *&actual, Type *formal, const TyVarMap &tyVars ) {
-			// type contains polymorphism, but isn't exactly a polytype, in which case it
-			// has some real actual type (e.g. unsigned int) and casting to void * is wrong
-			if ( hasPolymorphism( formal, tyVars ) && ! isPolyType( formal, tyVars ) ) {
-				Type * newType = formal->clone();
-				newType = ScrubTyVars::scrub( newType, tyVars );
-				actual = new CastExpr( actual, newType );
-			} // if
-		}
-
 		void Pass1::boxParams( ApplicationExpr *appExpr, FunctionType *function, std::list< Expression *>::iterator &arg, const TyVarMap &exprTyVars ) {
-			for ( std::list< DeclarationWithType *>::const_iterator param = function->get_parameters().begin(); param != function->parameters.end(); ++param, ++arg ) {
-				assertf( arg != appExpr->args.end(), "boxParams: missing argument for param %s to %s in %s", toString( *param ).c_str(), toString( function ).c_str(), toString( appExpr ).c_str() );
-				addCast( *arg, (*param)->get_type(), exprTyVars );
-				boxParam( (*param)->get_type(), *arg, exprTyVars );
+			for ( DeclarationWithType * param : function->parameters ) {
+				assertf( arg != appExpr->args.end(), "boxParams: missing argument for param %s to %s in %s", toString( param ).c_str(), toString( function ).c_str(), toString( appExpr ).c_str() );
+				boxParam( param->get_type(), *arg, exprTyVars );
+				++arg;
 			} // for
 		}
@@ -767,11 +751,10 @@
 		void Pass1::addInferredParams( ApplicationExpr *appExpr, FunctionType *functionType, std::list< Expression *>::iterator &arg, const TyVarMap &tyVars ) {
 			std::list< Expression *>::iterator cur = arg;
-			for ( Type::ForallList::iterator tyVar = functionType->get_forall().begin(); tyVar != functionType->get_forall().end(); ++tyVar ) {
-				for ( std::list< DeclarationWithType *>::iterator assert = (*tyVar)->assertions.begin(); assert != (*tyVar)->assertions.end(); ++assert ) {
-					InferredParams::const_iterator inferParam = appExpr->inferParams.find( (*assert)->get_uniqueId() );
-					assertf( inferParam != appExpr->inferParams.end(), "addInferredParams missing inferred parameter: %s in: %s", toString( *assert ).c_str(), toString( appExpr ).c_str() );
+			for ( TypeDecl * const tyVar : functionType->forall ) {
+				for ( DeclarationWithType * const assert : tyVar->assertions ) {
+					InferredParams::const_iterator inferParam = appExpr->inferParams.find( assert->get_uniqueId() );
+					assertf( inferParam != appExpr->inferParams.end(), "addInferredParams missing inferred parameter: %s in: %s", toString( assert ).c_str(), toString( appExpr ).c_str() );
 					Expression *newExpr = inferParam->second.expr->clone();
-					addCast( newExpr, (*assert)->get_type(), tyVars );
-					boxParam( (*assert)->get_type(), newExpr, tyVars );
+					boxParam( assert->get_type(), newExpr, tyVars );
 					appExpr->get_args().insert( cur, newExpr );
 				} // for
@@ -803,11 +786,10 @@
 			assert( param );
 			assert( arg );
-			if ( isPolyType( realParam->get_type(), tyVars ) ) {
-				if ( ! isPolyType( arg->get_type() ) ) {
-					UntypedExpr *deref = new UntypedExpr( new NameExpr( "*?" ) );
-					deref->args.push_back( new CastExpr( new VariableExpr( param ), new PointerType( Type::Qualifiers(), arg->get_type()->clone() ) ) );
-					deref->result = arg->get_type()->clone();
-					return deref;
-				} // if
+			if ( isPolyType( realParam->get_type(), tyVars )
+					&& ! isPolyType( arg->get_type() ) ) {
+				UntypedExpr *deref = new UntypedExpr( new NameExpr( "*?" ) );
+				deref->args.push_back( new CastExpr( new VariableExpr( param ), new PointerType( Type::Qualifiers(), arg->get_type()->clone() ) ) );
+				deref->result = arg->get_type()->clone();
+				return deref;
 			} // if
 			return new VariableExpr( param );
@@ -1145,15 +1127,22 @@
 		}
 
-		Expression * Pass1::postmutate( UntypedExpr *expr ) {
+		bool isPolyDeref( UntypedExpr * expr, TyVarMap const & scopeTyVars, TypeSubstitution const * env ) {
 			if ( expr->result && isPolyType( expr->result, scopeTyVars, env ) ) {
 				if ( NameExpr *name = dynamic_cast< NameExpr *>( expr->function ) ) {
 					if ( name->name == "*?" ) {
-						Expression *ret = expr->args.front();
-						expr->args.clear();
-						delete expr;
-						return ret;
+						return true;
 					} // if
 				} // if
 			} // if
+			return false;
+		}
+
+		Expression * Pass1::postmutate( UntypedExpr *expr ) {
+			if ( isPolyDeref( expr, scopeTyVars, env ) ) {
+				Expression *ret = expr->args.front();
+				expr->args.clear();
+				delete expr;
+				return ret;
+			}
 			return expr;
 		}
@@ -1165,14 +1154,10 @@
 			bool needs = false;
 			if ( UntypedExpr *expr = dynamic_cast< UntypedExpr *>( addrExpr->arg ) ) {
-				if ( expr->result && isPolyType( expr->result, scopeTyVars, env ) ) {
-					if ( NameExpr *name = dynamic_cast< NameExpr *>( expr->function ) ) {
-						if ( name->name == "*?" ) {
-							if ( ApplicationExpr * appExpr = dynamic_cast< ApplicationExpr * >( expr->args.front() ) ) {
-								assert( appExpr->function->result );
-								FunctionType *function = getFunctionType( appExpr->function->result );
-								assert( function );
-								needs = needsAdapter( function, scopeTyVars );
-							} // if
-						} // if
+				if ( isPolyDeref( expr, scopeTyVars, env ) ) {
+					if ( ApplicationExpr * appExpr = dynamic_cast< ApplicationExpr * >( expr->args.front() ) ) {
+						assert( appExpr->function->result );
+						FunctionType *function = getFunctionType( appExpr->function->result );
+						assert( function );
+						needs = needsAdapter( function, scopeTyVars );
 					} // if
 				} // if
@@ -1226,5 +1211,5 @@
 			std::list< DeclarationWithType *> &paramList = functionType->parameters;
 			std::list< FunctionType *> functions;
-			for (  DeclarationWithType * const arg : functionType->parameters ) {
+			for ( DeclarationWithType * const arg : functionType->parameters ) {
 				Type *orig = arg->get_type();
 				findAndReplaceFunction( orig, functions, scopeTyVars, needsAdapter );
@@ -1447,12 +1432,7 @@
 
 			if(!expect_func_type) {
-				GuardAction( [this]() {
-					knownLayouts.endScope();
-					knownOffsets.endScope();
-				});
 				// If this is the first function type we see
 				// Then it's the type of the declaration and we care about it
-				knownLayouts.beginScope();
-				knownOffsets.beginScope();
+				GuardScope( *this );
 			}
 
Index: src/GenPoly/GenPoly.cc
===================================================================
--- src/GenPoly/GenPoly.cc	(revision 0bdfcc3388f9d38f0193e11bf3fda5e335326dff)
+++ src/GenPoly/GenPoly.cc	(revision e50d9cb8bcb9d4dee3eb25c5c5ffe27efe455a18)
@@ -120,5 +120,5 @@
 	const ast::Type * replaceTypeInst(const ast::Type * type, const ast::TypeSubstitution * env) {
 		if (!env) return type;
-		if (auto typeInst = dynamic_cast<const ast::TypeInstType*> (type)) {
+		if ( auto typeInst = dynamic_cast<const ast::TypeInstType*>(type) ) {
 			auto newType = env->lookup(typeInst);
 			if (newType) return newType;
@@ -229,5 +229,5 @@
 		auto var = typeVars.find( *inst );
 		if ( var != typeVars.end() && var->second.isComplete ) {
-
+			return inst;
 		}
 	} else if ( auto inst = dynamic_cast<ast::StructInstType const *>( type ) ) {
Index: src/GenPoly/InstantiateGenericNew.cpp
===================================================================
--- src/GenPoly/InstantiateGenericNew.cpp	(revision 0bdfcc3388f9d38f0193e11bf3fda5e335326dff)
+++ src/GenPoly/InstantiateGenericNew.cpp	(revision e50d9cb8bcb9d4dee3eb25c5c5ffe27efe455a18)
@@ -10,6 +10,6 @@
 // Created On       : Tue Aug 16 10:51:00 2022
 // Last Modified By : Andrew Beach
-// Last Modified On : Tue Sep 13 16:03:00 2022
-// Update Count     : 0
+// Last Modified On : Mon Oct 31 16:48:00 2022
+// Update Count     : 1
 //
 
@@ -378,23 +378,35 @@
 		//   Ptr(int) p;
 		//   int i;
+		// The original expression:
 		//   p.x = &i;
-		// becomes
-		//   int *& _dtype_static_member_0 = (int **)&p.x;
-		//   _dtype_static_member_0 = &i;
+		// Becomes the expression/declaration:
+		//   int *& _dtype_static_member_0;
+		//   (_dtype_static_member_0 = (int**)&p.x,
+		//    _dtype_static_member_0) = &i;
+
+		// The declaration is simple:
 		static UniqueName tmpNamer( "_dtype_static_member_" );
-		ast::Expr * init = new ast::CastExpr( location,
-			new ast::AddressExpr( location, memberExpr ),
-			new ast::PointerType( ast::deepCopy( concType ) ),
-			ast::ExplicitCast
-		);
 		ast::ObjectDecl * tmp = new ast::ObjectDecl( location,
 			tmpNamer.newName(),
 			new ast::ReferenceType( concType ),
-			new ast::SingleInit( location, init ),
+			nullptr,
 			ast::Storage::Classes(),
 			ast::Linkage::C
 		);
 		stmtsToAddBefore.push_back( new ast::DeclStmt( location, tmp ) );
-		return new ast::VariableExpr( location, tmp );
+
+		// The expression is more complex, uses references and reference /
+		// pointer parity. But breaking it up risks reordering.
+		return new ast::CommaExpr( location,
+			ast::UntypedExpr::createAssign( location,
+				new ast::VariableExpr( location, tmp ),
+				new ast::CastExpr( location,
+					new ast::AddressExpr( location, memberExpr ),
+					new ast::PointerType( ast::deepCopy( concType ) ),
+					ast::ExplicitCast
+				)
+			),
+			new ast::VariableExpr( location, tmp )
+		);
 	} else {
 		// Here, it can simply add a cast to actual types.
Index: tests/.expect/loop-inc.txt
===================================================================
--- tests/.expect/loop-inc.txt	(revision e50d9cb8bcb9d4dee3eb25c5c5ffe27efe455a18)
+++ tests/.expect/loop-inc.txt	(revision e50d9cb8bcb9d4dee3eb25c5c5ffe27efe455a18)
@@ -0,0 +1,3 @@
+loop
+loop
+done
Index: tests/concurrent/.expect/once.txt
===================================================================
--- tests/concurrent/.expect/once.txt	(revision e50d9cb8bcb9d4dee3eb25c5c5ffe27efe455a18)
+++ tests/concurrent/.expect/once.txt	(revision e50d9cb8bcb9d4dee3eb25c5c5ffe27efe455a18)
@@ -0,0 +1,2 @@
+starting
+done
Index: tests/concurrent/once.cfa
===================================================================
--- tests/concurrent/once.cfa	(revision e50d9cb8bcb9d4dee3eb25c5c5ffe27efe455a18)
+++ tests/concurrent/once.cfa	(revision e50d9cb8bcb9d4dee3eb25c5c5ffe27efe455a18)
@@ -0,0 +1,44 @@
+#include <barrier.hfa>
+#include <fstream.hfa>
+#include <kernel.hfa>
+#include <once.hfa>
+#include <thread.hfa>
+
+once_flag global;
+
+volatile int check;
+
+void reset() {
+	(global){};
+	check = 0;
+}
+
+void must_once(void) {
+	int prev = __atomic_fetch_add( &check, 1, __ATOMIC_SEQ_CST );
+	if(prev != 0) {
+		abort | "'must_once' appears to have been called more than once, check was" | prev;
+	}
+}
+
+barrier barr = { 11 };
+
+thread Tester {};
+
+void main( Tester & this ) {
+	for(500) {
+		block( barr, reset );
+
+		// sometime yields
+		yield(prng(this, 3));
+	}
+}
+
+int main() {
+	processor p[2];
+
+	sout | "starting";
+	{
+		Tester testers[11];
+	}
+	sout | "done";
+}
Index: tests/loop-inc.cfa
===================================================================
--- tests/loop-inc.cfa	(revision e50d9cb8bcb9d4dee3eb25c5c5ffe27efe455a18)
+++ tests/loop-inc.cfa	(revision e50d9cb8bcb9d4dee3eb25c5c5ffe27efe455a18)
@@ -0,0 +1,20 @@
+forall(T &)
+struct A {
+    T * next;
+};
+
+struct B {
+    A(B) link;
+};
+
+int main(void) {
+	B end = { { 0p } };
+	B two = { { &end } };
+	B one = { { &two } };
+	B * head = &one;
+
+	for (B ** it = &head ; (*it)->link.next ; it = &(*it)->link.next) {
+		printf("loop\n");
+	}
+	printf("done\n");
+}
