Index: src/Concurrency/Keywords.cc
===================================================================
--- src/Concurrency/Keywords.cc	(revision c95b11526c10568ee3415f3873af592f6779ea99)
+++ src/Concurrency/Keywords.cc	(revision 490db327533a4cac681c8fe17b7d300026d46a79)
@@ -553,5 +553,5 @@
 			),
 			new ListInit(
-				map_range < std::list<Initializer*> > ( args, [this](DeclarationWithType * var ){
+				map_range < std::list<Initializer*> > ( args, [](DeclarationWithType * var ){
 					Type * type = var->get_type()->clone();
 					type->set_mutex( false );
Index: src/InitTweak/GenInit.cc
===================================================================
--- src/InitTweak/GenInit.cc	(revision c95b11526c10568ee3415f3873af592f6779ea99)
+++ src/InitTweak/GenInit.cc	(revision 490db327533a4cac681c8fe17b7d300026d46a79)
@@ -214,5 +214,5 @@
 		}
 		// a type is managed if it appears in the map of known managed types, or if it contains any polymorphism (is a type variable or generic type containing a type variable)
-		return managedTypes.find( SymTab::Mangler::mangle( type ) ) != managedTypes.end() || GenPoly::isPolyType( type );
+		return managedTypes.find( SymTab::Mangler::mangleConcrete( type ) ) != managedTypes.end() || GenPoly::isPolyType( type );
 	}
 
@@ -232,5 +232,5 @@
 			Type * type = InitTweak::getPointerBase( params.front()->get_type() );
 			assert( type );
-			managedTypes.insert( SymTab::Mangler::mangle( type ) );
+			managedTypes.insert( SymTab::Mangler::mangleConcrete( type ) );
 		}
 	}
@@ -242,6 +242,8 @@
 			if ( ObjectDecl * field = dynamic_cast< ObjectDecl * >( member ) ) {
 				if ( isManaged( field ) ) {
+					// generic parameters should not play a role in determining whether a generic type is constructed - construct all generic types, so that
+					// polymorphic constructors make generic types managed types
 					StructInstType inst( Type::Qualifiers(), aggregateDecl );
-					managedTypes.insert( SymTab::Mangler::mangle( &inst ) );
+					managedTypes.insert( SymTab::Mangler::mangleConcrete( &inst ) );
 					break;
 				}
Index: src/InitTweak/InitTweak.cc
===================================================================
--- src/InitTweak/InitTweak.cc	(revision c95b11526c10568ee3415f3873af592f6779ea99)
+++ src/InitTweak/InitTweak.cc	(revision 490db327533a4cac681c8fe17b7d300026d46a79)
@@ -98,4 +98,5 @@
 	class InitExpander::ExpanderImpl {
 	public:
+		virtual ~ExpanderImpl() = default;
 		virtual std::list< Expression * > next( std::list< Expression * > & indices ) = 0;
 		virtual Statement * buildListInit( UntypedExpr * callExpr, std::list< Expression * > & indices ) = 0;
@@ -105,4 +106,5 @@
 	public:
 		InitImpl( Initializer * init ) : init( init ) {}
+		virtual ~InitImpl() = default;
 
 		virtual std::list< Expression * > next( __attribute((unused)) std::list< Expression * > & indices ) {
@@ -121,6 +123,5 @@
 	public:
 		ExprImpl( Expression * expr ) : arg( expr ) {}
-
-		~ExprImpl() { delete arg; }
+		virtual ~ExprImpl() { delete arg; }
 
 		virtual std::list< Expression * > next( std::list< Expression * > & indices ) {
Index: src/ResolvExpr/AlternativeFinder.cc
===================================================================
--- src/ResolvExpr/AlternativeFinder.cc	(revision c95b11526c10568ee3415f3873af592f6779ea99)
+++ src/ResolvExpr/AlternativeFinder.cc	(revision 490db327533a4cac681c8fe17b7d300026d46a79)
@@ -22,4 +22,5 @@
 #include <memory>                  // for allocator_traits<>::value_type
 #include <utility>                 // for pair
+#include <vector>                  // for vector
 
 #include "Alternative.h"           // for AltList, Alternative
@@ -333,5 +334,4 @@
 		tmpCost.incPoly( -tmpCost.get_polyCost() );
 		if ( tmpCost != Cost::zero ) {
-		// if ( convCost != Cost::zero ) {
 			Type *newType = formalType->clone();
 			env.apply( newType );
@@ -405,120 +405,4 @@
 ///     needAssertions.insert( needAssertions.end(), (*tyvar)->get_assertions().begin(), (*tyvar)->get_assertions().end() );
 		}
-	}
-
-	/// instantiate a single argument by matching actuals from [actualIt, actualEnd) against formalType,
-	/// producing expression(s) in out and their total cost in cost.
-	template< typename AltIterator, typename OutputIterator >
-	bool instantiateArgument( Type * formalType, Initializer * defaultValue, AltIterator & actualIt, AltIterator actualEnd, OpenVarSet & openVars, TypeEnvironment & resultEnv, AssertionSet & resultNeed, AssertionSet & resultHave, const SymTab::Indexer & indexer, Cost & cost, OutputIterator out ) {
-		if ( TupleType * tupleType = dynamic_cast< TupleType * >( formalType ) ) {
-			// formalType is a TupleType - group actuals into a TupleExpr whose type unifies with the TupleType
-			std::list< Expression * > exprs;
-			for ( Type * type : *tupleType ) {
-				if ( ! instantiateArgument( type, defaultValue, actualIt, actualEnd, openVars, resultEnv, resultNeed, resultHave, indexer, cost, back_inserter( exprs ) ) ) {
-					deleteAll( exprs );
-					return false;
-				}
-			}
-			*out++ = new TupleExpr( exprs );
-		} else if ( TypeInstType * ttype = Tuples::isTtype( formalType ) ) {
-			// xxx - mixing default arguments with variadic??
-			std::list< Expression * > exprs;
-			for ( ; actualIt != actualEnd; ++actualIt ) {
-				exprs.push_back( actualIt->expr->clone() );
-				cost += actualIt->cost;
-			}
-			Expression * arg = nullptr;
-			if ( exprs.size() == 1 && Tuples::isTtype( exprs.front()->get_result() ) ) {
-				// the case where a ttype value is passed directly is special, e.g. for argument forwarding purposes
-				// xxx - what if passing multiple arguments, last of which is ttype?
-				// xxx - what would happen if unify was changed so that unifying tuple types flattened both before unifying lists? then pass in TupleType(ttype) below.
-				arg = exprs.front();
-			} else {
-				arg = new TupleExpr( exprs );
-			}
-			assert( arg && arg->get_result() );
-			if ( ! unify( ttype, arg->get_result(), resultEnv, resultNeed, resultHave, openVars, indexer ) ) {
-				return false;
-			}
-			*out++ = arg;
-		} else if ( actualIt != actualEnd ) {
-			// both actualType and formalType are atomic (non-tuple) types - if they unify
-			// then accept actual as an argument, otherwise return false (fail to instantiate argument)
-			Expression * actual = actualIt->expr;
-			Type * actualType = actual->get_result();
-
-			PRINT(
-				std::cerr << "formal type is ";
-				formalType->print( std::cerr );
-				std::cerr << std::endl << "actual type is ";
-				actualType->print( std::cerr );
-				std::cerr << std::endl;
-			)
-			if ( ! unify( formalType, actualType, resultEnv, resultNeed, resultHave, openVars, indexer ) ) {
-				// std::cerr << "unify failed" << std::endl;
-				return false;
-			}
-			// move the expression from the alternative to the output iterator
-			*out++ = actual;
-			actualIt->expr = nullptr;
-			cost += actualIt->cost;
-			++actualIt;
-		} else {
-			// End of actuals - Handle default values
-			if ( SingleInit *si = dynamic_cast<SingleInit *>( defaultValue )) {
-				if ( CastExpr * castExpr = dynamic_cast< CastExpr * >( si->get_value() ) ) {
-					// so far, only constant expressions are accepted as default values
-					if ( ConstantExpr *cnstexpr = dynamic_cast<ConstantExpr *>( castExpr->get_arg() ) ) {
-						if ( Constant *cnst = dynamic_cast<Constant *>( cnstexpr->get_constant() ) ) {
-							if ( unify( formalType, cnst->get_type(), resultEnv, resultNeed, resultHave, openVars, indexer ) ) {
-								*out++ = cnstexpr->clone();
-								return true;
-							} // if
-						} // if
-					} // if
-				}
-			} // if
-			return false;
-		} // if
-		return true;
-	}
-
-	bool AlternativeFinder::instantiateFunction( std::list< DeclarationWithType* >& formals, const AltList &actuals, bool isVarArgs, OpenVarSet& openVars, TypeEnvironment &resultEnv, AssertionSet &resultNeed, AssertionSet &resultHave, AltList & out ) {
-		simpleCombineEnvironments( actuals.begin(), actuals.end(), resultEnv );
-		// make sure we don't widen any existing bindings
-		for ( TypeEnvironment::iterator i = resultEnv.begin(); i != resultEnv.end(); ++i ) {
-			i->allowWidening = false;
-		}
-		resultEnv.extractOpenVars( openVars );
-
-		// flatten actuals so that each actual has an atomic (non-tuple) type
-		AltList exploded;
-		Tuples::explode( actuals, indexer, back_inserter( exploded ) );
-
-		AltList::iterator actualExpr = exploded.begin();
-		AltList::iterator actualEnd = exploded.end();
-		for ( DeclarationWithType * formal : formals ) {
-			// match flattened actuals with formal parameters - actuals will be grouped to match
-			// with formals as appropriate
-			Cost cost = Cost::zero;
-			std::list< Expression * > newExprs;
-			ObjectDecl * obj = strict_dynamic_cast< ObjectDecl * >( formal );
-			if ( ! instantiateArgument( obj->get_type(), obj->get_init(), actualExpr, actualEnd, openVars, resultEnv, resultNeed, resultHave, indexer, cost, back_inserter( newExprs ) ) ) {
-				deleteAll( newExprs );
-				return false;
-			}
-			// success - produce argument as a new alternative
-			assert( newExprs.size() == 1 );
-			out.push_back( Alternative( newExprs.front(), resultEnv, cost ) );
-		}
-		if ( actualExpr != actualEnd ) {
-			// there are still actuals remaining, but we've run out of formal parameters to match against
-			// this is okay only if the function is variadic
-			if ( ! isVarArgs ) {
-				return false;
-			}
-			out.splice( out.end(), exploded, actualExpr, actualEnd );
-		}
-		return true;
 	}
 
@@ -675,31 +559,339 @@
 	}
 
-	template< typename OutputIterator >
-	void AlternativeFinder::makeFunctionAlternatives( const Alternative &func, FunctionType *funcType, const AltList &actualAlt, OutputIterator out ) {
-		OpenVarSet openVars;
-		AssertionSet resultNeed, resultHave;
-		TypeEnvironment resultEnv( func.env );
-		makeUnifiableVars( funcType, openVars, resultNeed );
-		resultEnv.add( funcType->get_forall() ); // add all type variables as open variables now so that those not used in the parameter list are still considered open
-		AltList instantiatedActuals; // filled by instantiate function
+	/// Gets a default value from an initializer, nullptr if not present
+	ConstantExpr* getDefaultValue( Initializer* init ) {
+		if ( SingleInit* si = dynamic_cast<SingleInit*>( init ) ) {
+			if ( CastExpr* ce = dynamic_cast<CastExpr*>( si->get_value() ) ) {
+				return dynamic_cast<ConstantExpr*>( ce->get_arg() );
+			}
+		}
+		return nullptr;
+	}
+
+	/// State to iteratively build a match of parameter expressions to arguments
+	struct ArgPack {
+		AltList actuals;                 ///< Arguments included in this pack
+		TypeEnvironment env;             ///< Environment for this pack
+		AssertionSet need;               ///< Assertions outstanding for this pack
+		AssertionSet have;               ///< Assertions found for this pack
+		OpenVarSet openVars;             ///< Open variables for this pack
+		unsigned nextArg;                ///< Index of next argument in arguments list
+		std::vector<Alternative> expls;  ///< Exploded actuals left over from last match
+		unsigned nextExpl;               ///< Index of next exploded alternative to use
+		std::vector<unsigned> tupleEls;  /// Number of elements in current tuple element(s)
+
+		ArgPack(const TypeEnvironment& env, const AssertionSet& need, const AssertionSet& have, 
+				const OpenVarSet& openVars)
+			: actuals(), env(env), need(need), have(have), openVars(openVars), nextArg(0),
+			  expls(), nextExpl(0), tupleEls() {}
+		
+		/// Starts a new tuple expression
+		void beginTuple() {
+			if ( ! tupleEls.empty() ) ++tupleEls.back();
+			tupleEls.push_back(0);
+		}
+
+		/// Ends a tuple expression, consolidating the appropriate actuals
+		void endTuple() {
+			// set up new Tuple alternative
+			std::list<Expression*> exprs;
+			Cost cost = Cost::zero;
+
+			// transfer elements into alternative
+			for (unsigned i = 0; i < tupleEls.back(); ++i) {
+				exprs.push_front( actuals.back().expr );
+				actuals.back().expr = nullptr;
+				cost += actuals.back().cost;
+				actuals.pop_back();
+			}
+			tupleEls.pop_back();
+
+			// build new alternative
+			actuals.emplace_back( new TupleExpr( exprs ), this->env, cost );
+		}
+
+		/// Clones and adds an actual, returns this
+		ArgPack& withArg( Expression* expr, Cost cost = Cost::zero ) {
+			actuals.emplace_back( expr->clone(), this->env, cost );
+			if ( ! tupleEls.empty() ) ++tupleEls.back();
+			return *this;
+		}
+	};
+
+	/// Instantiates an argument to match a formal, returns false if no results left
+	bool instantiateArgument( Type* formalType, Initializer* initializer, 
+			const std::vector< AlternativeFinder >& args, 
+			std::vector<ArgPack>& results, std::vector<ArgPack>& nextResults, 
+			const SymTab::Indexer& indexer ) {
+		if ( TupleType* tupleType = dynamic_cast<TupleType*>( formalType ) ) {
+			// formalType is a TupleType - group actuals into a TupleExpr
+			for ( ArgPack& result : results ) { result.beginTuple(); }
+			for ( Type* type : *tupleType ) {
+				// xxx - dropping initializer changes behaviour from previous, but seems correct
+				if ( ! instantiateArgument( type, nullptr, args, results, nextResults, indexer ) ) 
+					return false;
+			}
+			for ( ArgPack& result : results ) { result.endTuple(); }
+			return true;
+		} else if ( TypeInstType* ttype = Tuples::isTtype( formalType ) ) {
+			// formalType is a ttype, consumes all remaining arguments
+			// xxx - mixing default arguments with variadic??
+			std::vector<ArgPack> finalResults{};  /// list of completed tuples
+			// start tuples
+			for ( ArgPack& result : results ) {
+				result.beginTuple();
+
+				// use rest of exploded tuple if present
+				while ( result.nextExpl < result.expls.size() ) {
+					const Alternative& actual = result.expls[result.nextExpl];
+					result.env.addActual( actual.env, result.openVars );
+					result.withArg( actual.expr );
+					++result.nextExpl;
+				}
+			}
+			// iterate until all results completed
+			while ( ! results.empty() ) {
+				// add another argument to results
+				for ( ArgPack& result : results ) {
+					// finish result when out of arguments
+					if ( result.nextArg >= args.size() ) {
+						Type* argType = result.actuals.back().expr->get_result();
+						if ( result.tupleEls.back() == 1 && Tuples::isTtype( argType ) ) {
+							// the case where a ttype value is passed directly is special, e.g. for 
+							// argument forwarding purposes
+							// xxx - what if passing multiple arguments, last of which is ttype?
+							// xxx - what would happen if unify was changed so that unifying tuple 
+							// types flattened both before unifying lists? then pass in TupleType
+							// (ttype) below.
+							result.tupleEls.pop_back();
+						} else {
+							// collapse leftover arguments into tuple
+							result.endTuple();
+							argType = result.actuals.back().expr->get_result();
+						}
+						// check unification for ttype before adding to final
+						if ( unify( ttype, argType, result.env, result.need, result.have, 
+								result.openVars, indexer ) ) {
+							finalResults.push_back( std::move(result) );
+						}
+						continue;
+					}
+
+					// add each possible next argument
+					for ( const Alternative& actual : args[result.nextArg] ) {
+						ArgPack aResult = result;  // copy to clone everything
+						// add details of actual to result
+						aResult.env.addActual( actual.env, aResult.openVars );
+						Cost cost = actual.cost;
+		
+						// explode argument
+						std::vector<Alternative> exploded;
+						Tuples::explode( actual, indexer, back_inserter( exploded ) );
+						
+						// add exploded argument to tuple
+						for ( Alternative& aActual : exploded ) {
+							aResult.withArg( aActual.expr, cost );
+							cost = Cost::zero;
+						}
+						++aResult.nextArg;
+						nextResults.push_back( std::move(aResult) );
+					}
+				}
+
+				// reset for next round
+				results.swap( nextResults );
+				nextResults.clear();
+			}
+			results.swap( finalResults );
+			return ! results.empty();
+		}
+		
+		// iterate each current subresult
+		for ( unsigned iResult = 0; iResult < results.size(); ++iResult ) {
+			ArgPack& result = results[iResult];
+
+			if ( result.nextExpl < result.expls.size() ) {
+				// use remainder of exploded tuple if present
+				const Alternative& actual = result.expls[result.nextExpl];
+				result.env.addActual( actual.env, result.openVars );
+				Type* actualType = actual.expr->get_result();
+
+				PRINT(
+					std::cerr << "formal type is ";
+					formalType->print( std::cerr );
+					std::cerr << std::endl << "actual type is ";
+					actualType->print( std::cerr );
+					std::cerr << std::endl;
+				)
+				
+				if ( unify( formalType, actualType, result.env, result.need, result.have, 
+						result.openVars, indexer ) ) {
+					++result.nextExpl;
+					nextResults.push_back( std::move(result.withArg( actual.expr )) );
+				}
+
+				continue;
+			} else if ( result.nextArg >= args.size() ) {
+				// use default initializers if out of arguments
+				if ( ConstantExpr* cnstExpr = getDefaultValue( initializer ) ) {
+					if ( Constant* cnst = dynamic_cast<Constant*>( cnstExpr->get_constant() ) ) {
+						if ( unify( formalType, cnst->get_type(), result.env, result.need, 
+								result.have, result.openVars, indexer ) ) {
+							nextResults.push_back( std::move(result.withArg( cnstExpr )) );
+						}
+					}
+				}
+				continue;
+			}
+
+			// Check each possible next argument
+			for ( const Alternative& actual : args[result.nextArg] ) {
+				ArgPack aResult = result;  // copy to clone everything
+				// add details of actual to result
+				aResult.env.addActual( actual.env, aResult.openVars );
+
+				// explode argument
+				std::vector<Alternative> exploded;
+				Tuples::explode( actual, indexer, back_inserter( exploded ) );
+				if ( exploded.empty() ) {
+					// skip empty tuple arguments
+					++aResult.nextArg;
+					results.push_back( std::move(aResult) );
+					continue;
+				}
+
+				// consider only first exploded actual
+				const Alternative& aActual = exploded.front();
+				Type* actualType = aActual.expr->get_result()->clone();
+
+				PRINT(
+					std::cerr << "formal type is ";
+					formalType->print( std::cerr );
+					std::cerr << std::endl << "actual type is ";
+					actualType->print( std::cerr );
+					std::cerr << std::endl;
+				)
+
+				// attempt to unify types
+				if ( unify( formalType, actualType, aResult.env, aResult.need, aResult.have, aResult.openVars, indexer ) ) {
+					// add argument
+					aResult.withArg( aActual.expr, actual.cost );
+					++aResult.nextArg;
+					if ( exploded.size() > 1 ) {
+						// other parts of tuple left over
+						aResult.expls = std::move( exploded );
+						aResult.nextExpl = 1;
+					}
+					nextResults.push_back( std::move(aResult) );
+				}
+			}
+		}
+
+		// reset for next parameter
+		results.swap( nextResults );
+		nextResults.clear();
+		
+		return ! results.empty();
+	}	
+
+	template<typename OutputIterator>
+	void AlternativeFinder::makeFunctionAlternatives( const Alternative &func, 
+			FunctionType *funcType, const std::vector< AlternativeFinder > &args, 
+			OutputIterator out ) {
+		OpenVarSet funcOpenVars;
+		AssertionSet funcNeed, funcHave;
+		TypeEnvironment funcEnv( func.env );
+		makeUnifiableVars( funcType, funcOpenVars, funcNeed );
+		// add all type variables as open variables now so that those not used in the parameter 
+		// list are still considered open.
+		funcEnv.add( funcType->get_forall() );
+		
 		if ( targetType && ! targetType->isVoid() && ! funcType->get_returnVals().empty() ) {
 			// attempt to narrow based on expected target type
 			Type * returnType = funcType->get_returnVals().front()->get_type();
-			if ( ! unify( returnType, targetType, resultEnv, resultNeed, resultHave, openVars, indexer ) ) {
-				// unification failed, don't pursue this alternative
+			if ( ! unify( returnType, targetType, funcEnv, funcNeed, funcHave, funcOpenVars, 
+					indexer ) ) {
+				// unification failed, don't pursue this function alternative
 				return;
 			}
 		}
 
-		if ( instantiateFunction( funcType->get_parameters(), actualAlt, funcType->get_isVarArgs(), openVars, resultEnv, resultNeed, resultHave, instantiatedActuals ) ) {
+		// iteratively build matches, one parameter at a time
+		std::vector<ArgPack> results{ ArgPack{ funcEnv, funcNeed, funcHave, funcOpenVars } };
+		std::vector<ArgPack> nextResults{};
+		for ( DeclarationWithType* formal : funcType->get_parameters() ) {
+			ObjectDecl* obj = strict_dynamic_cast< ObjectDecl* >( formal );
+			if ( ! instantiateArgument( 
+					obj->get_type(), obj->get_init(), args, results, nextResults, indexer ) )
+				return;
+		}
+
+		// filter out results that don't use all the arguments, and aren't variadic
+		std::vector<ArgPack> finalResults{};
+		if ( funcType->get_isVarArgs() ) {
+			for ( ArgPack& result : results ) {
+				// use rest of exploded tuple if present
+				while ( result.nextExpl < result.expls.size() ) {
+					const Alternative& actual = result.expls[result.nextExpl];
+					result.env.addActual( actual.env, result.openVars );
+					result.withArg( actual.expr );
+					++result.nextExpl;
+				}
+			}
+
+			while ( ! results.empty() ) {
+				// build combinations for all remaining arguments
+				for ( ArgPack& result : results ) {
+					// keep if used all arguments
+					if ( result.nextArg >= args.size() ) {
+						finalResults.push_back( std::move(result) );
+						continue;
+					}
+
+					// add each possible next argument
+					for ( const Alternative& actual : args[result.nextArg] ) {
+						ArgPack aResult = result; // copy to clone everything
+						// add details of actual to result
+						aResult.env.addActual( actual.env, aResult.openVars );
+						Cost cost = actual.cost;
+
+						// explode argument
+						std::vector<Alternative> exploded;
+						Tuples::explode( actual, indexer, back_inserter( exploded ) );
+
+						// add exploded argument to arg list
+						for ( Alternative& aActual : exploded ) {
+							aResult.withArg( aActual.expr, cost );
+							cost = Cost::zero;
+						}
+						++aResult.nextArg;
+						nextResults.push_back( std::move(aResult) );
+					}
+				}
+
+				// reset for next round
+				results.swap( nextResults );
+				nextResults.clear();
+			}
+		} else {
+			// filter out results that don't use all the arguments
+			for ( ArgPack& result : results ) {
+				if ( result.nextExpl >= result.expls.size() && result.nextArg >= args.size() ) {
+					finalResults.push_back( std::move(result) );
+				}
+			}
+		}
+
+		// validate matching combos, add to final result list
+		for ( ArgPack& result : finalResults ) {
 			ApplicationExpr *appExpr = new ApplicationExpr( func.expr->clone() );
-			Alternative newAlt( appExpr, resultEnv, sumCost( instantiatedActuals ) );
-			makeExprList( instantiatedActuals, appExpr->get_args() );
+			Alternative newAlt( appExpr, result.env, sumCost( result.actuals ) );
+			makeExprList( result.actuals, appExpr->get_args() );
 			PRINT(
 				std::cerr << "instantiate function success: " << appExpr << std::endl;
 				std::cerr << "need assertions:" << std::endl;
-				printAssertionSet( resultNeed, std::cerr, 8 );
+				printAssertionSet( result.need, std::cerr, 8 );
 			)
-			inferParameters( resultNeed, resultHave, newAlt, openVars, out );
+			inferParameters( result.need, result.have, newAlt, result.openVars, out );
 		}
 	}
@@ -711,13 +903,11 @@
 		if ( funcFinder.alternatives.empty() ) return;
 
-		std::list< AlternativeFinder > argAlternatives;
-		findSubExprs( untypedExpr->begin_args(), untypedExpr->end_args(), back_inserter( argAlternatives ) );
-
-		std::list< AltList > possibilities;
-		combos( argAlternatives.begin(), argAlternatives.end(), back_inserter( possibilities ) );
+		std::vector< AlternativeFinder > argAlternatives;
+		findSubExprs( untypedExpr->begin_args(), untypedExpr->end_args(), 
+			back_inserter( argAlternatives ) );
 
 		// take care of possible tuple assignments
 		// if not tuple assignment, assignment is taken care of as a normal function call
-		Tuples::handleTupleAssignment( *this, untypedExpr, possibilities );
+		Tuples::handleTupleAssignment( *this, untypedExpr, argAlternatives );
 
 		// find function operators
@@ -744,9 +934,6 @@
 						Alternative newFunc( *func );
 						referenceToRvalueConversion( newFunc.expr );
-						for ( std::list< AltList >::iterator actualAlt = possibilities.begin(); actualAlt != possibilities.end(); ++actualAlt ) {
-							// XXX
-							//Designators::check_alternative( function, *actualAlt );
-							makeFunctionAlternatives( newFunc, function, *actualAlt, std::back_inserter( candidates ) );
-						}
+						makeFunctionAlternatives( newFunc, function, argAlternatives, 
+							std::back_inserter( candidates ) );
 					}
 				} else if ( TypeInstType *typeInst = dynamic_cast< TypeInstType* >( func->expr->get_result()->stripReferences() ) ) { // handle ftype (e.g. *? on function pointer)
@@ -756,31 +943,38 @@
 							Alternative newFunc( *func );
 							referenceToRvalueConversion( newFunc.expr );
-							for ( std::list< AltList >::iterator actualAlt = possibilities.begin(); actualAlt != possibilities.end(); ++actualAlt ) {
-								makeFunctionAlternatives( newFunc, function, *actualAlt, std::back_inserter( candidates ) );
-							} // for
+							makeFunctionAlternatives( newFunc, function, argAlternatives, 
+								std::back_inserter( candidates ) );
 						} // if
 					} // if
-				}
-
-				// try each function operator ?() with the current function alternative and each of the argument combinations
-				for ( AltList::iterator funcOp = funcOpFinder.alternatives.begin(); funcOp != funcOpFinder.alternatives.end(); ++funcOp ) {
-					// check if the type is pointer to function
-					if ( PointerType *pointer = dynamic_cast< PointerType* >( funcOp->expr->get_result()->stripReferences() ) ) {
-						if ( FunctionType *function = dynamic_cast< FunctionType* >( pointer->get_base() ) ) {
+				}			
+			} catch ( SemanticError &e ) {
+				errors.append( e );
+			}
+		} // for
+
+		// try each function operator ?() with each function alternative
+		if ( ! funcOpFinder.alternatives.empty() ) {
+			// add function alternatives to front of argument list
+			argAlternatives.insert( argAlternatives.begin(), std::move(funcFinder) );
+
+			for ( AltList::iterator funcOp = funcOpFinder.alternatives.begin();
+					funcOp != funcOpFinder.alternatives.end(); ++funcOp ) {
+				try {
+					// check if type is a pointer to function
+					if ( PointerType* pointer = dynamic_cast<PointerType*>( 
+							funcOp->expr->get_result()->stripReferences() ) ) {
+						if ( FunctionType* function = 
+								dynamic_cast<FunctionType*>( pointer->get_base() ) ) {
 							Alternative newFunc( *funcOp );
 							referenceToRvalueConversion( newFunc.expr );
-							for ( std::list< AltList >::iterator actualAlt = possibilities.begin(); actualAlt != possibilities.end(); ++actualAlt ) {
-								AltList currentAlt;
-								currentAlt.push_back( *func );
-								currentAlt.insert( currentAlt.end(), actualAlt->begin(), actualAlt->end() );
-								makeFunctionAlternatives( newFunc, function, currentAlt, std::back_inserter( candidates ) );
-							} // for
-						} // if
-					} // if
-				} // for
-			} catch ( SemanticError &e ) {
-				errors.append( e );
-			}
-		} // for
+							makeFunctionAlternatives( newFunc, function, argAlternatives, 
+								std::back_inserter( candidates ) );
+						}
+					}
+				} catch ( SemanticError &e ) {
+					errors.append( e );
+				}
+			}
+		}
 
 		// Implement SFINAE; resolution errors are only errors if there aren't any non-erroneous resolutions
Index: src/ResolvExpr/AlternativeFinder.h
===================================================================
--- src/ResolvExpr/AlternativeFinder.h	(revision c95b11526c10568ee3415f3873af592f6779ea99)
+++ src/ResolvExpr/AlternativeFinder.h	(revision 490db327533a4cac681c8fe17b7d300026d46a79)
@@ -34,4 +34,31 @@
 	  public:
 		AlternativeFinder( const SymTab::Indexer &indexer, const TypeEnvironment &env );
+
+		AlternativeFinder( const AlternativeFinder& o )
+			: indexer(o.indexer), alternatives(o.alternatives), env(o.env), 
+			  targetType(o.targetType) {}
+		
+		AlternativeFinder( AlternativeFinder&& o )
+			: indexer(o.indexer), alternatives(std::move(o.alternatives)), env(o.env), 
+			  targetType(o.targetType) {}
+		
+		AlternativeFinder& operator= ( const AlternativeFinder& o ) {
+			if (&o == this) return *this;
+			
+			// horrific nasty hack to rebind references...
+			alternatives.~AltList();
+			new(this) AlternativeFinder(o);
+			return *this;
+		}
+
+		AlternativeFinder& operator= ( AlternativeFinder&& o ) {
+			if (&o == this) return *this;
+			
+			// horrific nasty hack to rebind references...
+			alternatives.~AltList();
+			new(this) AlternativeFinder(std::move(o));
+			return *this;
+		}
+
 		void find( Expression *expr, bool adjust = false, bool prune = true, bool failFast = true );
 		/// Calls find with the adjust flag set; adjustment turns array and function types into equivalent pointer types
@@ -99,7 +126,6 @@
 		/// Adds alternatives for offsetof expressions, given the base type and name of the member
 		template< typename StructOrUnionType > void addOffsetof( StructOrUnionType *aggInst, const std::string &name );
-		bool instantiateFunction( std::list< DeclarationWithType* >& formals, const AltList &actuals, bool isVarArgs, OpenVarSet& openVars, TypeEnvironment &resultEnv, AssertionSet &resultNeed, AssertionSet &resultHave, AltList & out );
-		template< typename OutputIterator >
-		void makeFunctionAlternatives( const Alternative &func, FunctionType *funcType, const AltList &actualAlt, OutputIterator out );
+		template<typename OutputIterator>
+		void makeFunctionAlternatives( const Alternative &func, FunctionType *funcType, const std::vector< AlternativeFinder >& args, OutputIterator out );
 		template< typename OutputIterator >
 		void inferParameters( const AssertionSet &need, AssertionSet &have, const Alternative &newAlt, OpenVarSet &openVars, OutputIterator out );
Index: src/ResolvExpr/TypeEnvironment.cc
===================================================================
--- src/ResolvExpr/TypeEnvironment.cc	(revision c95b11526c10568ee3415f3873af592f6779ea99)
+++ src/ResolvExpr/TypeEnvironment.cc	(revision 490db327533a4cac681c8fe17b7d300026d46a79)
@@ -201,4 +201,15 @@
 	}
 
+	void TypeEnvironment::addActual( const TypeEnvironment& actualEnv, OpenVarSet& openVars ) {
+		for ( const EqvClass& c : actualEnv ) {
+			EqvClass c2 = c;
+			c2.allowWidening = false;
+			for ( const std::string& var : c2.vars ) {
+				openVars[ var ] = c2.data;
+			}
+			env.push_back( std::move(c2) );
+		}
+	}
+
 } // namespace ResolvExpr
 
Index: src/ResolvExpr/TypeEnvironment.h
===================================================================
--- src/ResolvExpr/TypeEnvironment.h	(revision c95b11526c10568ee3415f3873af592f6779ea99)
+++ src/ResolvExpr/TypeEnvironment.h	(revision 490db327533a4cac681c8fe17b7d300026d46a79)
@@ -86,4 +86,8 @@
 		TypeEnvironment *clone() const { return new TypeEnvironment( *this ); }
 
+		/// Iteratively adds the environment of a new actual (with allowWidening = false), 
+		/// and extracts open variables.
+		void addActual( const TypeEnvironment& actualEnv, OpenVarSet& openVars );
+
 		typedef std::list< EqvClass >::iterator iterator;
 		iterator begin() { return env.begin(); }
Index: src/SymTab/Mangler.cc
===================================================================
--- src/SymTab/Mangler.cc	(revision c95b11526c10568ee3415f3873af592f6779ea99)
+++ src/SymTab/Mangler.cc	(revision 490db327533a4cac681c8fe17b7d300026d46a79)
@@ -32,11 +32,17 @@
 namespace SymTab {
 	std::string Mangler::mangleType( Type * ty ) {
-		Mangler mangler( false, true );
+		Mangler mangler( false, true, true );
 		maybeAccept( ty, mangler );
 		return mangler.get_mangleName();
 	}
 
-	Mangler::Mangler( bool mangleOverridable, bool typeMode )
-		: nextVarNum( 0 ), isTopLevel( true ), mangleOverridable( mangleOverridable ), typeMode( typeMode ) {}
+	std::string Mangler::mangleConcrete( Type* ty ) {
+		Mangler mangler( false, false, false );
+		maybeAccept( ty, mangler );
+		return mangler.get_mangleName();
+	}
+
+	Mangler::Mangler( bool mangleOverridable, bool typeMode, bool mangleGenericParams )
+		: nextVarNum( 0 ), isTopLevel( true ), mangleOverridable( mangleOverridable ), typeMode( typeMode ), mangleGenericParams( mangleGenericParams ) {}
 
 	Mangler::Mangler( const Mangler &rhs ) : mangleName() {
@@ -166,37 +172,25 @@
 
 		mangleName << ( refType->get_name().length() + prefix.length() ) << prefix << refType->get_name();
-	}
-
-	void Mangler::mangleGenericRef( ReferenceToType * refType, std::string prefix ) {
-		printQualifiers( refType );
-
-		std::ostringstream oldName( mangleName.str() );
-		mangleName.clear();
-
-		mangleName << prefix << refType->get_name();
-
-		std::list< Expression* >& params = refType->get_parameters();
-		if ( ! params.empty() ) {
-			mangleName << "_";
-			for ( std::list< Expression* >::const_iterator param = params.begin(); param != params.end(); ++param ) {
-				TypeExpr *paramType = dynamic_cast< TypeExpr* >( *param );
-				assertf(paramType, "Aggregate parameters should be type expressions: %s", toString(*param).c_str());
-				maybeAccept( paramType->get_type(), *this );
+
+		if ( mangleGenericParams ) {
+			std::list< Expression* >& params = refType->get_parameters();
+			if ( ! params.empty() ) {
+				mangleName << "_";
+				for ( std::list< Expression* >::const_iterator param = params.begin(); param != params.end(); ++param ) {
+					TypeExpr *paramType = dynamic_cast< TypeExpr* >( *param );
+					assertf(paramType, "Aggregate parameters should be type expressions: %s", toString(*param).c_str());
+					maybeAccept( paramType->get_type(), *this );
+				}
+				mangleName << "_";
 			}
-			mangleName << "_";
 		}
-
-		oldName << mangleName.str().length() << mangleName.str();
-		mangleName.str( oldName.str() );
 	}
 
 	void Mangler::visit( StructInstType * aggregateUseType ) {
-		if ( typeMode ) mangleGenericRef( aggregateUseType, "s" );
-		else mangleRef( aggregateUseType, "s" );
+		mangleRef( aggregateUseType, "s" );
 	}
 
 	void Mangler::visit( UnionInstType * aggregateUseType ) {
-		if ( typeMode ) mangleGenericRef( aggregateUseType, "u" );
-		else mangleRef( aggregateUseType, "u" );
+		mangleRef( aggregateUseType, "u" );
 	}
 
@@ -285,5 +279,5 @@
 				varNums[ (*i)->name ] = std::pair< int, int >( nextVarNum++, (int)(*i)->get_kind() );
 				for ( std::list< DeclarationWithType* >::iterator assert = (*i)->assertions.begin(); assert != (*i)->assertions.end(); ++assert ) {
-					Mangler sub_mangler( mangleOverridable, typeMode );
+					Mangler sub_mangler( mangleOverridable, typeMode, mangleGenericParams );
 					sub_mangler.nextVarNum = nextVarNum;
 					sub_mangler.isTopLevel = false;
Index: src/SymTab/Mangler.h
===================================================================
--- src/SymTab/Mangler.h	(revision c95b11526c10568ee3415f3873af592f6779ea99)
+++ src/SymTab/Mangler.h	(revision 490db327533a4cac681c8fe17b7d300026d46a79)
@@ -30,7 +30,10 @@
 		/// Mangle syntax tree object; primary interface to clients
 		template< typename SynTreeClass >
-	    static std::string mangle( SynTreeClass *decl, bool mangleOverridable = true, bool typeMode = false );
+	    static std::string mangle( SynTreeClass *decl, bool mangleOverridable = true, bool typeMode = false, bool mangleGenericParams = true );
 		/// Mangle a type name; secondary interface
 		static std::string mangleType( Type* ty );
+		/// Mangle ignoring generic type parameters
+		static std::string mangleConcrete( Type* ty );
+
 
 		virtual void visit( ObjectDecl *declaration );
@@ -62,11 +65,11 @@
 		bool mangleOverridable;         ///< Specially mangle overridable built-in methods
 		bool typeMode;                  ///< Produce a unique mangled name for a type
+		bool mangleGenericParams;       ///< Include generic parameters in name mangling if true
 
-		Mangler( bool mangleOverridable, bool typeMode );
+		Mangler( bool mangleOverridable, bool typeMode, bool mangleGenericParams );
 		Mangler( const Mangler & );
 
 		void mangleDecl( DeclarationWithType *declaration );
 		void mangleRef( ReferenceToType *refType, std::string prefix );
-		void mangleGenericRef( ReferenceToType *refType, std::string prefix );
 
 		void printQualifiers( Type *type );
@@ -74,6 +77,6 @@
 
 	template< typename SynTreeClass >
-	std::string Mangler::mangle( SynTreeClass *decl, bool mangleOverridable, bool typeMode ) {
-		Mangler mangler( mangleOverridable, typeMode );
+	std::string Mangler::mangle( SynTreeClass *decl, bool mangleOverridable, bool typeMode, bool mangleGenericParams ) {
+		Mangler mangler( mangleOverridable, typeMode, mangleGenericParams );
 		maybeAccept( decl, mangler );
 		return mangler.get_mangleName();
Index: src/Tuples/TupleAssignment.cc
===================================================================
--- src/Tuples/TupleAssignment.cc	(revision c95b11526c10568ee3415f3873af592f6779ea99)
+++ src/Tuples/TupleAssignment.cc	(revision 490db327533a4cac681c8fe17b7d300026d46a79)
@@ -20,4 +20,5 @@
 #include <memory>                          // for unique_ptr, allocator_trai...
 #include <string>                          // for string
+#include <vector>
 
 #include "CodeGen/OperatorTable.h"
@@ -33,4 +34,5 @@
 #include "ResolvExpr/Resolver.h"           // for resolveCtorInit
 #include "ResolvExpr/TypeEnvironment.h"    // for TypeEnvironment
+#include "ResolvExpr/typeops.h"            // for combos
 #include "SynTree/Declaration.h"           // for ObjectDecl
 #include "SynTree/Expression.h"            // for Expression, CastExpr, Name...
@@ -52,5 +54,5 @@
 		// dispatcher for Tuple (multiple and mass) assignment operations
 		TupleAssignSpotter( ResolvExpr::AlternativeFinder & );
-		void spot( UntypedExpr * expr, const std::list<ResolvExpr::AltList> &possibilities );
+		void spot( UntypedExpr * expr, std::vector<ResolvExpr::AlternativeFinder> &args );
 
 	  private:
@@ -59,5 +61,6 @@
 		struct Matcher {
 		  public:
-			Matcher( TupleAssignSpotter &spotter, const ResolvExpr::AltList & alts );
+			Matcher( TupleAssignSpotter &spotter, const ResolvExpr::AltList& lhs, const 
+				ResolvExpr::AltList& rhs );
 			virtual ~Matcher() {}
 			virtual void match( std::list< Expression * > &out ) = 0;
@@ -72,5 +75,6 @@
 		struct MassAssignMatcher : public Matcher {
 		  public:
-			MassAssignMatcher( TupleAssignSpotter &spotter, const ResolvExpr::AltList & alts );
+			MassAssignMatcher( TupleAssignSpotter &spotter, const ResolvExpr::AltList& lhs, 
+				const ResolvExpr::AltList& rhs ) : Matcher(spotter, lhs, rhs) {}
 			virtual void match( std::list< Expression * > &out );
 		};
@@ -78,5 +82,6 @@
 		struct MultipleAssignMatcher : public Matcher {
 		  public:
-			MultipleAssignMatcher( TupleAssignSpotter &spot, const ResolvExpr::AltList & alts );
+			MultipleAssignMatcher( TupleAssignSpotter &spotter, const ResolvExpr::AltList& lhs, 
+				const ResolvExpr::AltList& rhs ) : Matcher(spotter, lhs, rhs) {}
 			virtual void match( std::list< Expression * > &out );
 		};
@@ -114,7 +119,8 @@
 	}
 
-	void handleTupleAssignment( ResolvExpr::AlternativeFinder & currentFinder, UntypedExpr * expr, const std::list<ResolvExpr::AltList> &possibilities ) {
+	void handleTupleAssignment( ResolvExpr::AlternativeFinder & currentFinder, UntypedExpr * expr, 
+				std::vector<ResolvExpr::AlternativeFinder> &args ) {
 		TupleAssignSpotter spotter( currentFinder );
-		spotter.spot( expr, possibilities );
+		spotter.spot( expr, args );
 	}
 
@@ -122,32 +128,79 @@
 		: currentFinder(f) {}
 
-	void TupleAssignSpotter::spot( UntypedExpr * expr, const std::list<ResolvExpr::AltList> &possibilities ) {
+	void TupleAssignSpotter::spot( UntypedExpr * expr, 
+			std::vector<ResolvExpr::AlternativeFinder> &args ) {
 		if (  NameExpr *op = dynamic_cast< NameExpr * >(expr->get_function()) ) {
 			if ( CodeGen::isCtorDtorAssign( op->get_name() ) ) {
-                               fname = op->get_name();
-				PRINT( std::cerr << "TupleAssignment: " << fname << std::endl; )
-				for ( std::list<ResolvExpr::AltList>::const_iterator ali = possibilities.begin(); ali != possibilities.end(); ++ali ) {
-					if ( ali->size() == 0 ) continue; // AlternativeFinder will natrually handle this case, if it's legal
-					if ( ali->size() <= 1 && CodeGen::isAssignment( op->get_name() ) ) {
-						// what does it mean if an assignment takes 1 argument? maybe someone defined such a function, in which case AlternativeFinder will naturally handle it
-						continue;
+				fname = op->get_name();
+
+				// AlternativeFinder will naturally handle this case case, if it's legal
+				if ( args.size() == 0 ) return;
+
+				// if an assignment only takes 1 argument, that's odd, but maybe someone wrote 
+				// the function, in which case AlternativeFinder will handle it normally
+				if ( args.size() == 1 && CodeGen::isAssignment( fname ) ) return;
+
+				// look over all possible left-hand-sides
+				for ( ResolvExpr::Alternative& lhsAlt : args[0] ) {
+					// skip non-tuple LHS
+					if ( ! refToTuple(lhsAlt.expr) ) continue;
+
+					// explode is aware of casts - ensure every LHS expression is sent into explode 
+					// with a reference cast
+					// xxx - this seems to change the alternatives before the normal 
+					//  AlternativeFinder flow; maybe this is desired?
+					if ( ! dynamic_cast<CastExpr*>( lhsAlt.expr ) ) {
+						lhsAlt.expr = new CastExpr( lhsAlt.expr, 
+								new ReferenceType( Type::Qualifiers(), 
+									lhsAlt.expr->get_result()->clone() ) );
 					}
 
-					assert( ! ali->empty() );
-					// grab args 2-N and group into a TupleExpr
-					const ResolvExpr::Alternative & alt1 = ali->front();
-					auto begin = std::next(ali->begin(), 1), end = ali->end();
-					PRINT( std::cerr << "alt1 is " << alt1.expr << std::endl; )
-					if ( refToTuple(alt1.expr) ) {
-						PRINT( std::cerr << "and is reference to tuple" << std::endl; )
-						if ( isMultAssign( begin, end ) ) {
-							PRINT( std::cerr << "possible multiple assignment" << std::endl; )
-							matcher.reset( new MultipleAssignMatcher( *this, *ali ) );
-						} else {
-							// mass assignment
-							PRINT( std::cerr << "possible mass assignment" << std::endl; )
-							matcher.reset( new MassAssignMatcher( *this,  *ali ) );
+					// explode the LHS so that each field of a tuple-valued-expr is assigned
+					ResolvExpr::AltList lhs;
+					explode( lhsAlt, currentFinder.get_indexer(), back_inserter(lhs), true );
+					for ( ResolvExpr::Alternative& alt : lhs ) {
+						// each LHS value must be a reference - some come in with a cast expression, 
+						// if not just cast to reference here
+						if ( ! dynamic_cast<ReferenceType*>( alt.expr->get_result() ) ) {
+							alt.expr = new CastExpr( alt.expr, 
+								new ReferenceType( Type::Qualifiers(), 
+									alt.expr->get_result()->clone() ) );
 						}
+					}
+
+					if ( args.size() == 1 ) {
+						// mass default-initialization/destruction
+						ResolvExpr::AltList rhs{};
+						matcher.reset( new MassAssignMatcher( *this, lhs, rhs ) );
 						match();
+					} else if ( args.size() > 2 ) {
+						// expand all possible RHS possibilities
+						// TODO build iterative version of this instead of using combos
+						std::vector< ResolvExpr::AltList > rhsAlts;
+						combos( std::next(args.begin(), 1), args.end(), 
+							std::back_inserter( rhsAlts ) );
+						for ( const ResolvExpr::AltList& rhsAlt : rhsAlts ) {
+							// multiple assignment
+							ResolvExpr::AltList rhs;
+							explode( rhsAlt, currentFinder.get_indexer(), 
+								std::back_inserter(rhs), true );
+							matcher.reset( new MultipleAssignMatcher( *this, lhs, rhs ) );
+							match();
+						}
+					} else {
+						for ( const ResolvExpr::Alternative& rhsAlt : args[1] ) {
+							ResolvExpr::AltList rhs;
+							if ( isTuple(rhsAlt.expr) ) {
+								// multiple assignment
+								explode( rhsAlt, currentFinder.get_indexer(),  
+									std::back_inserter(rhs), true );
+								matcher.reset( new MultipleAssignMatcher( *this, lhs, rhs ) );
+							} else {
+								// mass assignment
+								rhs.push_back( rhsAlt );
+								matcher.reset( new MassAssignMatcher( *this, lhs, rhs ) );
+							}
+							match();
+						}
 					}
 				}
@@ -169,5 +222,6 @@
 		ResolvExpr::AltList current;
 		// now resolve new assignments
-		for ( std::list< Expression * >::iterator i = new_assigns.begin(); i != new_assigns.end(); ++i ) {
+		for ( std::list< Expression * >::iterator i = new_assigns.begin(); 
+				i != new_assigns.end(); ++i ) {
 			PRINT(
 				std::cerr << "== resolving tuple assign ==" << std::endl;
@@ -175,5 +229,6 @@
 			)
 
-			ResolvExpr::AlternativeFinder finder( currentFinder.get_indexer(), currentFinder.get_environ() );
+			ResolvExpr::AlternativeFinder finder{ currentFinder.get_indexer(), 
+				currentFinder.get_environ() };
 			try {
 				finder.findWithAdjustment(*i);
@@ -196,39 +251,15 @@
 		// combine assignment environments into combined expression environment
 		simpleCombineEnvironments( current.begin(), current.end(), matcher->compositeEnv );
-		currentFinder.get_alternatives().push_front( ResolvExpr::Alternative(new TupleAssignExpr(solved_assigns, matcher->tmpDecls), matcher->compositeEnv, ResolvExpr::sumCost( current  ) + matcher->baseCost ) );
-	}
-
-	TupleAssignSpotter::Matcher::Matcher( TupleAssignSpotter &spotter, const ResolvExpr::AltList &alts ) : spotter(spotter), baseCost( ResolvExpr::sumCost( alts ) ) {
-		assert( ! alts.empty() );
-		// combine argument environments into combined expression environment
-		simpleCombineEnvironments( alts.begin(), alts.end(), compositeEnv );
-
-		ResolvExpr::Alternative lhsAlt = alts.front();
-		// explode is aware of casts - ensure every LHS expression is sent into explode with a reference cast
-		if ( ! dynamic_cast< CastExpr * >( lhsAlt.expr ) ) {
-			lhsAlt.expr = new CastExpr( lhsAlt.expr, new ReferenceType( Type::Qualifiers(), lhsAlt.expr->get_result()->clone() ) );
-		}
-
-		// explode the lhs so that each field of the tuple-valued-expr is assigned.
-		explode( lhsAlt, spotter.currentFinder.get_indexer(), back_inserter(lhs), true );
-
-		for ( ResolvExpr::Alternative & alt : lhs ) {
-			// every LHS value must be a reference - some come in with a cast expression, if it doesn't just cast to reference here.
-			if ( ! dynamic_cast< ReferenceType * >( alt.expr->get_result() ) ) {
-				alt.expr = new CastExpr( alt.expr, new ReferenceType( Type::Qualifiers(), alt.expr->get_result()->clone() ) );
-			}
-		}
-	}
-
-	TupleAssignSpotter::MassAssignMatcher::MassAssignMatcher( TupleAssignSpotter &spotter, const ResolvExpr::AltList & alts ) : Matcher( spotter, alts ) {
-		assert( alts.size() == 1 || alts.size() == 2 );
-		if ( alts.size() == 2 ) {
-			rhs.push_back( alts.back() );
-		}
-	}
-
-	TupleAssignSpotter::MultipleAssignMatcher::MultipleAssignMatcher( TupleAssignSpotter &spotter, const ResolvExpr::AltList & alts ) : Matcher( spotter, alts ) {
-		// explode the rhs so that each field of the tuple-valued-expr is assigned.
-		explode( std::next(alts.begin(), 1), alts.end(), spotter.currentFinder.get_indexer(), back_inserter(rhs), true );
+		currentFinder.get_alternatives().push_front( ResolvExpr::Alternative(
+			new TupleAssignExpr(solved_assigns, matcher->tmpDecls), matcher->compositeEnv, 
+			ResolvExpr::sumCost( current ) + matcher->baseCost ) );
+	}
+
+	TupleAssignSpotter::Matcher::Matcher( TupleAssignSpotter &spotter, 
+		const ResolvExpr::AltList &lhs, const ResolvExpr::AltList &rhs ) 
+	: lhs(lhs), rhs(rhs), spotter(spotter), 
+	  baseCost( ResolvExpr::sumCost( lhs ) + ResolvExpr::sumCost( rhs ) ) {
+		simpleCombineEnvironments( lhs.begin(), lhs.end(), compositeEnv );
+		simpleCombineEnvironments( rhs.begin(), rhs.end(), compositeEnv );
 	}
 
Index: src/Tuples/Tuples.h
===================================================================
--- src/Tuples/Tuples.h	(revision c95b11526c10568ee3415f3873af592f6779ea99)
+++ src/Tuples/Tuples.h	(revision 490db327533a4cac681c8fe17b7d300026d46a79)
@@ -17,4 +17,5 @@
 
 #include <string>
+#include <vector>
 
 #include "SynTree/Expression.h"
@@ -26,6 +27,7 @@
 namespace Tuples {
 	// TupleAssignment.cc
-	void handleTupleAssignment( ResolvExpr::AlternativeFinder & currentFinder, UntypedExpr * assign, const std::list<ResolvExpr::AltList> & possibilities );
-
+	void handleTupleAssignment( ResolvExpr::AlternativeFinder & currentFinder, UntypedExpr * assign, 
+		std::vector< ResolvExpr::AlternativeFinder >& args );
+	
 	// TupleExpansion.cc
 	/// expands z.[a, b.[x, y], c] into [z.a, z.b.x, z.b.y, z.c], inserting UniqueExprs as appropriate
Index: src/benchmark/Makefile.am
===================================================================
--- src/benchmark/Makefile.am	(revision c95b11526c10568ee3415f3873af592f6779ea99)
+++ src/benchmark/Makefile.am	(revision 490db327533a4cac681c8fe17b7d300026d46a79)
@@ -133,8 +133,9 @@
 ## =========================================================================================================
 creation$(EXEEXT) :\
-	creation-pthread.run		\
-	creation-cfa_coroutine.run	\
-	creation-cfa_thread.run		\
-	creation-upp_coroutine.run	\
+	creation-pthread.run			\
+	creation-cfa_coroutine.run		\
+	creation-cfa_coroutine_eager.run	\
+	creation-cfa_thread.run			\
+	creation-upp_coroutine.run		\
 	creation-upp_thread.run
 
@@ -142,4 +143,7 @@
 	${CC}        creation/cfa_cor.c   -DBENCH_N=10000000   -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
 
+creation-cfa_coroutine_eager$(EXEEXT):
+	${CC}        creation/cfa_cor.c   -DBENCH_N=10000000   -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags} -DEAGER
+
 creation-cfa_thread$(EXEEXT):
 	${CC}        creation/cfa_thrd.c  -DBENCH_N=10000000   -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
@@ -153,4 +157,55 @@
 creation-pthread$(EXEEXT):
 	@BACKEND_CC@ creation/pthreads.c  -DBENCH_N=250000     -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+
+## =========================================================================================================
+
+compile$(EXEEXT) :\
+	compile-array$(EXEEXT)		\
+	compile-attributes$(EXEEXT)	\
+	compile-empty$(EXEEXT)  	\
+	compile-expression$(EXEEXT)	\
+	compile-io$(EXEEXT)		\
+	compile-monitor$(EXEEXT)	\
+	compile-operators$(EXEEXT)	\
+	compile-thread$(EXEEXT)		\
+	compile-typeof$(EXEEXT)
+
+
+compile-array$(EXEEXT):
+	@printf '%20s\t' $(subst compile-,,$@)
+	@/usr/bin/time -f "%E" ${CC} -quiet -fsyntax-only -w ../tests/array.c
+
+compile-attributes$(EXEEXT):
+	@printf '%20s\t' $(subst compile-,,$@)
+	@/usr/bin/time -f "%E" ${CC} -quiet -fsyntax-only -w ../tests/attributes.c
+
+compile-empty$(EXEEXT):
+	@printf '%20s\t' $(subst compile-,,$@)
+	@/usr/bin/time -f "%E" ${CC} -quiet -fsyntax-only -w compile/empty.c
+
+compile-expression$(EXEEXT):
+	@printf '%20s\t' $(subst compile-,,$@)
+	@/usr/bin/time -f "%E" ${CC} -quiet -fsyntax-only -w ../tests/expression.c
+
+compile-io$(EXEEXT):
+	@printf '%20s\t' $(subst compile-,,$@)
+	@/usr/bin/time -f "%E" ${CC} -quiet -fsyntax-only -w ../tests/io.c
+
+compile-monitor$(EXEEXT):
+	@printf '%20s\t' $(subst compile-,,$@)
+	@/usr/bin/time -f "%E" ${CC} -quiet -fsyntax-only -w ../tests/monitor.c
+
+compile-operators$(EXEEXT):
+	@printf '%20s\t' $(subst compile-,,$@)
+	@/usr/bin/time -f "%E" ${CC} -quiet -fsyntax-only -w ../tests/operators.c
+
+compile-thread$(EXEEXT):
+	@printf '%20s\t' $(subst compile-,,$@)
+	@/usr/bin/time -f "%E" ${CC} -quiet -fsyntax-only -w ../tests/thread.c
+
+compile-typeof$(EXEEXT):
+	@printf '%20s\t' $(subst compile-,,$@)
+	@/usr/bin/time -f "%E" ${CC} -quiet -fsyntax-only -w ../tests/typeof.c
+
 
 ## =========================================================================================================
Index: src/benchmark/Makefile.in
===================================================================
--- src/benchmark/Makefile.in	(revision c95b11526c10568ee3415f3873af592f6779ea99)
+++ src/benchmark/Makefile.in	(revision 490db327533a4cac681c8fe17b7d300026d46a79)
@@ -124,5 +124,5 @@
   esac
 am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
-am__DIST_COMMON = $(srcdir)/Makefile.in
+am__DIST_COMMON = $(srcdir)/Makefile.in compile
 DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
 ACLOCAL = @ACLOCAL@
@@ -544,8 +544,9 @@
 
 creation$(EXEEXT) :\
-	creation-pthread.run		\
-	creation-cfa_coroutine.run	\
-	creation-cfa_thread.run		\
-	creation-upp_coroutine.run	\
+	creation-pthread.run			\
+	creation-cfa_coroutine.run		\
+	creation-cfa_coroutine_eager.run	\
+	creation-cfa_thread.run			\
+	creation-upp_coroutine.run		\
 	creation-upp_thread.run
 
@@ -553,4 +554,7 @@
 	${CC}        creation/cfa_cor.c   -DBENCH_N=10000000   -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
 
+creation-cfa_coroutine_eager$(EXEEXT):
+	${CC}        creation/cfa_cor.c   -DBENCH_N=10000000   -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags} -DEAGER
+
 creation-cfa_thread$(EXEEXT):
 	${CC}        creation/cfa_thrd.c  -DBENCH_N=10000000   -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
@@ -564,4 +568,56 @@
 creation-pthread$(EXEEXT):
 	@BACKEND_CC@ creation/pthreads.c  -DBENCH_N=250000     -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+
+compile$(EXEEXT) :\
+	compile-array$(EXEEXT)		\
+	compile-attributes$(EXEEXT)	\
+	compile-empty$(EXEEXT)  	\
+	compile-expression$(EXEEXT)	\
+	compile-io$(EXEEXT)		\
+	compile-monitor$(EXEEXT)	\
+	compile-operators$(EXEEXT)	\
+	compile-thread$(EXEEXT)		\
+	compile-typeof$(EXEEXT)		\
+	compile-vector_test$(EXEEXT)
+
+compile-array$(EXEEXT):
+	@printf '%20s\t' $(subst compile-,,$@)
+	@/usr/bin/time -f "%E" ${CC} -quiet -fsyntax-only -w ../tests/array.c
+
+compile-attributes$(EXEEXT):
+	@printf '%20s\t' $(subst compile-,,$@)
+	@/usr/bin/time -f "%E" ${CC} -quiet -fsyntax-only -w ../tests/attributes.c
+
+compile-empty$(EXEEXT):
+	@printf '%20s\t' $(subst compile-,,$@)
+	@/usr/bin/time -f "%E" ${CC} -quiet -fsyntax-only -w compile/empty.c
+
+compile-expression$(EXEEXT):
+	@printf '%20s\t' $(subst compile-,,$@)
+	@/usr/bin/time -f "%E" ${CC} -quiet -fsyntax-only -w ../tests/expression.c
+
+compile-io$(EXEEXT):
+	@printf '%20s\t' $(subst compile-,,$@)
+	@/usr/bin/time -f "%E" ${CC} -quiet -fsyntax-only -w ../tests/io.c
+
+compile-monitor$(EXEEXT):
+	@printf '%20s\t' $(subst compile-,,$@)
+	@/usr/bin/time -f "%E" ${CC} -quiet -fsyntax-only -w ../tests/monitor.c
+
+compile-operators$(EXEEXT):
+	@printf '%20s\t' $(subst compile-,,$@)
+	@/usr/bin/time -f "%E" ${CC} -quiet -fsyntax-only -w ../tests/operators.c
+
+compile-thread$(EXEEXT):
+	@printf '%20s\t' $(subst compile-,,$@)
+	@/usr/bin/time -f "%E" ${CC} -quiet -fsyntax-only -w ../tests/thread.c
+
+compile-typeof$(EXEEXT):
+	@printf '%20s\t' $(subst compile-,,$@)
+	@/usr/bin/time -f "%E" ${CC} -quiet -fsyntax-only -w ../tests/typeof.c
+
+compile-vector_test$(EXEEXT):
+	@printf '%20s\t' $(subst compile-,,$@)
+	@/usr/bin/time -f "%E" ${CC} -quiet -fsyntax-only -w ../tests/vector_test.c
 
 %.run : %$(EXEEXT) ${REPEAT}
Index: src/benchmark/compile/empty.c
===================================================================
--- src/benchmark/compile/empty.c	(revision 490db327533a4cac681c8fe17b7d300026d46a79)
+++ src/benchmark/compile/empty.c	(revision 490db327533a4cac681c8fe17b7d300026d46a79)
@@ -0,0 +1,3 @@
+int main() {
+	return 0;
+}
Index: src/benchmark/creation/cfa_cor.c
===================================================================
--- src/benchmark/creation/cfa_cor.c	(revision c95b11526c10568ee3415f3873af592f6779ea99)
+++ src/benchmark/creation/cfa_cor.c	(revision 490db327533a4cac681c8fe17b7d300026d46a79)
@@ -5,5 +5,9 @@
 
 coroutine MyCoroutine {};
-void ?{} (MyCoroutine & this) { prime(this); }
+void ?{} (MyCoroutine & this) {
+#ifdef EAGER
+	prime(this);
+#endif
+}
 void main(MyCoroutine & this) {}
 
Index: src/libcfa/Makefile.am
===================================================================
--- src/libcfa/Makefile.am	(revision c95b11526c10568ee3415f3873af592f6779ea99)
+++ src/libcfa/Makefile.am	(revision 490db327533a4cac681c8fe17b7d300026d46a79)
@@ -95,5 +95,16 @@
 
 cfa_includedir = $(CFA_INCDIR)
-nobase_cfa_include_HEADERS = ${headers} ${stdhdr} math gmp concurrency/invoke.h
+nobase_cfa_include_HEADERS = 	\
+	${headers} 			\
+	${stdhdr} 			\
+	math 				\
+	gmp 				\
+	bits/defs.h 		\
+	bits/locks.h 		\
+	concurrency/invoke.h 	\
+	libhdr.h 			\
+	libhdr/libalign.h 	\
+	libhdr/libdebug.h 	\
+	libhdr/libtools.h
 
 CLEANFILES = libcfa-prelude.c
Index: src/libcfa/Makefile.in
===================================================================
--- src/libcfa/Makefile.in	(revision c95b11526c10568ee3415f3873af592f6779ea99)
+++ src/libcfa/Makefile.in	(revision 490db327533a4cac681c8fe17b7d300026d46a79)
@@ -264,5 +264,7 @@
 	containers/result containers/vector concurrency/coroutine \
 	concurrency/thread concurrency/kernel concurrency/monitor \
-	${shell echo stdhdr/*} math gmp concurrency/invoke.h
+	${shell echo stdhdr/*} math gmp bits/defs.h bits/locks.h \
+	concurrency/invoke.h libhdr.h libhdr/libalign.h \
+	libhdr/libdebug.h libhdr/libtools.h
 HEADERS = $(nobase_cfa_include_HEADERS)
 am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
@@ -430,5 +432,17 @@
 stdhdr = ${shell echo stdhdr/*}
 cfa_includedir = $(CFA_INCDIR)
-nobase_cfa_include_HEADERS = ${headers} ${stdhdr} math gmp concurrency/invoke.h
+nobase_cfa_include_HEADERS = \
+	${headers} 			\
+	${stdhdr} 			\
+	math 				\
+	gmp 				\
+	bits/defs.h 		\
+	bits/locks.h 		\
+	concurrency/invoke.h 	\
+	libhdr.h 			\
+	libhdr/libalign.h 	\
+	libhdr/libdebug.h 	\
+	libhdr/libtools.h
+
 CLEANFILES = libcfa-prelude.c
 all: all-am
Index: src/libcfa/bits/containers.h
===================================================================
--- src/libcfa/bits/containers.h	(revision 490db327533a4cac681c8fe17b7d300026d46a79)
+++ src/libcfa/bits/containers.h	(revision 490db327533a4cac681c8fe17b7d300026d46a79)
@@ -0,0 +1,132 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// bits/containers.h -- Intrusive generic containers.h
+//
+// Author           : Thierry Delisle
+// Created On       : Tue Oct 31 16:38:50 2017
+// Last Modified By : --
+// Last Modified On : --
+// Update Count     : 0
+
+#pragma once
+
+#include <stddef.h>
+
+#include "libhdr.h"
+
+//-----------------------------------------------------------------------------
+// Node Base
+//-----------------------------------------------------------------------------
+
+#ifdef __CFORALL__
+	trait is_node(dtype T) {
+		T*& get_next( T& );
+	};
+#endif
+
+//-----------------------------------------------------------------------------
+// Stack
+//-----------------------------------------------------------------------------
+#ifdef __CFORALL__
+	forall(dtype TYPE | is_node(TYPE))
+	#define T TYPE
+#else
+	#define T void
+#endif
+struct __stack {
+	T * top;
+};
+
+#ifdef __CFORALL__
+#define __stack_t(T) __stack(T)
+#else
+#define __stack_t(T) struct __stack
+#endif
+
+#ifdef __CFORALL__
+	forall(dtype T | is_node(T))
+	void ?{}( __stack(T) & this ) {
+		this.top = NULL;
+	}
+
+	forall(dtype T | is_node(T) | sized(T))
+	void push( __stack(T) & this, T * val ) {
+		verify( !get_next( *val ) );
+		get_next( *val ) = this.top;
+		this.top = val;
+	}
+
+	forall(dtype T | is_node(T) | sized(T))
+	T * pop( __stack(T) & this ) {
+		T * top = this.top;
+		if( top ) {
+			this.top = get_next( *top );
+			get_next( *top ) = NULL;
+		}
+		return top;
+	}
+#endif
+
+//-----------------------------------------------------------------------------
+// Queue
+//-----------------------------------------------------------------------------
+#ifdef __CFORALL__
+	forall(dtype T | is_node(T))
+	#define T TYPE
+#else
+	#define T void
+#endif
+struct __queue {
+	T * head;
+	T ** tail;
+};
+
+#ifdef __CFORALL__
+	forall(dtype T | is_node(T))
+	void ?{}( __queue(T) & this ) {
+		this.head = NULL;
+		this.tail = &this.head;
+	}
+
+	forall(dtype T | is_node(T) | sized(T))
+	void append( __queue(T) & this, T * val ) {
+		verify(this.tail != NULL);
+		*this.tail = val;
+		this.tail = &get_next( *val );
+	}
+
+	forall(dtype T | is_node(T) | sized(T))
+	T * pop_head( __queue(T) & this ) {
+		T * head = this.head;
+		if( head ) {
+			this.head = get_next( *head );
+			if( !get_next( *head ) ) {
+				this.tail = &this.head;
+			}
+			get_next( *head ) = NULL;
+		}
+		return head;
+	}
+
+	forall(dtype T | is_node(T) | sized(T))
+	T * remove( __queue(T) & this, T ** it ) {
+		T * val = *it;
+		verify( val );
+
+		(*it) = get_next( *val );
+
+		if( this.tail == &get_next( *val ) ) {
+			this.tail = it;
+		}
+
+		get_next( *val ) = NULL;
+
+		verify( (this.head == NULL) == (&this.head == this.tail) );
+		verify( *this.tail == NULL );
+		return val;
+	}
+#endif
Index: src/libcfa/bits/defs.h
===================================================================
--- src/libcfa/bits/defs.h	(revision 490db327533a4cac681c8fe17b7d300026d46a79)
+++ src/libcfa/bits/defs.h	(revision 490db327533a4cac681c8fe17b7d300026d46a79)
@@ -0,0 +1,23 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// bits/defs.h --
+//
+// Author           : Thierry Delisle
+// Created On       : Thu Nov 09 13:24:10 2017
+// Last Modified By :
+// Last Modified On :
+// Update Count     :
+//
+
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#define unlikely(x)    __builtin_expect(!!(x), 0)
+#define likely  (x)    __builtin_expect(!!(x), 1)
+#define thread_local _Thread_local
Index: src/libcfa/bits/locks.h
===================================================================
--- src/libcfa/bits/locks.h	(revision 490db327533a4cac681c8fe17b7d300026d46a79)
+++ src/libcfa/bits/locks.h	(revision 490db327533a4cac681c8fe17b7d300026d46a79)
@@ -0,0 +1,121 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// bits/locks.h -- Fast internal locks.
+//
+// Author           : Thierry Delisle
+// Created On       : Tue Oct 31 15:14:38 2017
+// Last Modified By : --
+// Last Modified On : --
+// Update Count     : 0
+//
+
+#pragma once
+
+#include "bits/defs.h"
+
+#include "libhdr.h"
+
+// pause to prevent excess processor bus usage
+#if defined( __sparc )
+	#define Pause() __asm__ __volatile__ ( "rd %ccr,%g0" )
+#elif defined( __i386 ) || defined( __x86_64 )
+	#define Pause() __asm__ __volatile__ ( "pause" : : : )
+#else
+	#error unsupported architecture
+#endif
+
+#if defined( __i386 ) || defined( __x86_64 )
+	// Intel recommendation
+	#define __ALIGN__ __attribute__(( aligned (128) ))
+#elif defined( __sparc )
+	#define __ALIGN__ CALIGN
+#else
+	#error unsupported architecture
+#endif
+
+#if defined( __x86_64 )
+	#define __lock_test_and_test_and_set( lock ) (lock) == 0 && __sync_lock_test_and_set_8( &(lock), 1 ) == 0
+	#define __lock_release( lock ) __sync_lock_release_8( &(lock) );
+#elif defined( __i386 )
+	#define __lock_test_and_test_and_set( lock ) (lock) == 0 && __sync_lock_test_and_set_4( &(lock), 1 ) == 0
+	#define __lock_release( lock ) __sync_lock_release_4( &(lock) );
+#else
+	#error unsupported architecture
+#endif
+
+struct __spinlock_t {
+	__ALIGN__ volatile uintptr_t lock;
+	#ifdef __CFA_DEBUG__
+		const char * prev_name;
+		void* prev_thrd;
+	#endif
+} __ALIGN__;
+
+#ifdef __CFORALL__
+	extern void yield( unsigned int );
+	extern thread_local struct thread_desc *    volatile this_thread;
+
+	static inline void ?{}( __spinlock_t & this ) {
+		this.lock = 0;
+	}
+
+	// Lock the spinlock, return false if already acquired
+	static inline _Bool try_lock  ( __spinlock_t & this DEBUG_CTX_PARAM2 ) {
+		_Bool result = __lock_test_and_test_and_set( this.lock );
+		LIB_DEBUG_DO(
+			if( result ) {
+				this.prev_name = caller;
+				this.prev_thrd = this_thread;
+			}
+		)
+		return result;
+	}
+
+	// Lock the spinlock, spin if already acquired
+	static inline void lock( __spinlock_t & this DEBUG_CTX_PARAM2 ) {
+		#ifndef NOEXPBACK
+			enum { SPIN_START = 4, SPIN_END = 64 * 1024, };
+			unsigned int spin = SPIN_START;
+		#endif
+
+		for ( unsigned int i = 1;; i += 1 ) {
+			if ( __lock_test_and_test_and_set( this.lock ) ) break;
+			#ifndef NOEXPBACK
+				// exponential spin
+				for ( volatile unsigned int s = 0; s < spin; s += 1 ) Pause();
+
+				// slowly increase by powers of 2
+				if ( i % 64 == 0 ) spin += spin;
+
+				// prevent overflow
+				if ( spin > SPIN_END ) spin = SPIN_START;
+			#else
+				Pause();
+			#endif
+		}
+		LIB_DEBUG_DO(
+			this.prev_name = caller;
+			this.prev_thrd = this_thread;
+		)
+	}
+
+	// Lock the spinlock, spin if already acquired
+	static inline void lock_yield( __spinlock_t & this DEBUG_CTX_PARAM2 ) {
+		for ( unsigned int i = 1;; i += 1 ) {
+			if ( __lock_test_and_test_and_set( this.lock ) ) break;
+			yield( i );
+		}
+		LIB_DEBUG_DO(
+			this.prev_name = caller;
+			this.prev_thrd = this_thread;
+		)
+	}
+
+	static inline void unlock( __spinlock_t & this ) {
+		__lock_release( this.lock );
+	}
+#endif
Index: src/libcfa/concurrency/alarm.c
===================================================================
--- src/libcfa/concurrency/alarm.c	(revision c95b11526c10568ee3415f3873af592f6779ea99)
+++ src/libcfa/concurrency/alarm.c	(revision 490db327533a4cac681c8fe17b7d300026d46a79)
@@ -186,5 +186,5 @@
 
 	disable_interrupts();
-	lock( &event_kernel->lock DEBUG_CTX2 );
+	lock( event_kernel->lock DEBUG_CTX2 );
 	{
 		verify( validate( alarms ) );
@@ -196,5 +196,5 @@
 		}
 	}
-	unlock( &event_kernel->lock );
+	unlock( event_kernel->lock );
 	this->set = true;
 	enable_interrupts( DEBUG_CTX );
@@ -203,10 +203,10 @@
 void unregister_self( alarm_node_t * this ) {
 	disable_interrupts();
-	lock( &event_kernel->lock DEBUG_CTX2 );
+	lock( event_kernel->lock DEBUG_CTX2 );
 	{
 		verify( validate( &event_kernel->alarms ) );
 		remove( &event_kernel->alarms, this );
 	}
-	unlock( &event_kernel->lock );
+	unlock( event_kernel->lock );
 	enable_interrupts( DEBUG_CTX );
 	this->set = false;
Index: src/libcfa/concurrency/invoke.h
===================================================================
--- src/libcfa/concurrency/invoke.h	(revision c95b11526c10568ee3415f3873af592f6779ea99)
+++ src/libcfa/concurrency/invoke.h	(revision 490db327533a4cac681c8fe17b7d300026d46a79)
@@ -14,6 +14,6 @@
 //
 
-#include <stdbool.h>
-#include <stdint.h>
+#include "bits/defs.h"
+#include "bits/locks.h"
 
 #ifdef __CFORALL__
@@ -25,17 +25,6 @@
 #define _INVOKE_H_
 
-	#define unlikely(x)    __builtin_expect(!!(x), 0)
-	#define thread_local _Thread_local
-
 	typedef void (*fptr_t)();
 	typedef int_fast16_t __lock_size_t;
-
-	struct spinlock {
-		volatile int lock;
-		#ifdef __CFA_DEBUG__
-			const char * prev_name;
-			void* prev_thrd;
-		#endif
-	};
 
 	struct __thread_queue_t {
@@ -58,7 +47,4 @@
 		void push( struct __condition_stack_t &, struct __condition_criterion_t * );
 		struct __condition_criterion_t * pop( struct __condition_stack_t & );
-
-		void  ?{}(spinlock & this);
-		void ^?{}(spinlock & this);
 	}
 	#endif
@@ -122,5 +108,5 @@
 	struct monitor_desc {
 		// spinlock to protect internal data
-		struct spinlock lock;
+		struct __spinlock_t lock;
 
 		// current owner of the monitor
Index: src/libcfa/concurrency/kernel
===================================================================
--- src/libcfa/concurrency/kernel	(revision c95b11526c10568ee3415f3873af592f6779ea99)
+++ src/libcfa/concurrency/kernel	(revision 490db327533a4cac681c8fe17b7d300026d46a79)
@@ -26,18 +26,18 @@
 //-----------------------------------------------------------------------------
 // Locks
-// Lock the spinlock, spin if already acquired
-void lock      ( spinlock * DEBUG_CTX_PARAM2 );
+// // Lock the spinlock, spin if already acquired
+// void lock      ( spinlock * DEBUG_CTX_PARAM2 );
 
-// Lock the spinlock, yield repeatedly if already acquired
-void lock_yield( spinlock * DEBUG_CTX_PARAM2 );
+// // Lock the spinlock, yield repeatedly if already acquired
+// void lock_yield( spinlock * DEBUG_CTX_PARAM2 );
 
-// Lock the spinlock, return false if already acquired
-bool try_lock  ( spinlock * DEBUG_CTX_PARAM2 );
+// // Lock the spinlock, return false if already acquired
+// bool try_lock  ( spinlock * DEBUG_CTX_PARAM2 );
 
-// Unlock the spinlock
-void unlock    ( spinlock * );
+// // Unlock the spinlock
+// void unlock    ( spinlock * );
 
 struct semaphore {
-	spinlock lock;
+	__spinlock_t lock;
 	int count;
 	__thread_queue_t waiting;
@@ -54,5 +54,5 @@
 struct cluster {
 	// Ready queue locks
-	spinlock ready_queue_lock;
+	__spinlock_t ready_queue_lock;
 
 	// Ready queue for threads
@@ -74,6 +74,6 @@
 	FinishOpCode action_code;
 	thread_desc * thrd;
-	spinlock * lock;
-	spinlock ** locks;
+	__spinlock_t * lock;
+	__spinlock_t ** locks;
 	unsigned short lock_count;
 	thread_desc ** thrds;
Index: src/libcfa/concurrency/kernel.c
===================================================================
--- src/libcfa/concurrency/kernel.c	(revision c95b11526c10568ee3415f3873af592f6779ea99)
+++ src/libcfa/concurrency/kernel.c	(revision 490db327533a4cac681c8fe17b7d300026d46a79)
@@ -242,5 +242,5 @@
 void finishRunning(processor * this) {
 	if( this->finish.action_code == Release ) {
-		unlock( this->finish.lock );
+		unlock( *this->finish.lock );
 	}
 	else if( this->finish.action_code == Schedule ) {
@@ -248,15 +248,15 @@
 	}
 	else if( this->finish.action_code == Release_Schedule ) {
-		unlock( this->finish.lock );
+		unlock( *this->finish.lock );
 		ScheduleThread( this->finish.thrd );
 	}
 	else if( this->finish.action_code == Release_Multi ) {
 		for(int i = 0; i < this->finish.lock_count; i++) {
-			unlock( this->finish.locks[i] );
+			unlock( *this->finish.locks[i] );
 		}
 	}
 	else if( this->finish.action_code == Release_Multi_Schedule ) {
 		for(int i = 0; i < this->finish.lock_count; i++) {
-			unlock( this->finish.locks[i] );
+			unlock( *this->finish.locks[i] );
 		}
 		for(int i = 0; i < this->finish.thrd_count; i++) {
@@ -334,7 +334,7 @@
 	verifyf( thrd->next == NULL, "Expected null got %p", thrd->next );
 
-	lock(   &this_processor->cltr->ready_queue_lock DEBUG_CTX2 );
+	lock(   this_processor->cltr->ready_queue_lock DEBUG_CTX2 );
 	append( this_processor->cltr->ready_queue, thrd );
-	unlock( &this_processor->cltr->ready_queue_lock );
+	unlock( this_processor->cltr->ready_queue_lock );
 
 	verify( disable_preempt_count > 0 );
@@ -343,7 +343,7 @@
 thread_desc * nextThread(cluster * this) {
 	verify( disable_preempt_count > 0 );
-	lock( &this->ready_queue_lock DEBUG_CTX2 );
+	lock( this->ready_queue_lock DEBUG_CTX2 );
 	thread_desc * head = pop_head( this->ready_queue );
-	unlock( &this->ready_queue_lock );
+	unlock( this->ready_queue_lock );
 	verify( disable_preempt_count > 0 );
 	return head;
@@ -358,5 +358,5 @@
 }
 
-void BlockInternal( spinlock * lock ) {
+void BlockInternal( __spinlock_t * lock ) {
 	disable_interrupts();
 	this_processor->finish.action_code = Release;
@@ -384,5 +384,5 @@
 }
 
-void BlockInternal( spinlock * lock, thread_desc * thrd ) {
+void BlockInternal( __spinlock_t * lock, thread_desc * thrd ) {
 	assert(thrd);
 	disable_interrupts();
@@ -398,5 +398,5 @@
 }
 
-void BlockInternal(spinlock * locks [], unsigned short count) {
+void BlockInternal(__spinlock_t * locks [], unsigned short count) {
 	disable_interrupts();
 	this_processor->finish.action_code = Release_Multi;
@@ -411,5 +411,5 @@
 }
 
-void BlockInternal(spinlock * locks [], unsigned short lock_count, thread_desc * thrds [], unsigned short thrd_count) {
+void BlockInternal(__spinlock_t * locks [], unsigned short lock_count, thread_desc * thrds [], unsigned short thrd_count) {
 	disable_interrupts();
 	this_processor->finish.action_code = Release_Multi_Schedule;
@@ -426,5 +426,5 @@
 }
 
-void LeaveThread(spinlock * lock, thread_desc * thrd) {
+void LeaveThread(__spinlock_t * lock, thread_desc * thrd) {
 	verify( disable_preempt_count > 0 );
 	this_processor->finish.action_code = thrd ? Release_Schedule : Release;
@@ -516,6 +516,6 @@
 }
 
-static spinlock kernel_abort_lock;
-static spinlock kernel_debug_lock;
+static __spinlock_t kernel_abort_lock;
+static __spinlock_t kernel_debug_lock;
 static bool kernel_abort_called = false;
 
@@ -523,13 +523,13 @@
 	// abort cannot be recursively entered by the same or different processors because all signal handlers return when
 	// the globalAbort flag is true.
-	lock( &kernel_abort_lock DEBUG_CTX2 );
+	lock( kernel_abort_lock DEBUG_CTX2 );
 
 	// first task to abort ?
 	if ( !kernel_abort_called ) {			// not first task to abort ?
 		kernel_abort_called = true;
-		unlock( &kernel_abort_lock );
+		unlock( kernel_abort_lock );
 	}
 	else {
-		unlock( &kernel_abort_lock );
+		unlock( kernel_abort_lock );
 
 		sigset_t mask;
@@ -561,9 +561,9 @@
 extern "C" {
 	void __lib_debug_acquire() {
-		lock( &kernel_debug_lock DEBUG_CTX2 );
+		lock( kernel_debug_lock DEBUG_CTX2 );
 	}
 
 	void __lib_debug_release() {
-		unlock( &kernel_debug_lock );
+		unlock( kernel_debug_lock );
 	}
 }
@@ -574,41 +574,4 @@
 //-----------------------------------------------------------------------------
 // Locks
-void ?{}( spinlock & this ) {
-	this.lock = 0;
-}
-void ^?{}( spinlock & this ) {
-
-}
-
-bool try_lock( spinlock * this DEBUG_CTX_PARAM2 ) {
-	return this->lock == 0 && __sync_lock_test_and_set_4( &this->lock, 1 ) == 0;
-}
-
-void lock( spinlock * this DEBUG_CTX_PARAM2 ) {
-	for ( unsigned int i = 1;; i += 1 ) {
-		if ( this->lock == 0 && __sync_lock_test_and_set_4( &this->lock, 1 ) == 0 ) { break; }
-	}
-	LIB_DEBUG_DO(
-		this->prev_name = caller;
-		this->prev_thrd = this_thread;
-	)
-}
-
-void lock_yield( spinlock * this DEBUG_CTX_PARAM2 ) {
-	for ( unsigned int i = 1;; i += 1 ) {
-		if ( this->lock == 0 && __sync_lock_test_and_set_4( &this->lock, 1 ) == 0 ) { break; }
-		yield();
-	}
-	LIB_DEBUG_DO(
-		this->prev_name = caller;
-		this->prev_thrd = this_thread;
-	)
-}
-
-
-void unlock( spinlock * this ) {
-	__sync_lock_release_4( &this->lock );
-}
-
 void  ?{}( semaphore & this, int count = 1 ) {
 	(this.lock){};
@@ -619,5 +582,5 @@
 
 void P(semaphore & this) {
-	lock( &this.lock DEBUG_CTX2 );
+	lock( this.lock DEBUG_CTX2 );
 	this.count -= 1;
 	if ( this.count < 0 ) {
@@ -629,5 +592,5 @@
 	}
 	else {
-	    unlock( &this.lock );
+	    unlock( this.lock );
 	}
 }
@@ -635,5 +598,5 @@
 void V(semaphore & this) {
 	thread_desc * thrd = NULL;
-	lock( &this.lock DEBUG_CTX2 );
+	lock( this.lock DEBUG_CTX2 );
 	this.count += 1;
 	if ( this.count <= 0 ) {
@@ -642,5 +605,5 @@
 	}
 
-	unlock( &this.lock );
+	unlock( this.lock );
 
 	// make new owner
Index: src/libcfa/concurrency/kernel_private.h
===================================================================
--- src/libcfa/concurrency/kernel_private.h	(revision c95b11526c10568ee3415f3873af592f6779ea99)
+++ src/libcfa/concurrency/kernel_private.h	(revision 490db327533a4cac681c8fe17b7d300026d46a79)
@@ -45,10 +45,10 @@
 //Block current thread and release/wake-up the following resources
 void BlockInternal(void);
-void BlockInternal(spinlock * lock);
+void BlockInternal(__spinlock_t * lock);
 void BlockInternal(thread_desc * thrd);
-void BlockInternal(spinlock * lock, thread_desc * thrd);
-void BlockInternal(spinlock * locks [], unsigned short count);
-void BlockInternal(spinlock * locks [], unsigned short count, thread_desc * thrds [], unsigned short thrd_count);
-void LeaveThread(spinlock * lock, thread_desc * thrd);
+void BlockInternal(__spinlock_t * lock, thread_desc * thrd);
+void BlockInternal(__spinlock_t * locks [], unsigned short count);
+void BlockInternal(__spinlock_t * locks [], unsigned short count, thread_desc * thrds [], unsigned short thrd_count);
+void LeaveThread(__spinlock_t * lock, thread_desc * thrd);
 
 //-----------------------------------------------------------------------------
@@ -66,5 +66,5 @@
 struct event_kernel_t {
 	alarm_list_t alarms;
-	spinlock lock;
+	__spinlock_t lock;
 };
 
Index: src/libcfa/concurrency/monitor.c
===================================================================
--- src/libcfa/concurrency/monitor.c	(revision c95b11526c10568ee3415f3873af592f6779ea99)
+++ src/libcfa/concurrency/monitor.c	(revision 490db327533a4cac681c8fe17b7d300026d46a79)
@@ -34,11 +34,11 @@
 static inline bool is_accepted( monitor_desc * this, const __monitor_group_t & monitors );
 
-static inline void lock_all  ( spinlock * locks [], __lock_size_t count );
-static inline void lock_all  ( monitor_desc * source [], spinlock * /*out*/ locks [], __lock_size_t count );
-static inline void unlock_all( spinlock * locks [], __lock_size_t count );
+static inline void lock_all  ( __spinlock_t * locks [], __lock_size_t count );
+static inline void lock_all  ( monitor_desc * source [], __spinlock_t * /*out*/ locks [], __lock_size_t count );
+static inline void unlock_all( __spinlock_t * locks [], __lock_size_t count );
 static inline void unlock_all( monitor_desc * locks [], __lock_size_t count );
 
-static inline void save   ( monitor_desc * ctx [], __lock_size_t count, spinlock * locks [], unsigned int /*out*/ recursions [], __waitfor_mask_t /*out*/ masks [] );
-static inline void restore( monitor_desc * ctx [], __lock_size_t count, spinlock * locks [], unsigned int /*in */ recursions [], __waitfor_mask_t /*in */ masks [] );
+static inline void save   ( monitor_desc * ctx [], __lock_size_t count, __spinlock_t * locks [], unsigned int /*out*/ recursions [], __waitfor_mask_t /*out*/ masks [] );
+static inline void restore( monitor_desc * ctx [], __lock_size_t count, __spinlock_t * locks [], unsigned int /*in */ recursions [], __waitfor_mask_t /*in */ masks [] );
 
 static inline void init     ( __lock_size_t count, monitor_desc * monitors [], __condition_node_t & waiter, __condition_criterion_t criteria [] );
@@ -53,4 +53,10 @@
 static inline __lock_size_t count_max    ( const __waitfor_mask_t & mask );
 static inline __lock_size_t aggregate    ( monitor_desc * storage [], const __waitfor_mask_t & mask );
+
+#ifndef __CFA_LOCK_NO_YIELD
+#define DO_LOCK lock_yield
+#else
+#define DO_LOCK lock
+#endif
 
 //-----------------------------------------------------------------------------
@@ -71,5 +77,5 @@
 	unsigned int recursions[ count ];                         /* Save the current recursion levels to restore them later                             */ \
 	__waitfor_mask_t masks [ count ];                         /* Save the current waitfor masks to restore them later                                */ \
-	spinlock *   locks     [ count ];                         /* We need to pass-in an array of locks to BlockInternal                               */ \
+	__spinlock_t *   locks [ count ];                         /* We need to pass-in an array of locks to BlockInternal                               */ \
 
 #define monitor_save    save   ( monitors, count, locks, recursions, masks )
@@ -84,6 +90,6 @@
 	// Enter single monitor
 	static void __enter_monitor_desc( monitor_desc * this, const __monitor_group_t & group ) {
-		// Lock the monitor spinlock, lock_yield to reduce contention
-		lock_yield( &this->lock DEBUG_CTX2 );
+		// Lock the monitor spinlock
+		DO_LOCK( this->lock DEBUG_CTX2 );
 		thread_desc * thrd = this_thread;
 
@@ -127,11 +133,11 @@
 
 		// Release the lock and leave
-		unlock( &this->lock );
+		unlock( this->lock );
 		return;
 	}
 
 	static void __enter_monitor_dtor( monitor_desc * this, fptr_t func ) {
-		// Lock the monitor spinlock, lock_yield to reduce contention
-		lock_yield( &this->lock DEBUG_CTX2 );
+		// Lock the monitor spinlock
+		DO_LOCK( this->lock DEBUG_CTX2 );
 		thread_desc * thrd = this_thread;
 
@@ -145,5 +151,5 @@
 			set_owner( this, thrd );
 
-			unlock( &this->lock );
+			unlock( this->lock );
 			return;
 		}
@@ -196,6 +202,6 @@
 	// Leave single monitor
 	void __leave_monitor_desc( monitor_desc * this ) {
-		// Lock the monitor spinlock, lock_yield to reduce contention
-		lock_yield( &this->lock DEBUG_CTX2 );
+		// Lock the monitor spinlock, DO_LOCK to reduce contention
+		DO_LOCK( this->lock DEBUG_CTX2 );
 
 		LIB_DEBUG_PRINT_SAFE("Kernel : %10p Leaving mon %p (%p)\n", this_thread, this, this->owner);
@@ -210,5 +216,5 @@
 		if( this->recursion != 0) {
 			LIB_DEBUG_PRINT_SAFE("Kernel :  recursion still %d\n", this->recursion);
-			unlock( &this->lock );
+			unlock( this->lock );
 			return;
 		}
@@ -218,5 +224,5 @@
 
 		// We can now let other threads in safely
-		unlock( &this->lock );
+		unlock( this->lock );
 
 		//We need to wake-up the thread
@@ -243,5 +249,5 @@
 
 		// Lock the monitor now
-		lock_yield( &this->lock DEBUG_CTX2 );
+		DO_LOCK( this->lock DEBUG_CTX2 );
 
 		disable_interrupts();
@@ -730,21 +736,21 @@
 }
 
-static inline void lock_all( spinlock * locks [], __lock_size_t count ) {
+static inline void lock_all( __spinlock_t * locks [], __lock_size_t count ) {
 	for( __lock_size_t i = 0; i < count; i++ ) {
-		lock_yield( locks[i] DEBUG_CTX2 );
-	}
-}
-
-static inline void lock_all( monitor_desc * source [], spinlock * /*out*/ locks [], __lock_size_t count ) {
+		DO_LOCK( *locks[i] DEBUG_CTX2 );
+	}
+}
+
+static inline void lock_all( monitor_desc * source [], __spinlock_t * /*out*/ locks [], __lock_size_t count ) {
 	for( __lock_size_t i = 0; i < count; i++ ) {
-		spinlock * l = &source[i]->lock;
-		lock_yield( l DEBUG_CTX2 );
+		__spinlock_t * l = &source[i]->lock;
+		DO_LOCK( *l DEBUG_CTX2 );
 		if(locks) locks[i] = l;
 	}
 }
 
-static inline void unlock_all( spinlock * locks [], __lock_size_t count ) {
+static inline void unlock_all( __spinlock_t * locks [], __lock_size_t count ) {
 	for( __lock_size_t i = 0; i < count; i++ ) {
-		unlock( locks[i] );
+		unlock( *locks[i] );
 	}
 }
@@ -752,5 +758,5 @@
 static inline void unlock_all( monitor_desc * locks [], __lock_size_t count ) {
 	for( __lock_size_t i = 0; i < count; i++ ) {
-		unlock( &locks[i]->lock );
+		unlock( locks[i]->lock );
 	}
 }
@@ -759,5 +765,5 @@
 	monitor_desc * ctx [],
 	__lock_size_t count,
-	__attribute((unused)) spinlock * locks [],
+	__attribute((unused)) __spinlock_t * locks [],
 	unsigned int /*out*/ recursions [],
 	__waitfor_mask_t /*out*/ masks []
@@ -772,5 +778,5 @@
 	monitor_desc * ctx [],
 	__lock_size_t count,
-	spinlock * locks [],
+	__spinlock_t * locks [],
 	unsigned int /*out*/ recursions [],
 	__waitfor_mask_t /*out*/ masks []
Index: src/libcfa/concurrency/preemption.c
===================================================================
--- src/libcfa/concurrency/preemption.c	(revision c95b11526c10568ee3415f3873af592f6779ea99)
+++ src/libcfa/concurrency/preemption.c	(revision 490db327533a4cac681c8fe17b7d300026d46a79)
@@ -355,7 +355,7 @@
 		case SI_KERNEL:
 			// LIB_DEBUG_PRINT_SAFE("Kernel : Preemption thread tick\n");
-			lock( &event_kernel->lock DEBUG_CTX2 );
+			lock( event_kernel->lock DEBUG_CTX2 );
 			tick_preemption();
-			unlock( &event_kernel->lock );
+			unlock( event_kernel->lock );
 			break;
 		// Signal was not sent by the kernel but by an other thread
Index: src/prelude/builtins.c
===================================================================
--- src/prelude/builtins.c	(revision c95b11526c10568ee3415f3873af592f6779ea99)
+++ src/prelude/builtins.c	(revision 490db327533a4cac681c8fe17b7d300026d46a79)
@@ -80,9 +80,14 @@
 } // ?\?
 
-static inline forall( otype T | { void ?{}( T & this, one_t ); T ?*?( T, T ); double ?/?( double, T ); } )
-double ?\?( T x, signed long int y ) {
-    if ( y >=  0 ) return (double)(x \ (unsigned long int)y);
-    else return 1.0 / x \ (unsigned long int)(-y);
-} // ?\?
+// FIXME (x \ (unsigned long int)y) relies on X ?\?(T, unsigned long) a function that is neither 
+// defined, nor passed as an assertion parameter. Without user-defined conversions, cannot specify 
+// X as a type that casts to double, yet it doesn't make sense to write functions with that type 
+// signature where X is double.
+
+// static inline forall( otype T | { void ?{}( T & this, one_t ); T ?*?( T, T ); double ?/?( double, T ); } )
+// double ?\?( T x, signed long int y ) {
+//     if ( y >=  0 ) return (double)(x \ (unsigned long int)y);
+//     else return 1.0 / x \ (unsigned long int)(-y);
+// } // ?\?
 
 static inline long int ?\=?( long int & x, unsigned long int y ) { x = x \ y; return x; }
