Index: src/InitTweak/FixInit.cc
===================================================================
--- src/InitTweak/FixInit.cc	(revision d76c588058daa6269114595a571223a110ced0ab)
+++ src/InitTweak/FixInit.cc	(revision 8d70648a1bc901be1455417052ead87a499b935b)
@@ -54,5 +54,7 @@
 #include "SynTree/Type.h"              // for Type, Type::StorageClasses
 #include "SynTree/TypeSubstitution.h"  // for TypeSubstitution, operator<<
+#include "SynTree/DeclReplacer.h"      // for DeclReplacer
 #include "SynTree/Visitor.h"           // for acceptAll, maybeAccept
+#include "Validate/FindSpecialDecls.h" // for dtorStmt, dtorStructDestroy
 
 bool ctordtorp = false; // print all debug
@@ -66,6 +68,4 @@
 namespace InitTweak {
 	namespace {
-		typedef std::unordered_map< int, int > UnqCount;
-
 		struct SelfAssignChecker {
 			void previsit( ApplicationExpr * appExpr );
@@ -80,16 +80,13 @@
 		};
 
-		struct ResolveCopyCtors final : public WithIndexer, public WithShortCircuiting, public WithTypeSubstitution {
+		struct ResolveCopyCtors final : public WithStmtsToAdd, public WithIndexer, public WithShortCircuiting, public WithTypeSubstitution, public WithVisitorRef<ResolveCopyCtors> {
 			/// generate temporary ObjectDecls for each argument and return value of each ImplicitCopyCtorExpr,
 			/// generate/resolve copy construction expressions for each, and generate/resolve destructors for both
 			/// arguments and return value temporaries
-			static void resolveImplicitCalls( std::list< Declaration * > & translationUnit, UnqCount & unqCount );
-
-			ResolveCopyCtors( UnqCount & unqCount ) : unqCount( unqCount ) {}
-
-			void postvisit( ImplicitCopyCtorExpr * impCpCtorExpr );
-			void postvisit( StmtExpr * stmtExpr );
-			void previsit( UniqueExpr * unqExpr );
-			void postvisit( UniqueExpr * unqExpr );
+			static void resolveImplicitCalls( std::list< Declaration * > & translationUnit );
+
+			Expression * postmutate( ImplicitCopyCtorExpr * impCpCtorExpr );
+			void premutate( StmtExpr * stmtExpr );
+			void premutate( UniqueExpr * unqExpr );
 
 			/// create and resolve ctor/dtor expression: fname(var, [cpArg])
@@ -98,8 +95,5 @@
 			bool skipCopyConstruct( Type * type );
 			void copyConstructArg( Expression *& arg, ImplicitCopyCtorExpr * impCpCtorExpr, Type * formal );
-			void destructRet( ObjectDecl * ret, ImplicitCopyCtorExpr * impCpCtorExpr );
-
-			UnqCount & unqCount; // count the number of times each unique expr ID appears
-			std::unordered_set< int > vars;
+			void destructRet( ObjectDecl * ret, ImplicitCopyCtorExpr * impCpCtorExpr, Expression *& arg );
 		};
 
@@ -162,10 +156,6 @@
 			using Parent::previsit;
 
-			void previsit( ObjectDecl * objDecl );
 			void previsit( FunctionDecl * funcDecl );
 
-			void previsit( CompoundStmt * compoundStmt );
-			void postvisit( CompoundStmt * compoundStmt );
-			void previsit( ReturnStmt * returnStmt );
 			void previsit( BranchStmt * stmt );
 		private:
@@ -185,18 +175,4 @@
 
 			std::list< Declaration * > staticDtorDecls;
-		};
-
-		class FixCopyCtors final : public WithStmtsToAdd, public WithShortCircuiting, public WithVisitorRef<FixCopyCtors>, public WithConstTypeSubstitution {
-		  public:
-			FixCopyCtors( UnqCount & unqCount ) : unqCount( unqCount ){}
-			/// expand ImplicitCopyCtorExpr nodes into the temporary declarations, copy constructors, call expression,
-			/// and destructors
-			static void fixCopyCtors( std::list< Declaration * > &translationUnit, UnqCount & unqCount );
-
-			Expression * postmutate( ImplicitCopyCtorExpr * impCpCtorExpr );
-			void premutate( StmtExpr * stmtExpr );
-			void premutate( UniqueExpr * unqExpr );
-
-			UnqCount & unqCount;
 		};
 
@@ -236,4 +212,12 @@
 			Expression * postmutate( ConstructorExpr * ctorExpr );
 		};
+
+		struct SplitExpressions : public WithShortCircuiting, public WithTypeSubstitution, public WithStmtsToAdd {
+			/// add CompoundStmts around top-level expressions so that temporaries are destroyed in the correct places.
+			static void split( std::list< Declaration * > &translationUnit );
+
+			Statement * postmutate( ExprStmt * stmt );
+			void premutate( TupleAssignExpr * expr );
+		};
 	} // namespace
 
@@ -245,27 +229,81 @@
 		InitTweak::fixGlobalInit( translationUnit, inLibrary );
 
-		UnqCount unqCount;
+		// must happen before ResolveCopyCtors because temporaries have to be inserted into the correct scope
+		SplitExpressions::split( translationUnit );
 
 		InsertImplicitCalls::insert( translationUnit );
-		ResolveCopyCtors::resolveImplicitCalls( translationUnit, unqCount );
+
+		// Needs to happen before ResolveCopyCtors, because argument/return temporaries should not be considered in
+		// error checking branch statements
 		InsertDtors::insert( translationUnit );
+
+		ResolveCopyCtors::resolveImplicitCalls( translationUnit );
 		FixInit::fixInitializers( translationUnit );
-
-		// FixCopyCtors must happen after FixInit, so that destructors are placed correctly
-		FixCopyCtors::fixCopyCtors( translationUnit, unqCount );
-
 		GenStructMemberCalls::generate( translationUnit );
 
-		// xxx - ctor expansion currently has to be after FixCopyCtors, because there is currently a
-		// hack in the way untyped assignments are generated, where the first argument cannot have
-		// its address taken because of the way codegeneration handles UntypedExpr vs. ApplicationExpr.
-		// Thus such assignment exprs must never pushed through expression resolution (and thus should
-		// not go through the FixCopyCtors pass), otherwise they will fail -- guaranteed.
-		// Also needs to happen after GenStructMemberCalls, since otherwise member constructors exprs
-		// don't look right, and a member can be constructed more than once.
+		// Needs to happen after GenStructMemberCalls, since otherwise member constructors exprs
+		// don't have the correct form, and a member can be constructed more than once.
 		FixCtorExprs::fix( translationUnit );
 	}
 
 	namespace {
+		/// find and return the destructor used in `input`. If `input` is not a simple destructor call, generate a thunk
+		/// that wraps the destructor, insert it into `stmtsToAdd` and return the new function declaration
+		DeclarationWithType * getDtorFunc( ObjectDecl * objDecl, Statement * input, std::list< Statement * > & stmtsToAdd ) {
+			// unwrap implicit statement wrapper
+			Statement * dtor = input;
+			if ( ImplicitCtorDtorStmt * implicit = dynamic_cast< ImplicitCtorDtorStmt * >( input ) ) {
+				// dtor = implicit->callStmt;
+				// implicit->callStmt = nullptr;
+			}
+			assert( dtor );
+			std::list< Expression * > matches;
+			collectCtorDtorCalls( dtor, matches );
+
+			if ( dynamic_cast< ExprStmt * >( dtor ) ) {
+				// only one destructor call in the expression
+				if ( matches.size() == 1 ) {
+					DeclarationWithType * func = getFunction( matches.front() );
+					assertf( func, "getFunction failed to find function in %s", toString( matches.front() ).c_str() );
+
+					// cleanup argument must be a function, not an object (including function pointer)
+					if ( FunctionDecl * dtorFunc = dynamic_cast< FunctionDecl * > ( func ) ) {
+						if ( dtorFunc->type->forall.empty() ) {
+							// simple case where the destructor is a monomorphic function call - can simply
+							// use that function as the cleanup function.
+							delete dtor;
+							return func;
+						}
+					}
+				}
+			}
+
+			// otherwise the cleanup is more complicated - need to build a single argument cleanup function that
+			// wraps the more complicated code.
+			static UniqueName dtorNamer( "__cleanup_dtor" );
+			FunctionDecl * dtorFunc = FunctionDecl::newFunction( dtorNamer.newName(), SymTab::genDefaultType( objDecl->type->stripReferences(), false ), new CompoundStmt() );
+			stmtsToAdd.push_back( new DeclStmt( dtorFunc ) );
+
+			// the original code contains uses of objDecl - replace them with the newly generated 'this' parameter.
+			ObjectDecl * thisParam = getParamThis( dtorFunc->type );
+			Expression * replacement = new VariableExpr( thisParam );
+
+			Type * base = replacement->result->stripReferences();
+			if ( dynamic_cast< ArrayType * >( base ) || dynamic_cast< TupleType * > ( base ) ) {
+				// need to cast away reference for array types, since the destructor is generated without the reference type,
+				// and for tuple types since tuple indexing does not work directly on a reference
+				replacement = new CastExpr( replacement, base->clone() );
+			}
+			DeclReplacer::replace( dtor, { std::make_pair( objDecl, replacement ) } );
+			dtorFunc->statements->push_back( strict_dynamic_cast<Statement *>( dtor ) );
+
+			return dtorFunc;
+		}
+
+		void SplitExpressions::split( std::list< Declaration * > & translationUnit ) {
+			PassVisitor<SplitExpressions> splitter;
+			mutateAll( translationUnit, splitter );
+		}
+
 		void InsertImplicitCalls::insert( std::list< Declaration * > & translationUnit ) {
 			PassVisitor<InsertImplicitCalls> inserter;
@@ -273,7 +311,7 @@
 		}
 
-		void ResolveCopyCtors::resolveImplicitCalls( std::list< Declaration * > & translationUnit, UnqCount & unqCount ) {
-			PassVisitor<ResolveCopyCtors> resolver( unqCount );
-			acceptAll( translationUnit, resolver );
+		void ResolveCopyCtors::resolveImplicitCalls( std::list< Declaration * > & translationUnit ) {
+			PassVisitor<ResolveCopyCtors> resolver;
+			mutateAll( translationUnit, resolver );
 		}
 
@@ -303,9 +341,4 @@
 		}
 
-		void FixCopyCtors::fixCopyCtors( std::list< Declaration * > & translationUnit, UnqCount & unqCount ) {
-			PassVisitor<FixCopyCtors> fixer( unqCount );
-			mutateAll( translationUnit, fixer );
-		}
-
 		void GenStructMemberCalls::generate( std::list< Declaration * > & translationUnit ) {
 			PassVisitor<GenStructMemberCalls> warner;
@@ -318,67 +351,77 @@
 		}
 
-		namespace {
-			// Relatively simple structural comparison for expressions, needed to determine
-			// if two expressions are "the same" (used to determine if self assignment occurs)
-			struct StructuralChecker {
-				Expression * stripCasts( Expression * expr ) {
-					// this might be too permissive. It's possible that only particular casts are relevant.
-					while ( CastExpr * cast = dynamic_cast< CastExpr * >( expr ) ) {
-						expr = cast->arg;
-					}
-					return expr;
-				}
-
-				void previsit( Expression * ) {
-					// anything else does not qualify
-					isSimilar = false;
-				}
-
-				template<typename T>
-				T * cast( Expression * node ) {
-					// all expressions need to ignore casts, so this bit has been factored out
-					return dynamic_cast< T * >( stripCasts( node ) );
-				}
-
-				// ignore casts
-				void previsit( CastExpr * ) {}
-
-				void previsit( MemberExpr * memExpr ) {
-					if ( MemberExpr * otherMember = cast< MemberExpr >( other ) ) {
-						if ( otherMember->member == memExpr->member ) {
-							other = otherMember->aggregate;
-							return;
-						}
-					}
-					isSimilar = false;
-				}
-
-				void previsit( VariableExpr * varExpr ) {
-					if ( VariableExpr * otherVar = cast< VariableExpr >( other ) ) {
-						if ( otherVar->var == varExpr->var ) {
-							return;
-						}
-					}
-					isSimilar = false;
-				}
-
-				void previsit( AddressExpr * ) {
-					if ( AddressExpr * addrExpr = cast< AddressExpr >( other ) ) {
-						other = addrExpr->arg;
+		Statement * SplitExpressions::postmutate( ExprStmt * stmt ) {
+			// wrap each top-level ExprStmt in a block so that destructors for argument and return temporaries are destroyed
+			// in the correct places
+			CompoundStmt * ret = new CompoundStmt( { stmt } );
+			return ret;
+		}
+
+		void SplitExpressions::premutate( TupleAssignExpr * ) {
+			// don't do this within TupleAssignExpr, since it is already broken up into multiple expressions
+			visit_children = false;
+		}
+
+		// Relatively simple structural comparison for expressions, needed to determine
+		// if two expressions are "the same" (used to determine if self assignment occurs)
+		struct StructuralChecker {
+			Expression * stripCasts( Expression * expr ) {
+				// this might be too permissive. It's possible that only particular casts are relevant.
+				while ( CastExpr * cast = dynamic_cast< CastExpr * >( expr ) ) {
+					expr = cast->arg;
+				}
+				return expr;
+			}
+
+			void previsit( Expression * ) {
+				// anything else does not qualify
+				isSimilar = false;
+			}
+
+			template<typename T>
+			T * cast( Expression * node ) {
+				// all expressions need to ignore casts, so this bit has been factored out
+				return dynamic_cast< T * >( stripCasts( node ) );
+			}
+
+			// ignore casts
+			void previsit( CastExpr * ) {}
+
+			void previsit( MemberExpr * memExpr ) {
+				if ( MemberExpr * otherMember = cast< MemberExpr >( other ) ) {
+					if ( otherMember->member == memExpr->member ) {
+						other = otherMember->aggregate;
 						return;
 					}
-					isSimilar = false;
-				}
-
-				Expression * other = nullptr;
-				bool isSimilar = true;
-			};
-
-			bool structurallySimilar( Expression * e1, Expression * e2 ) {
-				PassVisitor<StructuralChecker> checker;
-				checker.pass.other = e2;
-				e1->accept( checker );
-				return checker.pass.isSimilar;
-			}
+				}
+				isSimilar = false;
+			}
+
+			void previsit( VariableExpr * varExpr ) {
+				if ( VariableExpr * otherVar = cast< VariableExpr >( other ) ) {
+					if ( otherVar->var == varExpr->var ) {
+						return;
+					}
+				}
+				isSimilar = false;
+			}
+
+			void previsit( AddressExpr * ) {
+				if ( AddressExpr * addrExpr = cast< AddressExpr >( other ) ) {
+					other = addrExpr->arg;
+					return;
+				}
+				isSimilar = false;
+			}
+
+			Expression * other = nullptr;
+			bool isSimilar = true;
+		};
+
+		bool structurallySimilar( Expression * e1, Expression * e2 ) {
+			PassVisitor<StructuralChecker> checker;
+			checker.pass.other = e2;
+			e1->accept( checker );
+			return checker.pass.isSimilar;
 		}
 
@@ -457,5 +500,5 @@
 			if ( TupleAssignExpr * assign = dynamic_cast< TupleAssignExpr * >( resolved ) ) {
 				// fix newly generated StmtExpr
-				postvisit( assign->stmtExpr );
+				premutate( assign->stmtExpr );
 			}
 			return resolved;
@@ -489,4 +532,5 @@
 					// so that the object isn't changed inside of the polymorphic function
 					if ( ! GenPoly::needsBoxing( formal, result, impCpCtorExpr->callExpr, env ) ) return;
+					// xxx - leaking tmp
 				}
 			}
@@ -496,17 +540,59 @@
 
 			// replace argument to function call with temporary
-			arg = new CommaExpr( cpCtor, new VariableExpr( tmp ) );
-			impCpCtorExpr->tempDecls.push_back( tmp );
-			impCpCtorExpr->dtors.push_front( makeCtorDtor( "^?{}", tmp ) );
-		}
-
-		void ResolveCopyCtors::destructRet( ObjectDecl * ret, ImplicitCopyCtorExpr * impCpCtorExpr ) {
-			impCpCtorExpr->get_dtors().push_front( makeCtorDtor( "^?{}", ret ) );
-		}
-
-		void ResolveCopyCtors::postvisit( ImplicitCopyCtorExpr *impCpCtorExpr ) {
+			stmtsToAddBefore.push_back( new DeclStmt( tmp ) );
+			arg = cpCtor;
+			destructRet( tmp, impCpCtorExpr, arg );
+
+			// impCpCtorExpr->dtors.push_front( makeCtorDtor( "^?{}", tmp ) );
+		}
+
+		void ResolveCopyCtors::destructRet( ObjectDecl * ret, ImplicitCopyCtorExpr * /*impCpCtorExpr*/, Expression *& arg ) {
+			// TODO: refactor code for generating cleanup attribute, since it's common and reused in ~3-4 places
+			// check for existing cleanup attribute before adding another(?)
+			// need to add __Destructor for _tmp_cp variables as well
+
+			assertf( Validate::dtorStruct && Validate::dtorStruct->members.size() == 2, "Destructor generation requires __Destructor definition." );
+			assertf( Validate::dtorStructDestroy, "Destructor generation requires __destroy_Destructor." );
+
+			// generate a __Destructor for ret that calls the destructor
+			Expression * dtor = makeCtorDtor( "^?{}", ret );
+
+			// if the chosen destructor is intrinsic, elide the generated dtor handler
+			if ( arg && isIntrinsicCallExpr( dtor ) ) {
+				arg = new CommaExpr( arg, new VariableExpr( ret ) );
+				return;
+			}
+
+			if ( ! dtor->env ) dtor->env = maybeClone( env );
+			DeclarationWithType * dtorFunc = getDtorFunc( ret, new ExprStmt( dtor ), stmtsToAddBefore );
+
+			StructInstType * dtorStructType = new StructInstType( Type::Qualifiers(), Validate::dtorStruct );
+			dtorStructType->parameters.push_back( new TypeExpr( new VoidType( Type::Qualifiers() ) ) );
+
+			// cast destructor pointer to void (*)(void *), to silence GCC incompatible pointer warnings
+			FunctionType * dtorFtype = new FunctionType( Type::Qualifiers(), false );
+			dtorFtype->parameters.push_back( ObjectDecl::newObject( "", new PointerType( Type::Qualifiers(), new VoidType( Type::Qualifiers() ) ), nullptr ) );
+			Type * dtorType = new PointerType( Type::Qualifiers(), dtorFtype );
+
+			static UniqueName namer( "_ret_dtor" );
+			ObjectDecl * retDtor = ObjectDecl::newObject( namer.newName(), dtorStructType, new ListInit( { new SingleInit( new ConstantExpr( Constant::null() ) ), new SingleInit( new CastExpr( new VariableExpr( dtorFunc ), dtorType ) ) } ) );
+			retDtor->attributes.push_back( new Attribute( "cleanup", { new VariableExpr( Validate::dtorStructDestroy ) } ) );
+			stmtsToAddBefore.push_back( new DeclStmt( retDtor ) );
+
+			if ( arg ) {
+				Expression * member = new MemberExpr( strict_dynamic_cast<DeclarationWithType *>( Validate::dtorStruct->members.front() ), new VariableExpr( retDtor ) );
+				Expression * object = new CastExpr( new AddressExpr( new VariableExpr( ret ) ), new PointerType( Type::Qualifiers(), new VoidType( Type::Qualifiers() ) ) );
+				Expression * assign = createBitwiseAssignment( member, object );
+				arg = new CommaExpr( new CommaExpr( arg, assign ), new VariableExpr( ret ) );
+			}
+
+			// impCpCtorExpr->get_dtors().push_front( makeCtorDtor( "^?{}", ret ) );
+		}
+
+		Expression * ResolveCopyCtors::postmutate( ImplicitCopyCtorExpr *impCpCtorExpr ) {
 			CP_CTOR_PRINT( std::cerr << "ResolveCopyCtors: " << impCpCtorExpr << std::endl; )
 
 			ApplicationExpr * appExpr = impCpCtorExpr->callExpr;
+			ObjectDecl * returnDecl = nullptr;
 
 			// take each argument and attempt to copy construct it.
@@ -517,5 +603,5 @@
 			for ( Expression * & arg : appExpr->args ) {
 				Type * formal = nullptr;
-				if ( iter != params.end() ) {
+				if ( iter != params.end() ) { // does not copy construct C-style variadic arguments
 					DeclarationWithType * param = *iter++;
 					formal = param->get_type();
@@ -535,18 +621,56 @@
 				ObjectDecl * ret = ObjectDecl::newObject( retNamer.newName(), result, nullptr );
 				ret->type->set_const( false );
-				impCpCtorExpr->returnDecls.push_back( ret );
+				returnDecl = ret;
+				stmtsToAddBefore.push_back( new DeclStmt( ret ) );
 				CP_CTOR_PRINT( std::cerr << "makeCtorDtor for a return" << std::endl; )
+			} // for
+			CP_CTOR_PRINT( std::cerr << "after Resolving: " << impCpCtorExpr << std::endl; )
+			// ------------------------------------------------------
+
+			CP_CTOR_PRINT( std::cerr << "Coming out the back..." << impCpCtorExpr << std::endl; )
+
+			// detach fields from wrapper node so that it can be deleted without deleting too much
+			impCpCtorExpr->callExpr = nullptr;
+			std::swap( impCpCtorExpr->env, appExpr->env );
+			assert( impCpCtorExpr->env == nullptr );
+			delete impCpCtorExpr;
+
+			if ( returnDecl ) {
+				Expression * assign = createBitwiseAssignment( new VariableExpr( returnDecl ), appExpr );
 				if ( ! dynamic_cast< ReferenceType * >( result ) ) {
 					// destructing reference returns is bad because it can cause multiple destructor calls to the same object - the returned object is not a temporary
-					destructRet( ret, impCpCtorExpr );
-				}
+					destructRet( returnDecl, impCpCtorExpr, assign );
+				} else {
+					assign = new CommaExpr( assign, new VariableExpr( returnDecl ) );
+				}
+				// move env from appExpr to retExpr
+				std::swap( assign->env, appExpr->env );
+				return assign;
+			} else {
+				return appExpr;
+			} // if
+		}
+
+		void ResolveCopyCtors::premutate( StmtExpr * stmtExpr ) {
+			// function call temporaries should be placed at statement-level, rather than nested inside of a new statement expression,
+			// since temporaries can be shared across sub-expressions, e.g.
+			//   [A, A] f();
+			//   g([A] x, [A] y);
+			//   g(f());
+			// f is executed once, so the return temporary is shared across the tuple constructors for x and y.
+			// Explicitly mutating children instead of mutating the inner compound statement forces the temporaries to be added
+			// to the outer context, rather than inside of the statement expression.
+			visit_children = false;
+
+			assert( env );
+
+			// visit all statements
+			std::list< Statement * > & stmts = stmtExpr->statements->get_kids();
+			for ( Statement *& stmt : stmts ) {
+				stmt = stmt->acceptMutator( *visitor );
 			} // for
-			CP_CTOR_PRINT( std::cerr << "after Resolving: " << impCpCtorExpr << std::endl; )
-		}
-
-		void ResolveCopyCtors::postvisit( StmtExpr * stmtExpr ) {
-			assert( env );
-			assert( stmtExpr->get_result() );
-			Type * result = stmtExpr->get_result();
+
+			assert( stmtExpr->result );
+			Type * result = stmtExpr->result;
 			if ( ! result->isVoid() ) {
 				static UniqueName retNamer("_tmp_stmtexpr_ret");
@@ -562,23 +686,25 @@
 				ObjectDecl * ret = ObjectDecl::newObject( retNamer.newName(), result, nullptr );
 				ret->type->set_const( false );
-				stmtExpr->returnDecls.push_front( ret );
+				stmtsToAddBefore.push_back( new DeclStmt( ret ) );
 
 				// must have a non-empty body, otherwise it wouldn't have a result
 				CompoundStmt * body = stmtExpr->statements;
-				assert( ! body->get_kids().empty() );
+				assert( ! body->kids.empty() );
 				// must be an ExprStmt, otherwise it wouldn't have a result
-				ExprStmt * last = strict_dynamic_cast< ExprStmt * >( body->get_kids().back() );
-				last->expr = makeCtorDtor( "?{}", ret, last->get_expr() );
-
-				stmtExpr->dtors.push_front( makeCtorDtor( "^?{}", ret ) );
+				ExprStmt * last = strict_dynamic_cast< ExprStmt * >( body->kids.back() );
+				last->expr = makeCtorDtor( "?{}", ret, last->expr );
+
+				// add destructors after current statement
+				stmtsToAddAfter.push_back( new ExprStmt( makeCtorDtor( "^?{}", ret ) ) );
+
+				// must have a non-empty body, otherwise it wouldn't have a result
+				assert( ! stmts.empty() );
+
+				// if there is a return decl, add a use as the last statement; will not have return decl on non-constructable returns
+				stmts.push_back( new ExprStmt( new VariableExpr( ret ) ) );
 			} // if
-		}
-
-		void ResolveCopyCtors::previsit( UniqueExpr * unqExpr ) {
-			unqCount[ unqExpr->get_id() ]++;  // count the number of unique expressions for each ID
-			if ( vars.count( unqExpr->get_id() ) ) {
-				// xxx - hack to prevent double-handling of unique exprs, otherwise too many temporary variables and destructors are generated
-				visit_children = false;
-			}
+
+			assert( stmtExpr->returnDecls.empty() );
+			assert( stmtExpr->dtors.empty() );
 		}
 
@@ -597,137 +723,38 @@
 		}
 
-		void ResolveCopyCtors::postvisit( UniqueExpr * unqExpr ) {
-			if ( vars.count( unqExpr->get_id() ) ) {
-				// xxx - hack to prevent double-handling of unique exprs, otherwise too many temporary variables and destructors are generated
-				return;
-			}
-
-			// it should never be necessary to wrap a void-returning expression in a UniqueExpr - if this assumption changes, this needs to be rethought
-			assert( unqExpr->get_result() );
-			if ( ImplicitCopyCtorExpr * impCpCtorExpr = dynamic_cast<ImplicitCopyCtorExpr*>( unqExpr->get_expr() ) ) {
-				// note the variable used as the result from the call
-				assert( impCpCtorExpr->get_result() && impCpCtorExpr->get_returnDecls().size() == 1 );
-				unqExpr->set_var( new VariableExpr( impCpCtorExpr->get_returnDecls().front() ) );
+		void ResolveCopyCtors::premutate( UniqueExpr * unqExpr ) {
+			visit_children = false;
+			// xxx - hack to prevent double-handling of unique exprs, otherwise too many temporary variables and destructors are generated
+			static std::unordered_map< int, UniqueExpr * > unqMap;
+			if ( ! unqMap.count( unqExpr->get_id() ) ) {
+				// resolve expr and find its
+
+				ImplicitCopyCtorExpr * impCpCtorExpr = dynamic_cast< ImplicitCopyCtorExpr * >( unqExpr->expr );
+				// PassVisitor<ResolveCopyCtors> fixer;
+				unqExpr->expr = unqExpr->expr->acceptMutator( *visitor );
+
+				// it should never be necessary to wrap a void-returning expression in a UniqueExpr - if this assumption changes, this needs to be rethought
+				assert( unqExpr->result );
+				if ( impCpCtorExpr ) {
+					CommaExpr * comma = strict_dynamic_cast< CommaExpr * >( unqExpr->expr );
+					VariableExpr * var = strict_dynamic_cast<VariableExpr *>( comma->arg2 );
+					// note the variable used as the result from the call
+					unqExpr->var = var->clone();
+				} else {
+					// expr isn't a call expr, so create a new temporary variable to use to hold the value of the unique expression
+					unqExpr->object = ObjectDecl::newObject( toString("_unq", unqExpr->get_id()), unqExpr->result->clone(), makeInit( unqExpr->result ) );
+					unqExpr->var = new VariableExpr( unqExpr->object );
+				}
+
+				// stmtsToAddBefore.splice( stmtsToAddBefore.end(), fixer.pass.stmtsToAddBefore );
+				// stmtsToAddAfter.splice( stmtsToAddAfter.end(), fixer.pass.stmtsToAddAfter );
+				unqMap[unqExpr->get_id()] = unqExpr;
 			} else {
-				// expr isn't a call expr, so create a new temporary variable to use to hold the value of the unique expression
-				unqExpr->set_object( ObjectDecl::newObject( toString("_unq", unqExpr->get_id()), unqExpr->get_result()->clone(), makeInit( unqExpr->get_result() ) ) );
-				unqExpr->set_var( new VariableExpr( unqExpr->get_object() ) );
-			}
-			vars.insert( unqExpr->get_id() );
-		}
-
-		Expression * FixCopyCtors::postmutate( ImplicitCopyCtorExpr * impCpCtorExpr ) {
-			CP_CTOR_PRINT( std::cerr << "FixCopyCtors: " << impCpCtorExpr << std::endl; )
-
-			std::list< ObjectDecl * > & tempDecls = impCpCtorExpr->get_tempDecls();
-			std::list< ObjectDecl * > & returnDecls = impCpCtorExpr->get_returnDecls();
-			std::list< Expression * > & dtors = impCpCtorExpr->get_dtors();
-
-			// add all temporary declarations and their constructors
-			for ( ObjectDecl * obj : tempDecls ) {
-				stmtsToAddBefore.push_back( new DeclStmt( obj ) );
-			} // for
-			for ( ObjectDecl * obj : returnDecls ) {
-				stmtsToAddBefore.push_back( new DeclStmt( obj ) );
-			} // for
-
-			// add destructors after current statement
-			for ( Expression * dtor : dtors ) {
-				// take relevant bindings from environment
-				assert( ! dtor->env );
-				dtor->env =  maybeClone( env );
-				stmtsToAddAfter.push_back( new ExprStmt( dtor ) );
-			} // for
-
-			ObjectDecl * returnDecl = returnDecls.empty() ? nullptr : returnDecls.front();
-			Expression * callExpr = impCpCtorExpr->get_callExpr();
-
-			CP_CTOR_PRINT( std::cerr << "Coming out the back..." << impCpCtorExpr << std::endl; )
-
-			// detach fields from wrapper node so that it can be deleted without deleting too much
-			dtors.clear();
-			tempDecls.clear();
-			returnDecls.clear();
-			impCpCtorExpr->set_callExpr( nullptr );
-			std::swap( impCpCtorExpr->env, callExpr->env );
-			assert( impCpCtorExpr->env == nullptr );
-			delete impCpCtorExpr;
-
-			if ( returnDecl ) {
-				ApplicationExpr * assign = createBitwiseAssignment( new VariableExpr( returnDecl ), callExpr );
-				Expression * retExpr = new CommaExpr( assign, new VariableExpr( returnDecl ) );
-				// move env from callExpr to retExpr
-				std::swap( retExpr->env, callExpr->env );
-				return retExpr;
-			} else {
-				return callExpr;
-			} // if
-		}
-
-		void FixCopyCtors::premutate( StmtExpr * stmtExpr ) {
-			// function call temporaries should be placed at statement-level, rather than nested inside of a new statement expression,
-			// since temporaries can be shared across sub-expressions, e.g.
-			//   [A, A] f();
-			//   g([A] x, [A] y);
-			//   g(f());
-			// f is executed once, so the return temporary is shared across the tuple constructors for x and y.
-			// Explicitly mutating children instead of mutating the inner compound statment forces the temporaries to be added
-			// to the outer context, rather than inside of the statement expression.
-			visit_children = false;
-			std::list< Statement * > & stmts = stmtExpr->statements->get_kids();
-			for ( Statement *& stmt : stmts ) {
-				stmt = stmt->acceptMutator( *visitor );
-			} // for
-			assert( stmtExpr->result );
-			Type * result = stmtExpr->result;
-			if ( ! result->isVoid() ) {
-				for ( ObjectDecl * obj : stmtExpr->returnDecls ) {
-					stmtsToAddBefore.push_back( new DeclStmt( obj ) );
-				} // for
-				// add destructors after current statement
-				for ( Expression * dtor : stmtExpr->dtors ) {
-					stmtsToAddAfter.push_back( new ExprStmt( dtor ) );
-				} // for
-				// must have a non-empty body, otherwise it wouldn't have a result
-				assert( ! stmts.empty() );
-				assertf( ! stmtExpr->returnDecls.empty() || stmtExpr->dtors.empty(), "StmtExpr returns non-void, but no return decls: %s", toString( stmtExpr ).c_str() );
-				// if there is a return decl, add a use as the last statement; will not have return decl on non-constructable returns
-				if ( ! stmtExpr->returnDecls.empty() ) {
-					stmts.push_back( new ExprStmt( new VariableExpr( stmtExpr->returnDecls.front() ) ) );
-				}
-				stmtExpr->returnDecls.clear();
-				stmtExpr->dtors.clear();
-			}
-			assert( stmtExpr->returnDecls.empty() );
-			assert( stmtExpr->dtors.empty() );
-		}
-
-		void FixCopyCtors::premutate( UniqueExpr * unqExpr ) {
-			visit_children = false;
-			unqCount[ unqExpr->get_id() ]--;
-			static std::unordered_map< int, std::list< Statement * > > dtors;
-			static std::unordered_map< int, UniqueExpr * > unqMap;
-			// has to be done to clean up ImplicitCopyCtorExpr nodes, even when this node was skipped in previous passes
-			if ( unqMap.count( unqExpr->get_id() ) ) {
 				// take data from other UniqueExpr to ensure consistency
 				delete unqExpr->get_expr();
-				unqExpr->set_expr( unqMap[unqExpr->get_id()]->get_expr()->clone() );
-				delete unqExpr->get_result();
-				unqExpr->set_result( maybeClone( unqExpr->get_expr()->get_result() ) );
-				if ( unqCount[ unqExpr->get_id() ] == 0 ) {  // insert destructor after the last use of the unique expression
-					stmtsToAddAfter.splice( stmtsToAddAfter.end(), dtors[ unqExpr->get_id() ] );
-				}
-				return;
-			}
-			PassVisitor<FixCopyCtors> fixer( unqCount );
-			unqExpr->set_expr( unqExpr->get_expr()->acceptMutator( fixer ) ); // stmtexprs contained should not be separately fixed, so this must occur after the lookup
-			stmtsToAddBefore.splice( stmtsToAddBefore.end(), fixer.pass.stmtsToAddBefore );
-			unqMap[unqExpr->get_id()] = unqExpr;
-			if ( unqCount[ unqExpr->get_id() ] == 0 ) {  // insert destructor after the last use of the unique expression
-				stmtsToAddAfter.splice( stmtsToAddAfter.end(), dtors[ unqExpr->get_id() ] );
-			} else { // remember dtors for last instance of unique expr
-				dtors[ unqExpr->get_id() ] = fixer.pass.stmtsToAddAfter;
-			}
-			return;
+				unqExpr->expr = unqMap[unqExpr->get_id()]->expr->clone();
+				delete unqExpr->result;
+				unqExpr->result = maybeClone( unqExpr->expr->result );
+			}
 		}
 
@@ -844,4 +871,19 @@
 							ctorInit->ctor = nullptr;
 						}
+
+						Statement * dtor = ctorInit->dtor;
+						if ( dtor ) {
+							ImplicitCtorDtorStmt * implicit = strict_dynamic_cast< ImplicitCtorDtorStmt * >( dtor );
+							Statement * dtorStmt = implicit->callStmt;
+
+							// don't need to call intrinsic dtor, because it does nothing, but
+							// non-intrinsic dtors must be called
+							if ( ! isIntrinsicSingleArgCallStmt( dtorStmt ) ) {
+								// set dtor location to the object's location for error messages
+								DeclarationWithType * dtorFunc = getDtorFunc( objDecl, dtorStmt, stmtsToAddBefore );
+								objDecl->attributes.push_back( new Attribute( "cleanup", { new VariableExpr( dtorFunc ) } ) );
+								ctorInit->dtor = nullptr;
+							} // if
+						}
 					} // if
 				} else if ( Initializer * init = ctorInit->init ) {
@@ -886,36 +928,4 @@
 
 
-		template<typename Iterator, typename OutputIterator>
-		void insertDtors( Iterator begin, Iterator end, OutputIterator out ) {
-			for ( Iterator it = begin ; it != end ; ++it ) {
-				// extract destructor statement from the object decl and insert it into the output. Note that this is
-				// only called on lists of non-static objects with implicit non-intrinsic dtors, so if the user manually
-				// calls an intrinsic dtor then the call must (and will) still be generated since the argument may
-				// contain side effects.
-				ObjectDecl * objDecl = *it;
-				ConstructorInit * ctorInit = dynamic_cast< ConstructorInit * >( objDecl->get_init() );
-				assert( ctorInit && ctorInit->get_dtor() );
-				*out++ = ctorInit->get_dtor()->clone();
-			} // for
-		}
-
-		void InsertDtors::previsit( ObjectDecl * objDecl ) {
-			// remember non-static destructed objects so that their destructors can be inserted later
-			if ( ! objDecl->get_storageClasses().is_static ) {
-				if ( ConstructorInit * ctorInit = dynamic_cast< ConstructorInit * >( objDecl->get_init() ) ) {
-					// a decision should have been made by the resolver, so ctor and init are not both non-NULL
-					assert( ! ctorInit->get_ctor() || ! ctorInit->get_init() );
-					Statement * dtor = ctorInit->get_dtor();
-					// don't need to call intrinsic dtor, because it does nothing, but
-					// non-intrinsic dtors must be called
-					if ( dtor && ! isIntrinsicSingleArgCallStmt( dtor ) ) {
-						// set dtor location to the object's location for error messages
-						ctorInit->dtor->location = objDecl->location;
-						reverseDeclOrder.front().push_front( objDecl );
-					} // if
-				} // if
-			} // if
-		}
-
 		void InsertDtors::previsit( FunctionDecl * funcDecl ) {
 			// each function needs to have its own set of labels
@@ -930,29 +940,4 @@
 		}
 
-		void InsertDtors::previsit( CompoundStmt * compoundStmt ) {
-			// visit statements - this will also populate reverseDeclOrder list.  don't want to dump all destructors
-			// when block is left, just the destructors associated with variables defined in this block, so push a new
-			// list to the top of the stack so that we can differentiate scopes
-			reverseDeclOrder.push_front( OrderedDecls() );
-			Parent::previsit( compoundStmt );
-		}
-
-		void InsertDtors::postvisit( CompoundStmt * compoundStmt ) {
-			// add destructors for the current scope that we're exiting, unless the last statement is a return, which
-			// causes unreachable code warnings
-			std::list< Statement * > & statements = compoundStmt->get_kids();
-			if ( ! statements.empty() && ! dynamic_cast< ReturnStmt * >( statements.back() ) ) {
-				insertDtors( reverseDeclOrder.front().begin(), reverseDeclOrder.front().end(), back_inserter( statements ) );
-			}
-			reverseDeclOrder.pop_front();
-		}
-
-		void InsertDtors::previsit( ReturnStmt * ) {
-			// return exits all scopes, so dump destructors for all scopes
-			for ( OrderedDecls & od : reverseDeclOrder ) {
-				insertDtors( od.begin(), od.end(), back_inserter( stmtsToAddBefore ) );
-			} // for
-		}
-
 		// Handle break/continue/goto in the same manner as C++.  Basic idea: any objects that are in scope at the
 		// BranchStmt but not at the labelled (target) statement must be destructed.  If there are any objects in scope
@@ -982,22 +967,4 @@
 			if ( ! diff.empty() ) {
 				SemanticError( stmt, std::string("jump to label '") + stmt->get_target().get_name() + "' crosses initialization of " + (*diff.begin())->get_name() + " " );
-			} // if
-			// S_G-S_L results in set of objects that must be destructed
-			diff.clear();
-			std::set_difference( curVars.begin(), curVars.end(), lvars.begin(), lvars.end(), std::inserter( diff, diff.end() ) );
-			DTOR_PRINT(
-				std::cerr << "S_G-S_L = " << printSet( diff ) << std::endl;
-			)
-			if ( ! diff.empty() ) {
-				// create an auxilliary set for fast lookup -- can't make diff a set, because diff ordering should be consistent for error messages.
-				std::unordered_set<ObjectDecl *> needsDestructor( diff.begin(), diff.end() );
-
-				// go through decl ordered list of objectdecl. for each element that occurs in diff, output destructor
-				OrderedDecls ordered;
-				for ( OrderedDecls & rdo : reverseDeclOrder ) {
-					// add elements from reverseDeclOrder into ordered if they occur in diff - it is key that this happens in reverse declaration order.
-					copy_if( rdo.begin(), rdo.end(), back_inserter( ordered ), [&]( ObjectDecl * objDecl ) { return needsDestructor.count( objDecl ); } );
-				} // for
-				insertDtors( ordered.begin(), ordered.end(), back_inserter( stmtsToAddBefore ) );
 			} // if
 		}
@@ -1116,8 +1083,32 @@
 							callStmt->acceptMutator( *visitor );
 							if ( isCtor ) {
-								function->get_statements()->push_front( callStmt );
-							} else {
+								function->statements->push_front( callStmt );
+							} else { // TODO: don't generate destructor function/object for intrinsic calls
 								// destructor statements should be added at the end
-								function->get_statements()->push_back( callStmt );
+								// function->get_statements()->push_back( callStmt );
+
+								// Optimization: do not need to call intrinsic destructors on members
+								if ( isIntrinsicSingleArgCallStmt( callStmt ) ) continue;;
+
+								// __Destructor _dtor0 = { (void *)&b.a1, (void (*)(void *)_destroy_A };
+								std::list< Statement * > stmtsToAdd;
+
+								static UniqueName memberDtorNamer = { "__memberDtor" };
+								assertf( Validate::dtorStruct, "builtin __Destructor not found." );
+								assertf( Validate::dtorStructDestroy, "builtin __destroy_Destructor not found." );
+
+								Expression * thisExpr = new CastExpr( new AddressExpr( new VariableExpr( thisParam ) ), new PointerType( Type::Qualifiers(), new VoidType( Type::Qualifiers() ) ) );
+								Expression * dtorExpr = new VariableExpr( getDtorFunc( thisParam, callStmt, stmtsToAdd ) );
+
+								// cast destructor pointer to void (*)(void *), to silence GCC incompatible pointer warnings
+								FunctionType * dtorFtype = new FunctionType( Type::Qualifiers(), false );
+								dtorFtype->parameters.push_back( ObjectDecl::newObject( "", new PointerType( Type::Qualifiers(), new VoidType( Type::Qualifiers() ) ), nullptr ) );
+								Type * dtorType = new PointerType( Type::Qualifiers(), dtorFtype );
+
+								ObjectDecl * destructor = ObjectDecl::newObject( memberDtorNamer.newName(), new StructInstType( Type::Qualifiers(), Validate::dtorStruct ), new ListInit( { new SingleInit( thisExpr ), new SingleInit( new CastExpr( dtorExpr, dtorType ) ) } ) );
+								function->statements->push_front( new DeclStmt( destructor ) );
+								destructor->attributes.push_back( new Attribute( "cleanup", { new VariableExpr( Validate::dtorStructDestroy ) } ) );
+
+								function->statements->kids.splice( function->statements->kids.begin(), stmtsToAdd );
 							}
 						} catch ( SemanticErrorException & error ) {
Index: src/InitTweak/GenInit.cc
===================================================================
--- src/InitTweak/GenInit.cc	(revision d76c588058daa6269114595a571223a110ced0ab)
+++ src/InitTweak/GenInit.cc	(revision 8d70648a1bc901be1455417052ead87a499b935b)
@@ -15,31 +15,32 @@
 #include "GenInit.h"
 
-#include <stddef.h>                // for NULL
-#include <algorithm>               // for any_of
-#include <cassert>                 // for assert, strict_dynamic_cast, assertf
-#include <iterator>                // for back_inserter, inserter, back_inse...
-#include <list>                    // for _List_iterator, list
+#include <stddef.h>                    // for NULL
+#include <algorithm>                   // for any_of
+#include <cassert>                     // for assert, strict_dynamic_cast, assertf
+#include <iterator>                    // for back_inserter, inserter, back_inse...
+#include <list>                        // for _List_iterator, list
 
 #include "CodeGen/OperatorTable.h"
-#include "Common/PassVisitor.h"    // for PassVisitor, WithGuards, WithShort...
-#include "Common/SemanticError.h"  // for SemanticError
-#include "Common/UniqueName.h"     // for UniqueName
-#include "Common/utility.h"        // for ValueGuard, maybeClone
-#include "GenPoly/GenPoly.h"       // for getFunctionType, isPolyType
-#include "GenPoly/ScopedSet.h"     // for ScopedSet, ScopedSet<>::const_iter...
-#include "InitTweak.h"             // for isConstExpr, InitExpander, checkIn...
-#include "Parser/LinkageSpec.h"    // for isOverridable, C
+#include "Common/PassVisitor.h"        // for PassVisitor, WithGuards, WithShort...
+#include "Common/SemanticError.h"      // for SemanticError
+#include "Common/UniqueName.h"         // for UniqueName
+#include "Common/utility.h"            // for ValueGuard, maybeClone
+#include "GenPoly/GenPoly.h"           // for getFunctionType, isPolyType
+#include "GenPoly/ScopedSet.h"         // for ScopedSet, ScopedSet<>::const_iter...
+#include "InitTweak.h"                 // for isConstExpr, InitExpander, checkIn...
+#include "Parser/LinkageSpec.h"        // for isOverridable, C
 #include "ResolvExpr/Resolver.h"
-#include "SymTab/Autogen.h"        // for genImplicitCall, SizeType
-#include "SymTab/Mangler.h"        // for Mangler
-#include "SynTree/Declaration.h"   // for ObjectDecl, DeclarationWithType
-#include "SynTree/Expression.h"    // for VariableExpr, UntypedExpr, Address...
-#include "SynTree/Initializer.h"   // for ConstructorInit, SingleInit, Initi...
-#include "SynTree/Label.h"         // for Label
-#include "SynTree/Mutator.h"       // for mutateAll
-#include "SynTree/Statement.h"     // for CompoundStmt, ImplicitCtorDtorStmt
-#include "SynTree/Type.h"          // for Type, ArrayType, Type::Qualifiers
-#include "SynTree/Visitor.h"       // for acceptAll, maybeAccept
-#include "Tuples/Tuples.h"         // for maybeImpure
+#include "SymTab/Autogen.h"            // for genImplicitCall
+#include "SymTab/Mangler.h"            // for Mangler
+#include "SynTree/Declaration.h"       // for ObjectDecl, DeclarationWithType
+#include "SynTree/Expression.h"        // for VariableExpr, UntypedExpr, Address...
+#include "SynTree/Initializer.h"       // for ConstructorInit, SingleInit, Initi...
+#include "SynTree/Label.h"             // for Label
+#include "SynTree/Mutator.h"           // for mutateAll
+#include "SynTree/Statement.h"         // for CompoundStmt, ImplicitCtorDtorStmt
+#include "SynTree/Type.h"              // for Type, ArrayType, Type::Qualifiers
+#include "SynTree/Visitor.h"           // for acceptAll, maybeAccept
+#include "Tuples/Tuples.h"             // for maybeImpure
+#include "Validate/FindSpecialDecls.h" // for SizeType
 
 namespace InitTweak {
@@ -186,5 +187,5 @@
 
 			// need to resolve array dimensions in order to accurately determine if constexpr
-			ResolvExpr::findSingleExpression( arrayType->dimension, SymTab::SizeType->clone(), indexer );
+			ResolvExpr::findSingleExpression( arrayType->dimension, Validate::SizeType->clone(), indexer );
 			// array is variable-length when the dimension is not constexpr
 			arrayType->isVarLen = ! isConstExpr( arrayType->dimension );
@@ -192,5 +193,5 @@
 			if ( ! Tuples::maybeImpure( arrayType->dimension ) ) return;
 
-			ObjectDecl * arrayDimension = new ObjectDecl( dimensionName.newName(), storageClasses, LinkageSpec::C, 0, SymTab::SizeType->clone(), new SingleInit( arrayType->get_dimension() ) );
+			ObjectDecl * arrayDimension = new ObjectDecl( dimensionName.newName(), storageClasses, LinkageSpec::C, 0, Validate::SizeType->clone(), new SingleInit( arrayType->get_dimension() ) );
 			arrayDimension->get_type()->set_const( true );
 
Index: src/InitTweak/InitTweak.cc
===================================================================
--- src/InitTweak/InitTweak.cc	(revision d76c588058daa6269114595a571223a110ced0ab)
+++ src/InitTweak/InitTweak.cc	(revision 8d70648a1bc901be1455417052ead87a499b935b)
@@ -340,5 +340,5 @@
 		std::list< Expression * > matches;
 		collectCtorDtorCalls( stmt, matches );
-		assert( matches.size() <= 1 );
+		assertf( matches.size() <= 1, "%zd constructor/destructors found in %s", matches.size(), toString( stmt ).c_str() );
 		return matches.size() == 1 ? matches.front() : nullptr;
 	}
