Index: tests/zombies/vector-perf/iteration-perf.cfa
===================================================================
--- tests/zombies/vector-perf/iteration-perf.cfa	(revision 2e19e916e2c03e1ba8136c8e599fba506fa7737b)
+++ tests/zombies/vector-perf/iteration-perf.cfa	(revision 2e19e916e2c03e1ba8136c8e599fba506fa7737b)
@@ -0,0 +1,32 @@
+#include "vector2.hfa"
+
+#include <time.h>
+
+enum { NumElements = 10000, NumReps = 50000 };
+
+// A layer of indirection to improve performance (naturally!)
+// Works around trac #248.
+// This test keeps a vector of notfloat, instead of a vector of float.  The optimizer removes this added indirection.
+// Furthermore, by passing notfloat's static-inline constructors, defined in this compile unit, to vector, the vector's element's constructors become eligible for inlining.
+// Skipping this optimization costs about a 25% slowdown.
+struct notfloat{ inline float; };
+
+int main() {
+    clock_t start, end;
+    vector(notfloat) x = { 4 };
+    for (i; NumElements) {
+        push_last(x, (notfloat){0.1f * i});
+    }
+    float total;
+    start = clock();
+    for (rep; NumReps) {
+        total = 0;
+        while( vector_exit(notfloat) it = x`origin; it`moveNext ) {
+            total += it`val;
+        }
+    }
+    end = clock();
+    printf("last total was %f\n", total);
+    double elapsed = ((double) (end - start)) / CLOCKS_PER_SEC; \
+    printf("iterating duration was %f\n", elapsed);
+}
Index: tests/zombies/vector-perf/iteration-perf.cpp
===================================================================
--- tests/zombies/vector-perf/iteration-perf.cpp	(revision 2e19e916e2c03e1ba8136c8e599fba506fa7737b)
+++ tests/zombies/vector-perf/iteration-perf.cpp	(revision 2e19e916e2c03e1ba8136c8e599fba506fa7737b)
@@ -0,0 +1,25 @@
+#include <vector>
+#include <iostream>
+#include <time.h>
+
+enum { NumElements = 10000, NumReps = 50000 };
+
+int main() {
+    clock_t start, end;
+    std::vector<float> x;
+    for (int i = 0; i < NumElements; i++) {
+        x.push_back(0.1f * i);
+    }
+    float total;
+    start = clock();
+    for (int rep = 0; rep < NumReps; rep++) {
+        total = 0;
+        for( std::vector<float>::iterator it = x.begin(); it < x.end(); it ++ ) {
+            total += *it;
+        }
+    }
+    end = clock();
+    std::cout << "last total was " << total << std::endl;
+    double elapsed = ((double) (end - start)) / CLOCKS_PER_SEC; \
+    std::cout << "iterating duration was " << elapsed << std::endl;
+}
