Index: tests/zombies/linked-list-perf/experiment.cpp
===================================================================
--- tests/zombies/linked-list-perf/experiment.cpp	(revision 3c2c2f00421be9e9aec0d30df16d2a86589d8219)
+++ tests/zombies/linked-list-perf/experiment.cpp	(revision 3c2c2f00421be9e9aec0d30df16d2a86589d8219)
@@ -0,0 +1,1 @@
+experiment.koad
Index: tests/zombies/linked-list-perf/experiment.koad
===================================================================
--- tests/zombies/linked-list-perf/experiment.koad	(revision 3c2c2f00421be9e9aec0d30df16d2a86589d8219)
+++ tests/zombies/linked-list-perf/experiment.koad	(revision 3c2c2f00421be9e9aec0d30df16d2a86589d8219)
@@ -0,0 +1,203 @@
+/*
+
+$cfa is the result of building this configuration:
+../cfa-cc/configure --with-target-hosts=host:nodebug CXX=g++-
+
+~/u++ is my own download of uC++ 7.0.0
+
+$cfa -xc experiment.koad -DIMPL_CFA_MIKE_OLD -o perfexp-cfa-mike-old -nodebug -O2 
+$cfa -xc experiment.koad -DIMPL_CFA_MIKE_NEW -o perfexp-cfa-mike-new -nodebug -O2
+$cfa -xc experiment.koad -DIMPL_CFA_MIKE_POC -o perfexp-cfa-mike-poc -nodebug -O2
+$cfa -xc experiment.koad -DIMPL_CFA_COLBY -o perfexp-cfa-colby -nodebug -O2
+$cfa -xc experiment.koad -DIMPL_CFA_THIERRY_SUB -o perfexp-cfa-thierry-sub -nodebug -O2
+g++ -xc++ experiment.koad -DIMPL_STL -DNDEBUG -o perfexp-stlgpp -O2
+~/u++/u++-7.0.0/bin/u++ experiment.cpp -DIMPL_UPP -DNDEBUG -o perfexp-uppupp -O2
+
+                        O2      O2-ltd  O3-ltd
+perfexp-cfa-mike-old    2.50    2.55
+perfexp-cfa-mike-new            2.18    2.15
+perfexp-cfa-mike-poc    1.74    1.71
+perfexp-cfa-colby       2.90    2.84    3.09
+perfexp-cfa-thierry-sub 1.85
+perfexp-stlgpp          4.57    4.72
+perfexp-uppupp          2.09    1.94    1.91
+
+O2-ltd is
+ -fno-tree-pre -fno-gcse
+ (An earlier draft of mike-new didn't work without disabling those optimizations; we probably don't care about that anymore.)
+
+*/
+
+#include <time.h>
+#include <stdio.h>
+
+#if defined IMPL_STL
+
+        #include <list>
+        struct S {
+                volatile int f[64];
+        };
+
+#elif defined IMPL_UPP
+
+        #include <uC++.h>
+        #include <uSequence.h>
+        struct S : public uSeqable {
+                volatile int f[64];
+        };
+
+#elif defined IMPL_CFA_MIKE_OLD
+
+        #include <containers/list.hfa>
+        struct S {
+                int f[64]; // FIXME: make "is volatile" consistent; given bug #TBD
+                DLISTED_MGD_IMPL_IN(S)
+        };
+        DLISTED_MGD_IMPL_OUT(S)
+
+#elif defined IMPL_CFA_MIKE_POC
+
+        #include "mike-proto-list.hfa"
+        struct S {
+                int f[64]; // FIXME: make "is volatile" consistent; given bug #TBD
+                inline dlink(S);
+        };
+        P9_EMBEDDED( S, dlink(S) )
+
+#elif defined IMPL_CFA_MIKE_NEW
+
+        #include <containers/list2.hfa>
+        struct S {
+                int f[64]; // FIXME: make "is volatile" consistent; given bug #TBD
+                inline dlink(S);
+        };
+        P9_EMBEDDED( S, dlink(S) )
+
+#elif defined IMPL_CFA_COLBY
+
+        #include <bits/sequence.hfa>
+        struct S {
+                inline Seqable;
+                int f[64]; // FIXME: make "is volatile" consistent; given bug #TBD
+        };
+	static inline S *& Back( S * n ) {
+		return (S *)Back( (Seqable *)n );
+	}
+	static inline S *& Next( S * n ) {
+		return (S *)Next( (Colable *)n );
+	}
+
+#elif defined IMPL_CFA_THIERRY_SUB
+
+        #include "thierry-subqueue-old-rip.hfa"
+        typedef  $thread S;
+
+
+#else
+        #error bad impl
+#endif
+
+
+#define Repeat( op ) for ( volatile unsigned int i = 0; i < NoOfNodes; i += 1 ) { op; }
+
+int main() {
+        enum { NoOfNodes = 1000, Times = 100000 }; // Times supposed to be 100000
+        S s[NoOfNodes];
+        clock_t start, end;
+        const char * impl = 0;
+
+    #define STATS
+    #define REPORT do { \
+        double elapsed = ((double) (end - start)) / CLOCKS_PER_SEC; \
+        printf("%s %f sec\n", impl, elapsed); \
+        STATS \
+    } while(0);
+
+    #if defined IMPL_STL
+    do {
+        std::list<S *> lst;
+        start = clock();
+        for ( volatile unsigned int t = 0; t < Times; t += 1 ) {
+                Repeat( lst.push_back( &s[i] ) );
+                Repeat( lst.pop_front() );
+        }
+        end = clock();
+        impl = "STL list, pointer";
+        REPORT
+    } while (0);
+    #elif defined IMPL_UPP
+    do {
+        uSequence<S> lst;
+        start = clock();
+        for ( volatile unsigned int t = 0; t < Times; t += 1 ) {
+                Repeat( lst.addTail( &s[i] ) );
+                Repeat( lst.dropHead() );
+        }
+        end = clock();
+        impl = "u++ intrusive list";
+        REPORT
+    } while (0);
+    #elif defined IMPL_CFA_MIKE_OLD
+    do {
+        dlist(S, S) lst;
+        start = clock();
+        for ( volatile unsigned int t = 0; t < Times; t += 1 ) {
+                Repeat( insert_last( lst, s[i] ) );
+                Repeat( remove( lst`first ) );
+        }
+        end = clock();
+        impl = "cfa mike-old intrusive list";
+        REPORT
+    } while (0);
+    #elif defined IMPL_CFA_MIKE_POC
+    do {
+        dlist(S) lst;
+        start = clock();
+        for ( volatile unsigned int t = 0; t < Times; t += 1 ) {
+                Repeat( insert_last( lst, s[i] ) );
+                Repeat( remove_first( lst ) );
+        }
+        end = clock();
+        impl = "cfa mike-poc intrusive list";
+        REPORT
+    } while (0);
+    #elif defined IMPL_CFA_MIKE_NEW
+    do {
+        dlist(S) lst;
+        start = clock();
+        for ( volatile unsigned int t = 0; t < Times; t += 1 ) {
+                Repeat( insert_last( lst, s[i] ) );
+                Repeat( remove( lst`first ) );
+        }
+        end = clock();
+        impl = "cfa mike-new intrusive list";
+        REPORT
+    } while (0);
+    #elif defined IMPL_CFA_COLBY
+    do {
+    	Sequence(S) lst;
+        start = clock();
+        for ( volatile unsigned int t = 0; t < Times; t += 1 ) {
+                Repeat( addHead( lst, s[i] ) );
+                Repeat( dropTail( lst ) );
+        }
+        end = clock();
+        impl = "cfa colby intrusive list";
+        REPORT
+    } while (0);
+
+    #elif defined IMPL_CFA_THIERRY_SUB
+    do {
+    	__intrusive_lane_t lst;
+        start = clock();
+        for ( volatile unsigned int t = 0; t < Times; t += 1 ) {
+                Repeat( push( lst, &s[i] ) );
+                Repeat( pop( lst ) );
+        }
+        end = clock();
+        impl = "cfa thierry subqueue intrusive list";
+        REPORT
+    } while (0);
+
+    #endif
+}
Index: tests/zombies/linked-list-perf/mike-proto-list.hfa
===================================================================
--- tests/zombies/linked-list-perf/mike-proto-list.hfa	(revision 3c2c2f00421be9e9aec0d30df16d2a86589d8219)
+++ tests/zombies/linked-list-perf/mike-proto-list.hfa	(revision 3c2c2f00421be9e9aec0d30df16d2a86589d8219)
@@ -0,0 +1,75 @@
+#include <assert.h>
+
+forall( tE & ) {
+
+    struct dlink{
+        tE *next;
+        tE *prev;
+    };
+
+    static inline void ?{}( dlink(tE) & this ) {
+        this.next = 0p;
+        this.prev = 0p;
+    }
+
+    forall( tLinks & = dlink(tE) ) {
+        struct dlist{
+            tE *first;
+            tE *last;
+        };
+
+        static inline void ?{}( dlist(tE, tLinks) & this ) {
+            this.first = 0p;
+            this.last = 0p;
+        }
+    }
+}
+
+trait embedded( tOuter &, tInner & ) {
+    tInner & ?`inner( tOuter & );
+};
+
+// embedded is reflexive
+forall( tX & )
+static inline tX & ?`inner( tX & this ) { return this; }
+
+// use this on every case of plan-9 inheritance, to make embedded a closure of plan-9 inheritance
+#define P9_EMBEDDED( tOuter, tInner ) \
+    static inline tInner & ?`inner( tOuter & this ) { return this; }
+
+
+forall( tE &, tLinks & | embedded( tE, tLinks ) | embedded( tLinks, dlink(tE) ) ) {
+    static inline void insert_last( dlist(tE, tLinks) &lst, tE & e ) {
+        if (lst.last) {
+            verify(lst.first);
+            dlink(tE) & oldLastLinks = (*lst.last)`inner`inner;
+            verify(oldLastLinks.next == 0p);
+            oldLastLinks.next = & e;
+        } else {
+            verify(!lst.first);
+            lst.first = &e;
+        }
+        dlink(tE) & newLastLinks = e`inner`inner;
+        verify(newLastLinks.prev == 0p);
+        verify(newLastLinks.next == 0p);
+        newLastLinks.prev = lst.last;
+        lst.last = &e;
+    }
+    static inline void remove_first( dlist(tE, tLinks) &lst ) {
+        verify(lst.first && lst.last);
+        dlink(tE) & oldFirstLinks = (*lst.first)`inner`inner;
+        verify(oldFirstLinks.prev == 0p);
+        if( lst.last != lst.first) {
+            verify(oldFirstLinks.next != 0p);
+            tE & newFirst = * oldFirstLinks.next;
+            dlink(tE) & newFirstLinks = (newFirst)`inner`inner;
+            oldFirstLinks.next = 0p;
+            newFirstLinks.prev = 0p;
+            lst.first = & newFirst;
+        } else {
+            verify(oldFirstLinks.next == 0p);
+            lst.last = 0p;
+            lst.first = 0p;
+        }
+    }
+}
Index: tests/zombies/linked-list-perf/thierry-subqueue-old-rip.hfa
===================================================================
--- tests/zombies/linked-list-perf/thierry-subqueue-old-rip.hfa	(revision 3c2c2f00421be9e9aec0d30df16d2a86589d8219)
+++ tests/zombies/linked-list-perf/thierry-subqueue-old-rip.hfa	(revision 3c2c2f00421be9e9aec0d30df16d2a86589d8219)
@@ -0,0 +1,255 @@
+#include <assert.h>
+#include <stddef.h>
+struct $thread;
+struct __thread_desc_link {
+    struct $thread * next;
+    struct $thread * prev;
+    volatile unsigned long long ts;
+    int preferred;
+};
+struct $thread {
+    int f[64]; // FIXME: make "is volatile" consistent; given bug #TBD
+    struct __thread_desc_link link;
+};
+
+// Intrusives lanes which are used by the relaxed ready queue
+struct __attribute__((aligned(128))) __intrusive_lane_t {
+
+        #if defined(USE_MPSC)
+                mpsc_queue($thread) queue;
+                __attribute__((aligned(128)))
+        #else
+                // anchor for the head and the tail of the queue
+                __attribute__((aligned(128))) struct __sentinel_t {
+                        // Link lists fields
+                        // instrusive link field for threads
+                        // must be exactly as in $thread
+                        __thread_desc_link link;
+                } before, after;
+        #endif
+
+        // spin lock protecting the queue
+        volatile bool lock;
+
+        // Optional statistic counters
+        #if !defined(__CFA_NO_SCHED_STATS__)
+                struct __attribute__((aligned(64))) {
+                        // difference between number of push and pops
+                        ssize_t diff;
+
+                        // total number of pushes and pops
+                        size_t  push;
+                        size_t  pop ;
+                } stat;
+        #endif
+};
+
+void  ?{}(__intrusive_lane_t & this);
+void ^?{}(__intrusive_lane_t & this);
+
+// Get the head pointer (one before the first element) from the anchor
+static inline $thread * head(const __intrusive_lane_t & this) {
+        #if defined(USE_MPSC)
+                return this.queue.head;
+        #else
+                $thread * rhead = ($thread *)(
+                        (uintptr_t)( &this.before ) - offsetof( $thread, link )
+                );
+                /* paranoid */ verify(rhead);
+                return rhead;
+        #endif
+}
+
+// Get the tail pointer (one after the last element) from the anchor
+static inline $thread * tail(const __intrusive_lane_t & this) {
+        #if defined(USE_MPSC)
+                return this.queue.tail;
+        #else
+                $thread * rtail = ($thread *)(
+                        (uintptr_t)( &this.after ) - offsetof( $thread, link )
+                );
+                /* paranoid */ verify(rtail);
+                return rtail;
+        #endif
+}
+
+// Ctor
+void ?{}( __intrusive_lane_t & this ) {
+        this.lock = false;
+
+        #if !defined(USE_MPSC)
+                this.before.link.prev = 0p;
+                this.before.link.next = tail(this);
+                this.before.link.ts   = 0;
+
+                this.after .link.prev = head(this);
+                this.after .link.next = 0p;
+                this.after .link.ts   = 0;
+
+                #if !defined(__CFA_NO_SCHED_STATS__)
+                        this.stat.diff = 0;
+                        this.stat.push = 0;
+                        this.stat.pop  = 0;
+                #endif
+
+                // We add a boat-load of assertions here because the anchor code is very fragile
+                /* paranoid */ verify(((uintptr_t)( head(this) ) + offsetof( $thread, link )) == (uintptr_t)(&this.before));
+                /* paranoid */ verify(((uintptr_t)( tail(this) ) + offsetof( $thread, link )) == (uintptr_t)(&this.after ));
+                /* paranoid */ verify(head(this)->link.prev == 0p );
+                /* paranoid */ verify(head(this)->link.next == tail(this) );
+                /* paranoid */ verify(tail(this)->link.next == 0p );
+                /* paranoid */ verify(tail(this)->link.prev == head(this) );
+                /* paranoid */ verify(&head(this)->link.prev == &this.before.link.prev );
+                /* paranoid */ verify(&head(this)->link.next == &this.before.link.next );
+                /* paranoid */ verify(&tail(this)->link.prev == &this.after .link.prev );
+                /* paranoid */ verify(&tail(this)->link.next == &this.after .link.next );
+                /* paranoid */ verify(__alignof__(__intrusive_lane_t) == 128);
+                /* paranoid */ verify(__alignof__(this) == 128);
+                /* paranoid */ verifyf(((intptr_t)(&this) % 128) == 0, "Expected address to be aligned %p %% 128 == %zd", &this, ((intptr_t)(&this) % 128));
+        #endif
+}
+
+// Dtor is trivial
+void ^?{}( __intrusive_lane_t & this ) {
+        #if !defined(USE_MPSC)
+                // Make sure the list is empty
+                /* paranoid */ verify(head(this)->link.prev == 0p );
+                /* paranoid */ verify(head(this)->link.next == tail(this) );
+                /* paranoid */ verify(tail(this)->link.next == 0p );
+                /* paranoid */ verify(tail(this)->link.prev == head(this) );
+        #endif
+}
+
+// Push a thread onto this lane
+// returns true of lane was empty before push, false otherwise
+bool push(__intrusive_lane_t & this, $thread * node) {
+        #if defined(USE_MPSC)
+                inline $thread * volatile & ?`next ( $thread * this )  __attribute__((const)) {
+                        return this->link.next;
+                }
+                push(this.queue, node);
+        #else
+                #if defined(__CFA_WITH_VERIFY__)
+                        /* paranoid */ verify(this.lock);
+                        /* paranoid */ verify(node->link.ts != 0);
+                        /* paranoid */ verify(node->link.next == 0p);
+                        /* paranoid */ verify(node->link.prev == 0p);
+                        /* paranoid */ verify(tail(this)->link.next == 0p);
+                        /* paranoid */ verify(head(this)->link.prev == 0p);
+
+                        if(this.before.link.ts == 0l) {
+                                /* paranoid */ verify(tail(this)->link.prev == head(this));
+                                /* paranoid */ verify(head(this)->link.next == tail(this));
+                        } else {
+                                /* paranoid */ verify(tail(this)->link.prev != head(this));
+                                /* paranoid */ verify(head(this)->link.next != tail(this));
+                        }
+                #endif
+
+                // Get the relevant nodes locally
+                $thread * tail = tail(this);
+                $thread * prev = tail->link.prev;
+
+                // Do the push
+                node->link.next = tail;
+                node->link.prev = prev;
+                prev->link.next = node;
+                tail->link.prev = node;
+
+                // Update stats
+                #if !defined(__CFA_NO_SCHED_STATS__)
+                        this.stat.diff++;
+                        this.stat.push++;
+                #endif
+
+                verify(node->link.next == tail(this));
+
+                // Check if the queue used to be empty
+                if(this.before.link.ts == 0l) {
+                        this.before.link.ts = node->link.ts;
+                        /* paranoid */ verify(node->link.prev == head(this));
+                        return true;
+                }
+                return false;
+        #endif
+}
+
+// Pop a thread from this lane (must be non-empty)
+// returns popped
+// returns true of lane was empty before push, false otherwise
+$thread * pop(__intrusive_lane_t & this) {
+        /* paranoid */ verify(this.lock);
+        #if defined(USE_MPSC)
+                inline $thread * volatile & ?`next ( $thread * this )  __attribute__((const)) {
+                        return this->link.next;
+                }
+                return pop(this.queue);
+        #else
+                /* paranoid */ verify(this.before.link.ts != 0ul);
+
+                // Get anchors locally
+                $thread * head = head(this);
+                $thread * tail = tail(this);
+
+                // Get the relevant nodes locally
+                $thread * node = head->link.next;
+                $thread * next = node->link.next;
+
+                /* paranoid */ verify(node != tail);
+                /* paranoid */ verify(node);
+
+                // Do the pop
+                head->link.next = next;
+                next->link.prev = head;
+                node->link.next = 0p;
+                node->link.prev = 0p;
+
+                // Update head time stamp
+                this.before.link.ts = next->link.ts;
+
+                // Update stats
+                #ifndef __CFA_NO_SCHED_STATS__
+                        this.stat.diff--;
+                        this.stat.pop ++;
+                #endif
+
+                // Check if we emptied list and return accordingly
+                /* paranoid */ verify(tail(this)->link.next == 0p);
+                /* paranoid */ verify(head(this)->link.prev == 0p);
+                if(next == tail) {
+                        /* paranoid */ verify(this.before.link.ts == 0);
+                        /* paranoid */ verify(tail(this)->link.prev == head(this));
+                        /* paranoid */ verify(head(this)->link.next == tail(this));
+                        return node;
+                }
+                else {
+                        /* paranoid */ verify(next->link.ts != 0);
+                        /* paranoid */ verify(tail(this)->link.prev != head(this));
+                        /* paranoid */ verify(head(this)->link.next != tail(this));
+                        /* paranoid */ verify(this.before.link.ts != 0);
+                        return node;
+                }
+        #endif
+}
+
+// Check whether or not list is empty
+static inline bool is_empty(__intrusive_lane_t & this) {
+        #if defined(USE_MPSC)
+                return this.queue.head == 0p;
+        #else
+                // Cannot verify here since it may not be locked
+                return this.before.link.ts == 0;
+        #endif
+}
+
+// Return the timestamp
+static inline unsigned long long ts(__intrusive_lane_t & this) {
+        #if defined(USE_MPSC)
+                $thread * tl = this.queue.head;
+                if(!tl) return -1ull;
+                return tl->link.ts;
+        #else
+                // Cannot verify here since it may not be locked
+                return this.before.link.ts;
+        #endif
+}
