Index: libcfa/src/Makefile.am
===================================================================
--- libcfa/src/Makefile.am	(revision 7d51ef867a6e78e1b2b6f9e5456b64a42c555758)
+++ libcfa/src/Makefile.am	(revision 69914cbc38918b375971f7accca59529f62f2d08)
@@ -59,5 +59,4 @@
 	concurrency/iofwd.hfa \
 	containers/list.hfa \
-	containers/list2.hfa \
 	containers/queueLockFree.hfa \
 	containers/stackLockFree.hfa \
Index: libcfa/src/concurrency/alarm.hfa
===================================================================
--- libcfa/src/concurrency/alarm.hfa	(revision 7d51ef867a6e78e1b2b6f9e5456b64a42c555758)
+++ libcfa/src/concurrency/alarm.hfa	(revision 69914cbc38918b375971f7accca59529f62f2d08)
@@ -49,5 +49,5 @@
 	Duration period;			// if > 0 => period of alarm
 
-	DLISTED_MGD_IMPL_IN(alarm_node_t)
+	inline dlink(alarm_node_t);
 
 	union {
@@ -60,5 +60,5 @@
 	enum alarm_type type;		// true if this is not a user defined alarm
 };
-DLISTED_MGD_IMPL_OUT(alarm_node_t)
+P9_EMBEDDED( alarm_node_t, dlink(alarm_node_t) )
 
 void ?{}( alarm_node_t & this, $thread * thrd, Time alarm, Duration period );
@@ -67,5 +67,5 @@
 void ^?{}( alarm_node_t & this );
 
-typedef dlist(alarm_node_t, alarm_node_t) alarm_list_t;
+typedef dlist(alarm_node_t) alarm_list_t;
 
 void insert( alarm_list_t * this, alarm_node_t * n );
Index: libcfa/src/concurrency/kernel.cfa
===================================================================
--- libcfa/src/concurrency/kernel.cfa	(revision 7d51ef867a6e78e1b2b6f9e5456b64a42c555758)
+++ libcfa/src/concurrency/kernel.cfa	(revision 69914cbc38918b375971f7accca59529f62f2d08)
@@ -908,5 +908,5 @@
 	}
 
-	static void crawl_list( cluster * cltr, dlist(processor, processor) & list, unsigned count ) {
+	static void crawl_list( cluster * cltr, dlist(processor) & list, unsigned count ) {
 		/* paranoid */ verify( cltr->stats );
 
Index: libcfa/src/concurrency/kernel.hfa
===================================================================
--- libcfa/src/concurrency/kernel.hfa	(revision 7d51ef867a6e78e1b2b6f9e5456b64a42c555758)
+++ libcfa/src/concurrency/kernel.hfa	(revision 69914cbc38918b375971f7accca59529f62f2d08)
@@ -107,5 +107,5 @@
 
 	// Link lists fields
-	DLISTED_MGD_IMPL_IN(processor)
+	inline dlink(processor);
 
 	// special init fields
@@ -129,4 +129,5 @@
 #endif
 };
+P9_EMBEDDED( processor, dlink(processor) )
 
 void  ?{}(processor & this, const char name[], struct cluster & cltr);
@@ -136,6 +137,4 @@
 static inline void  ?{}(processor & this, struct cluster & cltr) { this{ "Anonymous Processor", cltr}; }
 static inline void  ?{}(processor & this, const char name[])     { this{name, *mainCluster}; }
-
-DLISTED_MGD_IMPL_OUT(processor)
 
 //-----------------------------------------------------------------------------
@@ -186,8 +185,8 @@
 
 	// List of idle processors
-	dlist(processor, processor) idles;
+	dlist(processor) idles;
 
 	// List of active processors
-	dlist(processor, processor) actives;
+	dlist(processor) actives;
 };
 
Index: libcfa/src/concurrency/ready_queue.cfa
===================================================================
--- libcfa/src/concurrency/ready_queue.cfa	(revision 7d51ef867a6e78e1b2b6f9e5456b64a42c555758)
+++ libcfa/src/concurrency/ready_queue.cfa	(revision 69914cbc38918b375971f7accca59529f62f2d08)
@@ -554,5 +554,5 @@
 }
 
-static void assign_list(unsigned & value, dlist(processor, processor) & list, unsigned count) {
+static void assign_list(unsigned & value, dlist(processor) & list, unsigned count) {
 	processor * it = &list`first;
 	for(unsigned i = 0; i < count; i++) {
Index: libcfa/src/containers/list.hfa
===================================================================
--- libcfa/src/containers/list.hfa	(revision 7d51ef867a6e78e1b2b6f9e5456b64a42c555758)
+++ libcfa/src/containers/list.hfa	(revision 69914cbc38918b375971f7accca59529f62f2d08)
@@ -18,333 +18,279 @@
 #include <assert.h>
 
-#define __DLISTED_MGD_COMMON(ELEM, NODE, LINKS_FLD) \
-static inline ELEM& $tempcv_n2e(NODE &node) { \
-	return node; \
-} \
-\
-static inline NODE& $tempcv_e2n(ELEM &node) { \
-	return ( NODE & ) node; \
-} \
-\
-static inline ELEM & ?`prev(NODE &node) { \
-    $dlinks(ELEM) & ls = node.LINKS_FLD; \
-	$mgd_link(ELEM) * l = &ls.prev; \
-	ELEM * e = l->elem; \
-	return *e; \
-} \
-\
-static inline ELEM & ?`next(NODE &node) { \
-    $dlinks(ELEM) & ls = node.LINKS_FLD; \
-	$mgd_link(ELEM) * l = &ls.next; \
-	ELEM * e = l->elem; \
-	return *e; \
-} \
-\
-static inline $mgd_link(ELEM) & $prev_link(NODE &node) { \
-    $dlinks(ELEM) & ls = node.LINKS_FLD; \
-	$mgd_link(ELEM) * l = &ls.prev; \
-	return *l; \
-} \
-\
-static inline $mgd_link(ELEM) & $next_link(NODE &node) { \
-    $dlinks(ELEM) & ls = node.LINKS_FLD; \
-	$mgd_link(ELEM) * l = &ls.next; \
-	return *l; \
+forall( Decorator &, T & )
+struct tytagref {
+    inline T &;
+};
+
+trait embedded( tOuter &, tMid &, tInner & ) {
+    tytagref( tMid, tInner ) ?`inner( tOuter & );
+};
+
+// embedded is reflexive, with no info (void) as the type tag
+forall (T &)
+static inline tytagref(void, T) ?`inner ( T & this ) { tytagref( void, T ) ret = {this}; return ret; }
+
+// use this on every case of plan-9 inheritance, to make embedded a closure of plan-9 inheritance
+#define P9_EMBEDDED( derived, immedBase ) \
+forall( Tbase &, TdiscardPath & | { tytagref( TdiscardPath, Tbase ) ?`inner( immedBase & ); } ) \
+    static inline tytagref(immedBase, Tbase) ?`inner( derived & this ) { \
+        immedBase & ib = this; \
+        Tbase & b = ib`inner; \
+        tytagref(immedBase, Tbase) result = { b }; \
+        return result; \
+    }
+
+#define EMBEDDED_VIA( OUTER, MID, INNER ) \
+   (struct { tytagref(MID, INNER) ( * ?`inner ) ( OUTER & ); }){ ?`inner } 
+
+#define DLINK_VIA( TE, TLINK ) \
+   EMBEDDED_VIA( TE, TLINK, dlink(TE) )
+
+
+// The origin is the position encountered at the start of iteration,
+// signifying, "need to advance to the first element," and at the end
+// of iteration, signifying, "no more elements."  Normal comsumption of
+// an iterator runs ?`moveNext as the first step, and uses the return
+// of ?`moveNext as a guard, before dereferencing the iterator.  So
+// normal consumption of an iterator does not dereference an iterator
+// in origin position.  The value of a pointer (underlying a refence)
+// that is exposed publicly as an iteraor, and also a pointer stored
+// internally in a link field, is tagged, to indicate "is the origin"
+// (internally, is the list-head sentinel node), or untagged, to indicate
+// "is a regular node."  Intent is to help a user who dereferences an
+// iterator in origin position (which would be an API-use error on their
+// part), by failing fast.
+
+#if defined( __x86_64 )
+    // Preferred case: tag in the most-significant bit.  Dereference
+    // has been shown to segfault consistently.  Maintenance should
+    // list more architectures as "ok" here, to let them use the
+    // preferred case, when valid.
+    #define ORIGIN_TAG_BITNO ( 8 * sizeof( size_t ) - 1 )
+#else
+    // Fallback case: tag in the least-significant bit.  Dereference
+    // will often give an alignment error, but may not, e.g. if
+    // accessing a char-typed member.  32-bit x86 uses the most-
+    // significant bit for real room on the heap.
+    #define ORIGIN_TAG_BITNO 0
+#endif
+#define ORIGIN_TAG_MASK (((size_t)1) << ORIGIN_TAG_BITNO)
+
+#define ORIGIN_TAG_SET(p)   ((p) |  ORIGIN_TAG_MASK)
+#define ORIGIN_TAG_CLEAR(p) ((p) & ~ORIGIN_TAG_MASK)
+#define ORIGIN_TAG_QUERY(p) ((p) &  ORIGIN_TAG_MASK)
+
+
+forall( tE & ) {
+
+    struct dlink{
+        tE *next;
+        tE *prev;
+    };
+
+    static inline void ?{}( dlink(tE) & this ) {
+        this.next = 0p;
+        this.prev = 0p;
+    }
+
+    forall( tLinks & = dlink(tE) )
+    struct dlist {
+        inline dlink(tE);
+    };
+
+    forall( tLinks & | embedded( tE, tLinks, dlink(tE) ) ) {
+        static inline tE * $get_list_origin_addr( dlist(tE, tLinks) & lst ) {
+            dlink(tE) & link_from_null = ( * (tE *) 0p )`inner;
+            ptrdiff_t link_offset = (ptrdiff_t) & link_from_null;
+            size_t origin_addr = ((size_t) & lst) - link_offset;
+            size_t preResult = ORIGIN_TAG_SET( origin_addr );
+            return (tE *)preResult;
+        }
+
+        static inline void ?{}( dlist(tE, tLinks) & this ) {
+            tE * listOrigin = $get_list_origin_addr( this );
+            ( ( dlink(tE) & ) this ){ listOrigin, listOrigin } ;
+        }
+    }
+
 }
 
-#define __DLISTED_MGD_JUSTEXPL(STRUCT, IN_THELIST, STRUCT_IN_THELIST) \
-struct STRUCT_IN_THELIST { \
-	inline STRUCT; \
-}; \
-\
-void ?{}(STRUCT_IN_THELIST &) = void; \
-\
-static inline STRUCT_IN_THELIST& ?`IN_THELIST(STRUCT &this) { \
-	return (STRUCT_IN_THELIST&)this; \
-}
-
-#define __DLISTED_MGD_JUSTIMPL(STRUCT)
-
-forall( tE & ) {
-	struct $mgd_link {
-		tE *elem;
-		void *terminator;
-		_Bool is_terminator;
-		// will collapse to single pointer with tag bit
-	};
-	static inline void ?{}( $mgd_link(tE) &this, tE* elem ) {
-		(this.elem){ elem };
-		(this.terminator){ 0p };
-		(this.is_terminator){ 0 };
-	}
-	static inline void ?{}( $mgd_link(tE) &this, void * terminator ) {
-		(this.elem){ 0p };
-		(this.terminator){ terminator };
-		(this.is_terminator){ 1 };
-	}
-	static inline void ?=?( $mgd_link(tE) &this, tE* elem ) {
-		this.elem = elem ;
-		this.terminator = 0p;
-		this.is_terminator = 0;
-	}
-	static inline void ?=?( $mgd_link(tE) &this, void * terminator ) {
-		this.elem = 0p;
-		this.terminator = terminator;
-		this.is_terminator = 1;
-	}
-	struct $dlinks {
-		// containing item is not listed
-		// iff
-		// links have (elem == 0p && terminator == 0p)
-		$mgd_link(tE) next;
-		$mgd_link(tE) prev;
-	};
-	static inline void ?{}( $dlinks(tE) &this ) {
-		(this.next){ (tE *)0p };
-		(this.prev){ (tE *)0p };
-	}
-}
-
-#define DLISTED_MGD_EXPL_IN(STRUCT, LIST_SUF) \
-  $dlinks(STRUCT) $links_ ## LIST_SUF;
-
-#define DLISTED_MGD_EXPL_OUT(STRUCT, LIST_SUF) \
-  __DLISTED_MGD_JUSTEXPL(STRUCT, in_##LIST_SUF, STRUCT ## _in_ ## LIST_SUF) \
-  __DLISTED_MGD_COMMON(STRUCT, STRUCT##_in_##LIST_SUF,  $links_ ## LIST_SUF)
-
-#define DLISTED_MGD_IMPL_IN(STRUCT) \
-  $dlinks(STRUCT) $links;
-
-#define DLISTED_MGD_IMPL_OUT(STRUCT) \
-  __DLISTED_MGD_JUSTIMPL(STRUCT) \
-  __DLISTED_MGD_COMMON(STRUCT, STRUCT, $links)
-
-trait $dlistable(Tnode &, Telem &) {
-	$mgd_link(Telem) & $prev_link(Tnode &);
-	$mgd_link(Telem) & $next_link(Tnode &);
-	Telem& $tempcv_n2e(Tnode &);
-	Tnode& $tempcv_e2n(Telem &);
-
-	Telem& ?`next(Tnode &);
-	Telem& ?`prev(Tnode &);
-};
-
-forall (Tnode &, Telem & | $dlistable(Tnode, Telem)) {
-
-	// implemented as a sentinel item in an underlying cicrular list
-	// theList.$links.next is first
-	// theList.$links.prev is last
-	// note this allocation preserves prev-next composition as an identity
-	struct dlist {
-		$dlinks(Telem) $links;
-	};
-
-	// an empty dlist
-	// links refer to self, making a tight circle
-	static inline void ?{}( dlist(Tnode, Telem) & this ) {
-		$mgd_link(Telem) selfRef = (void *) &this;
-		( this.$links ) { selfRef, selfRef };
-	}
-
-	static inline Telem & ?`first( dlist(Tnode, Telem) &l ) {
-		return * l.$links.next.elem;
-	}
-
-	static inline Telem & ?`last( dlist(Tnode, Telem) &l ) {
-		return * l.$links.prev.elem;
-	}
-
-	#if !defined(NDEBUG) && (defined(__CFA_DEBUG__) || defined(__CFA_VERIFY__))
-	static bool $validate_fwd( dlist(Tnode, Telem) & this ) {
-		Tnode * it = & $tempcv_e2n( this`first );
-		if (!it) return (& this`last == 0p);
-
-		while( $next_link(*it).elem ) {
-			it = & $tempcv_e2n( * $next_link(*it).elem );
-		}
-
-		return ( it == & $tempcv_e2n( this`last ) ) &&
-			   ( $next_link(*it).is_terminator ) &&
-			   ( ((dlist(Tnode, Telem)*)$next_link(*it).terminator) == &this );
-	}
-	static bool $validate_rev( dlist(Tnode, Telem) & this ) {
-		Tnode * it = & $tempcv_e2n( this`last );
-		if (!it) return (& this`first == 0p);
-
-		while( $prev_link(*it).elem ) {
-			it = & $tempcv_e2n( * $prev_link(*it).elem );
-		}
-
-		return ( it == & $tempcv_e2n( this`first ) ) &&
-			   ( $prev_link(*it).is_terminator ) &&
-			   ( ((dlist(Tnode, Telem)*)$prev_link(*it).terminator) == &this );
-	}
-	static bool validate( dlist(Tnode, Telem) & this ) {
-		return $validate_fwd(this) && $validate_rev(this);
-	}
-	#endif
-
-	static inline void insert_after(Tnode &list_pos, Telem &to_insert) {
+
+forall( tE &, tLinks & | embedded( tE, tLinks, dlink(tE) ) ) {
+
+	static inline void insert_after(tE & list_pos, tE &to_insert) {
 		verify (&list_pos != 0p);
 		verify (&to_insert != 0p);
-		Tnode &singleton_to_insert = $tempcv_e2n(to_insert);
-		verify($prev_link(singleton_to_insert).elem == 0p);
-		verify($next_link(singleton_to_insert).elem == 0p);
-		$prev_link(singleton_to_insert) = & $tempcv_n2e(list_pos);
-		$next_link(singleton_to_insert) = $next_link(list_pos);
-		if ($next_link(list_pos).is_terminator) {
-			dlist(Tnode, Telem) *list = ( dlist(Tnode, Telem) * ) $next_link(list_pos).terminator;
-			$dlinks(Telem) *list_links = & list->$links;
-			$mgd_link(Telem) *list_last = & list_links->prev;
-			*list_last = &to_insert;
-		} else {
-			Telem *list_pos_next = $next_link(list_pos).elem;
-			if (list_pos_next) {
-				Tnode & lpn_inlist = $tempcv_e2n(*list_pos_next);
-				$prev_link(lpn_inlist) = &to_insert;
-			}
-		}
-		$next_link(list_pos) = &to_insert;
-	}
-
-	static inline void insert_before(Tnode &list_pos, Telem &to_insert) {
+        dlink(tE) & linkToInsert = to_insert`inner;
+		verify(linkToInsert.prev == 0p);
+		verify(linkToInsert.next == 0p);
+        tE & list_pos_raw = list_pos;
+        tE & list_pos_elem = * (tE *) ORIGIN_TAG_CLEAR( (size_t) & list_pos_raw );
+        dlink(tE) & list_pos_links = list_pos_elem`inner;
+        asm( "" : : : "memory" );
+        tE & after_raw = * list_pos_links.next;
+        tE & after_elem = * (tE *) ORIGIN_TAG_CLEAR( (size_t) & after_raw );
+		linkToInsert.prev = & list_pos_raw;
+		linkToInsert.next = & after_raw;
+        dlink(tE) & afterLinks = after_elem`inner;
+        afterLinks.prev = &to_insert;
+		list_pos_links.next = &to_insert;
+        asm( "" : : : "memory" );
+	}
+
+	static inline void insert_before(tE & list_pos, tE &to_insert) {
 		verify (&list_pos != 0p);
 		verify (&to_insert != 0p);
-		Tnode &singleton_to_insert = $tempcv_e2n(to_insert);
-		verify($prev_link(singleton_to_insert).elem == 0p);
-		verify($next_link(singleton_to_insert).elem == 0p);
-		$next_link(singleton_to_insert) = & $tempcv_n2e(list_pos);
-		$prev_link(singleton_to_insert) = $prev_link(list_pos);
-		if ($prev_link(list_pos).is_terminator) {
-			dlist(Tnode, Telem) *list = ( dlist(Tnode, Telem) * ) $prev_link(list_pos).terminator;
-			$dlinks(Telem) *list_links = & list->$links;
-			$mgd_link(Telem) *list_first = & list_links->next;
-			*list_first = &to_insert;
-		} else {
-			Telem *list_pos_prev = $prev_link(list_pos).elem;
-			if (list_pos_prev) {
-				Tnode & lpp_inlist = $tempcv_e2n(*list_pos_prev);
-				$next_link(lpp_inlist) = &to_insert;
-			}
-		}
-		$prev_link(list_pos) = &to_insert;
-	}
-
-    static inline void insert_first(dlist(Tnode, Telem) &list, Telem &to_insert) {
-		verify (&list != 0p);
-		verify (&to_insert != 0p);
-		Tnode &singleton_to_insert = $tempcv_e2n(to_insert);
-		verify($prev_link(singleton_to_insert).elem == 0p);
-		verify($next_link(singleton_to_insert).elem == 0p);
-
-		$prev_link(singleton_to_insert) = (void*) &list;
-		$next_link(singleton_to_insert) = list.$links.next;
-
-		$dlinks(Telem) *listLinks = & list.$links;
-		if (listLinks->next.is_terminator) {
-			$mgd_link(Telem) * listPrevReference = & listLinks->prev;
-			*listPrevReference = &to_insert;
-		} else {
-			Tnode & next_inlist = $tempcv_e2n(*list.$links.next.elem);
-			$prev_link(next_inlist) = &to_insert;
-		}
-		$mgd_link(Telem) * listNextReference = & listLinks->next;
-		*listNextReference = &to_insert;
-	}
-
-    static inline void insert_last(dlist(Tnode, Telem) &list, Telem &to_insert) {
-		verify (&list != 0p);
-		verify (&to_insert != 0p);
-		Tnode &singleton_to_insert = $tempcv_e2n(to_insert);
-		verify($next_link(singleton_to_insert).elem == 0p);
-		verify($prev_link(singleton_to_insert).elem == 0p);
-
-		$next_link(singleton_to_insert) = (void*) &list;
-		$prev_link(singleton_to_insert) = list.$links.prev;
-
-		$dlinks(Telem) *listLinks = & list.$links;
-		if (listLinks->prev.is_terminator) {
-			$mgd_link(Telem) * listNextReference = & listLinks->next;
-			*listNextReference = &to_insert;
-		} else {
-			Tnode & prev_inlist = $tempcv_e2n(*list.$links.prev.elem);
-			$next_link(prev_inlist) = &to_insert;
-		}
-		$mgd_link(Telem) * listPrevReference = & listLinks->prev;
-		*listPrevReference = &to_insert;
-	}
-
-    static inline void remove(Tnode &list_pos) {
-		verify( &list_pos != 0p );
-
-		$mgd_link(Telem) &incoming_from_prev = *0p;
-		$mgd_link(Telem) &incoming_from_next = *0p;
-
-		if ( $prev_link(list_pos).is_terminator ) {
-			dlist(Tnode, Telem) * tgt_before = ( dlist(Tnode, Telem) * ) $prev_link(list_pos).terminator;
-			$dlinks(Telem) * links_before = & tgt_before->$links;
-			&incoming_from_prev = & links_before->next;
-		} else if ($prev_link(list_pos).elem) {
-			Telem * tgt_before = $prev_link(list_pos).elem;
-			Tnode & list_pos_before = $tempcv_e2n(*tgt_before);
-			&incoming_from_prev = & $next_link(list_pos_before);
-		}
-
-		if ( $next_link(list_pos).is_terminator ) {
-			dlist(Tnode, Telem) * tgt_after = ( dlist(Tnode, Telem) * ) $next_link(list_pos).terminator;
-			$dlinks(Telem) * links_after = & tgt_after->$links;
-			&incoming_from_next = & links_after->prev;
-		} else if ($next_link(list_pos).elem) {
-			Telem * tgt_after  = $next_link(list_pos).elem;
-			Tnode & list_pos_after  = $tempcv_e2n(*tgt_after );
-			&incoming_from_next = & $prev_link(list_pos_after );
-		}
-
-		if (& incoming_from_prev) {
-			incoming_from_prev = $next_link(list_pos);
-		}
-		if (& incoming_from_next) {
-			incoming_from_next = $prev_link(list_pos);
-		}
-
-		$next_link(list_pos) = (Telem*) 0p;
-		$prev_link(list_pos) = (Telem*) 0p;
-	}
-
-	static inline bool ?`is_empty(dlist(Tnode, Telem) &list) {
-		verify( &list != 0p );
-		$dlinks(Telem) *listLinks = & list.$links;
-		if (listLinks->next.is_terminator) {
-			verify(listLinks->prev.is_terminator);
-			verify(listLinks->next.terminator);
-			verify(listLinks->prev.terminator);
-			return true;
-		} else {
-			verify(!listLinks->prev.is_terminator);
-			verify(listLinks->next.elem);
-			verify(listLinks->prev.elem);
-			return false;
-		}
-	}
-
-	static inline Telem & pop_first(dlist(Tnode, Telem) &list) {
-		verify( &list != 0p );
-		verify( !list`is_empty );
-		$dlinks(Telem) *listLinks = & list.$links;
-		Telem & first = *listLinks->next.elem;
-		Tnode & list_pos_first  = $tempcv_e2n( first );
-		remove(list_pos_first);
-		return first;
-	}
-
-	static inline Telem & pop_last(dlist(Tnode, Telem) &list) {
-		verify( &list != 0p );
-		verify( !list`is_empty );
-		$dlinks(Telem) *listLinks = & list.$links;
-		Telem & last = *listLinks->prev.elem;
-		Tnode & list_pos_last  = $tempcv_e2n( last );
-		remove(list_pos_last);
-		return last;
-	}
+        dlink(tE) & linkToInsert = to_insert`inner;
+		verify(linkToInsert.next == 0p);
+		verify(linkToInsert.prev == 0p);
+        tE & list_pos_raw = list_pos;
+        tE & list_pos_elem = * (tE *) ORIGIN_TAG_CLEAR( (size_t) & list_pos_raw );
+        dlink(tE) & list_pos_links = list_pos_elem`inner;
+        asm( "" : : : "memory" );
+        tE & before_raw = * (list_pos_links).prev;
+        tE & before_elem = * (tE *) ORIGIN_TAG_CLEAR( (size_t) & before_raw );
+		linkToInsert.next = & list_pos_raw;
+		linkToInsert.prev = & before_raw;
+        dlink(tE) & beforeLinks = before_elem`inner;
+        beforeLinks.next = &to_insert;
+		(list_pos_links).prev = &to_insert;
+        asm( "" : : : "memory" );
+	}
+
+	static inline tE & remove(tE & list_pos) {
+		verify (&list_pos != 0p);
+        tE & list_pos_elem = list_pos;
+        verify( ! ORIGIN_TAG_QUERY((size_t) & list_pos_elem) );
+        dlink(tE) & list_pos_links = list_pos_elem`inner;
+        tE & before_raw = * list_pos_links.prev;
+        tE & before_elem = * (tE *) ORIGIN_TAG_CLEAR( (size_t) & before_raw );
+        dlink(tE) & before_links = before_elem`inner;
+        tE & after_raw = * list_pos_links.next;
+        tE & after_elem = * (tE *) ORIGIN_TAG_CLEAR( (size_t) & after_raw );
+        dlink(tE) & after_links = after_elem`inner;
+        before_links.next = &after_raw;
+        after_links.prev = &before_raw;
+        asm( "" : : : "memory" );
+		list_pos_links.prev = 0p;
+		list_pos_links.next = 0p;
+        asm( "" : : : "memory" );
+        return list_pos_elem;
+	}
+
+    static inline tE & ?`first( dlist(tE, tLinks) &lst ) {
+        tE * firstPtr = lst.next;
+        if (ORIGIN_TAG_QUERY((size_t)firstPtr)) firstPtr = 0p;
+        return *firstPtr;
+    }
+    static inline tE & ?`last ( dlist(tE, tLinks) &lst ) {
+        tE * lastPtr = lst.prev;
+        if (ORIGIN_TAG_QUERY((size_t)lastPtr)) lastPtr = 0p;
+        return *lastPtr;
+    }
+
+    static inline bool ?`isEmpty( dlist(tE, tLinks) & lst ) {
+        tE * firstPtr = lst.next;
+        if (ORIGIN_TAG_QUERY((size_t)firstPtr)) firstPtr = 0p;
+        return firstPtr == 0p;
+    }
+
+    static inline tE & ?`elems( dlist(tE, tLinks) & lst ) {
+        tE * origin = $get_list_origin_addr( lst );
+        return *origin;
+    }
+
+    static inline bool ?`moveNext( tE && refx ) {
+        tE && ref_inner = refx;
+        tE & oldReferent = * (tE*) ORIGIN_TAG_CLEAR( (size_t) & ref_inner );
+        &ref_inner = oldReferent`inner.next;
+        return &ref_inner != 0p  &&
+            ! ORIGIN_TAG_QUERY( (size_t) & ref_inner );
+    }
+
+    static inline bool ?`movePrev( tE && refx ) {
+        tE && ref_inner = refx;
+        tE & oldReferent = * (tE*) ORIGIN_TAG_CLEAR( (size_t) & ref_inner );
+        &ref_inner = oldReferent`inner.prev;
+        return &ref_inner != 0p  &&
+            ! ORIGIN_TAG_QUERY( (size_t) & ref_inner );
+    }
+
+    static inline bool ?`hasNext( tE & e ) {
+        return e`moveNext;
+    }
+
+    static inline bool ?`hasPrev( tE & e ) {
+        return e`movePrev;
+    }
+
+    static inline tE & ?`next( tE & e ) {
+        if( e`moveNext ) return e;
+        return * 0p;
+    }
+
+    static inline tE & ?`prev( tE & e ) {
+        if( e`movePrev ) return e;
+        return * 0p;
+    }
+
+    static inline void insert_first( dlist(tE, tLinks) &lst, tE & e ) {
+        insert_after(lst`elems, e);
+    }
+
+    static inline void insert_last( dlist(tE, tLinks) &lst, tE & e ) {
+        insert_before(lst`elems, e);
+    }
+
+    static inline tE & try_pop_front( dlist(tE, tLinks) &lst ) {
+        tE & first_inlist = lst`first;
+        tE & first_item = first_inlist;
+        if (&first_item) remove(first_inlist);
+        return first_item;
+    }
+
+    static inline tE & try_pop_back( dlist(tE, tLinks) &lst ) {
+        tE & last_inlist = lst`last;
+        tE & last_item = last_inlist;
+        if (&last_item) remove(last_inlist);
+        return last_item;
+    }
+
+
+  #if !defined(NDEBUG) && (defined(__CFA_DEBUG__) || defined(__CFA_VERIFY__))
+	static bool $validate_fwd( dlist(tE, tLinks) & this ) {
+        if ( ! & this`first ) return ( (& this`last) == 0p);
+
+        tE & lagElem = *0p;
+
+        while ( tE & it = this`elems; it`moveNext ) {
+            if (& lagElem == 0p &&  &it != & this`first ) return false;
+            & lagElem = & it;
+        }
+
+        if (& lagElem != & this`last) return false;
+
+        // TODO: verify that it is back at this`elems;
+        return true;
+	}
+	static bool $validate_rev( dlist(tE, tLinks) & this ) {
+        if ( ! & this`last ) return ( (& this`first) == 0p);
+
+        tE & lagElem = *0p;
+
+        while ( tE & it = this`elems; it`movePrev ) {
+            if (& lagElem == 0p &&  &it != & this`last ) return false;
+            & lagElem = & it;
+        }
+
+        if (& lagElem != & this`first) return false;
+
+        // TODO: verify that it is back at this`elems;
+        return true;
+	}
+	static bool validate( dlist(tE, tLinks) & this ) {
+		return $validate_fwd(this) && $validate_rev(this);
+	}
+  #endif
 
 }
Index: libcfa/src/containers/list2.hfa
===================================================================
--- libcfa/src/containers/list2.hfa	(revision 7d51ef867a6e78e1b2b6f9e5456b64a42c555758)
+++ 	(revision )
@@ -1,297 +1,0 @@
-//
-// Cforall Version 1.0.0 Copyright (C) 2020 University of Waterloo
-//
-// The contents of this file are covered under the licence agreement in the
-// file "LICENCE" distributed with Cforall.
-//
-// list -- lets a user-defined stuct form intrusive linked lists
-//
-// Author           : Michael Brooks
-// Created On       : Wed Apr 22 18:00:00 2020
-// Last Modified By : Michael Brooks
-// Last Modified On : Wed Apr 22 18:00:00 2020
-// Update Count     : 1
-//
-
-#pragma once
-
-#include <assert.h>
-
-forall( Decorator &, T & )
-struct tytagref {
-    inline T &;
-};
-
-trait embedded( tOuter &, tMid &, tInner & ) {
-    tytagref( tMid, tInner ) ?`inner( tOuter & );
-};
-
-// embedded is reflexive, with no info (void) as the type tag
-forall (T &)
-tytagref(void, T) ?`inner ( T & this ) { tytagref( void, T ) ret = {this}; return ret; }
-
-// use this on every case of plan-9 inheritance, to make embedded a closure of plan-9 inheritance
-#define P9_EMBEDDED( derived, immedBase ) \
-forall( Tbase &, TdiscardPath & | { tytagref( TdiscardPath, Tbase ) ?`inner( immedBase & ); } ) \
-    static inline tytagref(immedBase, Tbase) ?`inner( derived & this ) { \
-        immedBase & ib = this; \
-        Tbase & b = ib`inner; \
-        tytagref(immedBase, Tbase) result = { b }; \
-        return result; \
-    }
-
-#define EMBEDDED_VIA( OUTER, MID, INNER ) \
-   (struct { tytagref(MID, INNER) ( * ?`inner ) ( OUTER & ); }){ ?`inner } 
-
-#define DLINK_VIA( TE, TLINK ) \
-   EMBEDDED_VIA( TE, TLINK, dlink(TE) )
-
-
-// The origin is the position encountered at the start of iteration,
-// signifying, "need to advance to the first element," and at the end
-// of iteration, signifying, "no more elements."  Normal comsumption of
-// an iterator runs ?`moveNext as the first step, and uses the return
-// of ?`moveNext as a guard, before dereferencing the iterator.  So
-// normal consumption of an iterator does not dereference an iterator
-// in origin position.  The value of a pointer (underlying a refence)
-// that is exposed publicly as an iteraor, and also a pointer stored
-// internally in a link field, is tagged, to indicate "is the origin"
-// (internally, is the list-head sentinel node), or untagged, to indicate
-// "is a regular node."  Intent is to help a user who dereferences an
-// iterator in origin position (which would be an API-use error on their
-// part), by failing fast.
-
-#if defined( __x86_64 )
-    // Preferred case: tag in the most-significant bit.  Dereference
-    // has been shown to segfault consistently.  Maintenance should
-    // list more architectures as "ok" here, to let them use the
-    // preferred case, when valid.
-    #define ORIGIN_TAG_BITNO ( 8 * sizeof( size_t ) - 1 )
-#else
-    // Fallback case: tag in the least-significant bit.  Dereference
-    // will often give an alignment error, but may not, e.g. if
-    // accessing a char-typed member.  32-bit x86 uses the most-
-    // significant bit for real room on the heap.
-    #define ORIGIN_TAG_BITNO 0
-#endif
-#define ORIGIN_TAG_MASK (((size_t)1) << ORIGIN_TAG_BITNO)
-
-#define ORIGIN_TAG_SET(p)   ((p) |  ORIGIN_TAG_MASK)
-#define ORIGIN_TAG_CLEAR(p) ((p) & ~ORIGIN_TAG_MASK)
-#define ORIGIN_TAG_QUERY(p) ((p) &  ORIGIN_TAG_MASK)
-
-
-forall( tE & ) {
-
-    struct dlink{
-        tE *next;
-        tE *prev;
-    };
-
-    static inline void ?{}( dlink(tE) & this ) {
-        this.next = 0p;
-        this.prev = 0p;
-    }
-
-    forall( tLinks & = dlink(tE) )
-    struct dlist {
-        inline dlink(tE);
-    };
-
-    forall( tLinks & | embedded( tE, tLinks, dlink(tE) ) ) {
-        static inline tE * $get_list_origin_addr( dlist(tE, tLinks) & lst ) {
-            dlink(tE) & link_from_null = ( * (tE *) 0p )`inner;
-            ptrdiff_t link_offset = (ptrdiff_t) & link_from_null;
-            size_t origin_addr = ((size_t) & lst) - link_offset;
-            size_t preResult = ORIGIN_TAG_SET( origin_addr );
-            return (tE *)preResult;
-        }
-
-        static inline void ?{}( dlist(tE, tLinks) & this ) {
-            tE * listOrigin = $get_list_origin_addr( this );
-            ( ( dlink(tE) & ) this ){ listOrigin, listOrigin } ;
-        }
-    }
-
-}
-
-
-forall( tE &, tLinks & | embedded( tE, tLinks, dlink(tE) ) ) {
-
-	static inline void insert_after(tE & list_pos, tE &to_insert) {
-		verify (&list_pos != 0p);
-		verify (&to_insert != 0p);
-        dlink(tE) & linkToInsert = to_insert`inner;
-		verify(linkToInsert.prev == 0p);
-		verify(linkToInsert.next == 0p);
-        tE & list_pos_raw = list_pos;
-        tE & list_pos_elem = * (tE *) ORIGIN_TAG_CLEAR( (size_t) & list_pos_raw );
-        dlink(tE) & list_pos_links = list_pos_elem`inner;
-        asm( "" : : : "memory" );
-        tE & after_raw = * list_pos_links.next;
-        tE & after_elem = * (tE *) ORIGIN_TAG_CLEAR( (size_t) & after_raw );
-		linkToInsert.prev = & list_pos_raw;
-		linkToInsert.next = & after_raw;
-        dlink(tE) & afterLinks = after_elem`inner;
-        afterLinks.prev = &to_insert;
-		list_pos_links.next = &to_insert;
-        asm( "" : : : "memory" );
-	}
-
-	static inline void insert_before(tE & list_pos, tE &to_insert) {
-		verify (&list_pos != 0p);
-		verify (&to_insert != 0p);
-        dlink(tE) & linkToInsert = to_insert`inner;
-		verify(linkToInsert.next == 0p);
-		verify(linkToInsert.prev == 0p);
-        tE & list_pos_raw = list_pos;
-        tE & list_pos_elem = * (tE *) ORIGIN_TAG_CLEAR( (size_t) & list_pos_raw );
-        dlink(tE) & list_pos_links = list_pos_elem`inner;
-        asm( "" : : : "memory" );
-        tE & before_raw = * (list_pos_links).prev;
-        tE & before_elem = * (tE *) ORIGIN_TAG_CLEAR( (size_t) & before_raw );
-		linkToInsert.next = & list_pos_raw;
-		linkToInsert.prev = & before_raw;
-        dlink(tE) & beforeLinks = before_elem`inner;
-        beforeLinks.next = &to_insert;
-		(list_pos_links).prev = &to_insert;
-        asm( "" : : : "memory" );
-	}
-
-	static inline tE & remove(tE & list_pos) {
-		verify (&list_pos != 0p);
-        tE & list_pos_elem = list_pos;
-        verify( ! ORIGIN_TAG_QUERY((size_t) & list_pos_elem) );
-        dlink(tE) & list_pos_links = list_pos_elem`inner;
-        tE & before_raw = * list_pos_links.prev;
-        tE & before_elem = * (tE *) ORIGIN_TAG_CLEAR( (size_t) & before_raw );
-        dlink(tE) & before_links = before_elem`inner;
-        tE & after_raw = * list_pos_links.next;
-        tE & after_elem = * (tE *) ORIGIN_TAG_CLEAR( (size_t) & after_raw );
-        dlink(tE) & after_links = after_elem`inner;
-        before_links.next = &after_raw;
-        after_links.prev = &before_raw;
-        asm( "" : : : "memory" );
-		list_pos_links.prev = 0p;
-		list_pos_links.next = 0p;
-        asm( "" : : : "memory" );
-        return list_pos_elem;
-	}
-
-    static inline tE & ?`first( dlist(tE, tLinks) &lst ) {
-        tE * firstPtr = lst.next;
-        if (ORIGIN_TAG_QUERY((size_t)firstPtr)) firstPtr = 0p;
-        return *firstPtr;
-    }
-    static inline tE & ?`last ( dlist(tE, tLinks) &lst ) {
-        tE * lastPtr = lst.prev;
-        if (ORIGIN_TAG_QUERY((size_t)lastPtr)) lastPtr = 0p;
-        return *lastPtr;
-    }
-
-    static inline bool ?`isEmpty( dlist(tE, tLinks) & lst ) {
-        tE * firstPtr = lst.next;
-        if (ORIGIN_TAG_QUERY((size_t)firstPtr)) firstPtr = 0p;
-        return firstPtr == 0p;
-    }
-
-    static inline tE & ?`elems( dlist(tE, tLinks) & lst ) {
-        tE * origin = $get_list_origin_addr( lst );
-        return *origin;
-    }
-
-    static inline bool ?`moveNext( tE && refx ) {
-        tE && ref_inner = refx;
-        tE & oldReferent = * (tE*) ORIGIN_TAG_CLEAR( (size_t) & ref_inner );
-        &ref_inner = oldReferent`inner.next;
-        return &ref_inner != 0p  &&
-            ! ORIGIN_TAG_QUERY( (size_t) & ref_inner );
-    }
-
-    static inline bool ?`movePrev( tE && refx ) {
-        tE && ref_inner = refx;
-        tE & oldReferent = * (tE*) ORIGIN_TAG_CLEAR( (size_t) & ref_inner );
-        &ref_inner = oldReferent`inner.prev;
-        return &ref_inner != 0p  &&
-            ! ORIGIN_TAG_QUERY( (size_t) & ref_inner );
-    }
-
-    static inline bool ?`hasNext( tE & e ) {
-        return e`moveNext;
-    }
-
-    static inline bool ?`hasPrev( tE & e ) {
-        return e`movePrev;
-    }
-
-    static inline tE & ?`next( tE & e ) {
-        if( e`moveNext ) return e;
-        return * 0p;
-    }
-
-    static inline tE & ?`prev( tE & e ) {
-        if( e`movePrev ) return e;
-        return * 0p;
-    }
-
-    static inline void insert_first( dlist(tE, tLinks) &lst, tE & e ) {
-        insert_after(lst`elems, e);
-    }
-
-    static inline void insert_last( dlist(tE, tLinks) &lst, tE & e ) {
-        insert_before(lst`elems, e);
-    }
-
-    static inline tE & try_pop_front( dlist(tE, tLinks) &lst ) {
-        tE & first_inlist = lst`first;
-        tE & first_item = first_inlist;
-        if (&first_item) remove(first_inlist);
-        return first_item;
-    }
-
-    static inline tE & try_pop_back( dlist(tE, tLinks) &lst ) {
-        tE & last_inlist = lst`last;
-        tE & last_item = last_inlist;
-        if (&last_item) remove(last_inlist);
-        return last_item;
-    }
-
-
-  #if !defined(NDEBUG) && (defined(__CFA_DEBUG__) || defined(__CFA_VERIFY__))
-	static bool $validate_fwd( dlist(tE, tLinks) & this ) {
-        if ( ! & this`first ) return ( (& this`last) == 0p);
-
-        tE & lagElem = *0p;
-
-        while ( tE & it = this`elems; it`moveNext ) {
-            if (& lagElem == 0p &&  &it != & this`first ) return false;
-            & lagElem = & it;
-        }
-
-        if (& lagElem != & this`last) return false;
-
-        // TODO: verify that it is back at this`elems;
-        return true;
-	}
-	static bool $validate_rev( dlist(tE, tLinks) & this ) {
-        if ( ! & this`last ) return ( (& this`first) == 0p);
-
-        tE & lagElem = *0p;
-
-        while ( tE & it = this`elems; it`movePrev ) {
-            if (& lagElem == 0p &&  &it != & this`last ) return false;
-            & lagElem = & it;
-        }
-
-        if (& lagElem != & this`first) return false;
-
-        // TODO: verify that it is back at this`elems;
-        return true;
-	}
-	static bool validate( dlist(tE, tLinks) & this ) {
-		return $validate_fwd(this) && $validate_rev(this);
-	}
-  #endif
-
-}
-
Index: libcfa/src/executor.baseline.txt
===================================================================
--- libcfa/src/executor.baseline.txt	(revision 69914cbc38918b375971f7accca59529f62f2d08)
+++ libcfa/src/executor.baseline.txt	(revision 69914cbc38918b375971f7accca59529f62f2d08)
@@ -0,0 +1,165 @@
+// Mutex buffer is embedded in the nomutex executor to allow the executor to delete the workers without causing a
+// deadlock.  If the executor is the monitor and the buffer is class, the thread calling the executor's destructor
+// (which is mutex) blocks when deleting the workers, preventing outstanding workers from calling remove to drain the
+// buffer.
+
+#include <thread.hfa>
+#include <containers/list.hfa>
+
+forall( T & | $dlistable(T, T) ) {
+	monitor Buffer {									// unbounded buffer
+		dlist( T, T ) queue;							// unbounded list of work requests
+		condition delay;
+	}; // Buffer
+
+	void insert( Buffer(T) & mutex buf, T * elem ) with(buf) {
+		dlist( T, T ) * qptr = &queue;					// workaround https://cforall.uwaterloo.ca/trac/ticket/166
+		insert_last( *qptr, *elem );					// insert element into buffer
+		signal( delay );								// restart
+	} // insert
+
+	T * remove( Buffer(T) & mutex buf ) with(buf) {
+		dlist( T, T ) * qptr = &queue;					// workaround https://cforall.uwaterloo.ca/trac/ticket/166
+		// if ( (*qptr)`is_empty ) wait( delay );			// no request to process ? => wait
+	  if ( (*qptr)`is_empty ) return 0p;				// no request to process ? => wait
+		return &pop_first( *qptr );
+	} // remove
+} // forall
+
+struct WRequest {										// client request, no return
+	void (* action)( void );
+	DLISTED_MGD_IMPL_IN(WRequest)
+}; // WRequest
+DLISTED_MGD_IMPL_OUT(WRequest)
+
+void ?{}( WRequest & req ) with(req) { action = 0; }
+void ?{}( WRequest & req, void (* action)( void ) ) with(req) { req.action = action; }
+bool stop( WRequest & req ) { return req.action == 0; }
+void doit( WRequest & req ) { req.action(); }
+
+// Each worker has its own set (when requests buffers > workers) of work buffers to reduce contention between client
+// and server, where work requests arrive and are distributed into buffers in a roughly round-robin order.
+
+thread Worker {
+	Buffer(WRequest) * requests;
+	WRequest * request;
+	unsigned int start, range;
+}; // Worker
+
+void main( Worker & w ) with(w) {
+	for ( int i = 0;; i = (i + 1) % range ) {
+		request = remove( requests[i + start] );
+	  if ( ! request ) { yield(); continue; }
+	  if ( stop( *request ) ) break;
+		doit( *request );
+		delete( request );
+	} // for
+} // Worker::main
+
+void ?{}( Worker & worker, cluster * wc, Buffer(WRequest) * requests, unsigned int start, unsigned int range ) {
+	((thread &)worker){ *wc };
+	worker.[requests, request, start, range] = [requests, ((WRequest *) 0), start, range];
+} // ?{}
+
+WRequest * current_request( Worker & worker ) { return worker.request; }
+
+struct Executor {
+	cluster * cluster;									// if workers execute on separate cluster
+	processor ** processors;							// array of virtual processors adding parallelism for workers
+	Buffer(WRequest) * requests;						// list of work requests
+	Worker ** workers;									// array of workers executing work requests
+	unsigned int nprocessors, nworkers, nrqueues;		// number of processors/threads/request queues
+	bool sepClus;										// use same or separate cluster for executor
+	unsigned int next;									// demultiplexed across worker buffers
+}; // Executor
+
+unsigned int tickets( Executor & ex ) with(ex) {
+	//return uFetchAdd( next, 1 ) % nrqueues;
+	return next++ % nrqueues;							// no locking, interference randomizes
+} // tickets
+
+void ?{}( Executor & ex, unsigned int np, unsigned int nw, unsigned int nr, bool sc = false ) with(ex) {
+	[nprocessors, nworkers, nrqueues, sepClus] = [np, nw, nr, sc];
+	assert( nrqueues >= nworkers );
+	cluster = 0p;
+	processors = aalloc( nprocessors );
+	requests = anew( nrqueues );
+	workers = aalloc( nworkers );
+
+	for ( i; nprocessors ) {
+		processors[i] = new( *cluster );
+	} // for
+
+	unsigned int reqPerWorker = nrqueues / nworkers, extras = nrqueues % nworkers;
+//	for ( unsigned int i = 0, start = 0, range; i < nworkers; i += 1, start += range ) {
+    for ( i; nworkers : start; 0u ~ @ ~ range : range; ) {
+	    range = reqPerWorker + ( i < extras ? 1 : 0 );
+		workers[i] = new( cluster, requests, start, range );
+	} // for
+} // ?{}
+
+void ?{}( Executor & ex, unsigned int nprocessors, unsigned int nworkers, bool sepClus = false ) {
+	ex{ nprocessors, nworkers, nworkers, sepClus };
+}
+void ?{}( Executor & ex, unsigned int nprocessors, bool sepClus = false ) {
+	ex{ nprocessors, nprocessors, nprocessors, sepClus };
+}
+void ?{}( Executor & ex ) {								// special for current cluster, no processors added
+	ex{ 0, 5, false };
+}
+void ^?{}( Executor & ex ) with(ex) {
+	// Add one sentinel per worker to stop them. Since in destructor, no new external work should be queued.  Cannot
+	// combine next two loops and only have a single sentinel because workers arrive in arbitrary order, so worker1 may
+	// take the single sentinel while waiting for worker 0 to end.
+
+	WRequest sentinel[nworkers];
+	unsigned int reqPerWorker = nrqueues / nworkers;
+	for ( unsigned int i = 0, step = 0; i < nworkers; i += 1, step += reqPerWorker ) {
+		insert( requests[step], &sentinel[i] );			// force eventually termination
+	} // for
+	for ( i; nworkers ) {
+		delete( workers[i] );
+	} // for
+	for ( i; nprocessors ) {
+		delete( processors[i] );
+	} // for
+
+	free( workers );
+//	adelete( nrqueues, requests );
+	for ( i; nrqueues ) ^?{}( requests[i] );			// FIX ME: problem with resolver
+	free( requests );
+	free( processors );
+	if ( sepClus ) { delete( cluster ); }
+} // ^?{}
+
+void send( Executor & ex, void (* action)( void ) ) {	// asynchronous call, no return value
+	WRequest * node = new( action );
+	insert( ex.requests[tickets( ex )], node );
+} // send
+
+
+int counter = 0;
+
+void work( void ) {
+	__atomic_add_fetch( &counter, 1, __ATOMIC_SEQ_CST );
+	// fprintf( stderr, "workie\n" );
+}
+
+int main( int argc, char * argv[] ) {
+	int times = 1_000_000;
+	if ( argc == 2 ) times = atoi( argv[1] );
+	processor p[7];
+	{
+		Executor exector;
+		for ( i; times ) {
+			send( exector, work );
+			if ( i % 100 == 0 ) yield();
+		} // for
+	}
+	printf( "%d\n", counter );
+}
+
+// Local Variables: //
+// tab-width: 4" //
+// compile-command: "cfa executor.cfa" //
+// End: //
Index: libcfa/src/executor.cfa
===================================================================
--- libcfa/src/executor.cfa	(revision 7d51ef867a6e78e1b2b6f9e5456b64a42c555758)
+++ libcfa/src/executor.cfa	(revision 69914cbc38918b375971f7accca59529f62f2d08)
@@ -7,21 +7,21 @@
 #include <containers/list.hfa>
 
-forall( T & | $dlistable(T, T) ) {
+forall( T &, TLink& = dlink(T) | embedded(T, TLink, dlink(T)) ) {
 	monitor Buffer {									// unbounded buffer
-		dlist( T, T ) queue;							// unbounded list of work requests
+		dlist( T, TLink ) queue;						// unbounded list of work requests
 		condition delay;
 	}; // Buffer
 
-	void insert( Buffer(T) & mutex buf, T * elem ) with(buf) {
-		dlist( T, T ) * qptr = &queue;					// workaround https://cforall.uwaterloo.ca/trac/ticket/166
+	void insert( Buffer(T, TLink) & mutex buf, T * elem ) with(buf) {
+		dlist( T, TLink ) * qptr = &queue;				// workaround https://cforall.uwaterloo.ca/trac/ticket/166
 		insert_last( *qptr, *elem );					// insert element into buffer
 		signal( delay );								// restart
 	} // insert
 
-	T * remove( Buffer(T) & mutex buf ) with(buf) {
-		dlist( T, T ) * qptr = &queue;					// workaround https://cforall.uwaterloo.ca/trac/ticket/166
-		// if ( (*qptr)`is_empty ) wait( delay );			// no request to process ? => wait
-	  if ( (*qptr)`is_empty ) return 0p;				// no request to process ? => wait
-		return &pop_first( *qptr );
+	T * remove( Buffer(T, TLink) & mutex buf ) with(buf) {
+		dlist( T, TLink ) * qptr = &queue;				// workaround https://cforall.uwaterloo.ca/trac/ticket/166
+		// if ( (*qptr)`isEmpty ) wait( delay );		// no request to process ? => wait
+	  if ( (*qptr)`isEmpty ) return 0p;					// no request to process ? => wait
+		return &try_pop_front( *qptr );
 	} // remove
 } // forall
@@ -29,7 +29,7 @@
 struct WRequest {										// client request, no return
 	void (* action)( void );
-	DLISTED_MGD_IMPL_IN(WRequest)
+	inline dlink(WRequest);
 }; // WRequest
-DLISTED_MGD_IMPL_OUT(WRequest)
+P9_EMBEDDED(WRequest, dlink(WRequest))
 
 void ?{}( WRequest & req ) with(req) { action = 0; }
