Index: doc/theses/thierry_delisle_PhD/code/links.hpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/links.hpp	(revision ffa48a8bcd17b272b20f1563ed18140df52053e1)
+++ doc/theses/thierry_delisle_PhD/code/links.hpp	(revision ffa48a8bcd17b272b20f1563ed18140df52053e1)
@@ -0,0 +1,122 @@
+#pragma once
+
+#include "assert.hpp"
+#include "utils.hpp"
+
+template<typename node_t>
+struct _LinksFields_t {
+	node_t * prev = nullptr;
+	node_t * next = nullptr;
+	volatile unsigned long long ts = 0;
+	unsigned hint = (unsigned)-1;
+};
+
+template<typename node_t>
+class __attribute__((aligned(128))) intrusive_queue_t {
+public:
+	typedef spinlock_t lock_t;
+
+	struct stat {
+		ssize_t diff = 0;
+		size_t  push = 0;
+		size_t  pop  = 0;
+	};
+
+private:
+	struct sentinel_t {
+		_LinksFields_t<node_t> _links;
+	};
+
+public:
+	lock_t lock;
+
+private:
+	sentinel_t before;
+	sentinel_t after;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Winvalid-offsetof"
+	static constexpr auto fields_offset = offsetof( node_t, _links );
+#pragma GCC diagnostic pop
+public:
+	intrusive_queue_t()
+		: before{{ nullptr, tail() }}
+		, after {{ head(), nullptr }}
+	{
+		/* paranoid */ assert((reinterpret_cast<uintptr_t>( head() ) + fields_offset) == reinterpret_cast<uintptr_t>(&before));
+		/* paranoid */ assert((reinterpret_cast<uintptr_t>( tail() ) + fields_offset) == reinterpret_cast<uintptr_t>(&after ));
+		/* paranoid */ assert(head()->_links.prev == nullptr);
+		/* paranoid */ assert(head()->_links.next == tail() );
+		/* paranoid */ assert(tail()->_links.next == nullptr);
+		/* paranoid */ assert(tail()->_links.prev == head() );
+		/* paranoid */ assert(sizeof(*this) == 128);
+		/* paranoid */ assert((intptr_t(this) % 128) == 0);
+	}
+
+	~intrusive_queue_t() = default;
+
+	inline node_t * head() const {
+		node_t * rhead = reinterpret_cast<node_t *>(
+			reinterpret_cast<uintptr_t>( &before ) - fields_offset
+		);
+		assert(rhead);
+		return rhead;
+	}
+
+	inline node_t * tail() const {
+		node_t * rtail = reinterpret_cast<node_t *>(
+			reinterpret_cast<uintptr_t>( &after ) - fields_offset
+		);
+		assert(rtail);
+		return rtail;
+	}
+
+	inline bool push(node_t * node) {
+		assert(lock);
+		assert(node->_links.ts != 0);
+		node_t * tail = this->tail();
+
+		node_t * prev = tail->_links.prev;
+		// assertf(node->_links.ts >= prev->_links.ts,
+		// 	"New node has smaller timestamp: %llu < %llu", node->_links.ts, prev->_links.ts);
+		node->_links.next = tail;
+		node->_links.prev = prev;
+		prev->_links.next = node;
+		tail->_links.prev = node;
+
+		if(before._links.ts == 0l) {
+			before._links.ts = node->_links.ts;
+			assert(node->_links.prev == this->head());
+			return true;
+		}
+		return false;
+	}
+
+	inline std::pair<node_t *, bool> pop() {
+		assert(lock);
+		node_t * head = this->head();
+		node_t * tail = this->tail();
+
+		node_t * node = head->_links.next;
+		node_t * next = node->_links.next;
+		if(node == tail) return {nullptr, false};
+
+		head->_links.next = next;
+		next->_links.prev = head;
+
+		if(next == tail) {
+			before._links.ts = 0l;
+			return {node, true};
+		}
+		else {
+			assert(next->_links.ts != 0);
+			before._links.ts = next->_links.ts;
+			assert(before._links.ts != 0);
+			return {node, false};
+		}
+	}
+
+	long long ts() const {
+		return before._links.ts;
+	}
+};
Index: doc/theses/thierry_delisle_PhD/code/relaxed_list.cpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/relaxed_list.cpp	(revision a5873bd9568f82230e97ba50cf4e33da0f932488)
+++ doc/theses/thierry_delisle_PhD/code/relaxed_list.cpp	(revision ffa48a8bcd17b272b20f1563ed18140df52053e1)
@@ -1,3 +1,10 @@
-#include "relaxed_list.hpp"
+#if !defined(LIST_VARIANT_HPP)
+#define LIST_VARIANT_HPP "relaxed_list.hpp"
+#endif
+
+#include LIST_VARIANT_HPP
+#if !defined(LIST_VARIANT)
+#error not variant selected
+#endif
 
 #include <array>
@@ -35,12 +42,12 @@
 
 template<>
-thread_local relaxed_list<Node>::TLS relaxed_list<Node>::tls = {};
+thread_local LIST_VARIANT<Node>::TLS LIST_VARIANT<Node>::tls = {};
 
 template<>
-relaxed_list<Node> * relaxed_list<Node>::head = nullptr;
+std::atomic_uint32_t LIST_VARIANT<Node>::ticket = { 0 };
 
 #ifndef NO_STATS
 template<>
-relaxed_list<Node>::GlobalStats relaxed_list<Node>::global_stats = {};
+LIST_VARIANT<Node>::GlobalStats LIST_VARIANT<Node>::global_stats = {};
 #endif
 
@@ -120,5 +127,5 @@
 	atomic_min(global.valmin, local.valmin);
 
-	relaxed_list<Node>::stats_tls_tally();
+	LIST_VARIANT<Node>::stats_tls_tally();
 }
 
@@ -199,5 +206,5 @@
 	std::cout << "Total ops     : " << ops << "(" << global.in << "i, " << global.out << "o, " << global.empty << "e)\n";
 	#ifndef NO_STATS
-		relaxed_list<Node>::stats_print(std::cout);
+		LIST_VARIANT<Node>::stats_print(std::cout);
 	#endif
 }
@@ -216,5 +223,5 @@
 	unsigned nslots,
 	local_stat_t & local,
-	relaxed_list<Node> & list
+	LIST_VARIANT<Node> & list
 ) {
 	while(__builtin_expect(!done.load(std::memory_order_relaxed), true)) {
@@ -254,5 +261,5 @@
 	std::cout << "Initializing ";
 	size_t npushed = 0;
-	relaxed_list<Node> list = { nthread * nqueues };
+	LIST_VARIANT<Node> list = { nthread, nqueues };
 	{
 		Node** all_nodes[nthread];
@@ -340,5 +347,5 @@
 	unsigned nnodes,
 	local_stat_t & local,
-	relaxed_list<Node> & list
+	LIST_VARIANT<Node> & list
 ) {
 	Node * nodes[nnodes];
@@ -384,5 +391,5 @@
 	std::cout << "Initializing ";
 	// List being tested
-	relaxed_list<Node> list = { nthread * nqueues };
+	LIST_VARIANT<Node> list = { nthread, nqueues };
 	{
 		enable_stats = true;
@@ -441,5 +448,5 @@
 	int nslots,
 	local_stat_t & local,
-	relaxed_list<Node> & list
+	LIST_VARIANT<Node> & list
 ) {
 	while(__builtin_expect(!done.load(std::memory_order_relaxed), true)) {
@@ -518,5 +525,5 @@
 
 	// List being tested
-	relaxed_list<Node> list = { nthread * nqueues };
+	LIST_VARIANT<Node> list = { nthread, nqueues };
 	{
 		Random rand(rdtscl());
@@ -593,5 +600,5 @@
 	unsigned nnodes,
 	local_stat_t & local,
-	relaxed_list<Node> & list
+	LIST_VARIANT<Node> & list
 ) {
 	Node * nodes[nnodes];
@@ -653,5 +660,5 @@
 
 	// List being tested
-	relaxed_list<Node> list = { nthread * nqueues };
+	LIST_VARIANT<Node> list = { nthread, nqueues };
 	{
 		enable_stats = true;
@@ -921,5 +928,5 @@
 
 	std::cout << "Running " << nthreads << " threads (" << (nthreads * nqueues) << " queues) for " << duration << " seconds" << std::endl;
-	std::cout << "Relaxed list variant: " << relaxed_list<Node>::name() << std::endl;
+	std::cout << "Relaxed list variant: " << LIST_VARIANT<Node>::name() << std::endl;
 	switch(benchmark) {
 		case Churn:
Index: doc/theses/thierry_delisle_PhD/code/relaxed_list.hpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/relaxed_list.hpp	(revision a5873bd9568f82230e97ba50cf4e33da0f932488)
+++ doc/theses/thierry_delisle_PhD/code/relaxed_list.hpp	(revision ffa48a8bcd17b272b20f1563ed18140df52053e1)
@@ -1,6 +1,4 @@
 #pragma once
-
-#define MACRO_XSTR(s) MACRO_STR(s)
-#define MACRO_STR(s) #s
+#define LIST_VARIANT relaxed_list
 
 #define VANILLA 0
@@ -9,4 +7,5 @@
 #define DISCOVER 3
 #define SNZM 4
+#define BIAS 5
 
 #ifndef VARIANT
@@ -25,10 +24,9 @@
 #include "assert.hpp"
 #include "utils.hpp"
+#include "links.hpp"
 #include "snzi.hpp"
 #include "snzm.hpp"
 
 using namespace std;
-
-extern bool enable_stats;
 
 struct pick_stat {
@@ -36,4 +34,5 @@
 		size_t attempt = 0;
 		size_t success = 0;
+		size_t local = 0;
 	} push;
 	struct {
@@ -42,4 +41,5 @@
 		size_t mask_attempt = 0;
 		size_t mask_reset = 0;
+		size_t local = 0;
 	} pop;
 };
@@ -57,11 +57,4 @@
 
 template<typename node_t>
-struct _LinksFields_t {
-	node_t * prev = nullptr;
-	node_t * next = nullptr;
-	unsigned long long ts = 0;
-};
-
-template<typename node_t>
 class __attribute__((aligned(128))) relaxed_list {
 	static_assert(std::is_same<decltype(node_t::_links), _LinksFields_t<node_t>>::value, "Node must have a links field");
@@ -70,18 +63,19 @@
 	static const char * name() {
 		const char * names[] = {
-			"VANILLA",
-			"SNZI",
-			"BITMASK",
-			"SNZI + DISCOVERED MASK",
-			"SNZI + MASK"
+			"RELAXED: VANILLA",
+			"RELAXED: SNZI",
+			"RELAXED: BITMASK",
+			"RELAXED: SNZI + DISCOVERED MASK",
+			"RELAXED: SNZI + MASK",
+			"RELAXED: SNZI + LOCAL BIAS"
 		};
 		return names[VARIANT];
 	}
 
-	relaxed_list(unsigned numLists)
-	  	: lists(new intrusive_queue_t[numLists])
-		, numLists(numLists)
-		#if VARIANT == SNZI
-			, snzi( std::log2( numLists / 8 ), 2 )
+	relaxed_list(unsigned numThreads, unsigned numQueues)
+		: numLists(numThreads * numQueues)
+	  	, lists(new intrusive_queue_t<node_t>[numLists])
+		#if VARIANT == SNZI || VARIANT == BIAS
+			, snzi( std::log2( numLists / (2 * numQueues) ), 2 )
 		#elif VARIANT == SNZM || VARIANT == DISCOVER
 			, snzm( numLists )
@@ -89,11 +83,5 @@
 	{
 		assertf(7 * 8 * 8 >= numLists, "List currently only supports 448 sublists");
-		// assert(sizeof(*this) == 128);
 		std::cout << "Constructing Relaxed List with " << numLists << std::endl;
-
-		#ifndef NO_STATS
-			if(head) this->next = head;
-			head = this;
-		#endif
 	}
 
@@ -108,5 +96,17 @@
 		while(true) {
 			// Pick a random list
+			#if VARIANT == BIAS
+			unsigned r = tls.rng.next();
+			unsigned i;
+			if(0 == (r & 0xF)) {
+				i = r >> 4;
+			} else {
+				i = tls.my_queue + ((r >> 4) % 4);
+				tls.pick.push.local++;
+			}
+			i %= numLists;
+			#else
 			unsigned i = tls.rng.next() % numLists;
+			#endif
 
 			#ifndef NO_STATS
@@ -117,5 +117,5 @@
 			if( !lists[i].lock.try_lock() ) continue;
 
-			#if VARIANT != SNZM && VARIANT != SNZI && VARIANT != DISCOVER
+			#if VARIANT != SNZM && VARIANT != SNZI && VARIANT != DISCOVER && VARIANT != BIAS
 				__attribute__((unused)) int num = numNonEmpty;
 			#endif
@@ -129,5 +129,5 @@
 					bts(tls.mask, bit);
 					snzm.arrive(i);
-				#elif VARIANT == SNZI
+				#elif VARIANT == SNZI || VARIANT == BIAS
 					snzi.arrive(i);
 				#elif VARIANT == SNZM
@@ -145,5 +145,5 @@
 				#endif
 			}
-			#if VARIANT != SNZM && VARIANT != SNZI && VARIANT != DISCOVER
+			#if VARIANT != SNZM && VARIANT != SNZI && VARIANT != DISCOVER && VARIANT != BIAS
 				assert(numNonEmpty <= (int)numLists);
 			#endif
@@ -154,5 +154,5 @@
 			#ifndef NO_STATS
 				tls.pick.push.success++;
-				#if VARIANT != SNZM && VARIANT != SNZI && VARIANT != DISCOVER
+				#if VARIANT != SNZM && VARIANT != SNZI && VARIANT != DISCOVER && VARIANT != BIAS
 					tls.empty.push.value += num;
 					tls.empty.push.count += 1;
@@ -198,5 +198,24 @@
 				// Pick two lists at random
 				int i = tls.rng.next() % numLists;
-				int j = tls.rng.next() % numLists;
+				// int j = tls.rng.next() % numLists;
+
+				if(auto node = try_pop(i, j)) return node;
+			}
+
+		#elif VARIANT == BIAS
+			while(snzi.query()) {
+				// Pick two lists at random
+				unsigned ri = tls.rng.next();
+				unsigned i;
+				unsigned j = tls.rng.next();
+				if(0 == (ri & 0xF)) {
+					i = (ri >> 4) % numLists;
+				} else {
+					i = tls.my_queue + ((ri >> 4) % 4);
+					j = tls.my_queue + ((j >> 4) % 4);
+					tls.pick.pop.local++;
+				}
+				i %= numLists;
+				j %= numLists;
 
 				if(auto node = try_pop(i, j)) return node;
@@ -214,8 +233,8 @@
 					// Pick two nodes from it
 					unsigned wdxi = ri & snzm.mask;
-					unsigned wdxj = rj & snzm.mask;
+					// unsigned wdxj = rj & snzm.mask;
 
 					// Get the masks from the nodes
-					size_t maski = snzm.masks(wdxi);
+					// size_t maski = snzm.masks(wdxi);
 					size_t maskj = snzm.masks(wdxj);
 
@@ -224,8 +243,8 @@
 					#if defined(__BMI2__)
 						uint64_t idxsi = _pext_u64(snzm.indexes, maski);
-						uint64_t idxsj = _pext_u64(snzm.indexes, maskj);
+						// uint64_t idxsj = _pext_u64(snzm.indexes, maskj);
 
 						auto pi = __builtin_popcountll(maski);
-						auto pj = __builtin_popcountll(maskj);
+						// auto pj = __builtin_popcountll(maskj);
 
 						ri = pi ? ri & ((pi >> 3) - 1) : 0;
@@ -329,5 +348,5 @@
 		if( !list.lock.try_lock() ) return nullptr;
 
-		#if VARIANT != SNZM && VARIANT != SNZI && VARIANT != DISCOVER
+		#if VARIANT != SNZM && VARIANT != SNZI && VARIANT != DISCOVER  && VARIANT != BIAS
 			__attribute__((unused)) int num = numNonEmpty;
 		#endif
@@ -352,5 +371,5 @@
 				__attribute__((unused)) bool ret = btr(tls.mask, bit);
 				snzm.depart(w);
-			#elif VARIANT == SNZI
+			#elif VARIANT == SNZI || VARIANT == BIAS
 				snzi.depart(w);
 			#elif VARIANT == SNZM
@@ -371,10 +390,10 @@
 		// Unlock and return
 		list.lock.unlock();
-		#if VARIANT != SNZM && VARIANT != SNZI && VARIANT != DISCOVER
+		#if VARIANT != SNZM && VARIANT != SNZI && VARIANT != DISCOVER && VARIANT != BIAS
 			assert(numNonEmpty >= 0);
 		#endif
 		#ifndef NO_STATS
 			tls.pick.pop.success++;
-			#if VARIANT != SNZM && VARIANT != SNZI && VARIANT != DISCOVER
+			#if VARIANT != SNZM && VARIANT != SNZI && VARIANT != DISCOVER && VARIANT != BIAS
 				tls.empty.pop.value += num;
 				tls.empty.pop.count += 1;
@@ -384,128 +403,4 @@
 	}
 
-private:
-
-	class __attribute__((aligned(128))) intrusive_queue_t {
-	public:
-		typedef spinlock_t lock_t;
-
-		friend class relaxed_list<node_t>;
-
-		struct stat {
-			ssize_t diff = 0;
-			size_t  push = 0;
-			size_t  pop  = 0;
-		};
-
-	private:
-		struct sentinel_t {
-			_LinksFields_t<node_t> _links;
-		};
-
-		lock_t lock;
-		sentinel_t before;
-		sentinel_t after;
-		#ifndef NO_STATS
-			stat s;
-		#endif
-
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Winvalid-offsetof"
-		static constexpr auto fields_offset = offsetof( node_t, _links );
-#pragma GCC diagnostic pop
-	public:
-		intrusive_queue_t()
-			: before{{ nullptr, tail() }}
-			, after {{ head(), nullptr }}
-		{
-			/* paranoid */ assert((reinterpret_cast<uintptr_t>( head() ) + fields_offset) == reinterpret_cast<uintptr_t>(&before));
-			/* paranoid */ assert((reinterpret_cast<uintptr_t>( tail() ) + fields_offset) == reinterpret_cast<uintptr_t>(&after ));
-			/* paranoid */ assert(head()->_links.prev == nullptr);
-			/* paranoid */ assert(head()->_links.next == tail() );
-			/* paranoid */ assert(tail()->_links.next == nullptr);
-			/* paranoid */ assert(tail()->_links.prev == head() );
-			/* paranoid */ assert(sizeof(*this) == 128);
-			/* paranoid */ assert((intptr_t(this) % 128) == 0);
-		}
-
-		~intrusive_queue_t() = default;
-
-		inline node_t * head() const {
-			node_t * rhead = reinterpret_cast<node_t *>(
-				reinterpret_cast<uintptr_t>( &before ) - fields_offset
-			);
-			assert(rhead);
-			return rhead;
-		}
-
-		inline node_t * tail() const {
-			node_t * rtail = reinterpret_cast<node_t *>(
-				reinterpret_cast<uintptr_t>( &after ) - fields_offset
-			);
-			assert(rtail);
-			return rtail;
-		}
-
-		inline bool push(node_t * node) {
-			assert(lock);
-			assert(node->_links.ts != 0);
-			node_t * tail = this->tail();
-
-			node_t * prev = tail->_links.prev;
-			// assertf(node->_links.ts >= prev->_links.ts,
-			// 	"New node has smaller timestamp: %llu < %llu", node->_links.ts, prev->_links.ts);
-			node->_links.next = tail;
-			node->_links.prev = prev;
-			prev->_links.next = node;
-			tail->_links.prev = node;
-			#ifndef NO_STATS
-				if(enable_stats) {
-					s.diff++;
-					s.push++;
-				}
-			#endif
-			if(before._links.ts == 0l) {
-				before._links.ts = node->_links.ts;
-				assert(node->_links.prev == this->head());
-				return true;
-			}
-			return false;
-		}
-
-		inline std::pair<node_t *, bool> pop() {
-			assert(lock);
-			node_t * head = this->head();
-			node_t * tail = this->tail();
-
-			node_t * node = head->_links.next;
-			node_t * next = node->_links.next;
-			if(node == tail) return {nullptr, false};
-
-			head->_links.next = next;
-			next->_links.prev = head;
-
-			#ifndef NO_STATS
-				if(enable_stats) {
-					s.diff--;
-					s.pop ++;
-				}
-			#endif
-			if(next == tail) {
-				before._links.ts = 0l;
-				return {node, true};
-			}
-			else {
-				assert(next->_links.ts != 0);
-				before._links.ts = next->_links.ts;
-				assert(before._links.ts != 0);
-				return {node, false};
-			}
-		}
-
-		long long ts() const {
-			return before._links.ts;
-		}
-	};
-
 
 public:
@@ -513,4 +408,5 @@
 	static __attribute__((aligned(128))) thread_local struct TLS {
 		Random     rng = { int(rdtscl()) };
+		unsigned   my_queue = (ticket++) * 4;
 		pick_stat  pick;
 		empty_stat empty;
@@ -519,8 +415,8 @@
 
 private:
-    	__attribute__((aligned(64))) std::unique_ptr<intrusive_queue_t []> lists;
 	const unsigned numLists;
+    	__attribute__((aligned(64))) std::unique_ptr<intrusive_queue_t<node_t> []> lists;
 private:
-	#if VARIANT == SNZI
+	#if VARIANT == SNZI || VARIANT == BIAS
 		snzi_t snzi;
 	#elif VARIANT == SNZM || VARIANT == DISCOVER
@@ -534,22 +430,17 @@
 
 public:
-	static const constexpr size_t sizeof_queue = sizeof(intrusive_queue_t);
+	static const constexpr size_t sizeof_queue = sizeof(intrusive_queue_t<node_t>);
+	static std::atomic_uint32_t ticket;
 
 #ifndef NO_STATS
-	static void stats_print(std::ostream & os) {
-		auto it = head;
-		while(it) {
-			it->stats_print_local(os);
-			it = it->next;
-		}
-	}
-
 	static void stats_tls_tally() {
 		global_stats.pick.push.attempt += tls.pick.push.attempt;
 		global_stats.pick.push.success += tls.pick.push.success;
+		global_stats.pick.push.local += tls.pick.push.local;
 		global_stats.pick.pop .attempt += tls.pick.pop.attempt;
 		global_stats.pick.pop .success += tls.pick.pop.success;
 		global_stats.pick.pop .mask_attempt += tls.pick.pop.mask_attempt;
 		global_stats.pick.pop .mask_reset += tls.pick.pop.mask_reset;
+		global_stats.pick.pop .local += tls.pick.pop.local;
 
 		global_stats.qstat.push.value += tls.empty.push.value;
@@ -565,4 +456,5 @@
 				std::atomic_size_t attempt = { 0 };
 				std::atomic_size_t success = { 0 };
+				std::atomic_size_t local = { 0 };
 			} push;
 			struct {
@@ -571,4 +463,5 @@
 				std::atomic_size_t mask_attempt = { 0 };
 				std::atomic_size_t mask_reset = { 0 };
+				std::atomic_size_t local = { 0 };
 			} pop;
 		} pick;
@@ -585,26 +478,7 @@
 	} global_stats;
 
-	// Link list of all lists for stats
-	__attribute__((aligned(64))) relaxed_list<node_t> * next = nullptr;
-
-	static relaxed_list<node_t> * head;
-
-	void stats_print_local(std::ostream & os ) {
+public:
+	static void stats_print(std::ostream & os ) {
 		std::cout << "----- Relaxed List Stats -----" << std::endl;
-		// {
-		// 	ssize_t diff = 0;
-		// 	size_t  num  = 0;
-		// 	ssize_t max  = 0;
-
-		// 	for(size_t i = 0; i < numLists; i++) {
-		// 		const auto & list = lists[i];
-		// 		diff+= list.s.diff;
-		// 		num ++;
-		// 		max  = std::abs(max) > std::abs(list.s.diff) ? max : list.s.diff;
-		// 		os << "Local Q ops   : " << (list.s.push + list.s.pop) << "(" << list.s.push << "i, " << list.s.pop << "o)\n";
-		// 	}
-
-		// 	os << "Difference   : " << ssize_t(double(diff) / num  ) << " avg\t" << max << "max" << std::endl;
-		// }
 
 		const auto & global = global_stats;
@@ -631,4 +505,7 @@
 		os << "Pop    Avg Qs : " << avgQ_pop  << " (" << global.qstat.pop .count << "ops)\n";
 		os << "Global Avg Qs : " << avgQ      << " (" << (global.qstat.push.count + global.qstat.pop .count) << "ops)\n";
+
+		os << "Local Push    : " << global.pick.push.local << "\n";
+		os << "Local Pop     : " << global.pick.pop .local << "\n";
 	}
 #endif
Index: doc/theses/thierry_delisle_PhD/code/work_stealing.hpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/work_stealing.hpp	(revision ffa48a8bcd17b272b20f1563ed18140df52053e1)
+++ doc/theses/thierry_delisle_PhD/code/work_stealing.hpp	(revision ffa48a8bcd17b272b20f1563ed18140df52053e1)
@@ -0,0 +1,222 @@
+#pragma once
+#define LIST_VARIANT work_stealing
+
+#include <cmath>
+#include <iomanip>
+#include <memory>
+#include <mutex>
+#include <type_traits>
+
+#include "assert.hpp"
+#include "utils.hpp"
+#include "links.hpp"
+#include "snzi.hpp"
+
+using namespace std;
+
+template<typename node_t>
+class __attribute__((aligned(128))) work_stealing {
+	static_assert(std::is_same<decltype(node_t::_links), _LinksFields_t<node_t>>::value, "Node must have a links field");
+
+public:
+	static const char * name() {
+		return "Work Stealing";
+	}
+
+	work_stealing(unsigned _numThreads, unsigned)
+		: numThreads(_numThreads)
+		, lists(new intrusive_queue_t<node_t>[numThreads])
+		, snzi( std::log2( numThreads / 2 ), 2 )
+
+	{
+		std::cout << "Constructing Work Stealer with " << numThreads << std::endl;
+	}
+
+	~work_stealing() {
+		std::cout << "Destroying Work Stealer" << std::endl;
+		lists.reset();
+	}
+
+	__attribute__((noinline, hot)) void push(node_t * node) {
+		node->_links.ts = rdtscl();
+		if( node->_links.hint > numThreads ) {
+			node->_links.hint = tls.rng.next() % numThreads;
+			tls.stat.push.nhint++;
+		}
+
+		unsigned i = node->_links.hint;
+		auto & list = lists[i];
+		list.lock.lock();
+
+		if(list.push( node )) {
+			snzi.arrive(i);
+		}
+
+		list.lock.unlock();
+	}
+
+	__attribute__((noinline, hot)) node_t * pop() {
+		node_t * node;
+		while(true) {
+			if(!snzi.query()) {
+				return nullptr;
+			}
+
+			{
+				unsigned i = tls.my_queue;
+				auto & list = lists[i];
+				if( list.ts() != 0 ) {
+					list.lock.lock();
+					if((node = try_pop(i))) {
+						tls.stat.pop.local.success++;
+						break;
+					}
+					else {
+						tls.stat.pop.local.elock++;
+					}
+				}
+				else {
+					tls.stat.pop.local.espec++;
+				}
+			}
+
+			tls.stat.pop.steal.tried++;
+
+			int i = tls.rng.next() % numThreads;
+			auto & list = lists[i];
+			if( list.ts() == 0 ) {
+				tls.stat.pop.steal.empty++;
+				continue;
+			}
+
+			if( !list.lock.try_lock() ) {
+				tls.stat.pop.steal.locked++;
+				continue;
+			}
+
+			if((node = try_pop(i))) {
+				tls.stat.pop.steal.success++;
+				break;
+			}
+		}
+
+		#if defined(READ)
+			const unsigned f = READ;
+			if(0 == (tls.it % f)) {
+				unsigned i = tls.it / f;
+				lists[i % numThreads].ts();
+			}
+			// lists[tls.it].ts();
+			tls.it++;
+		#endif
+
+
+		return node;
+	}
+
+private:
+	node_t * try_pop(unsigned i) {
+		auto & list = lists[i];
+
+		// If list is empty, unlock and retry
+		if( list.ts() == 0 ) {
+			list.lock.unlock();
+			return nullptr;
+		}
+
+			// Actually pop the list
+		node_t * node;
+		bool emptied;
+		std::tie(node, emptied) = list.pop();
+		assert(node);
+
+		if(emptied) {
+			snzi.depart(i);
+		}
+
+		// Unlock and return
+		list.lock.unlock();
+		return node;
+	}
+
+
+public:
+
+	static std::atomic_uint32_t ticket;
+	static __attribute__((aligned(128))) thread_local struct TLS {
+		Random     rng = { int(rdtscl()) };
+		unsigned   my_queue = ticket++;
+		#if defined(READ)
+			unsigned it = 0;
+		#endif
+		struct {
+			struct {
+				std::size_t nhint = { 0 };
+			} push;
+			struct {
+				struct {
+					std::size_t success = { 0 };
+					std::size_t espec = { 0 };
+					std::size_t elock = { 0 };
+				} local;
+				struct {
+					std::size_t tried   = { 0 };
+					std::size_t locked  = { 0 };
+					std::size_t empty   = { 0 };
+					std::size_t success = { 0 };
+				} steal;
+			} pop;
+		} stat;
+	} tls;
+
+private:
+	const unsigned numThreads;
+    	std::unique_ptr<intrusive_queue_t<node_t> []> lists;
+	__attribute__((aligned(64))) snzi_t snzi;
+
+#ifndef NO_STATS
+private:
+	static struct GlobalStats {
+		struct {
+			std::atomic_size_t nhint = { 0 };
+		} push;
+		struct {
+			struct {
+				std::atomic_size_t success = { 0 };
+				std::atomic_size_t espec = { 0 };
+				std::atomic_size_t elock = { 0 };
+			} local;
+			struct {
+				std::atomic_size_t tried   = { 0 };
+				std::atomic_size_t locked  = { 0 };
+				std::atomic_size_t empty   = { 0 };
+				std::atomic_size_t success = { 0 };
+			} steal;
+		} pop;
+	} global_stats;
+
+public:
+	static void stats_tls_tally() {
+		global_stats.push.nhint += tls.stat.push.nhint;
+		global_stats.pop.local.success += tls.stat.pop.local.success;
+		global_stats.pop.local.espec   += tls.stat.pop.local.espec  ;
+		global_stats.pop.local.elock   += tls.stat.pop.local.elock  ;
+		global_stats.pop.steal.tried   += tls.stat.pop.steal.tried  ;
+		global_stats.pop.steal.locked  += tls.stat.pop.steal.locked ;
+		global_stats.pop.steal.empty   += tls.stat.pop.steal.empty  ;
+		global_stats.pop.steal.success += tls.stat.pop.steal.success;
+	}
+
+	static void stats_print(std::ostream & os ) {
+		std::cout << "----- Work Stealing Stats -----" << std::endl;
+
+		double stealSucc = double(global_stats.pop.steal.success) / global_stats.pop.steal.tried;
+		os << "Push to new Q : " << std::setw(15) << global_stats.push.nhint << "\n";
+		os << "Local Pop     : " << std::setw(15) << global_stats.pop.local.success << "\n";
+		os << "Steal Pop     : " << std::setw(15) << global_stats.pop.steal.success << "(" << global_stats.pop.local.espec << "s, " << global_stats.pop.local.elock << "l)\n";
+		os << "Steal Success : " << std::setw(15) << stealSucc << "(" << global_stats.pop.steal.tried << " tries)\n";
+		os << "Steal Fails   : " << std::setw(15) << global_stats.pop.steal.empty << "e, " << global_stats.pop.steal.locked << "l\n";
+	}
+private:
+#endif
+};
