Index: doc/theses/thierry_delisle_PhD/code/readyQ_proto/dynamic_entropy.hpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/readyQ_proto/dynamic_entropy.hpp	(revision 56c8b86a906726495fce9a11b079a122045c7ffd)
+++ doc/theses/thierry_delisle_PhD/code/readyQ_proto/dynamic_entropy.hpp	(revision a1b9bc32a15e98b204ca26969764c493704517ea)
@@ -33,5 +33,5 @@
 		do {
 			// Pick the index of a lane
-			i = idx_from_r(tls.rng1.next(), tls.my_queue);
+			i = idx_from_r(tls.rng1.next(), tls.my_queue).first;
 			// i = ret.first; //local = ret.second;
 			tls.stats.push.attempt++;
@@ -48,10 +48,10 @@
 			// Pick two lists at random
 			unsigned i, j;
-			// bool locali, localj;
-			i = idx_from_r(tls.rng2.prev(), tls.my_queue);
-			j = idx_from_r(tls.rng2.prev(), tls.my_queue);
-
-			// i = reti.first; //local = reti.second;
-			// j = retj.first; //local = retj.second;
+			bool locali, localj;
+			auto reti = idx_from_r(tls.rng2.prev(), tls.my_queue);
+			auto retj = idx_from_r(tls.rng2.prev(), tls.my_queue);
+
+			i = reti.first; locali = reti.second;
+			j = retj.first; localj = retj.second;
 			tls.stats.pop.attempt++;
 
@@ -108,7 +108,7 @@
 	}
 
-	inline unsigned idx_from_r(unsigned r, unsigned preferred) {
+	inline std::pair<unsigned, bool> idx_from_r(unsigned r, unsigned preferred) {
 		unsigned i;
-		// bool local;
+		bool local;
 		unsigned rlow  = r % 4;
 		unsigned rhigh = r / 4;
@@ -117,5 +117,5 @@
 			// Use perferred queues
 			i = preferred + (rhigh % 4);
-			// local = true;
+			local = true;
 		}
 		else {
@@ -123,7 +123,7 @@
 			// Use all queues
 			i = rhigh;
-			// local = false;
-		}
-		return i % numLists;
+			local = false;
+		}
+		return {i % numLists, local};
 	}
 private:
@@ -141,4 +141,5 @@
 		Random     rng1 = { unsigned(std::hash<std::thread::id>{}(std::this_thread::get_id()) ^ rdtscl()) };
 		Random     rng2 = { unsigned(std::hash<std::thread::id>{}(std::this_thread::get_id()) ^ rdtscl()) };
+		Random     rng3 = { unsigned(std::hash<std::thread::id>{}(std::this_thread::get_id()) ^ rdtscl()) };
 		unsigned   my_queue = calc_preferred();
 		struct {
@@ -185,5 +186,5 @@
 	}
 
-	static void stats_print(std::ostream & os) {
+	static void stats_print(std::ostream & os, double) {
 			const auto & global = global_stats;
 
Index: doc/theses/thierry_delisle_PhD/code/readyQ_proto/links2.hpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/readyQ_proto/links2.hpp	(revision a1b9bc32a15e98b204ca26969764c493704517ea)
+++ doc/theses/thierry_delisle_PhD/code/readyQ_proto/links2.hpp	(revision a1b9bc32a15e98b204ca26969764c493704517ea)
@@ -0,0 +1,111 @@
+#pragma once
+
+#include <assert.h>
+
+//------------------------------------------------------------
+// Queue based on the MCS lock
+// It is a Multi-Producer/Single-Consumer queue threads pushing
+// elements must hold on to the elements they push
+// Not appropriate for an async message queue for example,
+template<typename node_t>
+class mcs_queue {
+	node_t * volatile tail;
+
+public:
+	mcs_queue(): tail(nullptr) {}
+
+	inline bool empty() const { return !tail; }
+
+	node_t * push( node_t * elem ) {
+		/* paranoid */ assert(!elem->_links.next);
+		// Race to add to the tail
+		node_t * prev = __atomic_exchange_n(&tail, elem, __ATOMIC_SEQ_CST);
+		// If we aren't the first, we need to tell the person before us
+		// No need to
+		if (prev) prev->_links.next = elem;
+		return prev;
+	}
+
+	// Advances the head of the list, dropping the element given.
+	// Passing an element that is not the head is undefined behavior
+	// NOT Multi-Thread Safe, concurrent pushes are safe
+	node_t * advance(node_t * elem) {
+		node_t * expected = elem;
+		// Check if this is already the last item
+		if (__atomic_compare_exchange_n(&tail, &expected, nullptr, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) return nullptr;
+
+		// If not wait for next item to show-up, filled by push
+		while (!elem->_links.next) asm volatile("pause");
+
+		// we need to return if the next link was empty
+		node_t * ret = elem->_links.next;
+
+		// invalidate link to reset to initial state
+		elem->_links.next = nullptr;
+		return ret;
+	}
+};
+
+//------------------------------------------------------------
+// Queue based on the MCS lock
+// Extension of the above lock which supports 'blind' pops.
+// i.e., popping a value from the head without knowing what the head is
+// has no extra guarantees beyond the mcs_queue
+template<typename node_t>
+class mpsc_queue : private mcs_queue<node_t> {
+	node_t * volatile head;
+public:
+	mpsc_queue(): mcs_queue<node_t>(), head(nullptr) {}
+
+	inline bool empty() const { return mcs_queue<node_t>::empty(); }
+
+	// Added a new element to the queue
+	// Multi-Thread Safe, Lock-Free
+	inline node_t * push(node_t * elem) {
+		node_t * prev = mcs_queue<node_t>::push(elem);
+		if (!prev) head = elem;
+		return prev;
+	}
+
+	// Pop an element from the queue
+	// return the element that was removed
+	// next is set to the new head of the queue
+	// NOT Multi-Thread Safe
+	inline node_t * pop(node_t *& next) {
+		node_t * elem = head;
+		// If head is empty just return
+		if (!elem) return nullptr;
+
+		// If there is already someone in the list, then it's easy
+		if (elem->_links.next) {
+			head = next = elem->_links.next;
+			// force memory sync
+			__atomic_thread_fence(__ATOMIC_SEQ_CST);
+
+			// invalidate link to reset to initial state
+			elem->_links.next = nullptr;
+		}
+		// Otherwise, there might be a race where it only looks but someone is enqueuing
+		else {
+			// null out head here, because we linearize with push
+			// at the CAS in advance and therefore can write to head
+			// after that point, it could overwrite the write in push
+			head = nullptr;
+			next = mcs_queue<node_t>::advance(elem);
+
+			// Only write to the head if there is a next element
+			// it is the only way we can guarantee we are not overwriting
+			// a write made in push
+			if (next) head = next;
+		}
+
+		// return removed element
+		return elem;
+	}
+
+	// Same as previous function
+	inline node_t * pop() {
+		node_t * _ = nullptr;
+		return pop(_);
+	}
+};
Index: doc/theses/thierry_delisle_PhD/code/readyQ_proto/ntmove.cpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/readyQ_proto/ntmove.cpp	(revision a1b9bc32a15e98b204ca26969764c493704517ea)
+++ doc/theses/thierry_delisle_PhD/code/readyQ_proto/ntmove.cpp	(revision a1b9bc32a15e98b204ca26969764c493704517ea)
@@ -0,0 +1,87 @@
+#include <atomic>
+#include <iostream>
+#include <locale>
+#include <thread>
+
+#include <x86intrin.h>
+
+struct __attribute__((aligned(128))) Global_t {
+	volatile size_t value;
+} global;
+
+static const size_t iterations = 1'000'000'000;
+
+size_t read() {
+	// size_t r = __atomic_load_n(&global.value, __ATOMIC_RELAXED);
+	// _mm_stream_si64((long long int*)&global.value, r);
+	// // _mm_clflush( (void*)&global.value );
+	// // __builtin_prefetch((void*)&global.value);
+	// asm volatile(
+	// 	"PREFETCHNTA %[target]"
+	// 	:
+	// 	: [target] "m" (global.value)
+	// );
+	// return r;
+	return __atomic_load_n(&global.value, __ATOMIC_SEQ_CST);
+
+	// __m128i r = _mm_stream_load_si128((__m128i*)&global.value);
+	// asm volatile(
+	// 	"PREFETCHNTA %[target]"
+	// 	:
+	// 	: [target] "m" (global.value)
+	// );
+	// return ((Global_t*)&r)->value;
+	// size_t r;
+	// asm volatile(
+	// 	"MOVNTI %[target], %[r]\n\t"
+	// 	: [r] "=r" (r)
+	// 	: [target] "m" (global.value)
+	// );
+	// return r;
+}
+
+void write(size_t v) {
+	// __atomic_store_n(&global.value, v, __ATOMIC_SEQ_CST);
+	// __atomic_store_n(&global.value, v, __ATOMIC_RELAXED);
+	// asm volatile(
+	// 	"MOVNTI %[v], %[target]\n\t"
+	// 	:
+	// 	: [target] "m" (global.value), [v] "r" (v)
+	// );
+	_mm_stream_si64((long long int*)&global.value, v);
+}
+
+void reader(size_t * reads, size_t * diffs, size_t * m) {
+	size_t last = read();
+	for(size_t i = 0; i < iterations; i++) {
+		size_t val = read();
+		if(last != val) (*diffs)++;
+		last = val;
+		if(last > *m) *m = last;
+		(*reads)++;
+	}
+}
+
+std::atomic<bool> done = { false };
+
+void writer() {
+	size_t v = 0;
+	while(!done) {
+		v++;
+		write(v);
+		__atomic_thread_fence(__ATOMIC_SEQ_CST);
+	}
+}
+
+int main() {
+	std::cout.imbue(std::locale(""));
+	size_t reads = 0;
+	size_t diffs = 0;
+	size_t max   = 0;
+	auto w = std::thread(writer);
+	auto r = std::thread(reader, &reads, &diffs, &max);
+	r.join();
+	done = true;
+	w.join();
+	std::cout << reads << " " << diffs << " " << max << std::endl;
+}
Index: doc/theses/thierry_delisle_PhD/code/readyQ_proto/relaxed_list.cpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/readyQ_proto/relaxed_list.cpp	(revision 56c8b86a906726495fce9a11b079a122045c7ffd)
+++ doc/theses/thierry_delisle_PhD/code/readyQ_proto/relaxed_list.cpp	(revision a1b9bc32a15e98b204ca26969764c493704517ea)
@@ -206,5 +206,5 @@
 	std::cout << "Total ops     : " << ops << "(" << global.in << "i, " << global.out << "o, " << global.empty << "e)\n";
 	#ifndef NO_STATS
-		LIST_VARIANT<Node>::stats_print(std::cout);
+		LIST_VARIANT<Node>::stats_print(std::cout, duration);
 	#endif
 }
@@ -368,6 +368,8 @@
 
 		for(Node * & node : nodes) {
-			node = list.pop();
-			assert(node);
+			node = nullptr;
+			while(!node) {
+				node = list.pop();
+			}
 			local.crc_out += node->value;
 			local.out++;
Index: doc/theses/thierry_delisle_PhD/code/readyQ_proto/work_stealing.hpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/readyQ_proto/work_stealing.hpp	(revision 56c8b86a906726495fce9a11b079a122045c7ffd)
+++ doc/theses/thierry_delisle_PhD/code/readyQ_proto/work_stealing.hpp	(revision a1b9bc32a15e98b204ca26969764c493704517ea)
@@ -6,4 +6,5 @@
 #include <memory>
 #include <mutex>
+#include <thread>
 #include <type_traits>
 
@@ -11,7 +12,24 @@
 #include "utils.hpp"
 #include "links.hpp"
+#include "links2.hpp"
 #include "snzi.hpp"
 
+#include <x86intrin.h>
+
 using namespace std;
+
+static const long long lim = 2000;
+static const unsigned nqueues = 2;
+
+struct __attribute__((aligned(128))) timestamp_t {
+	volatile unsigned long long val = 0;
+};
+
+template<typename node_t>
+struct __attribute__((aligned(128))) localQ_t {
+	mpsc_queue<node_t> queue = {};
+	spinlock_t lock = {};
+	bool needs_help = true;
+};
 
 template<typename node_t>
@@ -25,7 +43,8 @@
 
 	work_stealing(unsigned _numThreads, unsigned)
-		: numThreads(_numThreads)
+		: numThreads(_numThreads * nqueues)
 		, lists(new intrusive_queue_t<node_t>[numThreads])
-		, snzi( std::log2( numThreads / 2 ), 2 )
+		, times(new timestamp_t[numThreads])
+		// , snzi( std::log2( numThreads / 2 ), 2 )
 
 	{
@@ -39,103 +58,116 @@
 
 	__attribute__((noinline, hot)) void push(node_t * node) {
-		node->_links.ts = rdtscl();
-		if( node->_links.hint > numThreads ) {
-			node->_links.hint = tls.rng.next() % numThreads;
-			tls.stat.push.nhint++;
-		}
-
-		unsigned i = node->_links.hint;
+		// node->_links.ts = rdtscl();
+		node->_links.ts = 1;
+
+		auto & list = *({
+			unsigned i;
+			do {
+				tls.stats.push.attempt++;
+				// unsigned r = tls.rng1.next();
+				unsigned r = tls.it++;
+				if(tls.my_queue == outside) {
+					i = r % numThreads;
+				} else {
+					i = tls.my_queue + (r % nqueues);
+				}
+			} while(!lists[i].lock.try_lock());
+		 	&lists[i];
+		});
+
+		list.push( node );
+		list.lock.unlock();
+		// tls.rng2.set_raw_state( tls.rng1.get_raw_state());
+		// count++;
+		tls.stats.push.success++;
+	}
+
+	__attribute__((noinline, hot)) node_t * pop() {
+		if( tls.myfriend == outside ) {
+			auto r  = tls.rng1.next();
+			tls.myfriend = r % numThreads;
+			times[tls.myfriend].val = 0;
+		}
+		else if(times[tls.myfriend].val == 0) {
+			node_t * n = try_pop(tls.myfriend, tls.stats.pop.help);
+			tls.stats.help++;
+			tls.myfriend = outside;
+			if(n) return n;
+		}
+
+		if(tls.my_queue != outside) {
+			node_t * n = local();
+			if(n) return n;
+		}
+
+		// try steal
+		for(int i = 0; i < 25; i++) {
+			node_t * n = steal();
+			if(n) return n;
+		}
+
+		return search();
+	}
+
+private:
+	inline node_t * local() {
+		// unsigned i = (tls.rng2.prev() % 4) + tls.my_queue;
+		unsigned i = (--tls.it % nqueues) + tls.my_queue;
+		return try_pop(i, tls.stats.pop.local);
+	}
+
+	inline node_t * steal() {
+		unsigned i = tls.rng2.prev() % numThreads;
+		return try_pop(i, tls.stats.pop.steal);
+	}
+
+	inline node_t * search() {
+		unsigned offset = tls.rng2.prev();
+		for(unsigned i = 0; i < numThreads; i++) {
+			unsigned idx = (offset + i) % numThreads;
+			node_t * thrd = try_pop(idx, tls.stats.pop.search);
+			if(thrd) {
+				return thrd;
+			}
+		}
+
+		return nullptr;
+	}
+
+private:
+	struct attempt_stat_t {
+		std::size_t attempt = { 0 };
+		std::size_t elock   = { 0 };
+		std::size_t eempty  = { 0 };
+		std::size_t espec   = { 0 };
+		std::size_t success = { 0 };
+	};
+
+	node_t * try_pop(unsigned i, attempt_stat_t & stat) {
+		assert(i < numThreads);
 		auto & list = lists[i];
-		list.lock.lock();
-
-		if(list.push( node )) {
-			snzi.arrive(i);
-		}
-
-		list.lock.unlock();
-	}
-
-	__attribute__((noinline, hot)) node_t * pop() {
-		node_t * node;
-		while(true) {
-			if(!snzi.query()) {
-				return nullptr;
-			}
-
-			{
-				unsigned i = tls.my_queue;
-				auto & list = lists[i];
-				if( list.ts() != 0 ) {
-					list.lock.lock();
-					if((node = try_pop(i))) {
-						tls.stat.pop.local.success++;
-						break;
-					}
-					else {
-						tls.stat.pop.local.elock++;
-					}
-				}
-				else {
-					tls.stat.pop.local.espec++;
-				}
-			}
-
-			tls.stat.pop.steal.tried++;
-
-			int i = tls.rng.next() % numThreads;
-			auto & list = lists[i];
-			if( list.ts() == 0 ) {
-				tls.stat.pop.steal.empty++;
-				continue;
-			}
-
-			if( !list.lock.try_lock() ) {
-				tls.stat.pop.steal.locked++;
-				continue;
-			}
-
-			if((node = try_pop(i))) {
-				tls.stat.pop.steal.success++;
-				break;
-			}
-		}
-
-		#if defined(READ)
-			const unsigned f = READ;
-			if(0 == (tls.it % f)) {
-				unsigned i = tls.it / f;
-				lists[i % numThreads].ts();
-			}
-			// lists[tls.it].ts();
-			tls.it++;
-		#endif
-
-
-		return node;
-	}
-
-private:
-	node_t * try_pop(unsigned i) {
-		auto & list = lists[i];
+		stat.attempt++;
+
+		// If the list is empty, don't try
+		if(list.ts() == 0) { stat.espec++; return nullptr; }
+
+		// If we can't get the lock, move on
+		if( !list.lock.try_lock() ) { stat.elock++; return nullptr; }
+
 
 		// If list is empty, unlock and retry
 		if( list.ts() == 0 ) {
 			list.lock.unlock();
+			stat.eempty++;
 			return nullptr;
 		}
 
-			// Actually pop the list
-		node_t * node;
-		bool emptied;
-		std::tie(node, emptied) = list.pop();
-		assert(node);
-
-		if(emptied) {
-			snzi.depart(i);
-		}
-
-		// Unlock and return
+		auto node = list.pop();
 		list.lock.unlock();
-		return node;
+		stat.success++;
+		times[i].val = 1; //node.first->_links.ts;
+		// count--;
+		// _mm_stream_si64((long long int*)&times[i].val, node.first->_links.ts);
+		return node.first;
 	}
 
@@ -144,7 +176,19 @@
 
 	static std::atomic_uint32_t ticket;
+	static const unsigned outside = 0xFFFFFFFF;
+
+	static inline unsigned calc_preferred() {
+		unsigned t = ticket++;
+		if(t == 0) return outside;
+		unsigned i = (t - 1) * nqueues;
+		return i;
+	}
+
 	static __attribute__((aligned(128))) thread_local struct TLS {
-		Random     rng = { int(rdtscl()) };
-		unsigned   my_queue = ticket++;
+		Random     rng1 = { unsigned(std::hash<std::thread::id>{}(std::this_thread::get_id()) ^ rdtscl()) };
+		Random     rng2 = { unsigned(std::hash<std::thread::id>{}(std::this_thread::get_id()) ^ rdtscl()) };
+		unsigned   it   = 0;
+		unsigned   my_queue = calc_preferred();
+		unsigned   myfriend = outside;
 		#if defined(READ)
 			unsigned it = 0;
@@ -152,20 +196,15 @@
 		struct {
 			struct {
-				std::size_t nhint = { 0 };
+				std::size_t attempt = { 0 };
+				std::size_t success = { 0 };
 			} push;
 			struct {
-				struct {
-					std::size_t success = { 0 };
-					std::size_t espec = { 0 };
-					std::size_t elock = { 0 };
-				} local;
-				struct {
-					std::size_t tried   = { 0 };
-					std::size_t locked  = { 0 };
-					std::size_t empty   = { 0 };
-					std::size_t success = { 0 };
-				} steal;
+				attempt_stat_t help;
+				attempt_stat_t local;
+				attempt_stat_t steal;
+				attempt_stat_t search;
 			} pop;
-		} stat;
+			std::size_t help = { 0 };
+		} stats;
 	} tls;
 
@@ -173,5 +212,6 @@
 	const unsigned numThreads;
     	std::unique_ptr<intrusive_queue_t<node_t> []> lists;
-	__attribute__((aligned(64))) snzi_t snzi;
+    	std::unique_ptr<timestamp_t []> times;
+	__attribute__((aligned(128))) std::atomic_size_t count;
 
 #ifndef NO_STATS
@@ -179,42 +219,94 @@
 	static struct GlobalStats {
 		struct {
-			std::atomic_size_t nhint = { 0 };
+			std::atomic_size_t attempt = { 0 };
+			std::atomic_size_t success = { 0 };
 		} push;
 		struct {
 			struct {
+				std::atomic_size_t attempt = { 0 };
+				std::atomic_size_t elock   = { 0 };
+				std::atomic_size_t eempty  = { 0 };
+				std::atomic_size_t espec   = { 0 };
 				std::atomic_size_t success = { 0 };
-				std::atomic_size_t espec = { 0 };
-				std::atomic_size_t elock = { 0 };
+			} help;
+			struct {
+				std::atomic_size_t attempt = { 0 };
+				std::atomic_size_t elock   = { 0 };
+				std::atomic_size_t eempty  = { 0 };
+				std::atomic_size_t espec   = { 0 };
+				std::atomic_size_t success = { 0 };
 			} local;
 			struct {
-				std::atomic_size_t tried   = { 0 };
-				std::atomic_size_t locked  = { 0 };
-				std::atomic_size_t empty   = { 0 };
+				std::atomic_size_t attempt = { 0 };
+				std::atomic_size_t elock   = { 0 };
+				std::atomic_size_t eempty  = { 0 };
+				std::atomic_size_t espec   = { 0 };
 				std::atomic_size_t success = { 0 };
 			} steal;
+			struct {
+				std::atomic_size_t attempt = { 0 };
+				std::atomic_size_t elock   = { 0 };
+				std::atomic_size_t eempty  = { 0 };
+				std::atomic_size_t espec   = { 0 };
+				std::atomic_size_t success = { 0 };
+			} search;
 		} pop;
+		std::atomic_size_t help = { 0 };
 	} global_stats;
 
 public:
 	static void stats_tls_tally() {
-		global_stats.push.nhint += tls.stat.push.nhint;
-		global_stats.pop.local.success += tls.stat.pop.local.success;
-		global_stats.pop.local.espec   += tls.stat.pop.local.espec  ;
-		global_stats.pop.local.elock   += tls.stat.pop.local.elock  ;
-		global_stats.pop.steal.tried   += tls.stat.pop.steal.tried  ;
-		global_stats.pop.steal.locked  += tls.stat.pop.steal.locked ;
-		global_stats.pop.steal.empty   += tls.stat.pop.steal.empty  ;
-		global_stats.pop.steal.success += tls.stat.pop.steal.success;
-	}
-
-	static void stats_print(std::ostream & os ) {
+		global_stats.push.attempt += tls.stats.push.attempt;
+		global_stats.push.success += tls.stats.push.success;
+		global_stats.pop.help  .attempt += tls.stats.pop.help  .attempt;
+		global_stats.pop.help  .elock   += tls.stats.pop.help  .elock  ;
+		global_stats.pop.help  .eempty  += tls.stats.pop.help  .eempty ;
+		global_stats.pop.help  .espec   += tls.stats.pop.help  .espec  ;
+		global_stats.pop.help  .success += tls.stats.pop.help  .success;
+		global_stats.pop.local .attempt += tls.stats.pop.local .attempt;
+		global_stats.pop.local .elock   += tls.stats.pop.local .elock  ;
+		global_stats.pop.local .eempty  += tls.stats.pop.local .eempty ;
+		global_stats.pop.local .espec   += tls.stats.pop.local .espec  ;
+		global_stats.pop.local .success += tls.stats.pop.local .success;
+		global_stats.pop.steal .attempt += tls.stats.pop.steal .attempt;
+		global_stats.pop.steal .elock   += tls.stats.pop.steal .elock  ;
+		global_stats.pop.steal .eempty  += tls.stats.pop.steal .eempty ;
+		global_stats.pop.steal .espec   += tls.stats.pop.steal .espec  ;
+		global_stats.pop.steal .success += tls.stats.pop.steal .success;
+		global_stats.pop.search.attempt += tls.stats.pop.search.attempt;
+		global_stats.pop.search.elock   += tls.stats.pop.search.elock  ;
+		global_stats.pop.search.eempty  += tls.stats.pop.search.eempty ;
+		global_stats.pop.search.espec   += tls.stats.pop.search.espec  ;
+		global_stats.pop.search.success += tls.stats.pop.search.success;
+		global_stats.help += tls.stats.help;
+	}
+
+	static void stats_print(std::ostream & os, double duration ) {
 		std::cout << "----- Work Stealing Stats -----" << std::endl;
 
-		double stealSucc = double(global_stats.pop.steal.success) / global_stats.pop.steal.tried;
-		os << "Push to new Q : " << std::setw(15) << global_stats.push.nhint << "\n";
-		os << "Local Pop     : " << std::setw(15) << global_stats.pop.local.success << "\n";
-		os << "Steal Pop     : " << std::setw(15) << global_stats.pop.steal.success << "(" << global_stats.pop.local.espec << "s, " << global_stats.pop.local.elock << "l)\n";
-		os << "Steal Success : " << std::setw(15) << stealSucc << "(" << global_stats.pop.steal.tried << " tries)\n";
-		os << "Steal Fails   : " << std::setw(15) << global_stats.pop.steal.empty << "e, " << global_stats.pop.steal.locked << "l\n";
+		double push_suc = (100.0 * double(global_stats.push.success) / global_stats.push.attempt);
+		double push_len = double(global_stats.push.attempt     ) / global_stats.push.success;
+		os << "Push   Pick : " << push_suc << " %, len " << push_len << " (" << global_stats.push.attempt      << " / " << global_stats.push.success << ")\n";
+
+		double hlp_suc = (100.0 * double(global_stats.pop.help.success) / global_stats.pop.help.attempt);
+		double hlp_len = double(global_stats.pop.help.attempt     ) / global_stats.pop.help.success;
+		os << "Help        : " << hlp_suc << " %, len " << hlp_len << " (" << global_stats.pop.help.attempt      << " / " << global_stats.pop.help.success << ")\n";
+		os << "Help Fail   : " << global_stats.pop.help.espec << "s, " << global_stats.pop.help.eempty << "e, " << global_stats.pop.help.elock << "l\n";
+
+		double pop_suc = (100.0 * double(global_stats.pop.local.success) / global_stats.pop.local.attempt);
+		double pop_len = double(global_stats.pop.local.attempt     ) / global_stats.pop.local.success;
+		os << "Local       : " << pop_suc << " %, len " << pop_len << " (" << global_stats.pop.local.attempt      << " / " << global_stats.pop.local.success << ")\n";
+		os << "Local Fail  : " << global_stats.pop.local.espec << "s, " << global_stats.pop.local.eempty << "e, " << global_stats.pop.local.elock << "l\n";
+
+		double stl_suc = (100.0 * double(global_stats.pop.steal.success) / global_stats.pop.steal.attempt);
+		double stl_len = double(global_stats.pop.steal.attempt     ) / global_stats.pop.steal.success;
+		os << "Steal       : " << stl_suc << " %, len " << stl_len << " (" << global_stats.pop.steal.attempt      << " / " << global_stats.pop.steal.success << ")\n";
+		os << "Steal Fail  : " << global_stats.pop.steal.espec << "s, " << global_stats.pop.steal.eempty << "e, " << global_stats.pop.steal.elock << "l\n";
+
+		double srh_suc = (100.0 * double(global_stats.pop.search.success) / global_stats.pop.search.attempt);
+		double srh_len = double(global_stats.pop.search.attempt     ) / global_stats.pop.search.success;
+		os << "Search      : " << srh_suc << " %, len " << srh_len << " (" << global_stats.pop.search.attempt      << " / " << global_stats.pop.search.success << ")\n";
+		os << "Search Fail : " << global_stats.pop.search.espec << "s, " << global_stats.pop.search.eempty << "e, " << global_stats.pop.search.elock << "l\n";
+		os << "Helps       : " << std::setw(15) << std::scientific << global_stats.help / duration << "/sec (" << global_stats.help  << ")\n";
 	}
 private:
