Index: doc/theses/thierry_delisle_PhD/code/relaxed_list.hpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/relaxed_list.hpp	(revision 9da5a50e6ddff0e029fb63b11d9539f8432afa31)
+++ doc/theses/thierry_delisle_PhD/code/relaxed_list.hpp	(revision 33e62f1b67260ca777fa767e6ee88452df8e052c)
@@ -11,61 +11,7 @@
 #include "assert.hpp"
 #include "utils.hpp"
+#include "snzi.hpp"
 
 using namespace std;
-
-struct spinlock_t {
-	std::atomic_bool ll = { false };
-
-	inline void lock() {
-		while( __builtin_expect(ll.exchange(true),false) ) {
-			while(ll.load(std::memory_order_relaxed))
-				asm volatile("pause");
-		}
-	}
-
-	inline bool try_lock() {
-		return false == ll.exchange(true);
-	}
-
-	inline void unlock() {
-		ll.store(false, std::memory_order_release);
-	}
-
-	inline explicit operator bool() {
-		return ll.load(std::memory_order_relaxed);
-	}
-};
-
-static inline bool bts(std::atomic_size_t & target, size_t bit ) {
-	//*
-	int result = 0;
-	asm volatile(
-		"LOCK btsq %[bit], %[target]\n\t"
-		:"=@ccc" (result)
-		: [target] "m" (target), [bit] "r" (bit)
-	);
- 	return result != 0;
-	/*/
-	size_t mask = 1ul << bit;
-	size_t ret = target.fetch_or(mask, std::memory_order_relaxed);
-	return (ret & mask) != 0;
-	//*/
-}
-
-static inline bool btr(std::atomic_size_t & target, size_t bit ) {
-	//*
-	int result = 0;
-	asm volatile(
-		"LOCK btrq %[bit], %[target]\n\t"
-		:"=@ccc" (result)
-		: [target] "m" (target), [bit] "r" (bit)
-	);
- 	return result != 0;
-	/*/
-	size_t mask = 1ul << bit;
-	size_t ret = target.fetch_and(~mask, std::memory_order_relaxed);
-	return (ret & mask) != 0;
-	//*/
-}
 
 extern bool enable_stats;
@@ -110,4 +56,7 @@
 	  	: lists(new intrusive_queue_t[numLists])
 		, numLists(numLists)
+		#if defined(SIMPLE_SNZI)
+			, snzi(4)
+		#endif
 	{
 		assertf(7 * 8 * 8 >= numLists, "List currently only supports 448 sublists");
@@ -140,5 +89,7 @@
 			if( !lists[i].lock.try_lock() ) continue;
 
-			__attribute__((unused)) int num = numNonEmpty;
+			#if !defined(SIMPLE_SNZI)
+				__attribute__((unused)) int num = numNonEmpty;
+			#endif
 
 			// Actually push it
@@ -149,4 +100,6 @@
 					assert(qword == 0);
 					bts(tls.mask, bit);
+				#elif defined(SIMPLE_SNZI)
+					snzi.arrive(i);
 				#elif !defined(NO_BITMASK)
 					numNonEmpty++;
@@ -161,5 +114,7 @@
 				#endif
 			}
-			assert(numNonEmpty <= (int)numLists);
+			#if !defined(SIMPLE_SNZI)
+				assert(numNonEmpty <= (int)numLists);
+			#endif
 
 			// Unlock and return
@@ -168,6 +123,8 @@
 			#ifndef NO_STATS
 				tls.pick.push.success++;
-				tls.empty.push.value += num;
-				tls.empty.push.count += 1;
+				#if !defined(SIMPLE_SNZI)
+					tls.empty.push.value += num;
+					tls.empty.push.count += 1;
+				#endif
 			#endif
 			return;
@@ -208,4 +165,12 @@
 				if(auto node = try_pop(i, j)) return node;
 			}
+		#elif defined(SIMPLE_SNZI)
+			while(snzi.query()) {
+				// Pick two lists at random
+				int i = tls.rng.next() % numLists;
+				int j = tls.rng.next() % numLists;
+
+				if(auto node = try_pop(i, j)) return node;
+			}
 		#elif !defined(NO_BITMASK)
 			int nnempty;
@@ -280,5 +245,7 @@
 		if( !list.lock.try_lock() ) return nullptr;
 
-		__attribute__((unused)) int num = numNonEmpty;
+		#if !defined(SIMPLE_SNZI)
+			__attribute__((unused)) int num = numNonEmpty;
+		#endif
 
 		// If list is empty, unlock and retry
@@ -300,4 +267,6 @@
 				assert(qword == 0);
 				__attribute__((unused)) bool ret = btr(tls.mask, bit);
+			#elif defined(SIMPLE_SNZI)
+				snzi.depart(w);
 			#elif !defined(NO_BITMASK)
 				numNonEmpty--;
@@ -315,9 +284,13 @@
 		// Unlock and return
 		list.lock.unlock();
-		assert(numNonEmpty >= 0);
+		#if !defined(SIMPLE_SNZI)
+			assert(numNonEmpty >= 0);
+		#endif
 		#ifndef NO_STATS
 			tls.pick.pop.success++;
-			tls.empty.pop.value += num;
-			tls.empty.pop.count += 1;
+			#if !defined(SIMPLE_SNZI)
+				tls.empty.pop.value += num;
+				tls.empty.pop.count += 1;
+			#endif
 		#endif
 		return node;
@@ -336,6 +309,4 @@
 			size_t  push = 0;
 			size_t  pop  = 0;
-			// size_t value = 0;
-			// size_t count = 0;
 		};
 
@@ -460,10 +431,14 @@
 	} tls;
 
-public:
-	std::atomic_int numNonEmpty  = { 0 };  // number of non-empty lists
-	std::atomic_size_t list_mask[7] = { {0}, {0}, {0}, {0}, {0}, {0}, {0} }; // which queues are empty
 private:
     	__attribute__((aligned(64))) std::unique_ptr<intrusive_queue_t []> lists;
 	const unsigned numLists;
+private:
+	#if defined(SIMPLE_SNZI)
+		snzi_t snzi;
+	#else
+		std::atomic_int numNonEmpty  = { 0 };  // number of non-empty lists
+		std::atomic_size_t list_mask[7] = { {0}, {0}, {0}, {0}, {0}, {0}, {0} }; // which queues are empty
+	#endif
 
 public:
Index: doc/theses/thierry_delisle_PhD/code/snzi.hpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/snzi.hpp	(revision 33e62f1b67260ca777fa767e6ee88452df8e052c)
+++ doc/theses/thierry_delisle_PhD/code/snzi.hpp	(revision 33e62f1b67260ca777fa767e6ee88452df8e052c)
@@ -0,0 +1,156 @@
+#pragma once
+
+#include "utils.hpp"
+
+
+class snzi_t {
+	class node;
+public:
+	const unsigned mask;
+	const int root;
+	std::unique_ptr<snzi_t::node[]> nodes;
+
+	snzi_t(unsigned depth);
+
+	void arrive(int idx) {
+		idx &= mask;
+		nodes[idx].arrive();
+	}
+
+	void depart(int idx) {
+		idx &= mask;
+		nodes[idx].depart();
+	}
+
+	bool query() const {
+		return nodes[root].query();
+	}
+
+
+private:
+	class __attribute__((aligned(64))) node {
+		friend class snzi_t;
+	private:
+
+		union val_t {
+			static constexpr char Half = -1;
+
+			uint64_t _all;
+			struct __attribute__((packed)) {
+				char cnt;
+				uint64_t ver:56;
+			};
+
+			bool cas(val_t & exp, char _cnt, uint64_t _ver) volatile {
+				val_t t;
+				t.ver = _ver;
+				t.cnt = _cnt;
+				/* paranoid */ assert(t._all == ((_ver << 8) | ((unsigned char)_cnt)));
+				return __atomic_compare_exchange_n(&this->_all, &exp._all, t._all, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+			}
+
+			bool cas(val_t & exp, const val_t & tar) volatile {
+				return __atomic_compare_exchange_n(&this->_all, &exp._all, tar._all, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+			}
+
+			val_t() : _all(0) {}
+			val_t(const volatile val_t & o) : _all(o._all) {}
+		};
+
+		//--------------------------------------------------
+		// Hierarchical node
+		void arrive_h() {
+			int undoArr = 0;
+			bool success = false;
+			while(!success) {
+				auto x{ value };
+				/* paranoid */ assert(x.cnt <= 120);
+				if( x.cnt >= 1 ) {
+					if( value.cas(x, x.cnt + 1, x.ver ) ) {
+						success = true;
+					}
+				}
+				/* paranoid */ assert(x.cnt <= 120);
+				if( x.cnt == 0 ) {
+					if( value.cas(x, val_t::Half, x.ver + 1) ) {
+						success = true;
+						x.cnt = val_t::Half;
+						x.ver = x.ver + 1;
+					}
+				}
+				/* paranoid */ assert(x.cnt <= 120);
+				if( x.cnt == val_t::Half ) {
+					/* paranoid */ assert(parent);
+					parent->arrive();
+					if( !value.cas(x, 1, x.ver) ) {
+						undoArr = undoArr + 1;
+					}
+				}
+			}
+
+			for(int i = 0; i < undoArr; i++) {
+				/* paranoid */ assert(parent);
+				parent->depart();
+			}
+		}
+
+		void depart_h() {
+			while(true) {
+				auto x = (const val_t)value;
+				/* paranoid */ assertf(x.cnt >= 1, "%d", x.cnt);
+				if( value.cas( x, x.cnt - 1, x.ver ) ) {
+					if( x.cnt == 1 ) {
+						/* paranoid */ assert(parent);
+						parent->depart();
+					}
+					return;
+				}
+			}
+		}
+
+		//--------------------------------------------------
+		// Root node
+		void arrive_r() {
+			__atomic_fetch_add(&value._all, 1, __ATOMIC_SEQ_CST);
+		}
+
+		void depart_r() {
+			__atomic_fetch_sub(&value._all, 1, __ATOMIC_SEQ_CST);
+		}
+
+	private:
+		volatile val_t value;
+		class node * parent = nullptr;
+
+		bool is_root() {
+			return parent == nullptr;
+		}
+
+	public:
+		void arrive() {
+			if(is_root()) arrive_r();
+			else arrive_h();
+		}
+
+		void depart() {
+			if(is_root()) depart_r();
+			else depart_h();
+		}
+
+		bool query() {
+			/* paranoid */ assert(is_root());
+			return value._all > 0;
+		}
+	};
+};
+
+snzi_t::snzi_t(unsigned depth)
+	: mask((1 << depth) - 1)
+	, root( ((1 << depth) * 2) - 2)
+	, nodes(new node[ root + 1 ]())
+{
+	int width = (1 << depth);
+	for(int i = 0; i < root; i++) {
+		nodes[i].parent = &nodes[(i / 2) + width ];
+	}
+}
Index: doc/theses/thierry_delisle_PhD/code/utils.hpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/utils.hpp	(revision 9da5a50e6ddff0e029fb63b11d9539f8432afa31)
+++ doc/theses/thierry_delisle_PhD/code/utils.hpp	(revision 33e62f1b67260ca777fa767e6ee88452df8e052c)
@@ -143,2 +143,57 @@
 #endif
 }
+
+struct spinlock_t {
+	std::atomic_bool ll = { false };
+
+	inline void lock() {
+		while( __builtin_expect(ll.exchange(true),false) ) {
+			while(ll.load(std::memory_order_relaxed))
+				asm volatile("pause");
+		}
+	}
+
+	inline bool try_lock() {
+		return false == ll.exchange(true);
+	}
+
+	inline void unlock() {
+		ll.store(false, std::memory_order_release);
+	}
+
+	inline explicit operator bool() {
+		return ll.load(std::memory_order_relaxed);
+	}
+};
+
+static inline bool bts(std::atomic_size_t & target, size_t bit ) {
+	//*
+	int result = 0;
+	asm volatile(
+		"LOCK btsq %[bit], %[target]\n\t"
+		:"=@ccc" (result)
+		: [target] "m" (target), [bit] "r" (bit)
+	);
+	return result != 0;
+	/*/
+	size_t mask = 1ul << bit;
+	size_t ret = target.fetch_or(mask, std::memory_order_relaxed);
+	return (ret & mask) != 0;
+	//*/
+}
+
+static inline bool btr(std::atomic_size_t & target, size_t bit ) {
+	//*
+	int result = 0;
+	asm volatile(
+		"LOCK btrq %[bit], %[target]\n\t"
+		:"=@ccc" (result)
+		: [target] "m" (target), [bit] "r" (bit)
+	);
+	return result != 0;
+	/*/
+	size_t mask = 1ul << bit;
+	size_t ret = target.fetch_and(~mask, std::memory_order_relaxed);
+	return (ret & mask) != 0;
+	//*/
+}
