Index: doc/theses/thierry_delisle_PhD/code/relaxed_list.cpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/relaxed_list.cpp	(revision 0e4df2ee2dfcfc551fd46578d5fe06b0fe7ff0b0)
+++ doc/theses/thierry_delisle_PhD/code/relaxed_list.cpp	(revision 95cb63bcaff9c78188bff94b95ea03df8fccfd9c)
@@ -57,4 +57,12 @@
 	size_t valmax = 0;
 	size_t valmin = 100000000ul;
+	struct {
+		size_t val = 0;
+		size_t cnt = 0;
+	} comp;
+	struct {
+		size_t val = 0;
+		size_t cnt = 0;
+	} subm;
 };
 
@@ -67,4 +75,12 @@
 	std::atomic_size_t valmax = { 0 };
 	std::atomic_size_t valmin = { 100000000ul };
+	struct {
+		std::atomic_size_t val = { 0 };
+		std::atomic_size_t cnt = { 0 };
+	} comp;
+	struct {
+		std::atomic_size_t val = { 0 };
+		std::atomic_size_t cnt = { 0 };
+	} subm;
 };
 
@@ -95,4 +111,9 @@
 	global.crc_in  += local.crc_in;
 	global.crc_out += local.crc_out;
+
+	global.comp.val += local.comp.val;
+	global.comp.cnt += local.comp.cnt;
+	global.subm.val += local.subm.val;
+	global.subm.cnt += local.subm.cnt;
 
 	atomic_max(global.valmax, local.valmax);
@@ -159,4 +180,14 @@
 	auto dur_nano = duration_cast<std::nano>(1.0);
 
+	if(global.valmax != 0) {
+		std::cout << "Max runs      : " << global.valmax << "\n";
+		std::cout << "Min runs      : " << global.valmin << "\n";
+	}
+	if(global.comp.cnt != 0) {
+		std::cout << "Submit count  : " << global.subm.cnt << "\n";
+		std::cout << "Submit average: " << ((double(global.subm.val)) / global.subm.cnt) << "\n";
+		std::cout << "Complete count: " << global.comp.cnt << "\n";
+		std::cout << "Complete avg  : " << ((double(global.comp.val)) / global.comp.cnt) << "\n";
+	}
 	std::cout << "Duration      : " << duration << "s\n";
 	std::cout << "ns/Op         : " << ( dur_nano / ops_thread )<< "\n";
@@ -164,8 +195,4 @@
 	std::cout << "Ops/sec       : " << ops_sec << "\n";
 	std::cout << "Total ops     : " << ops << "(" << global.in << "i, " << global.out << "o, " << global.empty << "e)\n";
-	if(global.valmax != 0) {
-		std::cout << "Max runs      : " << global.valmax << "\n";
-		std::cout << "Min runs      : " << global.valmin << "\n";
-	}
 	#ifndef NO_STATS
 		relaxed_list<Node>::stats_print(std::cout);
@@ -395,4 +422,157 @@
 
 		enable_stats = false;
+	}
+
+	print_stats(duration, nthread, global);
+}
+
+// ================================================================================================
+struct __attribute__((aligned(64))) Slot {
+	Node * volatile node;
+};
+
+__attribute__((noinline)) void runProducer_body(
+	std::atomic<bool>& done,
+	Random & rand,
+	Slot * slots,
+	int nslots,
+	local_stat_t & local,
+	relaxed_list<Node> & list
+) {
+	while(__builtin_expect(!done.load(std::memory_order_relaxed), true)) {
+
+		Node * node = list.pop();
+		if(!node) {
+			local.empty ++;
+			continue;
+		}
+
+		local.crc_out += node->value;
+		local.out++;
+
+		if(node->id == 0) {
+			unsigned cnt = 0;
+			for(int i = 0; i < nslots; i++) {
+				Node * found = __atomic_exchange_n( &slots[i].node, nullptr, __ATOMIC_SEQ_CST );
+				if( found ) {
+					local.crc_in += found->value;
+					local.in++;
+					cnt++;
+					list.push( found );
+				}
+			}
+
+			local.crc_in += node->value;
+			local.in++;
+			list.push( node );
+
+			local.comp.cnt++;
+			local.comp.val += cnt;
+		}
+		else {
+			unsigned len = 0;
+			while(true) {
+				auto off = rand.next();
+				for(int i = 0; i < nslots; i++) {
+					Node * expected = nullptr;
+					int idx = (i + off) % nslots;
+					Slot & slot = slots[ idx ];
+					if(
+						slot.node == nullptr &&
+						__atomic_compare_exchange_n( &slot.node, &expected, node, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST )
+					) {
+						local.subm.cnt++;
+						local.subm.val += len;
+						goto LOOP;
+					}
+					assert( expected != node );
+					len++;
+				}
+			}
+		}
+
+		LOOP:;
+	}
+}
+
+void runProducer(unsigned nthread, unsigned nqueues, double duration, unsigned nnodes) {
+	std::cout << "Producer Benchmark" << std::endl;
+
+	// Barrier for synchronization
+	barrier_t barrier(nthread + 1);
+
+	// Data to check everything is OK
+	global_stat_t global;
+
+	// Flag to signal termination
+	std::atomic_bool done  = { false };
+
+	std::cout << "Initializing ";
+
+	int nslots = nnodes * 4;
+	Slot * slots = new Slot[nslots];
+	std::cout << nnodes << " nodes (" << nslots << " slots)" << std::endl;
+
+	// List being tested
+	relaxed_list<Node> list = { nthread * nqueues };
+	{
+		Random rand(rdtscl());
+		for(unsigned i = 0; i < nnodes; i++) {
+			Node * node = new Node(rand.next() % 100);
+			node->id = i;
+			global.crc_in += node->value;
+			list.push(node);
+		}
+
+		for(int i = 0; i < nslots; i++) {
+			slots[i].node = nullptr;
+		}
+	}
+
+	{
+		enable_stats = true;
+
+		std::thread * threads[nthread];
+		unsigned i = 1;
+		for(auto & t : threads) {
+			t = new std::thread([&done, &list, &barrier, &global, slots, nslots](unsigned tid) {
+				Random rand(tid + rdtscl());
+
+				local_stat_t local;
+				barrier.wait(tid);
+
+				// EXPERIMENT START
+
+				runProducer_body(done, rand, slots, nslots, local, list);
+
+				// EXPERIMENT END
+
+				barrier.wait(tid);
+
+				tally_stats(global, local);
+			}, i++);
+		}
+
+		waitfor(duration, barrier, done);
+
+		for(auto t : threads) {
+			t->join();
+			delete t;
+		}
+
+		enable_stats = false;
+	}
+
+	{
+		while(Node * node = list.pop()) {
+			global.crc_out += node->value;
+			delete node;
+		}
+
+		for(int i = 0; i < nslots; i++) {
+			delete slots[i].node;
+		}
+
+		delete [] slots;
 	}
 
@@ -521,5 +701,5 @@
 	print_stats(duration, nthread, global);
 
-	save_fairness(data_out.get(), 100, nthread, width, length, output);
+	// save_fairness(data_out.get(), 100, nthread, width, length, output);
 }
 
@@ -547,4 +727,5 @@
 		Churn,
 		PingPong,
+		Producer,
 		Fairness,
 		NONE
@@ -577,5 +758,4 @@
 				case PingPong:
 					nnodes = 1;
-					nslots = 1;
 					switch(argc - optind) {
 					case 0: break;
@@ -591,5 +771,24 @@
 						break;
 					default:
-						std::cerr << "'PingPong' benchmark doesn't accept more than 2 extra arguments" << std::endl;
+						std::cerr << "'PingPong' benchmark doesn't accept more than 1 extra arguments" << std::endl;
+						goto usage;
+					}
+					break;
+				case Producer:
+					nnodes = 32;
+					switch(argc - optind) {
+					case 0: break;
+					case 1:
+						try {
+							arg = optarg = argv[optind];
+							nnodes = stoul(optarg, &len);
+							if(len != arg.size()) { throw std::invalid_argument(""); }
+						} catch(std::invalid_argument &) {
+							std::cerr << "Number of nodes must be a positive integer, was " << arg << std::endl;
+							goto usage;
+						}
+						break;
+					default:
+						std::cerr << "'Producer' benchmark doesn't accept more than 1 extra arguments" << std::endl;
 						goto usage;
 					}
@@ -662,4 +861,8 @@
 					break;
 				}
+				if(iequals(arg, "producer")) {
+					benchmark = Producer;
+					break;
+				}
 				if(iequals(arg, "fairness")) {
 					benchmark = Fairness;
@@ -702,4 +905,5 @@
 				std::cerr << "Usage: " << argv[0] << ": [options] -b churn [NNODES] [NSLOTS = NNODES]" << std::endl;
 				std::cerr << "  or:  " << argv[0] << ": [options] -b pingpong [NNODES]" << std::endl;
+				std::cerr << "  or:  " << argv[0] << ": [options] -b producer [NNODES]" << std::endl;
 				std::cerr << std::endl;
 				std::cerr << "  -d, --duration=DURATION  Duration of the experiment, in seconds" << std::endl;
@@ -720,4 +924,7 @@
 		case PingPong:
 			runPingPong(nthreads, nqueues, duration, nnodes);
+			break;
+		case Producer:
+			runProducer(nthreads, nqueues, duration, nnodes);
 			break;
 		case Fairness:
@@ -801,32 +1008,32 @@
 }
 
-void save_fairness(const int data[], int factor, unsigned nthreads, size_t columns, size_t rows, const std::string & output) {
-	std::ofstream os(output);
-	os << "<html>\n";
-	os << "<head>\n";
-	os << "<style>\n";
-	os << "</style>\n";
-	os << "</head>\n";
-	os << "<body>\n";
-	os << "<table style=\"width=100%\">\n";
-
-	size_t idx = 0;
-	for(size_t r = 0ul; r < rows; r++) {
-		os << "<tr>\n";
-		for(size_t c = 0ul; c < columns; c++) {
-			os << "<td class=\"custom custom" << data[idx] << "\"></td>\n";
-			idx++;
-		}
-		os << "</tr>\n";
-	}
-
-	os << "</table>\n";
-	os << "</body>\n";
-	os << "</html>\n";
-	os << std::endl;
-}
-
-#include <png.h>
-#include <setjmp.h>
+// void save_fairness(const int data[], int factor, unsigned nthreads, size_t columns, size_t rows, const std::string & output) {
+// 	std::ofstream os(output);
+// 	os << "<html>\n";
+// 	os << "<head>\n";
+// 	os << "<style>\n";
+// 	os << "</style>\n";
+// 	os << "</head>\n";
+// 	os << "<body>\n";
+// 	os << "<table style=\"width=100%\">\n";
+
+// 	size_t idx = 0;
+// 	for(size_t r = 0ul; r < rows; r++) {
+// 		os << "<tr>\n";
+// 		for(size_t c = 0ul; c < columns; c++) {
+// 			os << "<td class=\"custom custom" << data[idx] << "\"></td>\n";
+// 			idx++;
+// 		}
+// 		os << "</tr>\n";
+// 	}
+
+// 	os << "</table>\n";
+// 	os << "</body>\n";
+// 	os << "</html>\n";
+// 	os << std::endl;
+// }
+
+// #include <png.h>
+// #include <setjmp.h>
 
 /*
