#pragma once

#define LIST_VARIANT dyn_ent_list

#include <memory>
#include <mutex>
#include <thread>

#include "assert.hpp"
#include "utils.hpp"
#include "links.hpp"

std::mutex mtx_;

template<typename node_t>
class __attribute__((aligned(128))) dyn_ent_list {
	const unsigned numLists;
    	__attribute__((aligned(64))) std::unique_ptr<intrusive_queue_t<node_t> []> lists;

public:
	dyn_ent_list(unsigned numThreads, unsigned)
		: numLists(numThreads * 4)
		, lists(new intrusive_queue_t<node_t>[numLists])
	{
		std::cout << "Constructing Relaxed List with " << numLists << std::endl;
	}

	__attribute__((noinline, hot)) void push(node_t * node) {
		node->_links.ts = rdtscl();

		// Try to pick a lane and lock it
		unsigned i;
		do {
			// Pick the index of a lane
			i = idx_from_r(tls.rng1.next(), tls.my_queue).first;
			// i = ret.first; //local = ret.second;
			tls.stats.push.attempt++;
		} while( !lists[i].lock.try_lock() );

		lists[i].push(node);
		lists[i].lock.unlock();
		tls.rng2.set_raw_state( tls.rng1.get_raw_state());
		tls.stats.push.success++;
	}

	__attribute__((noinline, hot)) node_t * pop() {
		for(int n = 0; n < 25; n++) {
			// Pick two lists at random
			unsigned i, j;
			bool locali, localj;
			auto reti = idx_from_r(tls.rng2.prev(), tls.my_queue);
			auto retj = idx_from_r(tls.rng2.prev(), tls.my_queue);

			i = reti.first; locali = reti.second;
			j = retj.first; localj = retj.second;
			tls.stats.pop.attempt++;

			// try popping from the 2 picked lists
			node_t * thrd = try_pop(i, j);
			if(thrd) {
				tls.stats.pop.success++;
				return thrd;
			}
		}

		unsigned offset = tls.rng2.next();
		for(unsigned i = 0; i < numLists; i++) {
			unsigned idx = (offset + i) % numLists;
			node_t * thrd = try_pop(idx);
			if(thrd) {
				return thrd;
			}
		}

		// All lanes where empty return 0p
		return nullptr;
	}

private:
	inline node_t * try_pop(unsigned i, unsigned j) {
		// Pick the bet list
		int w = i;
		if( __builtin_expect(!(lists[j].ts() > 0), true) ) {
			w = (lists[i].ts() < lists[j].ts()) ? i : j;
		}

		return try_pop(w);
	}

	inline node_t * try_pop(unsigned w) {
		// If list looks empty retry
		if( lists[w].ts() == 0 ) { tls.stats.pop.empty++; return nullptr; }

		// If we can't get the lock retry
		if( !lists[w].lock.try_lock() ) { tls.stats.pop.locked++; return nullptr; }


		// If list is empty, unlock and retry
		if( lists[w].ts() == 0 ) {
			tls.stats.pop.both++;
			lists[w].lock.unlock();
			return nullptr;
		}

		auto node = lists[w].pop();
		lists[w].lock.unlock();
		return node.first;
	}

	inline std::pair<unsigned, bool> idx_from_r(unsigned r, unsigned preferred) {
		unsigned i;
		bool local;
		unsigned rlow  = r % 4;
		unsigned rhigh = r / 4;
		if((0 != rlow) && preferred != outside) {
			// (BIAS - 1) out of BIAS chances
			// Use perferred queues
			i = preferred + (rhigh % 4);
			local = true;
		}
		else {
			// 1 out of BIAS chances
			// Use all queues
			i = rhigh;
			local = false;
		}
		return {i % numLists, local};
	}
private:
	static std::atomic_uint32_t ticket;
	static const unsigned outside = 0xFFFFFFFF;

	static inline unsigned calc_preferred() {
		unsigned t = ticket++;
		if(t == 0) return outside;
		unsigned i = 4 * (t - 1);
		return i;
	}

	static __attribute__((aligned(128))) thread_local struct TLS {
		Random     rng1 = { unsigned(std::hash<std::thread::id>{}(std::this_thread::get_id()) ^ rdtscl()) };
		Random     rng2 = { unsigned(std::hash<std::thread::id>{}(std::this_thread::get_id()) ^ rdtscl()) };
		Random     rng3 = { unsigned(std::hash<std::thread::id>{}(std::this_thread::get_id()) ^ rdtscl()) };
		unsigned   my_queue = calc_preferred();
		struct {
			struct {
				size_t attempt = 0;
				size_t success = 0;
			} push;
			struct {
				size_t attempt = 0;
				size_t success = 0;
				size_t empty   = 0;
				size_t locked  = 0;
				size_t both    = 0;
			} pop;
		} stats;
	} tls;
public:
	static const char * name() {
		return "Dynamic Entropy List";
	}

public:
	static struct GlobalStats {
		struct {
			std::atomic_size_t attempt = 0;
			std::atomic_size_t success = 0;
		} push;
		struct {
			std::atomic_size_t attempt = 0;
			std::atomic_size_t success = 0;
			std::atomic_size_t empty   = 0;
			std::atomic_size_t locked  = 0;
			std::atomic_size_t both    = 0;
		} pop;
	} global_stats;
	static void stats_tls_tally() {
		global_stats.push.attempt += tls.stats.push.attempt;
		global_stats.push.success += tls.stats.push.success;
		global_stats.pop .attempt += tls.stats.pop.attempt;
		global_stats.pop .success += tls.stats.pop.success;
		global_stats.pop .empty   += tls.stats.pop.empty;
		global_stats.pop .locked  += tls.stats.pop.locked;
		global_stats.pop .both    += tls.stats.pop.both;
	}

	static void stats_print(std::ostream & os, double) {
			const auto & global = global_stats;

		double push_sur = (100.0 * double(global.push.success) / global.push.attempt);
		double pop_sur  = (100.0 * double(global.pop .success) / global.pop .attempt);

		double push_len = double(global.push.attempt     ) / global.push.success;
		double pop_len  = double(global.pop .attempt     ) / global.pop .success;

		os << "Push   Pick   : " << push_sur << " %, len " << push_len << " (" << global.push.attempt      << " / " << global.push.success << ")\n";
		os << "Pop    Pick   : " << pop_sur  << " %, len " << pop_len  << " (" << global.pop .attempt      << " / " << global.pop .success << ")\n";
		os << "Pop    Fails  : " << global_stats.pop .empty << "e, " << global_stats.pop .locked << "l, " << global_stats.pop .both << "\n";
	}
};