#include "processor_list.hpp"

#include <array>
#include <iomanip>
#include <iostream>
#include <locale>
#include <string>
#include <thread>

#include "utils.hpp"

unsigned num() {
	return 0x1000000;
}

//-------------------

struct processor {
	unsigned id;
};
void run(unsigned nthread, double duration, unsigned writes) {
	assert(writes < 100);

	// List being tested
	processor_list list = {};

	// Barrier for synchronization
	barrier_t barrier(nthread + 1);

	// Data to check everything is OK
	size_t write_committed = 0ul;
	std::atomic_size_t lock_cnt_write = { 0ul };
	std::atomic_size_t lock_cnt_read  = { 0ul };

	// Flag to signal termination
	std::atomic_bool done = { false };

	std::thread * threads[nthread];
	unsigned i = 1;
	for(auto & t : threads) {
		t = new std::thread([&done, &list, &barrier, &write_committed, &lock_cnt_write, &lock_cnt_read, writes](unsigned tid) {
			Random rand(tid + rdtscl());
			processor proc;
			proc.id = list.doregister(&proc);
			size_t writes_cnt = 0;
			size_t reads_cnt = 0;

			affinity(tid);

			barrier.wait(tid);

			while(__builtin_expect(!done, true)) {
				if ((rand.next() % 100) < writes) {
					auto n = list.write_lock();
					write_committed++;
					writes_cnt++;
					assert(writes_cnt < -2ul);
					list.write_unlock(n);
				}
				else {
					list.read_lock(proc.id);
					reads_cnt++;
					assert(reads_cnt < -2ul);
					list.read_unlock(proc.id);
				}
			}

			barrier.wait(tid);

			auto p = list.unregister(proc.id);
			assert(&proc == p);
			lock_cnt_write += writes_cnt;
			lock_cnt_read  += reads_cnt;
		}, i++);
	}

	auto before = Clock::now();
	barrier.wait(0);

	while(true) {
		usleep(1000);
		auto now = Clock::now();
		duration_t durr = now - before;
		if( durr.count() > duration ) {
			done = true;
			break;
		}
	}

	barrier.wait(0);
	auto after = Clock::now();
	duration_t durr = after - before;
	duration = durr.count();

	for(auto t : threads) {
		t->join();
		delete t;
	}

	assert(write_committed == lock_cnt_write);

	size_t ops_sec = size_t(double(lock_cnt_read + lock_cnt_write) / duration);
	size_t ops_thread = ops_sec / nthread;
	double dur_nano = duration_cast<std::nano>(1.0);

	std::cout << "Duration      : " << duration << "s\n";
	std::cout << "Total ops     : " << (lock_cnt_read + lock_cnt_write) << "(" << lock_cnt_read << "r, " << lock_cnt_write << "w)\n";
	std::cout << "Ops/sec       : " << ops_sec << "\n";
	std::cout << "Ops/sec/thread: " << ops_thread << "\n";
	std::cout << "ns/Op         : " << ( dur_nano / ops_thread )<< "\n";
}

void usage(char * argv[]) {
	std::cerr << argv[0] << ": [DURATION (FLOAT:SEC)] [NTHREADS] [%WRITES]" << std::endl;;
	std::exit(1);
}

int main(int argc, char * argv[]) {

	double duration   = 5.0;
	unsigned nthreads = 2;
	unsigned writes   = 0;

	std::cout.imbue(std::locale(""));

	switch (argc)
	{
	case 4:
		writes = std::stoul(argv[3]);
		if( writes >= 100 ) {
			std::cerr << "Writes must be valid percentage, was " << argv[3] << "(" << writes << ")" << std::endl;
			usage(argv);
		}
		[[fallthrough]];
	case 3:
		nthreads = std::stoul(argv[2]);
		[[fallthrough]];
	case 2:
		duration = std::stod(argv[1]);
		if( duration <= 0.0 ) {
			std::cerr << "Duration must be positive, was " << argv[1] << "(" << duration << ")" << std::endl;
			usage(argv);
		}
		[[fallthrough]];
	case 1:
		break;
	default:
		usage(argv);
		break;
	}

	check_cache_line_size();

	std::cout << "Running " << nthreads << " threads for " << duration << " seconds with " << writes << "% writes" << std::endl;
	run(nthreads, duration, writes);

	return 0;
}
