extern "C" {
	#include <errno.h>
	#include <stdio.h>
	#include <stdint.h>
	#include <stdlib.h>
	#include <string.h>
	#include <unistd.h>
	#include <sys/mman.h>
	#include <sys/syscall.h>
	#include <sys/uio.h>
	#include <fcntl.h>

	#include <linux/io_uring.h>
}

# ifndef __NR_io_uring_setup
#  define __NR_io_uring_setup		425
# endif
# ifndef __NR_io_uring_enter
#  define __NR_io_uring_enter		426
# endif
# ifndef __NR_io_uring_register
#  define __NR_io_uring_register	427
# endif

struct io_uring_sq {
	// Head and tail of the ring (associated with array)
	volatile uint32_t * head;
	volatile uint32_t * tail;

	// The actual kernel ring which uses head/tail
	// indexes into the sqes arrays
	uint32_t * array;

	// number of entries and mask to go with it
	const uint32_t * num;
	const uint32_t * mask;

	// Submission flags (Not sure what for)
	uint32_t * flags;

	// number of sqes not submitted (whatever that means)
	uint32_t * dropped;

	// Like head/tail but not seen by the kernel
	volatile uint32_t alloc;

	// A buffer of sqes (not the actual ring)
	struct io_uring_sqe * sqes;

	// The location and size of the mmaped area
	void * ring_ptr;
	size_t ring_sz;
};

struct io_uring_cq {
	// Head and tail of the ring
	volatile uint32_t * head;
	volatile uint32_t * tail;

	// number of entries and mask to go with it
	const uint32_t * mask;
	const uint32_t * num;

	// number of cqes not submitted (whatever that means)
	uint32_t * overflow;

	// the kernel ring
	struct io_uring_cqe * cqes;

	// The location and size of the mmaped area
	void * ring_ptr;
	size_t ring_sz;
};

struct io_ring {
	struct io_uring_sq submit_q;
	struct io_uring_cq completion_q;
	uint32_t flags;
	int fd;
};

struct IO_singleton {
	io_ring io;
};

IO_singleton self;

void init_uring(uint32_t nentries) {
      // Step 1 : call to setup
	struct io_uring_params params;
	memset(&params, 0, sizeof(params));
	// params.flags = IORING_SETUP_SQPOLL;

	int fd = syscall(__NR_io_uring_setup, nentries, &params );
	if(fd < 0) {
		fprintf(stderr, "KERNEL ERROR: IO_URING SETUP - %s\n", strerror(errno));
		abort();
	}

	// Step 2 : mmap result
	memset(&self.io, 0, sizeof(struct io_ring));
	struct io_uring_sq & sq = self.io.submit_q;
	struct io_uring_cq & cq = self.io.completion_q;

	// calculate the right ring size
	sq.ring_sz = params.sq_off.array + (params.sq_entries * sizeof(unsigned)           );
	cq.ring_sz = params.cq_off.cqes  + (params.cq_entries * sizeof(struct io_uring_cqe));

	// Requires features
	// // adjust the size according to the parameters
	// if ((params.features & IORING_FEAT_SINGLE_MMAP) != 0) {
	// 	cq->ring_sz = sq->ring_sz = max(cq->ring_sz, sq->ring_sz);
	// }

	// mmap the Submit Queue into existence
	sq.ring_ptr = mmap(0, sq.ring_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQ_RING);
	if (sq.ring_ptr == (void*)MAP_FAILED) {
		fprintf(stderr, "KERNEL ERROR: IO_URING MMAP1 - %s\n", strerror(errno));
		abort();
	}

	// mmap the Completion Queue into existence (may or may not be needed)
	// Requires features
	// if ((params.features & IORING_FEAT_SINGLE_MMAP) != 0) {
	// 	cq->ring_ptr = sq->ring_ptr;
	// }
	// else {
		// We need multiple call to MMAP
		cq.ring_ptr = mmap(0, cq.ring_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_CQ_RING);
		if (cq.ring_ptr == (void*)MAP_FAILED) {
			munmap(sq.ring_ptr, sq.ring_sz);
			fprintf(stderr, "KERNEL ERROR: IO_URING MMAP2 - %s\n", strerror(errno));
			abort();
		}
	// }

	// mmap the submit queue entries
	size_t size = params.sq_entries * sizeof(struct io_uring_sqe);
	sq.sqes = (struct io_uring_sqe *)mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQES);
	if (sq.sqes == (struct io_uring_sqe *)MAP_FAILED) {
		munmap(sq.ring_ptr, sq.ring_sz);
		if (cq.ring_ptr != sq.ring_ptr) munmap(cq.ring_ptr, cq.ring_sz);
		fprintf(stderr, "KERNEL ERROR: IO_URING MMAP3 - %s\n", strerror(errno));
		abort();
	}

	// Get the pointers from the kernel to fill the structure
	// submit queue
	sq.head    = (volatile uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.head);
	sq.tail    = (volatile uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.tail);
	sq.mask    = (   const uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.ring_mask);
	sq.num     = (   const uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.ring_entries);
	sq.flags   = (         uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.flags);
	sq.dropped = (         uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.dropped);
	sq.array   = (         uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.array);
	sq.alloc = *sq.tail;

	// completion queue
	cq.head     = (volatile uint32_t *)(((intptr_t)cq.ring_ptr) + params.cq_off.head);
	cq.tail     = (volatile uint32_t *)(((intptr_t)cq.ring_ptr) + params.cq_off.tail);
	cq.mask     = (   const uint32_t *)(((intptr_t)cq.ring_ptr) + params.cq_off.ring_mask);
	cq.num      = (   const uint32_t *)(((intptr_t)cq.ring_ptr) + params.cq_off.ring_entries);
	cq.overflow = (         uint32_t *)(((intptr_t)cq.ring_ptr) + params.cq_off.overflow);
	cq.cqes   = (struct io_uring_cqe *)(((intptr_t)cq.ring_ptr) + params.cq_off.cqes);

	self.io.fd = fd;
}