Index: benchmark/io/sendfile/producer.c
===================================================================
--- benchmark/io/sendfile/producer.c	(revision 6dc17a3de19005dbdddd38ebb540cd6a31e09864)
+++ benchmark/io/sendfile/producer.c	(revision 3263e2a49d27f9e862a60b38065ba8538ddd3aae)
@@ -10,4 +10,5 @@
 
 #include <errno.h>
+#include <locale.h>
 #include <time.h>
 #include <unistd.h>
@@ -24,4 +25,5 @@
 #include <netdb.h>
 
+#include <liburing.h>
 
 enum {
@@ -34,5 +36,6 @@
 	SENDFILE_ERROR,
 	SPLICEIN_ERROR,
-	SPLICEOUT_ERROR
+	SPLICEOUT_ERROR,
+	URINGWAIT_ERROR
 };
 
@@ -43,4 +46,5 @@
 
 int pipefd[2];
+struct io_uring ring;
 
 struct stats {
@@ -56,4 +60,6 @@
 static void my_sendfile(int out, int in, size_t size, struct stats *);
 static void my_splice  (int out, int in, size_t size, struct stats *);
+static void my_iouring (int out, int in, size_t size, struct stats *);
+static void my_ringlink(int out, int in, size_t size, struct stats *);
 typedef void (*sender_t)(int out, int in, size_t size, struct stats *);
 
@@ -61,4 +67,5 @@
 
 int main(int argc, char * argv[]) {
+	setlocale(LC_ALL, "");
 	const char * file_path;
 	struct addrinfo * addr;
@@ -167,4 +174,6 @@
 	DONE:
 
+	io_uring_queue_init(16, &ring, 0);
+
 	{
 		char addr_str[INET_ADDRSTRLEN];
@@ -211,4 +220,8 @@
 	printf("--- sendfile ---\n");
 	run(my_sendfile, addr, file_fd, file_size);
+	printf("--- io_uring ---\n");
+	run(my_iouring, addr, file_fd, file_size);
+	printf("--- io_uring + link ---\n");
+	run(my_ringlink, addr, file_fd, file_size);
 
 	close(pipefd[0]);
@@ -258,4 +271,5 @@
 	printf("Sent %'zu bytes in %'zu files, %f seconds\n", st.bytes, st.calls, secs);
 	printf(" - %'3.3f bytes per second\n", (((double)st.bytes) / secs));
+	printf(" - %'f seconds per file\n", secs / st.calls);
 	printf(" - %'3.3f bytes per calls\n", (((double)st.bytes) / st.calls));
 	if(st.shorts.r.cnt ){
@@ -323,2 +337,146 @@
 	st->bytes += writes;
 }
+
+static ssize_t naive_splice(int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags) {
+	struct io_uring_sqe * sqe = io_uring_get_sqe(&ring);
+
+	io_uring_prep_splice(sqe, fd_in, NULL != off_in ? *off_in: -1, fd_out, NULL != off_out ? *off_out: -1, len, flags);
+
+	io_uring_submit(&ring);
+
+	struct io_uring_cqe * cqe = NULL;
+	/* wait for the sqe to complete */
+	int ret = io_uring_wait_cqe_nr(&ring, &cqe, 1);
+
+	/* read and process cqe event */
+	switch(ret) {
+	case 0:
+		{
+			ssize_t val = cqe->res;
+			if( cqe->res < 0 ) {
+				printf("Completion Error : %s\n", strerror( -cqe->res ));
+				return EXIT_FAILURE;
+			}
+			io_uring_cqe_seen(&ring, cqe);
+			return val;
+		}
+	default:
+		fprintf( stderr, "io_uring_wait error: (%d) %s\n\n", (int)-ret, strerror(-ret) );
+		exit( URINGWAIT_ERROR );
+	}
+}
+
+static void my_iouring (int out, int in, size_t size, struct stats * st) {
+	unsigned flags = 0; //SPLICE_F_MOVE; // | SPLICE_F_MORE;
+	off_t offset = 0;
+	size_t writes = 0;
+	for(;;) {
+		ssize_t reti = 0;
+		reti = naive_splice(in, &offset, pipefd[1], NULL, size, flags);
+		if( reti < 0 ) {
+			fprintf( stderr, "splice in error: (%d) %s\n\n", (int)errno, strerror(errno) );
+			exit( SPLICEIN_ERROR );
+		}
+
+		size -= reti;
+		size_t in_pipe = reti;
+		for(;;) {
+			ssize_t reto = 0;
+			reto = naive_splice(pipefd[0], NULL, out, NULL, in_pipe, flags);
+			if( reto < 0 ) {
+				fprintf( stderr, "splice out error: (%d) %s\n\n", (int)errno, strerror(errno) );
+				exit( SPLICEOUT_ERROR );
+			}
+			in_pipe -= reto;
+			writes += reto;
+			if(0 == in_pipe) break;
+			st->shorts.w.cnt++;
+			st->shorts.w.bytes += reto;
+		}
+		if(0 == size) break;
+		st->shorts.r.cnt++;
+		st->shorts.r.bytes += reti;
+	}
+	st->calls++;
+	st->bytes += writes;
+}
+
+static void my_ringlink(int out, int in, size_t size, struct stats * st) {
+	enum { SPLICE_IN, SPLICE_OUT };
+
+	size_t in_pipe = size;
+	off_t offset = 0;
+	bool has_in = false;
+	bool has_out = false;
+	while(true) {
+		if(!has_in && size > 0) {
+			struct io_uring_sqe * sqe = io_uring_get_sqe(&ring);
+			io_uring_prep_splice(sqe, in, offset, pipefd[1], -1, size, 0);
+			sqe->user_data = SPLICE_IN;
+			has_in = true;
+		}
+		if(!has_out) {
+			struct io_uring_sqe * sqe = io_uring_get_sqe(&ring);
+			io_uring_prep_splice(sqe, pipefd[0], -1, out, -1, in_pipe, 0);
+			sqe->user_data = SPLICE_OUT;
+			if(has_in) sqe->flags = IOSQE_IO_LINK;
+			has_out = true;
+		}
+
+		int ret = io_uring_submit_and_wait(&ring, 1);
+		if(ret < 0) {
+			fprintf( stderr, "io_uring_submit error: (%d) %s\n\n", (int)-ret, strerror(-ret) );
+			exit( URINGWAIT_ERROR );
+		}
+
+		/* poll the cq and count how much polling we did */
+		while(true) {
+			struct io_uring_cqe * cqe = NULL;
+			/* wait for the sqe to complete */
+			int ret = io_uring_wait_cqe_nr(&ring, &cqe, 0);
+
+			/* read and process cqe event */
+			switch(ret) {
+			case 0:
+				if( cqe->res < 0 ) {
+					printf("Completion Error : %s\n", strerror( -cqe->res ));
+					exit( URINGWAIT_ERROR );
+				}
+
+				ssize_t write = cqe->res;
+				int which = cqe->user_data;
+				io_uring_cqe_seen(&ring, cqe);
+				switch( which ) {
+				case SPLICE_IN:
+					has_in = false;
+					size -= write;
+					offset += write;
+					if(0 == size) break;
+					st->shorts.r.cnt++;
+					st->shorts.r.bytes += write;
+					break;
+				case SPLICE_OUT:
+					has_out = false;
+					in_pipe -= write;
+					st->bytes += write;
+					if(0 == in_pipe) break;
+					st->shorts.w.cnt++;
+					st->shorts.w.bytes += write;
+					break;
+				default:
+					printf("Completion Error : unknown user data\n");
+					exit( URINGWAIT_ERROR );
+				}
+				continue;
+			case -EAGAIN:
+				goto OUTER;
+			default:
+				fprintf( stderr, "io_uring_get_cqe error: (%d) %s\n\n", (int)-ret, strerror(-ret) );
+				exit( URINGWAIT_ERROR );
+			}
+		}
+		OUTER:
+		if(0 == in_pipe) break;
+	}
+	st->calls++;
+}
