// Program to test the optimial batchsize in a single threaded process extern "C" { #include #include #include // timespec #include // timeval } enum { TIMEGRAN = 1000000000LL }; // nanosecond granularity, except for timeval #include #include "io_uring.h" int myfd; long long unsigned submits = 0; long long unsigned completes = 0; void submit_and_drain(struct iovec * iov, int n) { for(int i = 0; i < n; i++) { struct io_uring_sqe * sqe = &self.io.submit_q.sqes[ 0 ]; sqe->opcode = IORING_OP_READV; #if !defined(IOSQE_ASYNC) sqe->flags = 0; #else sqe->flags = IOSQE_ASYNC; #endif sqe->ioprio = 0; sqe->fd = myfd; sqe->off = 0; sqe->addr = (__u64)iov; sqe->len = 1; sqe->rw_flags = 0; sqe->__pad2[0] = sqe->__pad2[1] = sqe->__pad2[2] = 0; } volatile uint32_t * tail = self.io.submit_q.tail; __atomic_fetch_add(tail, n, __ATOMIC_SEQ_CST); int ret = syscall( __NR_io_uring_enter, self.io.fd, n, n, IORING_ENTER_GETEVENTS, nullptr, 0); if( ret < 0 ) { switch((int)errno) { case EAGAIN: case EINTR: default: fprintf(stderr, "KERNEL ERROR: IO_URING WAIT - %s\n", strerror(errno) ); abort(); } } submits += ret; uint32_t chead = *self.io.completion_q.head; uint32_t ctail = *self.io.completion_q.tail; const uint32_t mask = *self.io.completion_q.mask; // Memory barrier __atomic_thread_fence( __ATOMIC_SEQ_CST ); uint32_t count = ctail - chead; __atomic_fetch_add( self.io.completion_q.head, count, __ATOMIC_RELAXED ); completes += count; } uint64_t getTimeNsec() { timespec curr; clock_gettime( CLOCK_REALTIME, &curr ); return (int64_t)curr.tv_sec * TIMEGRAN + curr.tv_nsec; } uint64_t to_miliseconds( uint64_t durtn ) { return durtn / (TIMEGRAN / 1000LL); } double to_fseconds(uint64_t durtn ) { return durtn / (double)TIMEGRAN; } uint64_t from_fseconds(double sec) { return sec * TIMEGRAN; } int main(int argc, char * argv[]) { int buflen = 50; int batch = 1; double duration = 5; setlocale(LC_ALL, ""); for(;;) { static struct option options[] = { {"duration", required_argument, 0, 'd'}, {"batchsize", required_argument, 0, 'b'}, {"buflen", required_argument, 0, 'l'}, {0, 0, 0, 0} }; int idx = 0; int opt = getopt_long(argc, argv, "d:l:b:", options, &idx); const char * arg = optarg ? optarg : ""; char * end; switch(opt) { // Exit Case case -1: goto arg_loop; case 'd': \ duration = strtod(arg, &end); \ if(*end != '\0') { \ fprintf(stderr, "Duration must be a valid double, was %s\n", arg); \ goto usage; \ } \ break; case 'l': buflen = strtoul(arg, &end, 10); if(*end != '\0' && buflen < 10) { fprintf(stderr, "Buffer size must be at least 10, was %s\n", arg); goto usage; } case 'b': batch = strtoul(arg, &end, 10); if(*end != '\0' && batch < 0) { fprintf(stderr, "Batch size must be at least 1, was %s\n", arg); goto usage; } break; default: /* ? */ fprintf(stderr, "%d\n", opt); usage: fprintf( stderr, " -l, --buflen=SIZE Number of bytes to read per request\n" ); fprintf( stderr, " -b, --batchsize=COUNT Number of request to batch together\n" ); exit(EXIT_FAILURE); } } arg_loop: myfd = open(__FILE__, 0); init_uring(2048); // Allocate the sqe uint32_t idx = 0; // Return the sqe struct io_uring_sqe * sqe = &self.io.submit_q.sqes[ idx & (*self.io.submit_q.mask)]; char data[buflen]; struct iovec iov = { data, (size_t)buflen }; sqe->opcode = IORING_OP_READV; #if !defined(IOSQE_ASYNC) sqe->flags = 0; #else sqe->flags = IOSQE_ASYNC; #endif sqe->ioprio = 0; sqe->fd = myfd; sqe->off = 0; sqe->addr = (__u64)&iov; sqe->len = 1; sqe->rw_flags = 0; sqe->__pad2[0] = sqe->__pad2[1] = sqe->__pad2[2] = 0; // Append to the list of ready entries for(unsigned i = 0; i < *self.io.submit_q.num; i++) { self.io.submit_q.array[ i ] = 0; } printf("Running for %f second, reading %d bytes in batches of %d\n", duration, buflen, batch); uint64_t start = getTimeNsec(); uint64_t end = getTimeNsec(); uint64_t prev = getTimeNsec(); for(;;) { submit_and_drain(&iov, batch); end = getTimeNsec(); uint64_t delta = end - start; if( to_fseconds(end - prev) > 0.1 ) { printf(" %.1f\r", to_fseconds(delta)); fflush(stdout); prev = end; } if( delta >= from_fseconds(duration) ) { break; } } printf("Took %'ld ms\n", to_miliseconds(end - start)); printf("Submitted %'llu\n", submits); printf("Completed %'llu\n", completes); printf("Submitted / sec %'.f\n", submits / to_fseconds(end - start)); printf("Completed / sec %'.f\n", completes / to_fseconds(end - start)); printf("ns per Submitted %'.f\n", 1000000000.0 * to_fseconds(end - start) / (submits / batch) ); printf("ns per Completed %'.f\n", 1000000000.0 * to_fseconds(end - start) / (completes / batch) ); }