- Timestamp:
- Jul 11, 2020, 6:41:48 PM (6 years ago)
- Branches:
- ADT, arm-eh, ast-experimental, enum, forall-pointer-decay, jacob/cs343-translation, master, new-ast, new-ast-unique-expr, pthread-emulation, qualifiedEnum, stuck-waitfor-destruct
- Children:
- a3d3efc
- Parents:
- fc9bb79 (diff), 7922158 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the(diff)links above to see all the changes relative to each parent. - Location:
- benchmark/io
- Files:
-
- 1 added
- 2 edited
-
batch-readv.c (modified) (4 diffs)
-
io_uring.h (added)
-
readv.cfa (modified) (10 diffs)
Legend:
- Unmodified
- Added
- Removed
-
benchmark/io/batch-readv.c
rfc9bb79 rd34575b 1 1 // Program to test the optimial batchsize in a single threaded process 2 2 extern "C" { 3 #ifndef _GNU_SOURCE /* See feature_test_macros(7) */ 4 #define _GNU_SOURCE /* See feature_test_macros(7) */ 5 #endif 6 #include <errno.h> 7 #include <stdio.h> 8 #include <stdint.h> 9 #include <stdlib.h> 10 #include <string.h> 3 #include <getopt.h> 11 4 #include <locale.h> 12 #include <getopt.h>13 #include <unistd.h>14 #include <sys/mman.h>15 #include <sys/syscall.h>16 #include <sys/uio.h>17 #include <fcntl.h>18 5 #include <time.h> // timespec 19 6 #include <sys/time.h> // timeval 20 21 #include <linux/io_uring.h>22 7 } 23 24 8 25 9 enum { TIMEGRAN = 1000000000LL }; // nanosecond granularity, except for timeval … … 27 11 #include <omp.h> 28 12 29 # ifndef __NR_io_uring_setup 30 # define __NR_io_uring_setup 425 31 # endif 32 # ifndef __NR_io_uring_enter 33 # define __NR_io_uring_enter 426 34 # endif 35 # ifndef __NR_io_uring_register 36 # define __NR_io_uring_register 427 37 # endif 13 #include "io_uring.h" 38 14 39 struct io_uring_sq {40 // Head and tail of the ring (associated with array)41 volatile uint32_t * head;42 volatile uint32_t * tail;43 15 44 // The actual kernel ring which uses head/tail45 // indexes into the sqes arrays46 uint32_t * array;47 48 // number of entries and mask to go with it49 const uint32_t * num;50 const uint32_t * mask;51 52 // Submission flags (Not sure what for)53 uint32_t * flags;54 55 // number of sqes not submitted (whatever that means)56 uint32_t * dropped;57 58 // Like head/tail but not seen by the kernel59 volatile uint32_t alloc;60 61 // A buffer of sqes (not the actual ring)62 struct io_uring_sqe * sqes;63 64 // The location and size of the mmaped area65 void * ring_ptr;66 size_t ring_sz;67 };68 69 struct io_uring_cq {70 // Head and tail of the ring71 volatile uint32_t * head;72 volatile uint32_t * tail;73 74 // number of entries and mask to go with it75 const uint32_t * mask;76 const uint32_t * num;77 78 // number of cqes not submitted (whatever that means)79 uint32_t * overflow;80 81 // the kernel ring82 struct io_uring_cqe * cqes;83 84 // The location and size of the mmaped area85 void * ring_ptr;86 size_t ring_sz;87 };88 89 struct io_ring {90 struct io_uring_sq submit_q;91 struct io_uring_cq completion_q;92 uint32_t flags;93 int fd;94 };95 96 struct fred {97 io_ring io;98 };99 100 fred self;101 16 int myfd; 102 17 … … 217 132 myfd = open(__FILE__, 0); 218 133 219 // Step 1 : call to setup 220 struct io_uring_params params; 221 memset(¶ms, 0, sizeof(params)); 222 223 uint32_t nentries = 2048; 224 225 int fd = syscall(__NR_io_uring_setup, nentries, ¶ms ); 226 if(fd < 0) { 227 fprintf(stderr, "KERNEL ERROR: IO_URING SETUP - %s\n", strerror(errno)); 228 abort(); 229 } 230 231 // Step 2 : mmap result 232 memset(&self.io, 0, sizeof(struct io_ring)); 233 struct io_uring_sq & sq = self.io.submit_q; 234 struct io_uring_cq & cq = self.io.completion_q; 235 236 // calculate the right ring size 237 sq.ring_sz = params.sq_off.array + (params.sq_entries * sizeof(unsigned) ); 238 cq.ring_sz = params.cq_off.cqes + (params.cq_entries * sizeof(struct io_uring_cqe)); 239 240 // Requires features 241 // // adjust the size according to the parameters 242 // if ((params.features & IORING_FEAT_SINGLE_MMAP) != 0) { 243 // cq->ring_sz = sq->ring_sz = max(cq->ring_sz, sq->ring_sz); 244 // } 245 246 // mmap the Submit Queue into existence 247 sq.ring_ptr = mmap(0, sq.ring_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQ_RING); 248 if (sq.ring_ptr == (void*)MAP_FAILED) { 249 fprintf(stderr, "KERNEL ERROR: IO_URING MMAP1 - %s\n", strerror(errno)); 250 abort(); 251 } 252 253 // mmap the Completion Queue into existence (may or may not be needed) 254 // Requires features 255 // if ((params.features & IORING_FEAT_SINGLE_MMAP) != 0) { 256 // cq->ring_ptr = sq->ring_ptr; 257 // } 258 // else { 259 // We need multiple call to MMAP 260 cq.ring_ptr = mmap(0, cq.ring_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_CQ_RING); 261 if (cq.ring_ptr == (void*)MAP_FAILED) { 262 munmap(sq.ring_ptr, sq.ring_sz); 263 fprintf(stderr, "KERNEL ERROR: IO_URING MMAP2 - %s\n", strerror(errno)); 264 abort(); 265 } 266 // } 267 268 // mmap the submit queue entries 269 size_t size = params.sq_entries * sizeof(struct io_uring_sqe); 270 sq.sqes = (struct io_uring_sqe *)mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQES); 271 if (sq.sqes == (struct io_uring_sqe *)MAP_FAILED) { 272 munmap(sq.ring_ptr, sq.ring_sz); 273 if (cq.ring_ptr != sq.ring_ptr) munmap(cq.ring_ptr, cq.ring_sz); 274 fprintf(stderr, "KERNEL ERROR: IO_URING MMAP3 - %s\n", strerror(errno)); 275 abort(); 276 } 277 278 // Get the pointers from the kernel to fill the structure 279 // submit queue 280 sq.head = (volatile uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.head); 281 sq.tail = (volatile uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.tail); 282 sq.mask = ( const uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.ring_mask); 283 sq.num = ( const uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.ring_entries); 284 sq.flags = ( uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.flags); 285 sq.dropped = ( uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.dropped); 286 sq.array = ( uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.array); 287 sq.alloc = *sq.tail; 288 289 // completion queue 290 cq.head = (volatile uint32_t *)(((intptr_t)cq.ring_ptr) + params.cq_off.head); 291 cq.tail = (volatile uint32_t *)(((intptr_t)cq.ring_ptr) + params.cq_off.tail); 292 cq.mask = ( const uint32_t *)(((intptr_t)cq.ring_ptr) + params.cq_off.ring_mask); 293 cq.num = ( const uint32_t *)(((intptr_t)cq.ring_ptr) + params.cq_off.ring_entries); 294 cq.overflow = ( uint32_t *)(((intptr_t)cq.ring_ptr) + params.cq_off.overflow); 295 cq.cqes = (struct io_uring_cqe *)(((intptr_t)cq.ring_ptr) + params.cq_off.cqes); 296 297 self.io.fd = fd; 134 init_uring(2048); 298 135 299 136 // Allocate the sqe … … 344 181 345 182 printf("Took %'ld ms\n", to_miliseconds(end - start)); 346 printf("Submitted %'llu\n", submits); 347 printf("Completed %'llu\n", completes); 348 printf("Submitted / sec %'.f\n", submits / to_fseconds(end - start)); 349 printf("Completed / sec %'.f\n", completes / to_fseconds(end - start)); 183 printf("Submitted %'llu\n", submits); 184 printf("Completed %'llu\n", completes); 185 printf("Submitted / sec %'.f\n", submits / to_fseconds(end - start)); 186 printf("Completed / sec %'.f\n", completes / to_fseconds(end - start)); 187 printf("ns per Submitted %'.f\n", 1000000000.0 * to_fseconds(end - start) / (submits / batch) ); 188 printf("ns per Completed %'.f\n", 1000000000.0 * to_fseconds(end - start) / (completes / batch) ); 350 189 } -
benchmark/io/readv.cfa
rfc9bb79 rd34575b 1 #define _GNU_SOURCE 2 1 3 #include <stdlib.h> 2 4 #include <stdio.h> … … 22 24 extern bool traceHeapOn(); 23 25 extern ssize_t cfa_preadv2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags); 26 extern ssize_t cfa_preadv2_fixed(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags); 27 extern void register_fixed_files( cluster &, int *, unsigned count ); 24 28 25 29 int fd; … … 28 32 29 33 unsigned long int buflen = 50; 34 bool fixed_file = false; 30 35 31 36 thread __attribute__((aligned(128))) Reader {}; 32 37 void ?{}( Reader & this ) { 33 38 ((thread&)this){ "Reader Thread", *the_benchmark_cluster }; 39 } 40 41 int do_read(int fd, struct iovec * iov) { 42 if(fixed_file) { 43 return cfa_preadv2_fixed(fd, iov, 1, 0, 0); 44 } 45 else { 46 return cfa_preadv2(fd, iov, 1, 0, 0); 47 } 34 48 } 35 49 … … 42 56 43 57 while(__atomic_load_n(&run, __ATOMIC_RELAXED)) { 44 int r = cfa_preadv2(fd, &iov, 1, 0, 0);58 int r = do_read(fd, &iov); 45 59 if(r < 0) abort("%s\n", strerror(-r)); 46 60 … … 52 66 BENCH_DECL 53 67 unsigned flags = 0; 68 int file_flags = 0; 54 69 unsigned sublen = 16; 55 70 … … 58 73 static struct option options[] = { 59 74 BENCH_OPT_LONG 60 {"bufsize", required_argument, 0, 'b'}, 61 {"userthread", no_argument , 0, 'u'}, 62 {"submitthread", no_argument , 0, 's'}, 63 {"submitlength", required_argument, 0, 'l'}, 75 {"bufsize", required_argument, 0, 'b'}, 76 {"userthread", no_argument , 0, 'u'}, 77 {"submitthread", no_argument , 0, 's'}, 78 {"eagersubmit", no_argument , 0, 'e'}, 79 {"kpollsubmit", no_argument , 0, 'k'}, 80 {"kpollcomplete", no_argument , 0, 'i'}, 81 {"submitlength", required_argument, 0, 'l'}, 64 82 {0, 0, 0, 0} 65 83 }; 66 84 67 85 int idx = 0; 68 int opt = getopt_long(argc, argv, BENCH_OPT_SHORT "b:us l:", options, &idx);86 int opt = getopt_long(argc, argv, BENCH_OPT_SHORT "b:usekil:", options, &idx); 69 87 70 88 const char * arg = optarg ? optarg : ""; … … 88 106 flags |= CFA_CLUSTER_IO_POLLER_THREAD_SUBMITS; 89 107 break; 108 case 'e': 109 flags |= CFA_CLUSTER_IO_EAGER_SUBMITS; 110 break; 111 case 'k': 112 flags |= CFA_CLUSTER_IO_KERNEL_POLL_SUBMITS; 113 fixed_file = true; 114 break; 115 case 'i': 116 flags |= CFA_CLUSTER_IO_KERNEL_POLL_COMPLETES; 117 file_flags |= O_DIRECT; 118 break; 90 119 case 'l': 91 120 sublen = strtoul(arg, &end, 10); … … 103 132 fprintf( stderr, " -u, --userthread If set, cluster uses user-thread to poll I/O\n" ); 104 133 fprintf( stderr, " -s, --submitthread If set, cluster uses polling thread to submit I/O\n" ); 134 fprintf( stderr, " -e, --eagersubmit If set, cluster submits I/O eagerly but still aggregates submits\n" ); 135 fprintf( stderr, " -k, --kpollsubmit If set, cluster uses IORING_SETUP_SQPOLL\n" ); 136 fprintf( stderr, " -i, --kpollcomplete If set, cluster uses IORING_SETUP_IOPOLL\n" ); 137 fprintf( stderr, " -l, --submitlength=LEN Max number of submitions that can be submitted together\n" ); 105 138 exit(EXIT_FAILURE); 106 139 } 107 140 } 108 141 109 fd = open(__FILE__, 0);110 if( fd < 0) {142 int lfd = open(__FILE__, file_flags); 143 if(lfd < 0) { 111 144 fprintf(stderr, "Could not open source file\n"); 112 145 exit(EXIT_FAILURE); … … 118 151 Time start, end; 119 152 BenchCluster cl = { flags, CFA_STATS_READY_Q | CFA_STATS_IO }; 153 154 if(fixed_file) { 155 fd = 0; 156 register_fixed_files( cl.self, &lfd, 1 ); 157 } 158 else { 159 fd = lfd; 160 } 161 120 162 { 121 163 BenchProc procs[nprocs]; … … 145 187 } 146 188 147 close( fd);189 close(lfd); 148 190 }
Note:
See TracChangeset
for help on using the changeset viewer.