- Timestamp:
- Jan 1, 2022, 11:14:35 AM (4 years ago)
- Branches:
- ADT, ast-experimental, enum, master, pthread-emulation, qualifiedEnum
- Children:
- 12c1eef
- Parents:
- 7770cc8 (diff), db1ebed (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the(diff)
links above to see all the changes relative to each parent. - Location:
- libcfa/src
- Files:
-
- 2 added
- 9 edited
Legend:
- Unmodified
- Added
- Removed
-
libcfa/src/Makefile.am
r7770cc8 r5235d49 85 85 time.hfa \ 86 86 bits/weakso_locks.hfa \ 87 algorithms/range_iterator.hfa \ 87 88 containers/maybe.hfa \ 88 89 containers/pair.hfa \ -
libcfa/src/concurrency/io.cfa
r7770cc8 r5235d49 33 33 #include <sys/syscall.h> 34 34 #include <sys/eventfd.h> 35 #include <sys/uio.h> 35 36 36 37 #include <linux/io_uring.h> … … 133 134 } 134 135 135 bool __cfa_io_flush( processor * proc, bool wait) {136 bool __cfa_io_flush( processor * proc, int min_comp ) { 136 137 /* paranoid */ verify( ! __preemption_enabled() ); 137 138 /* paranoid */ verify( proc ); … … 144 145 145 146 __STATS__( true, io.calls.flush++; ) 146 int ret = syscall( __NR_io_uring_enter, ctx.fd, ctx.sq.to_submit, wait ? 1 : 0,0, (sigset_t *)0p, _NSIG / 8);147 int ret = syscall( __NR_io_uring_enter, ctx.fd, ctx.sq.to_submit, min_comp, min_comp > 0 ? IORING_ENTER_GETEVENTS : 0, (sigset_t *)0p, _NSIG / 8); 147 148 if( ret < 0 ) { 148 149 switch((int)errno) { … … 302 303 ctx->proc->io.dirty = true; 303 304 if(sq.to_submit > 30 || !lazy) { 304 __cfa_io_flush( ctx->proc, false);305 __cfa_io_flush( ctx->proc, 0 ); 305 306 } 306 307 } … … 502 503 } 503 504 504 #if defined( IO_URING_IDLE)505 bool __kernel_read(processor * proc, io_future_t & future, char buf[], int fd) {505 #if defined(CFA_WITH_IO_URING_IDLE) 506 bool __kernel_read(processor * proc, io_future_t & future, iovec & iov, int fd) { 506 507 $io_context * ctx = proc->io.ctx; 507 508 /* paranoid */ verify( ! __preemption_enabled() ); … … 518 519 __fill( &sqe, 1, &idx, ctx ); 519 520 520 sqe->opcode = IORING_OP_READ;521 521 sqe->user_data = (uintptr_t)&future; 522 522 sqe->flags = 0; 523 sqe->fd = fd; 524 sqe->off = 0; 523 525 sqe->ioprio = 0; 524 sqe->fd = 0;525 sqe->off = 0;526 526 sqe->fsync_flags = 0; 527 527 sqe->__pad2[0] = 0; 528 528 sqe->__pad2[1] = 0; 529 529 sqe->__pad2[2] = 0; 530 sqe->addr = (uintptr_t)buf; 531 sqe->len = sizeof(uint64_t); 530 531 #if defined(CFA_HAVE_IORING_OP_READ) 532 sqe->opcode = IORING_OP_READ; 533 sqe->addr = (uint64_t)iov.iov_base; 534 sqe->len = iov.iov_len; 535 #elif defined(CFA_HAVE_READV) && defined(CFA_HAVE_IORING_OP_READV) 536 sqe->opcode = IORING_OP_READV; 537 sqe->addr = (uintptr_t)&iov; 538 sqe->len = 1; 539 #else 540 #error CFA_WITH_IO_URING_IDLE but none of CFA_HAVE_READV, CFA_HAVE_IORING_OP_READV or CFA_HAVE_IORING_OP_READ defined 541 #endif 532 542 533 543 asm volatile("": : :"memory"); -
libcfa/src/concurrency/io/setup.cfa
r7770cc8 r5235d49 32 32 33 33 void __cfa_io_start( processor * proc ) {} 34 bool __cfa_io_flush( processor * proc, bool) {}34 bool __cfa_io_flush( processor * proc, int ) {} 35 35 void __cfa_io_stop ( processor * proc ) {} 36 36 … … 220 220 cq.cqes = (struct io_uring_cqe *)(((intptr_t)cq.ring_ptr) + params.cq_off.cqes); 221 221 222 #if !defined( IO_URING_IDLE)222 #if !defined(CFA_WITH_IO_URING_IDLE) 223 223 // Step 4 : eventfd 224 224 // io_uring_register is so f*cking slow on some machine that it -
libcfa/src/concurrency/kernel.cfa
r7770cc8 r5235d49 27 27 extern "C" { 28 28 #include <sys/eventfd.h> 29 #include <sys/uio.h> 29 30 } 30 31 … … 125 126 static void __wake_one(cluster * cltr); 126 127 127 static void idle_sleep(processor * proc, io_future_t & future, char buf[]);128 static void idle_sleep(processor * proc, io_future_t & future, iovec & iov); 128 129 static bool mark_idle (__cluster_proc_list & idles, processor & proc); 129 130 static void mark_awake(__cluster_proc_list & idles, processor & proc); … … 131 132 extern void __cfa_io_start( processor * ); 132 133 extern bool __cfa_io_drain( processor * ); 133 extern bool __cfa_io_flush( processor *, bool wait);134 extern bool __cfa_io_flush( processor *, int min_comp ); 134 135 extern void __cfa_io_stop ( processor * ); 135 136 static inline bool __maybe_io_drain( processor * ); 136 137 137 #if defined( IO_URING_IDLE) && defined(CFA_HAVE_LINUX_IO_URING_H)138 extern bool __kernel_read(processor * proc, io_future_t & future, char buf[], int fd);138 #if defined(CFA_WITH_IO_URING_IDLE) 139 extern bool __kernel_read(processor * proc, io_future_t & future, iovec &, int fd); 139 140 #endif 140 141 … … 171 172 io_future_t future; // used for idle sleep when io_uring is present 172 173 future.self.ptr = 1p; // mark it as already fulfilled so we know if there is a pending request or not 173 char buf[sizeof(uint64_t)]; 174 eventfd_t idle_val; 175 iovec idle_iovec = { &idle_val, sizeof(idle_val) }; 174 176 175 177 __cfa_io_start( this ); … … 206 208 207 209 if( !readyThread ) { 208 __cfa_io_flush( this, false);210 __cfa_io_flush( this, 0 ); 209 211 210 212 readyThread = __next_thread_slow( this->cltr ); … … 237 239 } 238 240 239 idle_sleep( this, future, buf);241 idle_sleep( this, future, idle_iovec ); 240 242 241 243 // We were woken up, remove self from idle … … 258 260 259 261 if(this->io.pending && !this->io.dirty) { 260 __cfa_io_flush( this, false);262 __cfa_io_flush( this, 0 ); 261 263 } 262 264 … … 274 276 275 277 // If we can't find a thread, might as well flush any outstanding I/O 276 if(this->io.pending) { __cfa_io_flush( this, false); }278 if(this->io.pending) { __cfa_io_flush( this, 0 ); } 277 279 278 280 // Spin a little on I/O, just in case … … 369 371 370 372 if(this->io.pending && !this->io.dirty) { 371 __cfa_io_flush( this, false);373 __cfa_io_flush( this, 0 ); 372 374 } 373 375 … … 379 381 380 382 __cfadbg_print_safe(runtime_core, "Kernel : core %p stopping\n", this); 383 } 384 385 for(int i = 0; !available(future); i++) { 386 if(i > 1000) __cfaabi_dbg_write( "ERROR: kernel has bin spinning on a flush after exit loop.\n", 60); 387 __cfa_io_flush( this, 1 ); 381 388 } 382 389 … … 779 786 } 780 787 781 static void idle_sleep(processor * this, io_future_t & future, char buf[]) {782 #if !defined( IO_URING_IDLE) || !defined(CFA_HAVE_LINUX_IO_URING_H)788 static void idle_sleep(processor * this, io_future_t & future, iovec & iov) { 789 #if !defined(CFA_WITH_IO_URING_IDLE) 783 790 #if !defined(__CFA_NO_STATISTICS__) 784 791 if(this->print_halts) { … … 813 820 #endif 814 821 #else 815 #if !defined(CFA_HAVE_IORING_OP_READ)816 #error this is only implemented if the read is present817 #endif818 822 // Do we already have a pending read 819 823 if(available(future)) { … … 821 825 reset(future); 822 826 823 __kernel_read(this, future, buf, this->idle_fd );824 } 825 826 __cfa_io_flush( this, true);827 __kernel_read(this, future, iov, this->idle_fd ); 828 } 829 830 __cfa_io_flush( this, 1 ); 827 831 #endif 828 832 } -
libcfa/src/concurrency/kernel_private.hfa
r7770cc8 r5235d49 39 39 } 40 40 41 // #define IO_URING_IDLE 41 // Defines whether or not we *want* to use io_uring_enter as the idle_sleep blocking call 42 #define CFA_WANT_IO_URING_IDLE 43 44 // Defines whether or not we *can* use io_uring_enter as the idle_sleep blocking call 45 #if defined(CFA_WANT_IO_URING_IDLE) && defined(CFA_HAVE_LINUX_IO_URING_H) 46 #if defined(CFA_HAVE_IORING_OP_READ) || (defined(CFA_HAVE_READV) && defined(CFA_HAVE_IORING_OP_READV)) 47 #define CFA_WITH_IO_URING_IDLE 48 #endif 49 #endif 42 50 43 51 //----------------------------------------------------------------------------- -
libcfa/src/device/cpu.cfa
r7770cc8 r5235d49 30 30 #include <fcntl.h> 31 31 } 32 33 #include "algorithms/range_iterator.hfa" 32 34 33 35 // search a string for character 'character' but looking atmost at len … … 135 137 count++; 136 138 } 137 iterate_dir(path, lambda); 139 int ret = iterate_dir(path, lambda); 140 if(ret == ENOTDIR) return 0; 138 141 139 142 /* paranoid */ verifyf(count == max + 1, "Inconsistent %s count, counted %d, but max %s was %d", prefix, count, prefix, (int)max); … … 143 146 144 147 // Count number of cpus in the system 145 static intcount_cpus(void) {148 static [int, const char *] count_cpus(void) { 146 149 const char * fpath = "/sys/devices/system/cpu/online"; 147 150 int fd = open(fpath, 0, O_RDONLY); … … 159 162 160 163 const char * _; 161 int cnt = read_width(buff, r - 1, &_); 162 /* paranoid */ verify(cnt == count_prefix_dirs("/sys/devices/system/cpu", "cpu")); 163 return cnt; 164 return [read_width(buff, r - 1, &_), strndup(buff, r - 1)]; 164 165 } 165 166 … … 226 227 227 228 struct raw_cache_instance { 228 idx_range_t range; 229 unsigned width; 230 unsigned char level; 229 idx_range_t range; // A text description of the cpus covered 230 unsigned width; // The number of cpus covered 231 unsigned char level; // the cache level 231 232 // FIXME add at least size and type 232 233 }; … … 235 236 static void ^?{}(raw_cache_instance & this) { free(this.range);} 236 237 237 raw_cache_instance ** build_raw_cache_table(unsigned cpus, unsigned idxs, unsigned cache_levels) 238 // Returns a 2D array of instances of size [cpu count][cache levels] 239 // where cache level doesn't include instruction caches 240 raw_cache_instance ** build_raw_cache_table(unsigned cpus_c, idx_range_t cpus, unsigned idxs, unsigned cache_levels) 238 241 { 239 raw_cache_instance ** raw = alloc(cpus); 240 for(i; cpus) { 242 raw_cache_instance ** raw = alloc(cpus_c, '\0'`fill); 243 244 RangeIter rc = { cpus }; 245 while(moveNext(rc)) { 246 unsigned i = rc.com; 241 247 raw[i] = alloc(cache_levels); 242 248 void addcache(unsigned fidx, unsigned char level, idx_range_t range, size_t len) { … … 263 269 264 270 // returns an allocate list of all the different distinct last level caches 265 static [*llc_map_t, size_t cnt] distinct_llcs( unsignedcpus, unsigned llc_idx, raw_cache_instance ** raw) {271 static [*llc_map_t, size_t cnt] distinct_llcs(idx_range_t cpus, unsigned llc_idx, raw_cache_instance ** raw) { 266 272 // Allocate at least one element 267 273 llc_map_t* ranges = alloc(); 268 274 size_t range_cnt = 1; 269 275 276 RangeIter rc = { cpus }; 277 __attribute__((unused)) bool ret = 278 moveNext(rc); 279 /* paranoid */ verify( ret ); 280 /* paranoid */ verify( rc.com >= 0 ); 281 270 282 // Initialize with element 0 271 ranges->raw = &raw[ 0][llc_idx];283 ranges->raw = &raw[rc.com][llc_idx]; 272 284 ranges->count = 0; 273 285 ranges->start = -1u; 274 286 275 287 // Go over all other cpus 276 CPU_LOOP: for(i; 1~cpus) { 288 CPU_LOOP: while(moveNext(rc)) { 289 unsigned i = rc.com; 277 290 // Check if the range is already there 278 291 raw_cache_instance * candidate = &raw[i][llc_idx]; … … 304 317 } 305 318 306 static [[]cpu_pairing_t] get_cpu_pairings(unsigned cpus, raw_cache_instance ** raw, llc_map_t * maps, size_t map_cnt) { 307 cpu_pairing_t * pairings = alloc(cpus); 308 309 CPU_LOOP: for(i; cpus) { 319 static [[]cpu_pairing_t] get_cpu_pairings(unsigned cpus_c, idx_range_t cpus, raw_cache_instance ** raw, llc_map_t * maps, size_t map_cnt) { 320 cpu_pairing_t * pairings = alloc(cpus_c); 321 322 RangeIter rc = { cpus }; 323 CPU_LOOP: while(moveNext(rc)) { 324 unsigned i = rc.com; 310 325 pairings[i].cpu = i; 311 326 idx_range_t want = raw[i][0].range; … … 327 342 extern "C" { 328 343 void __cfaabi_device_startup( void ) { 329 int cpus = count_cpus(); 344 int cpus_c; 345 const char * cpus; 346 [cpus_c, cpus] = count_cpus(); 347 #if defined(__CFA_WITH_VERIFY__) 348 // Verify that the mapping is self consistant. 349 { 350 RangeIter rc = { cpus }; 351 while(moveNext(rc)) { 352 unsigned i = rc.com; 353 verify(cpus_c > i); 354 } 355 } 356 #endif 357 330 358 int idxs = count_cache_indexes(); 331 359 … … 333 361 unsigned cache_levels = 0; 334 362 unsigned llc = 0; 335 {363 if (idxs != 0) { 336 364 unsigned char prev = -1u; 337 365 void first(unsigned idx, unsigned char level, const char * map, size_t len) { … … 345 373 346 374 // Read in raw data 347 raw_cache_instance ** raw = build_raw_cache_table(cpus , idxs, cache_levels);375 raw_cache_instance ** raw = build_raw_cache_table(cpus_c, cpus, idxs, cache_levels); 348 376 349 377 // Find number of distinct cache instances … … 362 390 width2 += maps[i].raw->width; 363 391 } 364 verify(width1 == cpus );365 verify(width2 == cpus );392 verify(width1 == cpus_c); 393 verify(width2 == cpus_c); 366 394 } 367 395 #endif 368 396 369 397 // Get mappings from cpu to cache instance 370 cpu_pairing_t * pairings = get_cpu_pairings(cpus , raw, maps, map_cnt);398 cpu_pairing_t * pairings = get_cpu_pairings(cpus_c, cpus, raw, maps, map_cnt); 371 399 372 400 // Sort by cache instance 373 qsort(pairings, cpus );401 qsort(pairings, cpus_c); 374 402 375 403 { 376 404 unsigned it = 0; 377 for(i; cpus) { 405 RangeIter rc = { cpus }; 406 while(moveNext(rc)) { 407 unsigned i = rc.com; 378 408 unsigned llc_id = pairings[i].id; 379 409 if(maps[llc_id].start == -1u) { … … 384 414 } 385 415 } 386 /* paranoid */ verify(it == cpus );416 /* paranoid */ verify(it == cpus_c); 387 417 } 388 418 389 419 // From the mappings build the actual cpu map we want 390 struct cpu_map_entry_t * entries = alloc(cpus); 391 for(i; cpus) { entries[i].count = 0; } 392 for(i; cpus) { 420 struct cpu_map_entry_t * entries = alloc(cpus_c); 421 for(i; cpus_c) { entries[i].count = 0; } 422 423 RangeIter rc = { cpus }; 424 while(moveNext(rc)) { 425 unsigned i = rc.com; 393 426 /* paranoid */ verify(pairings[i].id < map_cnt); 394 427 unsigned c = pairings[i].cpu; … … 406 439 free(pairings); 407 440 408 for(i; cpus ) {409 for(j; cache_levels) {441 for(i; cpus_c) { 442 if( raw[i] ) for(j; cache_levels) { 410 443 ^(raw[i][j]){}; 411 444 } … … 415 448 416 449 cpu_info.llc_map = entries; 417 cpu_info.hthrd_count = cpus; 450 cpu_info.hthrd_count = cpus_c; 451 cpu_info.llc_count = map_cnt; 418 452 } 419 453 -
libcfa/src/device/cpu.hfa
r7770cc8 r5235d49 23 23 24 24 struct cpu_info_t { 25 // array of size [hthrd_count]25 // Array of size [hthrd_count] 26 26 const cpu_map_entry_t * llc_map; 27 27 28 28 // Number of _hardware_ threads present in the system 29 29 size_t hthrd_count; 30 31 // Number of distinct last level caches 32 size_t llc_count; 30 33 }; 31 34 -
libcfa/src/stdlib.cfa
r7770cc8 r5235d49 10 10 // Created On : Thu Jan 28 17:10:29 2016 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Thu Nov 12 07:46:09 202013 // Update Count : 5 0312 // Last Modified On : Wed Dec 29 15:32:44 2021 13 // Update Count : 512 14 14 // 15 15 … … 221 221 //--------------------------------------- 222 222 223 bool threading_enabled(void) __attribute__((weak)) { 224 return false; 225 } 223 static uint32_t seed = 0; // current seed 224 static thread_local uint32_t state; // random state 225 226 void set_seed( uint32_t seed_ ) { state = seed = seed_; } 227 uint32_t get_seed() { return seed; } 228 229 #define GENERATOR LCG 230 231 inline uint32_t MarsagliaXor( uint32_t & state ) { 232 if ( unlikely( seed == 0 ) ) set_seed( rdtscl() ); 233 else if ( unlikely( state == 0 ) ) state = seed; 234 state ^= state << 6; 235 state ^= state >> 21; 236 state ^= state << 7; 237 return state; 238 } // MarsagliaXor 239 240 inline uint32_t LCG( uint32_t & state ) { // linear congruential generator 241 if ( unlikely( seed == 0 ) ) set_seed( rdtscl() ); 242 else if ( unlikely( state == 0 ) ) state = seed; 243 return state = 36973 * (state & 65535) + (state >> 16); 244 } // LCG 245 246 uint32_t prng( PRNG & prng ) with( prng ) { callcnt += 1; return GENERATOR( state ); } 247 248 uint32_t prng( void ) { return GENERATOR( state ); } 249 250 //--------------------------------------- 251 252 bool threading_enabled( void ) __attribute__(( weak )) { return false; } 226 253 227 254 // Local Variables: // -
libcfa/src/stdlib.hfa
r7770cc8 r5235d49 10 10 // Created On : Thu Jan 28 17:12:35 2016 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Tue Apr 20 21:20:03202113 // Update Count : 5 7512 // Last Modified On : Wed Dec 29 15:30:58 2021 13 // Update Count : 591 14 14 // 15 15 … … 169 169 return ret; 170 170 } 171 S_fill(T) ?`fill ( zero_t ) = void; // FIX ME: remove this once ticket 214 is resolved 172 S_fill(T) ?`fill ( T * a ) { return (S_fill(T)){ 'T', '0', 0, a }; } // FIX ME: remove this once ticket 214 is resolved 171 173 S_fill(T) ?`fill ( char c ) { return (S_fill(T)){ 'c', c }; } 172 S_fill(T) ?`fill ( T * a ) { return (S_fill(T)){ 'T', '0', 0, a }; }173 174 S_fill(T) ?`fill ( T a[], size_t nmemb ) { return (S_fill(T)){ 'a', '0', nmemb * sizeof(T), a }; } 174 175 … … 362 363 363 364 static inline { 364 long int random( long int l, long int u ) { if ( u < l ) [u, l] = [l, u]; return lrand48() % (u - l ) + l; } // [l,u)365 long int random( long int u ) { if ( u < 0 ) return random( u, 0 ); else return random( 0, u); } // [0,u)365 long int random( long int l, long int u ) { if ( u < l ) [u, l] = [l, u]; return lrand48() % (u - l + 1) + l; } // [l,u] 366 long int random( long int u ) { return random( 0, u - 1 ); } // [0,u) 366 367 unsigned long int random( void ) { return lrand48(); } 367 368 unsigned long int random( unsigned long int u ) { return lrand48() % u; } // [0,u) 368 unsigned long int random( unsigned long int l, unsigned long int u ) { if ( u < l ) [u, l] = [l, u]; return lrand48() % (u - l ) + l; } // [l,u)369 unsigned long int random( unsigned long int l, unsigned long int u ) { if ( u < l ) [u, l] = [l, u]; return lrand48() % (u - l + 1) + l; } // [l,u] 369 370 370 371 char random( void ) { return (unsigned long int)random(); } … … 387 388 //--------------------------------------- 388 389 390 struct PRNG { 391 uint32_t callcnt; // call count 392 uint32_t seed; // current seed 393 uint32_t state; // random state 394 }; // PRNG 395 396 extern uint32_t prng( PRNG & prng ) __attribute__(( warn_unused_result )); // [0,UINT_MAX] 397 static inline { 398 void set_seed( PRNG & prng, uint32_t seed_ ) with( prng ) { state = seed = seed_; } // set seed 399 void ?{}( PRNG & prng ) { set_seed( prng, rdtscl() ); } // random seed 400 void ?{}( PRNG & prng, uint32_t seed ) { set_seed( prng, seed ); } // fixed seed 401 uint32_t get_seed( PRNG & prng ) __attribute__(( warn_unused_result )) with( prng ) { return seed; } // get seed 402 uint32_t prng( PRNG & prng, uint32_t u ) __attribute__(( warn_unused_result )) { return prng( prng ) % u; } // [0,u) 403 uint32_t prng( PRNG & prng, uint32_t l, uint32_t u ) __attribute__(( warn_unused_result )) { return prng( prng, u - l + 1 ) + l; } // [l,u] 404 uint32_t calls( PRNG & prng ) __attribute__(( warn_unused_result )) with( prng ) { return callcnt; } 405 } // distribution 406 407 extern void set_seed( uint32_t seed ); // set per thread seed 408 extern uint32_t get_seed(); // get seed 409 extern uint32_t prng( void ) __attribute__(( warn_unused_result )); // [0,UINT_MAX] 410 static inline { 411 uint32_t prng( uint32_t u ) __attribute__(( warn_unused_result )); 412 uint32_t prng( uint32_t u ) { return prng() % u; } // [0,u) 413 uint32_t prng( uint32_t l, uint32_t u ) __attribute__(( warn_unused_result )); 414 uint32_t prng( uint32_t l, uint32_t u ) { return prng( u - l + 1 ) + l; } // [l,u] 415 } // distribution 416 417 //--------------------------------------- 418 389 419 extern bool threading_enabled( void ) OPTIONAL_THREAD; 390 420
Note:
See TracChangeset
for help on using the changeset viewer.