Changeset 6c53a93 for libcfa/src
- Timestamp:
- Jan 5, 2022, 10:39:39 AM (4 years ago)
- Branches:
- ADT, ast-experimental, enum, forall-pointer-decay, master, pthread-emulation, qualifiedEnum
- Children:
- 0ac728b
- Parents:
- e2853eb (diff), 6111f1f (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the(diff)
links above to see all the changes relative to each parent. - Location:
- libcfa/src
- Files:
-
- 2 added
- 13 edited
Legend:
- Unmodified
- Added
- Removed
-
libcfa/src/Makefile.am
re2853eb r6c53a93 84 84 time.hfa \ 85 85 bits/weakso_locks.hfa \ 86 algorithms/range_iterator.hfa \ 86 87 containers/maybe.hfa \ 87 88 containers/pair.hfa \ -
libcfa/src/concurrency/io.cfa
re2853eb r6c53a93 33 33 #include <sys/syscall.h> 34 34 #include <sys/eventfd.h> 35 #include <sys/uio.h> 35 36 36 37 #include <linux/io_uring.h> … … 133 134 } 134 135 135 void __cfa_io_flush( processor * proc) {136 bool __cfa_io_flush( processor * proc, int min_comp ) { 136 137 /* paranoid */ verify( ! __preemption_enabled() ); 137 138 /* paranoid */ verify( proc ); … … 141 142 $io_context & ctx = *proc->io.ctx; 142 143 143 // for(i; 2) {144 // unsigned idx = proc->rdq.id + i;145 // cltr->ready_queue.lanes.tscs[idx].tv = -1ull;146 // }147 148 144 __ioarbiter_flush( ctx ); 149 145 150 146 __STATS__( true, io.calls.flush++; ) 151 int ret = syscall( __NR_io_uring_enter, ctx.fd, ctx.sq.to_submit, 0,0, (sigset_t *)0p, _NSIG / 8);147 int ret = syscall( __NR_io_uring_enter, ctx.fd, ctx.sq.to_submit, min_comp, min_comp > 0 ? IORING_ENTER_GETEVENTS : 0, (sigset_t *)0p, _NSIG / 8); 152 148 if( ret < 0 ) { 153 149 switch((int)errno) { … … 157 153 // Update statistics 158 154 __STATS__( false, io.calls.errors.busy ++; ) 159 // for(i; 2) { 160 // unsigned idx = proc->rdq.id + i; 161 // cltr->ready_queue.lanes.tscs[idx].tv = rdtscl(); 162 // } 163 return; 155 return false; 164 156 default: 165 157 abort( "KERNEL ERROR: IO_URING SYSCALL - (%d) %s\n", (int)errno, strerror(errno) ); … … 182 174 183 175 ctx.proc->io.pending = false; 184 185 __cfa_io_drain( proc ); 186 // for(i; 2) { 187 // unsigned idx = proc->rdq.id + i; 188 // cltr->ready_queue.lanes.tscs[idx].tv = rdtscl(); 189 // } 176 ready_schedule_lock(); 177 bool ret = __cfa_io_drain( proc ); 178 ready_schedule_unlock(); 179 return ret; 190 180 } 191 181 … … 291 281 } 292 282 293 294 283 //============================================================================================= 295 284 // submission … … 314 303 ctx->proc->io.dirty = true; 315 304 if(sq.to_submit > 30 || !lazy) { 316 ready_schedule_lock(); 317 __cfa_io_flush( ctx->proc ); 318 ready_schedule_unlock(); 305 __cfa_io_flush( ctx->proc, 0 ); 319 306 } 320 307 } … … 515 502 } 516 503 } 504 505 #if defined(CFA_WITH_IO_URING_IDLE) 506 bool __kernel_read(processor * proc, io_future_t & future, iovec & iov, int fd) { 507 $io_context * ctx = proc->io.ctx; 508 /* paranoid */ verify( ! __preemption_enabled() ); 509 /* paranoid */ verify( proc == __cfaabi_tls.this_processor ); 510 /* paranoid */ verify( ctx ); 511 512 __u32 idx; 513 struct io_uring_sqe * sqe; 514 515 // We can proceed to the fast path 516 if( !__alloc(ctx, &idx, 1) ) return false; 517 518 // Allocation was successful 519 __fill( &sqe, 1, &idx, ctx ); 520 521 sqe->user_data = (uintptr_t)&future; 522 sqe->flags = 0; 523 sqe->fd = fd; 524 sqe->off = 0; 525 sqe->ioprio = 0; 526 sqe->fsync_flags = 0; 527 sqe->__pad2[0] = 0; 528 sqe->__pad2[1] = 0; 529 sqe->__pad2[2] = 0; 530 531 #if defined(CFA_HAVE_IORING_OP_READ) 532 sqe->opcode = IORING_OP_READ; 533 sqe->addr = (uint64_t)iov.iov_base; 534 sqe->len = iov.iov_len; 535 #elif defined(CFA_HAVE_READV) && defined(CFA_HAVE_IORING_OP_READV) 536 sqe->opcode = IORING_OP_READV; 537 sqe->addr = (uintptr_t)&iov; 538 sqe->len = 1; 539 #else 540 #error CFA_WITH_IO_URING_IDLE but none of CFA_HAVE_READV, CFA_HAVE_IORING_OP_READV or CFA_HAVE_IORING_OP_READ defined 541 #endif 542 543 asm volatile("": : :"memory"); 544 545 /* paranoid */ verify( sqe->user_data == (uintptr_t)&future ); 546 __submit( ctx, &idx, 1, true ); 547 548 /* paranoid */ verify( proc == __cfaabi_tls.this_processor ); 549 /* paranoid */ verify( ! __preemption_enabled() ); 550 } 551 #endif 517 552 #endif -
libcfa/src/concurrency/io/setup.cfa
re2853eb r6c53a93 32 32 33 33 void __cfa_io_start( processor * proc ) {} 34 void __cfa_io_flush( processor * proc) {}34 bool __cfa_io_flush( processor * proc, int ) {} 35 35 void __cfa_io_stop ( processor * proc ) {} 36 36 … … 111 111 this.ext_sq.empty = true; 112 112 (this.ext_sq.queue){}; 113 __io_uring_setup( this, cl.io.params, proc->idle );113 __io_uring_setup( this, cl.io.params, proc->idle_fd ); 114 114 __cfadbg_print_safe(io_core, "Kernel I/O : Created ring for io_context %u (%p)\n", this.fd, &this); 115 115 } … … 220 220 cq.cqes = (struct io_uring_cqe *)(((intptr_t)cq.ring_ptr) + params.cq_off.cqes); 221 221 222 // Step 4 : eventfd 223 // io_uring_register is so f*cking slow on some machine that it 224 // will never succeed if preemption isn't hard blocked 225 __cfadbg_print_safe(io_core, "Kernel I/O : registering %d for completion with ring %d\n", procfd, fd); 226 227 __disable_interrupts_hard(); 228 229 int ret = syscall( __NR_io_uring_register, fd, IORING_REGISTER_EVENTFD, &procfd, 1); 230 if (ret < 0) { 231 abort("KERNEL ERROR: IO_URING EVENTFD REGISTER - %s\n", strerror(errno)); 232 } 233 234 __enable_interrupts_hard(); 235 236 __cfadbg_print_safe(io_core, "Kernel I/O : registered %d for completion with ring %d\n", procfd, fd); 222 #if !defined(CFA_WITH_IO_URING_IDLE) 223 // Step 4 : eventfd 224 // io_uring_register is so f*cking slow on some machine that it 225 // will never succeed if preemption isn't hard blocked 226 __cfadbg_print_safe(io_core, "Kernel I/O : registering %d for completion with ring %d\n", procfd, fd); 227 228 __disable_interrupts_hard(); 229 230 int ret = syscall( __NR_io_uring_register, fd, IORING_REGISTER_EVENTFD, &procfd, 1); 231 if (ret < 0) { 232 abort("KERNEL ERROR: IO_URING EVENTFD REGISTER - %s\n", strerror(errno)); 233 } 234 235 __enable_interrupts_hard(); 236 237 __cfadbg_print_safe(io_core, "Kernel I/O : registered %d for completion with ring %d\n", procfd, fd); 238 #endif 237 239 238 240 // some paranoid checks -
libcfa/src/concurrency/io/types.hfa
re2853eb r6c53a93 185 185 186 186 // Wait for the future to be fulfilled 187 bool wait( io_future_t & this ) { 188 return wait(this.self); 189 } 190 191 void reset( io_future_t & this ) { 192 return reset(this.self); 193 } 187 bool wait ( io_future_t & this ) { return wait (this.self); } 188 void reset ( io_future_t & this ) { return reset (this.self); } 189 bool available( io_future_t & this ) { return available(this.self); } 194 190 } -
libcfa/src/concurrency/kernel.cfa
re2853eb r6c53a93 27 27 extern "C" { 28 28 #include <sys/eventfd.h> 29 #include <sys/uio.h> 29 30 } 30 31 … … 34 35 #include "strstream.hfa" 35 36 #include "device/cpu.hfa" 37 #include "io/types.hfa" 36 38 37 39 //Private includes … … 124 126 static void __wake_one(cluster * cltr); 125 127 126 static void mark_idle (__cluster_proc_list & idles, processor & proc); 128 static void idle_sleep(processor * proc, io_future_t & future, iovec & iov); 129 static bool mark_idle (__cluster_proc_list & idles, processor & proc); 127 130 static void mark_awake(__cluster_proc_list & idles, processor & proc); 128 static [unsigned idle, unsigned total, * processor] query_idles( & __cluster_proc_list idles );129 131 130 132 extern void __cfa_io_start( processor * ); 131 133 extern bool __cfa_io_drain( processor * ); 132 extern void __cfa_io_flush( processor *);134 extern bool __cfa_io_flush( processor *, int min_comp ); 133 135 extern void __cfa_io_stop ( processor * ); 134 136 static inline bool __maybe_io_drain( processor * ); 137 138 #if defined(CFA_WITH_IO_URING_IDLE) 139 extern bool __kernel_read(processor * proc, io_future_t & future, iovec &, int fd); 140 #endif 135 141 136 142 extern void __disable_interrupts_hard(); … … 148 154 /* paranoid */ verify( __preemption_enabled() ); 149 155 } 156 150 157 151 158 //============================================================================================= … … 163 170 verify(this); 164 171 172 io_future_t future; // used for idle sleep when io_uring is present 173 future.self.ptr = 1p; // mark it as already fulfilled so we know if there is a pending request or not 174 eventfd_t idle_val; 175 iovec idle_iovec = { &idle_val, sizeof(idle_val) }; 176 165 177 __cfa_io_start( this ); 166 178 … … 196 208 197 209 if( !readyThread ) { 198 ready_schedule_lock(); 199 __cfa_io_flush( this ); 200 ready_schedule_unlock(); 210 __cfa_io_flush( this, 0 ); 201 211 202 212 readyThread = __next_thread_slow( this->cltr ); … … 213 223 214 224 // Push self to idle stack 215 mark_idle(this->cltr->procs, * this);225 if(!mark_idle(this->cltr->procs, * this)) continue MAIN_LOOP; 216 226 217 227 // Confirm the ready-queue is empty … … 229 239 } 230 240 231 #if !defined(__CFA_NO_STATISTICS__) 232 if(this->print_halts) { 233 __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 0\n", this->unique_id, rdtscl()); 241 idle_sleep( this, future, idle_iovec ); 242 243 // We were woken up, remove self from idle 244 mark_awake(this->cltr->procs, * this); 245 246 // DON'T just proceed, start looking again 247 continue MAIN_LOOP; 248 } 249 250 /* paranoid */ verify( readyThread ); 251 252 // Reset io dirty bit 253 this->io.dirty = false; 254 255 // We found a thread run it 256 __run_thread(this, readyThread); 257 258 // Are we done? 259 if( __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST) ) break MAIN_LOOP; 260 261 if(this->io.pending && !this->io.dirty) { 262 __cfa_io_flush( this, 0 ); 263 } 264 265 #else 266 #warning new kernel loop 267 SEARCH: { 268 /* paranoid */ verify( ! __preemption_enabled() ); 269 270 // First, lock the scheduler since we are searching for a thread 271 ready_schedule_lock(); 272 273 // Try to get the next thread 274 readyThread = pop_fast( this->cltr ); 275 if(readyThread) { ready_schedule_unlock(); break SEARCH; } 276 277 // If we can't find a thread, might as well flush any outstanding I/O 278 if(this->io.pending) { __cfa_io_flush( this, 0 ); } 279 280 // Spin a little on I/O, just in case 281 for(5) { 282 __maybe_io_drain( this ); 283 readyThread = pop_fast( this->cltr ); 284 if(readyThread) { ready_schedule_unlock(); break SEARCH; } 285 } 286 287 // no luck, try stealing a few times 288 for(5) { 289 if( __maybe_io_drain( this ) ) { 290 readyThread = pop_fast( this->cltr ); 291 } else { 292 readyThread = pop_slow( this->cltr ); 234 293 } 235 #endif 236 237 __cfadbg_print_safe(runtime_core, "Kernel : core %p waiting on eventfd %d\n", this, this->idle); 294 if(readyThread) { ready_schedule_unlock(); break SEARCH; } 295 } 296 297 // still no luck, search for a thread 298 readyThread = pop_search( this->cltr ); 299 if(readyThread) { ready_schedule_unlock(); break SEARCH; } 300 301 // Don't block if we are done 302 if( __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST) ) { 303 ready_schedule_unlock(); 304 break MAIN_LOOP; 305 } 306 307 __STATS( __tls_stats()->ready.sleep.halts++; ) 308 309 // Push self to idle stack 310 ready_schedule_unlock(); 311 if(!mark_idle(this->cltr->procs, * this)) goto SEARCH; 312 ready_schedule_lock(); 313 314 // Confirm the ready-queue is empty 315 __maybe_io_drain( this ); 316 readyThread = pop_search( this->cltr ); 317 ready_schedule_unlock(); 318 319 if( readyThread ) { 320 // A thread was found, cancel the halt 321 mark_awake(this->cltr->procs, * this); 322 323 __STATS( __tls_stats()->ready.sleep.cancels++; ) 324 325 // continue the main loop 326 break SEARCH; 327 } 328 329 __STATS( if(this->print_halts) __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 0\n", this->unique_id, rdtscl()); ) 330 __cfadbg_print_safe(runtime_core, "Kernel : core %p waiting on eventfd %d\n", this, this->idle_fd); 238 331 239 332 { 240 333 eventfd_t val; 241 ssize_t ret = read( this->idle , &val, sizeof(val) );334 ssize_t ret = read( this->idle_fd, &val, sizeof(val) ); 242 335 if(ret < 0) { 243 336 switch((int)errno) { … … 255 348 } 256 349 257 #if !defined(__CFA_NO_STATISTICS__) 258 if(this->print_halts) { 259 __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 1\n", this->unique_id, rdtscl()); 260 } 261 #endif 350 __STATS( if(this->print_halts) __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 1\n", this->unique_id, rdtscl()); ) 262 351 263 352 // We were woken up, remove self from idle … … 268 357 } 269 358 270 /* paranoid */ verify( readyThread );271 272 // Reset io dirty bit273 this->io.dirty = false;274 275 // We found a thread run it276 __run_thread(this, readyThread);277 278 // Are we done?279 if( __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST) ) break MAIN_LOOP;280 281 if(this->io.pending && !this->io.dirty) {282 ready_schedule_lock();283 __cfa_io_flush( this );284 ready_schedule_unlock();285 }286 287 #else288 #warning new kernel loop289 SEARCH: {290 /* paranoid */ verify( ! __preemption_enabled() );291 292 // First, lock the scheduler since we are searching for a thread293 ready_schedule_lock();294 295 // Try to get the next thread296 readyThread = pop_fast( this->cltr );297 if(readyThread) { ready_schedule_unlock(); break SEARCH; }298 299 // If we can't find a thread, might as well flush any outstanding I/O300 if(this->io.pending) { __cfa_io_flush( this ); }301 302 // Spin a little on I/O, just in case303 for(5) {304 __maybe_io_drain( this );305 readyThread = pop_fast( this->cltr );306 if(readyThread) { ready_schedule_unlock(); break SEARCH; }307 }308 309 // no luck, try stealing a few times310 for(5) {311 if( __maybe_io_drain( this ) ) {312 readyThread = pop_fast( this->cltr );313 } else {314 readyThread = pop_slow( this->cltr );315 }316 if(readyThread) { ready_schedule_unlock(); break SEARCH; }317 }318 319 // still no luck, search for a thread320 readyThread = pop_search( this->cltr );321 if(readyThread) { ready_schedule_unlock(); break SEARCH; }322 323 // Don't block if we are done324 if( __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST) ) {325 ready_schedule_unlock();326 break MAIN_LOOP;327 }328 329 __STATS( __tls_stats()->ready.sleep.halts++; )330 331 // Push self to idle stack332 ready_schedule_unlock();333 mark_idle(this->cltr->procs, * this);334 ready_schedule_lock();335 336 // Confirm the ready-queue is empty337 __maybe_io_drain( this );338 readyThread = pop_search( this->cltr );339 ready_schedule_unlock();340 341 if( readyThread ) {342 // A thread was found, cancel the halt343 mark_awake(this->cltr->procs, * this);344 345 __STATS( __tls_stats()->ready.sleep.cancels++; )346 347 // continue the main loop348 break SEARCH;349 }350 351 __STATS( if(this->print_halts) __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 0\n", this->unique_id, rdtscl()); )352 __cfadbg_print_safe(runtime_core, "Kernel : core %p waiting on eventfd %d\n", this, this->idle);353 354 {355 eventfd_t val;356 ssize_t ret = read( this->idle, &val, sizeof(val) );357 if(ret < 0) {358 switch((int)errno) {359 case EAGAIN:360 #if EAGAIN != EWOULDBLOCK361 case EWOULDBLOCK:362 #endif363 case EINTR:364 // No need to do anything special here, just assume it's a legitimate wake-up365 break;366 default:367 abort( "KERNEL : internal error, read failure on idle eventfd, error(%d) %s.", (int)errno, strerror( (int)errno ) );368 }369 }370 }371 372 __STATS( if(this->print_halts) __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 1\n", this->unique_id, rdtscl()); )373 374 // We were woken up, remove self from idle375 mark_awake(this->cltr->procs, * this);376 377 // DON'T just proceed, start looking again378 continue MAIN_LOOP;379 }380 381 359 RUN_THREAD: 382 360 /* paranoid */ verify( ! __preemption_enabled() ); … … 393 371 394 372 if(this->io.pending && !this->io.dirty) { 395 __cfa_io_flush( this );373 __cfa_io_flush( this, 0 ); 396 374 } 397 375 … … 403 381 404 382 __cfadbg_print_safe(runtime_core, "Kernel : core %p stopping\n", this); 383 } 384 385 for(int i = 0; !available(future); i++) { 386 if(i > 1000) __cfaabi_dbg_write( "ERROR: kernel has bin spinning on a flush after exit loop.\n", 60); 387 __cfa_io_flush( this, 1 ); 405 388 } 406 389 … … 766 749 767 750 // Check if there is a sleeping processor 768 processor * p; 769 unsigned idle; 770 unsigned total; 771 [idle, total, p] = query_idles(this->procs); 751 int fd = __atomic_load_n(&this->procs.fd, __ATOMIC_SEQ_CST); 772 752 773 753 // If no one is sleeping, we are done 774 if( idle== 0 ) return;754 if( fd == 0 ) return; 775 755 776 756 // We found a processor, wake it up 777 757 eventfd_t val; 778 758 val = 1; 779 eventfd_write( p->idle, val );759 eventfd_write( fd, val ); 780 760 781 761 #if !defined(__CFA_NO_STATISTICS__) … … 802 782 eventfd_t val; 803 783 val = 1; 804 eventfd_write( this->idle , val );784 eventfd_write( this->idle_fd, val ); 805 785 __enable_interrupts_checked(); 806 786 } 807 787 808 static void mark_idle(__cluster_proc_list & this, processor & proc) { 809 /* paranoid */ verify( ! __preemption_enabled() ); 810 lock( this ); 788 static void idle_sleep(processor * this, io_future_t & future, iovec & iov) { 789 #if !defined(CFA_WITH_IO_URING_IDLE) 790 #if !defined(__CFA_NO_STATISTICS__) 791 if(this->print_halts) { 792 __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 0\n", this->unique_id, rdtscl()); 793 } 794 #endif 795 796 __cfadbg_print_safe(runtime_core, "Kernel : core %p waiting on eventfd %d\n", this, this->idle_fd); 797 798 { 799 eventfd_t val; 800 ssize_t ret = read( this->idle_fd, &val, sizeof(val) ); 801 if(ret < 0) { 802 switch((int)errno) { 803 case EAGAIN: 804 #if EAGAIN != EWOULDBLOCK 805 case EWOULDBLOCK: 806 #endif 807 case EINTR: 808 // No need to do anything special here, just assume it's a legitimate wake-up 809 break; 810 default: 811 abort( "KERNEL : internal error, read failure on idle eventfd, error(%d) %s.", (int)errno, strerror( (int)errno ) ); 812 } 813 } 814 } 815 816 #if !defined(__CFA_NO_STATISTICS__) 817 if(this->print_halts) { 818 __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 1\n", this->unique_id, rdtscl()); 819 } 820 #endif 821 #else 822 // Do we already have a pending read 823 if(available(future)) { 824 // There is no pending read, we need to add one 825 reset(future); 826 827 __kernel_read(this, future, iov, this->idle_fd ); 828 } 829 830 __cfa_io_flush( this, 1 ); 831 #endif 832 } 833 834 static bool mark_idle(__cluster_proc_list & this, processor & proc) { 835 /* paranoid */ verify( ! __preemption_enabled() ); 836 if(!try_lock( this )) return false; 811 837 this.idle++; 812 838 /* paranoid */ verify( this.idle <= this.total ); 813 839 remove(proc); 814 840 insert_first(this.idles, proc); 841 842 __atomic_store_n(&this.fd, proc.idle_fd, __ATOMIC_SEQ_CST); 815 843 unlock( this ); 816 844 /* paranoid */ verify( ! __preemption_enabled() ); 845 846 return true; 817 847 } 818 848 … … 824 854 remove(proc); 825 855 insert_last(this.actives, proc); 856 857 { 858 int fd = 0; 859 if(!this.idles`isEmpty) fd = this.idles`first.idle_fd; 860 __atomic_store_n(&this.fd, fd, __ATOMIC_SEQ_CST); 861 } 862 826 863 unlock( this ); 827 /* paranoid */ verify( ! __preemption_enabled() );828 }829 830 static [unsigned idle, unsigned total, * processor] query_idles( & __cluster_proc_list this ) {831 /* paranoid */ verify( ! __preemption_enabled() );832 /* paranoid */ verify( ready_schedule_islocked() );833 834 for() {835 uint64_t l = __atomic_load_n(&this.lock, __ATOMIC_SEQ_CST);836 if( 1 == (l % 2) ) { Pause(); continue; }837 unsigned idle = this.idle;838 unsigned total = this.total;839 processor * proc = &this.idles`first;840 // Compiler fence is unnecessary, but gcc-8 and older incorrectly reorder code without it841 asm volatile("": : :"memory");842 if(l != __atomic_load_n(&this.lock, __ATOMIC_SEQ_CST)) { Pause(); continue; }843 return [idle, total, proc];844 }845 846 /* paranoid */ verify( ready_schedule_islocked() );847 864 /* paranoid */ verify( ! __preemption_enabled() ); 848 865 } … … 906 923 if(head == tail) return false; 907 924 #if OLD_MAIN 908 ready_schedule_lock();909 ret = __cfa_io_drain( proc );910 ready_schedule_unlock();925 ready_schedule_lock(); 926 ret = __cfa_io_drain( proc ); 927 ready_schedule_unlock(); 911 928 #else 912 929 ret = __cfa_io_drain( proc ); 913 #endif930 #endif 914 931 #endif 915 932 return ret; -
libcfa/src/concurrency/kernel.hfa
re2853eb r6c53a93 100 100 101 101 // Idle lock (kernel semaphore) 102 int idle ;102 int idle_fd; 103 103 104 104 // Termination synchronisation (user semaphore) … … 195 195 struct __cluster_proc_list { 196 196 // Spin lock protecting the queue 197 volatile uint64_t lock; 197 __spinlock_t lock; 198 199 // FD to use to wake a processor 200 volatile int fd; 198 201 199 202 // Total number of processors -
libcfa/src/concurrency/kernel/startup.cfa
re2853eb r6c53a93 527 527 this.local_data = 0p; 528 528 529 this.idle = eventfd(0, 0);530 if (idle < 0) {529 this.idle_fd = eventfd(0, 0); 530 if (idle_fd < 0) { 531 531 abort("KERNEL ERROR: PROCESSOR EVENTFD - %s\n", strerror(errno)); 532 532 } … … 542 542 // Not a ctor, it just preps the destruction but should not destroy members 543 543 static void deinit(processor & this) { 544 close(this.idle );544 close(this.idle_fd); 545 545 } 546 546 … … 584 584 // Cluster 585 585 static void ?{}(__cluster_proc_list & this) { 586 this. lock= 0;586 this.fd = 0; 587 587 this.idle = 0; 588 588 this.total = 0; -
libcfa/src/concurrency/kernel_private.hfa
re2853eb r6c53a93 39 39 } 40 40 41 // Defines whether or not we *want* to use io_uring_enter as the idle_sleep blocking call 42 #define CFA_WANT_IO_URING_IDLE 43 44 // Defines whether or not we *can* use io_uring_enter as the idle_sleep blocking call 45 #if defined(CFA_WANT_IO_URING_IDLE) && defined(CFA_HAVE_LINUX_IO_URING_H) 46 #if defined(CFA_HAVE_IORING_OP_READ) || (defined(CFA_HAVE_READV) && defined(CFA_HAVE_IORING_OP_READV)) 47 #define CFA_WITH_IO_URING_IDLE 48 #endif 49 #endif 50 41 51 //----------------------------------------------------------------------------- 42 52 // Scheduler … … 149 159 __atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE); 150 160 } 151 152 153 154 155 161 156 162 //----------------------------------------------------------------------- … … 268 274 ready_schedule_lock(); 269 275 270 // Simple counting lock, acquired, acquired by incrementing the counter 271 // to an odd number 272 for() { 273 uint64_t l = this.lock; 274 if( 275 (0 == (l % 2)) 276 && __atomic_compare_exchange_n(&this.lock, &l, l + 1, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) 277 ) return; 278 Pause(); 279 } 280 281 /* paranoid */ verify( ! __preemption_enabled() ); 276 lock( this.lock __cfaabi_dbg_ctx2 ); 277 278 /* paranoid */ verify( ! __preemption_enabled() ); 279 } 280 281 static inline bool try_lock(__cluster_proc_list & this) { 282 /* paranoid */ verify( ! __preemption_enabled() ); 283 284 // Start by locking the global RWlock so that we know no-one is 285 // adding/removing processors while we mess with the idle lock 286 ready_schedule_lock(); 287 288 if(try_lock( this.lock __cfaabi_dbg_ctx2 )) { 289 // success 290 /* paranoid */ verify( ! __preemption_enabled() ); 291 return true; 292 } 293 294 // failed to lock 295 ready_schedule_unlock(); 296 297 /* paranoid */ verify( ! __preemption_enabled() ); 298 return false; 282 299 } 283 300 … … 285 302 /* paranoid */ verify( ! __preemption_enabled() ); 286 303 287 /* paranoid */ verify( 1 == (this.lock % 2) ); 288 // Simple couting lock, release by incrementing to an even number 289 __atomic_fetch_add( &this.lock, 1, __ATOMIC_SEQ_CST ); 304 unlock(this.lock); 290 305 291 306 // Release the global lock, which we acquired when locking -
libcfa/src/device/cpu.cfa
re2853eb r6c53a93 30 30 #include <fcntl.h> 31 31 } 32 33 #include "algorithms/range_iterator.hfa" 32 34 33 35 // search a string for character 'character' but looking atmost at len … … 135 137 count++; 136 138 } 137 iterate_dir(path, lambda); 139 int ret = iterate_dir(path, lambda); 140 if(ret == ENOTDIR) return 0; 138 141 139 142 /* paranoid */ verifyf(count == max + 1, "Inconsistent %s count, counted %d, but max %s was %d", prefix, count, prefix, (int)max); … … 143 146 144 147 // Count number of cpus in the system 145 static intcount_cpus(void) {148 static [int, const char *] count_cpus(void) { 146 149 const char * fpath = "/sys/devices/system/cpu/online"; 147 150 int fd = open(fpath, 0, O_RDONLY); … … 159 162 160 163 const char * _; 161 int cnt = read_width(buff, r - 1, &_); 162 /* paranoid */ verify(cnt == count_prefix_dirs("/sys/devices/system/cpu", "cpu")); 163 return cnt; 164 return [read_width(buff, r - 1, &_), strndup(buff, r - 1)]; 164 165 } 165 166 … … 226 227 227 228 struct raw_cache_instance { 228 idx_range_t range; 229 unsigned width; 230 unsigned char level; 229 idx_range_t range; // A text description of the cpus covered 230 unsigned width; // The number of cpus covered 231 unsigned char level; // the cache level 231 232 // FIXME add at least size and type 232 233 }; … … 235 236 static void ^?{}(raw_cache_instance & this) { free(this.range);} 236 237 237 raw_cache_instance ** build_raw_cache_table(unsigned cpus, unsigned idxs, unsigned cache_levels) 238 // Returns a 2D array of instances of size [cpu count][cache levels] 239 // where cache level doesn't include instruction caches 240 raw_cache_instance ** build_raw_cache_table(unsigned cpus_c, idx_range_t cpus, unsigned idxs, unsigned cache_levels) 238 241 { 239 raw_cache_instance ** raw = alloc(cpus); 240 for(i; cpus) { 242 raw_cache_instance ** raw = alloc(cpus_c, '\0'`fill); 243 244 RangeIter rc = { cpus }; 245 while(moveNext(rc)) { 246 unsigned i = rc.com; 241 247 raw[i] = alloc(cache_levels); 242 248 void addcache(unsigned fidx, unsigned char level, idx_range_t range, size_t len) { … … 263 269 264 270 // returns an allocate list of all the different distinct last level caches 265 static [*llc_map_t, size_t cnt] distinct_llcs( unsignedcpus, unsigned llc_idx, raw_cache_instance ** raw) {271 static [*llc_map_t, size_t cnt] distinct_llcs(idx_range_t cpus, unsigned llc_idx, raw_cache_instance ** raw) { 266 272 // Allocate at least one element 267 273 llc_map_t* ranges = alloc(); 268 274 size_t range_cnt = 1; 269 275 276 RangeIter rc = { cpus }; 277 __attribute__((unused)) bool ret = 278 moveNext(rc); 279 /* paranoid */ verify( ret ); 280 /* paranoid */ verify( rc.com >= 0 ); 281 270 282 // Initialize with element 0 271 ranges->raw = &raw[ 0][llc_idx];283 ranges->raw = &raw[rc.com][llc_idx]; 272 284 ranges->count = 0; 273 285 ranges->start = -1u; 274 286 275 287 // Go over all other cpus 276 CPU_LOOP: for(i; 1~cpus) { 288 CPU_LOOP: while(moveNext(rc)) { 289 unsigned i = rc.com; 277 290 // Check if the range is already there 278 291 raw_cache_instance * candidate = &raw[i][llc_idx]; … … 304 317 } 305 318 306 static [[]cpu_pairing_t] get_cpu_pairings(unsigned cpus, raw_cache_instance ** raw, llc_map_t * maps, size_t map_cnt) { 307 cpu_pairing_t * pairings = alloc(cpus); 308 309 CPU_LOOP: for(i; cpus) { 319 static [[]cpu_pairing_t] get_cpu_pairings(unsigned cpus_c, idx_range_t cpus, raw_cache_instance ** raw, llc_map_t * maps, size_t map_cnt) { 320 cpu_pairing_t * pairings = alloc(cpus_c); 321 322 RangeIter rc = { cpus }; 323 CPU_LOOP: while(moveNext(rc)) { 324 unsigned i = rc.com; 310 325 pairings[i].cpu = i; 311 326 idx_range_t want = raw[i][0].range; … … 327 342 extern "C" { 328 343 void __cfaabi_device_startup( void ) { 329 int cpus = count_cpus(); 344 int cpus_c; 345 const char * cpus; 346 [cpus_c, cpus] = count_cpus(); 347 #if defined(__CFA_WITH_VERIFY__) 348 // Verify that the mapping is self consistant. 349 { 350 RangeIter rc = { cpus }; 351 while(moveNext(rc)) { 352 unsigned i = rc.com; 353 verify(cpus_c > i); 354 } 355 } 356 #endif 357 330 358 int idxs = count_cache_indexes(); 331 359 … … 333 361 unsigned cache_levels = 0; 334 362 unsigned llc = 0; 335 {363 if (idxs != 0) { 336 364 unsigned char prev = -1u; 337 365 void first(unsigned idx, unsigned char level, const char * map, size_t len) { … … 345 373 346 374 // Read in raw data 347 raw_cache_instance ** raw = build_raw_cache_table(cpus , idxs, cache_levels);375 raw_cache_instance ** raw = build_raw_cache_table(cpus_c, cpus, idxs, cache_levels); 348 376 349 377 // Find number of distinct cache instances … … 362 390 width2 += maps[i].raw->width; 363 391 } 364 verify(width1 == cpus );365 verify(width2 == cpus );392 verify(width1 == cpus_c); 393 verify(width2 == cpus_c); 366 394 } 367 395 #endif 368 396 369 397 // Get mappings from cpu to cache instance 370 cpu_pairing_t * pairings = get_cpu_pairings(cpus , raw, maps, map_cnt);398 cpu_pairing_t * pairings = get_cpu_pairings(cpus_c, cpus, raw, maps, map_cnt); 371 399 372 400 // Sort by cache instance 373 qsort(pairings, cpus );401 qsort(pairings, cpus_c); 374 402 375 403 { 376 404 unsigned it = 0; 377 for(i; cpus) { 405 RangeIter rc = { cpus }; 406 while(moveNext(rc)) { 407 unsigned i = rc.com; 378 408 unsigned llc_id = pairings[i].id; 379 409 if(maps[llc_id].start == -1u) { … … 384 414 } 385 415 } 386 /* paranoid */ verify(it == cpus );416 /* paranoid */ verify(it == cpus_c); 387 417 } 388 418 389 419 // From the mappings build the actual cpu map we want 390 struct cpu_map_entry_t * entries = alloc(cpus); 391 for(i; cpus) { entries[i].count = 0; } 392 for(i; cpus) { 420 struct cpu_map_entry_t * entries = alloc(cpus_c); 421 for(i; cpus_c) { entries[i].count = 0; } 422 423 RangeIter rc = { cpus }; 424 while(moveNext(rc)) { 425 unsigned i = rc.com; 393 426 /* paranoid */ verify(pairings[i].id < map_cnt); 394 427 unsigned c = pairings[i].cpu; … … 406 439 free(pairings); 407 440 408 for(i; cpus ) {409 for(j; cache_levels) {441 for(i; cpus_c) { 442 if( raw[i] ) for(j; cache_levels) { 410 443 ^(raw[i][j]){}; 411 444 } … … 415 448 416 449 cpu_info.llc_map = entries; 417 cpu_info.hthrd_count = cpus; 450 cpu_info.hthrd_count = cpus_c; 451 cpu_info.llc_count = map_cnt; 418 452 } 419 453 -
libcfa/src/device/cpu.hfa
re2853eb r6c53a93 23 23 24 24 struct cpu_info_t { 25 // array of size [hthrd_count]25 // Array of size [hthrd_count] 26 26 const cpu_map_entry_t * llc_map; 27 27 28 28 // Number of _hardware_ threads present in the system 29 29 size_t hthrd_count; 30 31 // Number of distinct last level caches 32 size_t llc_count; 30 33 }; 31 34 -
libcfa/src/heap.cfa
re2853eb r6c53a93 10 10 // Created On : Tue Dec 19 21:58:35 2017 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Mon Aug 9 19:03:02 202113 // Update Count : 10 4012 // Last Modified On : Sun Jan 2 23:29:41 2022 13 // Update Count : 1058 14 14 // 15 15 … … 263 263 #ifdef __STATISTICS__ 264 264 // Heap statistics counters. 265 static unsigned int malloc_ zero_calls, malloc_calls;266 static unsigned long long int malloc_storage ;267 static unsigned int aalloc_ zero_calls, aalloc_calls;268 static unsigned long long int aalloc_storage ;269 static unsigned int calloc_ zero_calls, calloc_calls;270 static unsigned long long int calloc_storage ;271 static unsigned int memalign_ zero_calls, memalign_calls;272 static unsigned long long int memalign_storage ;273 static unsigned int amemalign_ zero_calls, amemalign_calls;274 static unsigned long long int amemalign_storage ;275 static unsigned int cmemalign_ zero_calls, cmemalign_calls;276 static unsigned long long int cmemalign_storage ;277 static unsigned int resize_ zero_calls, resize_calls;278 static unsigned long long int resize_storage ;279 static unsigned int realloc_ zero_calls, realloc_calls;280 static unsigned long long int realloc_storage ;281 static unsigned int free_ zero_calls, free_calls;282 static unsigned long long int free_storage ;265 static unsigned int malloc_calls, malloc_0_calls; 266 static unsigned long long int malloc_storage_request, malloc_storage_alloc; 267 static unsigned int aalloc_calls, aalloc_0_calls; 268 static unsigned long long int aalloc_storage_request, aalloc_storage_alloc; 269 static unsigned int calloc_calls, calloc_0_calls; 270 static unsigned long long int calloc_storage_request, calloc_storage_alloc; 271 static unsigned int memalign_calls, memalign_0_calls; 272 static unsigned long long int memalign_storage_request, memalign_storage_alloc; 273 static unsigned int amemalign_calls, amemalign_0_calls; 274 static unsigned long long int amemalign_storage_request, amemalign_storage_alloc; 275 static unsigned int cmemalign_calls, cmemalign_0_calls; 276 static unsigned long long int cmemalign_storage_request, cmemalign_storage_alloc; 277 static unsigned int resize_calls, resize_0_calls; 278 static unsigned long long int resize_storage_request, resize_storage_alloc; 279 static unsigned int realloc_calls, realloc_0_calls; 280 static unsigned long long int realloc_storage_request, realloc_storage_alloc; 281 static unsigned int free_calls, free_null_calls; 282 static unsigned long long int free_storage_request, free_storage_alloc; 283 283 static unsigned int mmap_calls; 284 static unsigned long long int mmap_storage ;284 static unsigned long long int mmap_storage_request, mmap_storage_alloc; 285 285 static unsigned int munmap_calls; 286 static unsigned long long int munmap_storage ;286 static unsigned long long int munmap_storage_request, munmap_storage_alloc; 287 287 static unsigned int sbrk_calls; 288 288 static unsigned long long int sbrk_storage; … … 294 294 char helpText[1024]; 295 295 __cfaabi_bits_print_buffer( STDERR_FILENO, helpText, sizeof(helpText), 296 "\nHeap statistics: \n"297 " malloc 0-calls %'u; >0-calls %'u; storage%'llu bytes\n"298 " aalloc 0-calls %'u; >0-calls %'u; storage%'llu bytes\n"299 " calloc 0-calls %'u; >0-calls %'u; storage%'llu bytes\n"300 " memalign 0-calls %'u; >0-calls %'u; storage%'llu bytes\n"301 " amemalign 0-calls %'u; >0-calls %'u; storage%'llu bytes\n"302 " cmemalign 0-calls %'u; >0-calls %'u; storage%'llu bytes\n"303 " resize 0-calls %'u; >0-calls %'u; storage%'llu bytes\n"304 " realloc 0-calls %'u; >0-calls %'u; storage%'llu bytes\n"305 " free 0-calls %'u; >0-calls %'u; storage%'llu bytes\n"306 " mmapcalls %'u; storage %'llu bytes\n"307 " m unmap calls %'u; storage%'llu bytes\n"308 " sbrk calls %'u; storage%'llu bytes\n",309 malloc_ zero_calls, malloc_calls, malloc_storage,310 aalloc_ zero_calls, aalloc_calls, aalloc_storage,311 calloc_ zero_calls, calloc_calls, calloc_storage,312 memalign_ zero_calls, memalign_calls, memalign_storage,313 amemalign_ zero_calls, amemalign_calls, amemalign_storage,314 cmemalign_ zero_calls, cmemalign_calls, cmemalign_storage,315 resize_ zero_calls, resize_calls, resize_storage,316 realloc_ zero_calls, realloc_calls, realloc_storage,317 free_ zero_calls, free_calls, free_storage,318 mmap_calls, mmap_storage,319 m unmap_calls, munmap_storage,320 sbrk_calls, sbrk_storage296 "\nHeap statistics: (storage request / allocation + header)\n" 297 " malloc >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n" 298 " aalloc >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n" 299 " calloc >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n" 300 " memalign >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n" 301 " amemalign >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n" 302 " cmemalign >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n" 303 " resize >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n" 304 " realloc >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n" 305 " free !null calls %'u; null calls %'u; storage %'llu / %'llu bytes\n" 306 " sbrk calls %'u; storage %'llu bytes\n" 307 " mmap calls %'u; storage %'llu / %'llu bytes\n" 308 " munmap calls %'u; storage %'llu / %'llu bytes\n", 309 malloc_calls, malloc_0_calls, malloc_storage_request, malloc_storage_alloc, 310 aalloc_calls, aalloc_0_calls, aalloc_storage_request, aalloc_storage_alloc, 311 calloc_calls, calloc_0_calls, calloc_storage_request, calloc_storage_alloc, 312 memalign_calls, memalign_0_calls, memalign_storage_request, memalign_storage_alloc, 313 amemalign_calls, amemalign_0_calls, amemalign_storage_request, amemalign_storage_alloc, 314 cmemalign_calls, cmemalign_0_calls, cmemalign_storage_request, cmemalign_storage_alloc, 315 resize_calls, resize_0_calls, resize_storage_request, resize_storage_alloc, 316 realloc_calls, realloc_0_calls, realloc_storage_request, realloc_storage_alloc, 317 free_calls, free_null_calls, free_storage_request, free_storage_alloc, 318 sbrk_calls, sbrk_storage, 319 mmap_calls, mmap_storage_request, mmap_storage_alloc, 320 munmap_calls, munmap_storage_request, munmap_storage_alloc 321 321 ); 322 322 } // printStats … … 329 329 "<sizes>\n" 330 330 "</sizes>\n" 331 "<total type=\"malloc\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n" 332 "<total type=\"aalloc\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n" 333 "<total type=\"calloc\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n" 334 "<total type=\"memalign\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n" 335 "<total type=\"amemalign\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n" 336 "<total type=\"cmemalign\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n" 337 "<total type=\"resize\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n" 338 "<total type=\"realloc\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n" 339 "<total type=\"free\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n" 340 "<total type=\"mmap\" count=\"%'u;\" size=\"%'llu\"/> bytes\n" 341 "<total type=\"munmap\" count=\"%'u;\" size=\"%'llu\"/> bytes\n" 331 "<total type=\"malloc\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n" 332 "<total type=\"aalloc\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n" 333 "<total type=\"calloc\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n" 334 "<total type=\"memalign\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n" 335 "<total type=\"amemalign\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n" 336 "<total type=\"cmemalign\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n" 337 "<total type=\"resize\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n" 338 "<total type=\"realloc\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n" 339 "<total type=\"free\" !null=\"%'u;\" 0 null=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n" 342 340 "<total type=\"sbrk\" count=\"%'u;\" size=\"%'llu\"/> bytes\n" 341 "<total type=\"mmap\" count=\"%'u;\" size=\"%'llu / %'llu\" / > bytes\n" 342 "<total type=\"munmap\" count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n" 343 343 "</malloc>", 344 malloc_ zero_calls, malloc_calls, malloc_storage,345 aalloc_ zero_calls, aalloc_calls, aalloc_storage,346 calloc_ zero_calls, calloc_calls, calloc_storage,347 memalign_ zero_calls, memalign_calls, memalign_storage,348 amemalign_ zero_calls, amemalign_calls, amemalign_storage,349 cmemalign_ zero_calls, cmemalign_calls, cmemalign_storage,350 resize_ zero_calls, resize_calls, resize_storage,351 realloc_ zero_calls, realloc_calls, realloc_storage,352 free_ zero_calls, free_calls, free_storage,353 mmap_calls, mmap_storage,354 m unmap_calls, munmap_storage,355 sbrk_calls, sbrk_storage344 malloc_calls, malloc_0_calls, malloc_storage_request, malloc_storage_alloc, 345 aalloc_calls, aalloc_0_calls, aalloc_storage_request, aalloc_storage_alloc, 346 calloc_calls, calloc_0_calls, calloc_storage_request, calloc_storage_alloc, 347 memalign_calls, memalign_0_calls, memalign_storage_request, memalign_storage_alloc, 348 amemalign_calls, amemalign_0_calls, amemalign_storage_request, amemalign_storage_alloc, 349 cmemalign_calls, cmemalign_0_calls, cmemalign_storage_request, cmemalign_storage_alloc, 350 resize_calls, resize_0_calls, resize_storage_request, resize_storage_alloc, 351 realloc_calls, realloc_0_calls, realloc_storage_request, realloc_storage_alloc, 352 free_calls, free_null_calls, free_storage_request, free_storage_alloc, 353 sbrk_calls, sbrk_storage, 354 mmap_calls, mmap_storage_request, mmap_storage_alloc, 355 munmap_calls, munmap_storage_request, munmap_storage_alloc 356 356 ); 357 357 __cfaabi_bits_write( fileno( stream ), helpText, len ); // ensures all bytes written or exit … … 577 577 #ifdef __STATISTICS__ 578 578 __atomic_add_fetch( &mmap_calls, 1, __ATOMIC_SEQ_CST ); 579 __atomic_add_fetch( &mmap_storage, tsize, __ATOMIC_SEQ_CST ); 579 __atomic_add_fetch( &mmap_storage_request, size, __ATOMIC_SEQ_CST ); 580 __atomic_add_fetch( &mmap_storage_alloc, tsize, __ATOMIC_SEQ_CST ); 580 581 #endif // __STATISTICS__ 581 582 … … 626 627 #ifdef __STATISTICS__ 627 628 __atomic_add_fetch( &munmap_calls, 1, __ATOMIC_SEQ_CST ); 628 __atomic_add_fetch( &munmap_storage, size, __ATOMIC_SEQ_CST ); 629 __atomic_add_fetch( &munmap_storage_request, header->kind.real.size, __ATOMIC_SEQ_CST ); 630 __atomic_add_fetch( &munmap_storage_alloc, size, __ATOMIC_SEQ_CST ); 629 631 #endif // __STATISTICS__ 630 632 if ( munmap( header, size ) == -1 ) { … … 642 644 #ifdef __STATISTICS__ 643 645 __atomic_add_fetch( &free_calls, 1, __ATOMIC_SEQ_CST ); 644 __atomic_add_fetch( &free_storage, size, __ATOMIC_SEQ_CST ); 646 __atomic_add_fetch( &free_storage_request, header->kind.real.size, __ATOMIC_SEQ_CST ); 647 __atomic_add_fetch( &free_storage_alloc, size, __ATOMIC_SEQ_CST ); 645 648 #endif // __STATISTICS__ 646 649 … … 819 822 if ( likely( size > 0 ) ) { 820 823 __atomic_add_fetch( &malloc_calls, 1, __ATOMIC_SEQ_CST ); 821 __atomic_add_fetch( &malloc_storage , size, __ATOMIC_SEQ_CST );824 __atomic_add_fetch( &malloc_storage_request, size, __ATOMIC_SEQ_CST ); 822 825 } else { 823 __atomic_add_fetch( &malloc_ zero_calls, 1, __ATOMIC_SEQ_CST );826 __atomic_add_fetch( &malloc_0_calls, 1, __ATOMIC_SEQ_CST ); 824 827 } // if 825 828 #endif // __STATISTICS__ … … 835 838 if ( likely( size > 0 ) ) { 836 839 __atomic_add_fetch( &aalloc_calls, 1, __ATOMIC_SEQ_CST ); 837 __atomic_add_fetch( &aalloc_storage , size, __ATOMIC_SEQ_CST );840 __atomic_add_fetch( &aalloc_storage_request, size, __ATOMIC_SEQ_CST ); 838 841 } else { 839 __atomic_add_fetch( &aalloc_ zero_calls, 1, __ATOMIC_SEQ_CST );842 __atomic_add_fetch( &aalloc_0_calls, 1, __ATOMIC_SEQ_CST ); 840 843 } // if 841 844 #endif // __STATISTICS__ … … 850 853 if ( unlikely( size ) == 0 ) { // 0 BYTE ALLOCATION RETURNS NULL POINTER 851 854 #ifdef __STATISTICS__ 852 __atomic_add_fetch( &calloc_ zero_calls, 1, __ATOMIC_SEQ_CST );855 __atomic_add_fetch( &calloc_0_calls, 1, __ATOMIC_SEQ_CST ); 853 856 #endif // __STATISTICS__ 854 857 return 0p; … … 856 859 #ifdef __STATISTICS__ 857 860 __atomic_add_fetch( &calloc_calls, 1, __ATOMIC_SEQ_CST ); 858 __atomic_add_fetch( &calloc_storage , dim * elemSize, __ATOMIC_SEQ_CST );861 __atomic_add_fetch( &calloc_storage_request, dim * elemSize, __ATOMIC_SEQ_CST ); 859 862 #endif // __STATISTICS__ 860 863 … … 891 894 if ( unlikely( size == 0 ) ) { // special cases 892 895 #ifdef __STATISTICS__ 893 __atomic_add_fetch( &resize_ zero_calls, 1, __ATOMIC_SEQ_CST );896 __atomic_add_fetch( &resize_0_calls, 1, __ATOMIC_SEQ_CST ); 894 897 #endif // __STATISTICS__ 895 898 free( oaddr ); … … 902 905 if ( unlikely( oaddr == 0p ) ) { 903 906 #ifdef __STATISTICS__ 904 __atomic_add_fetch( &resize_storage , size, __ATOMIC_SEQ_CST );907 __atomic_add_fetch( &resize_storage_request, size, __ATOMIC_SEQ_CST ); 905 908 #endif // __STATISTICS__ 906 909 return mallocNoStats( size ); … … 921 924 922 925 #ifdef __STATISTICS__ 923 __atomic_add_fetch( &resize_storage , size, __ATOMIC_SEQ_CST );926 __atomic_add_fetch( &resize_storage_request, size, __ATOMIC_SEQ_CST ); 924 927 #endif // __STATISTICS__ 925 928 … … 936 939 if ( unlikely( size == 0 ) ) { // special cases 937 940 #ifdef __STATISTICS__ 938 __atomic_add_fetch( &realloc_ zero_calls, 1, __ATOMIC_SEQ_CST );941 __atomic_add_fetch( &realloc_0_calls, 1, __ATOMIC_SEQ_CST ); 939 942 #endif // __STATISTICS__ 940 943 free( oaddr ); … … 947 950 if ( unlikely( oaddr == 0p ) ) { 948 951 #ifdef __STATISTICS__ 949 __atomic_add_fetch( &realloc_storage , size, __ATOMIC_SEQ_CST );952 __atomic_add_fetch( &realloc_storage_request, size, __ATOMIC_SEQ_CST ); 950 953 #endif // __STATISTICS__ 951 954 return mallocNoStats( size ); … … 969 972 970 973 #ifdef __STATISTICS__ 971 __atomic_add_fetch( &realloc_storage , size, __ATOMIC_SEQ_CST );974 __atomic_add_fetch( &realloc_storage_request, size, __ATOMIC_SEQ_CST ); 972 975 #endif // __STATISTICS__ 973 976 … … 1000 1003 if ( likely( size > 0 ) ) { 1001 1004 __atomic_add_fetch( &memalign_calls, 1, __ATOMIC_SEQ_CST ); 1002 __atomic_add_fetch( &memalign_storage , size, __ATOMIC_SEQ_CST );1005 __atomic_add_fetch( &memalign_storage_request, size, __ATOMIC_SEQ_CST ); 1003 1006 } else { 1004 __atomic_add_fetch( &memalign_ zero_calls, 1, __ATOMIC_SEQ_CST );1007 __atomic_add_fetch( &memalign_0_calls, 1, __ATOMIC_SEQ_CST ); 1005 1008 } // if 1006 1009 #endif // __STATISTICS__ … … 1016 1019 if ( likely( size > 0 ) ) { 1017 1020 __atomic_add_fetch( &cmemalign_calls, 1, __ATOMIC_SEQ_CST ); 1018 __atomic_add_fetch( &cmemalign_storage , size, __ATOMIC_SEQ_CST );1021 __atomic_add_fetch( &cmemalign_storage_request, size, __ATOMIC_SEQ_CST ); 1019 1022 } else { 1020 __atomic_add_fetch( &cmemalign_ zero_calls, 1, __ATOMIC_SEQ_CST );1023 __atomic_add_fetch( &cmemalign_0_calls, 1, __ATOMIC_SEQ_CST ); 1021 1024 } // if 1022 1025 #endif // __STATISTICS__ … … 1031 1034 if ( unlikely( size ) == 0 ) { // 0 BYTE ALLOCATION RETURNS NULL POINTER 1032 1035 #ifdef __STATISTICS__ 1033 __atomic_add_fetch( &cmemalign_ zero_calls, 1, __ATOMIC_SEQ_CST );1036 __atomic_add_fetch( &cmemalign_0_calls, 1, __ATOMIC_SEQ_CST ); 1034 1037 #endif // __STATISTICS__ 1035 1038 return 0p; … … 1037 1040 #ifdef __STATISTICS__ 1038 1041 __atomic_add_fetch( &cmemalign_calls, 1, __ATOMIC_SEQ_CST ); 1039 __atomic_add_fetch( &cmemalign_storage , dim * elemSize, __ATOMIC_SEQ_CST );1042 __atomic_add_fetch( &cmemalign_storage_request, dim * elemSize, __ATOMIC_SEQ_CST ); 1040 1043 #endif // __STATISTICS__ 1041 1044 … … 1101 1104 if ( unlikely( addr == 0p ) ) { // special case 1102 1105 #ifdef __STATISTICS__ 1103 __atomic_add_fetch( &free_ zero_calls, 1, __ATOMIC_SEQ_CST );1106 __atomic_add_fetch( &free_null_calls, 1, __ATOMIC_SEQ_CST ); 1104 1107 #endif // __STATISTICS__ 1105 1108 … … 1280 1283 if ( unlikely( size == 0 ) ) { // special cases 1281 1284 #ifdef __STATISTICS__ 1282 __atomic_add_fetch( &resize_ zero_calls, 1, __ATOMIC_SEQ_CST );1285 __atomic_add_fetch( &resize_0_calls, 1, __ATOMIC_SEQ_CST ); 1283 1286 #endif // __STATISTICS__ 1284 1287 free( oaddr ); … … 1294 1297 #ifdef __STATISTICS__ 1295 1298 __atomic_add_fetch( &resize_calls, 1, __ATOMIC_SEQ_CST ); 1296 __atomic_add_fetch( &resize_storage , size, __ATOMIC_SEQ_CST );1299 __atomic_add_fetch( &resize_storage_request, size, __ATOMIC_SEQ_CST ); 1297 1300 #endif // __STATISTICS__ 1298 1301 return memalignNoStats( nalign, size ); … … 1329 1332 1330 1333 #ifdef __STATISTICS__ 1331 __atomic_add_fetch( &resize_storage , size, __ATOMIC_SEQ_CST );1334 __atomic_add_fetch( &resize_storage_request, size, __ATOMIC_SEQ_CST ); 1332 1335 #endif // __STATISTICS__ 1333 1336 … … 1342 1345 if ( unlikely( size == 0 ) ) { // special cases 1343 1346 #ifdef __STATISTICS__ 1344 __atomic_add_fetch( &realloc_ zero_calls, 1, __ATOMIC_SEQ_CST );1347 __atomic_add_fetch( &realloc_0_calls, 1, __ATOMIC_SEQ_CST ); 1345 1348 #endif // __STATISTICS__ 1346 1349 free( oaddr ); … … 1356 1359 #ifdef __STATISTICS__ 1357 1360 __atomic_add_fetch( &realloc_calls, 1, __ATOMIC_SEQ_CST ); 1358 __atomic_add_fetch( &realloc_storage , size, __ATOMIC_SEQ_CST );1361 __atomic_add_fetch( &realloc_storage_request, size, __ATOMIC_SEQ_CST ); 1359 1362 #endif // __STATISTICS__ 1360 1363 return memalignNoStats( nalign, size ); … … 1380 1383 #ifdef __STATISTICS__ 1381 1384 __atomic_add_fetch( &realloc_calls, 1, __ATOMIC_SEQ_CST ); 1382 __atomic_add_fetch( &realloc_storage , size, __ATOMIC_SEQ_CST );1385 __atomic_add_fetch( &realloc_storage_request, size, __ATOMIC_SEQ_CST ); 1383 1386 #endif // __STATISTICS__ 1384 1387 -
libcfa/src/stdlib.cfa
re2853eb r6c53a93 10 10 // Created On : Thu Jan 28 17:10:29 2016 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Thu Nov 12 07:46:09 202013 // Update Count : 5 0312 // Last Modified On : Mon Jan 3 09:36:27 2022 13 // Update Count : 519 14 14 // 15 15 … … 221 221 //--------------------------------------- 222 222 223 bool threading_enabled(void) __attribute__((weak)) { 224 return false; 225 } 223 static uint32_t seed = 0; // current seed 224 static thread_local uint32_t state; // random state 225 226 void set_seed( uint32_t seed_ ) { state = seed = seed_; } 227 uint32_t get_seed() { return seed; } 228 229 #define GENERATOR LCG 230 231 inline uint32_t MarsagliaXor( uint32_t & state ) { 232 if ( unlikely( seed == 0 ) ) set_seed( rdtscl() ); 233 else if ( unlikely( state == 0 ) ) state = seed; 234 state ^= state << 6; 235 state ^= state >> 21; 236 state ^= state << 7; 237 return state; 238 } // MarsagliaXor 239 240 inline uint32_t LCG( uint32_t & state ) { // linear congruential generator 241 if ( unlikely( seed == 0 ) ) set_seed( rdtscl() ); 242 else if ( unlikely( state == 0 ) ) state = seed; 243 return state = 36969 * (state & 65535) + (state >> 16); // 36969 is NOT prime! 244 } // LCG 245 246 uint32_t prng( PRNG & prng ) with( prng ) { callcnt += 1; return GENERATOR( state ); } 247 248 uint32_t prng( void ) { return GENERATOR( state ); } 249 250 //--------------------------------------- 251 252 bool threading_enabled( void ) __attribute__(( weak )) { return false; } 226 253 227 254 // Local Variables: // -
libcfa/src/stdlib.hfa
re2853eb r6c53a93 10 10 // Created On : Thu Jan 28 17:12:35 2016 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Tue Apr 20 21:20:03 202113 // Update Count : 5 7512 // Last Modified On : Sun Jan 2 22:53:57 2022 13 // Update Count : 594 14 14 // 15 15 … … 43 43 //--------------------------------------- 44 44 45 // Macro because of returns46 #define ARRAY_ALLOC$( allocation, alignment, dim ) \47 if ( _Alignof(T) <= libAlign() ) return (T *)(void *)allocation( dim, (size_t)sizeof(T) ); /* C allocation */ \48 else return (T *)alignment( _Alignof(T), dim, sizeof(T) )49 50 45 static inline forall( T & | sized(T) ) { 51 46 // CFA safe equivalents, i.e., implicit size specification 52 47 53 48 T * malloc( void ) { 54 if ( _Alignof(T) <= libAlign() ) return (T *) (void *)malloc( (size_t)sizeof(T) ); // C allocation49 if ( _Alignof(T) <= libAlign() ) return (T *)malloc( sizeof(T) ); // C allocation 55 50 else return (T *)memalign( _Alignof(T), sizeof(T) ); 56 51 } // malloc 57 52 58 53 T * aalloc( size_t dim ) { 59 ARRAY_ALLOC$( aalloc, amemalign, dim ); 54 if ( _Alignof(T) <= libAlign() ) return (T *)aalloc( dim, sizeof(T) ); // C allocation 55 else return (T *)amemalign( _Alignof(T), dim, sizeof(T) ); 60 56 } // aalloc 61 57 62 58 T * calloc( size_t dim ) { 63 ARRAY_ALLOC$( calloc, cmemalign, dim ); 59 if ( _Alignof(T) <= libAlign() ) return (T *)calloc( dim, sizeof(T) ); // C allocation 60 else return (T *)cmemalign( _Alignof(T), dim, sizeof(T) ); 64 61 } // calloc 65 62 66 63 T * resize( T * ptr, size_t size ) { // CFA resize, eliminate return-type cast 67 if ( _Alignof(T) <= libAlign() ) return (T *) (void *)resize( (void *)ptr, size ); // CFA resize68 else return (T *) (void *)resize( (void *)ptr, _Alignof(T), size ); // CFA resize64 if ( _Alignof(T) <= libAlign() ) return (T *)resize( (void *)ptr, size ); // CFA resize 65 else return (T *)resize( (void *)ptr, _Alignof(T), size ); // CFA resize 69 66 } // resize 70 67 71 68 T * realloc( T * ptr, size_t size ) { // CFA realloc, eliminate return-type cast 72 if ( _Alignof(T) <= libAlign() ) return (T *) (void *)realloc( (void *)ptr, size ); // C realloc73 else return (T *) (void *)realloc( (void *)ptr, _Alignof(T), size ); // CFA realloc69 if ( _Alignof(T) <= libAlign() ) return (T *)realloc( (void *)ptr, size ); // C realloc 70 else return (T *)realloc( (void *)ptr, _Alignof(T), size ); // CFA realloc 74 71 } // realloc 75 72 … … 169 166 return ret; 170 167 } 168 S_fill(T) ?`fill ( zero_t ) = void; // FIX ME: remove this once ticket 214 is resolved 169 S_fill(T) ?`fill ( T * a ) { return (S_fill(T)){ 'T', '0', 0, a }; } // FIX ME: remove this once ticket 214 is resolved 171 170 S_fill(T) ?`fill ( char c ) { return (S_fill(T)){ 'c', c }; } 172 S_fill(T) ?`fill ( T * a ) { return (S_fill(T)){ 'T', '0', 0, a }; }173 171 S_fill(T) ?`fill ( T a[], size_t nmemb ) { return (S_fill(T)){ 'a', '0', nmemb * sizeof(T), a }; } 174 172 … … 362 360 363 361 static inline { 364 long int random( long int l, long int u ) { if ( u < l ) [u, l] = [l, u]; return lrand48() % (u - l ) + l; } // [l,u)365 long int random( long int u ) { if ( u < 0 ) return random( u, 0 ); else return random( 0, u); } // [0,u)362 long int random( long int l, long int u ) { if ( u < l ) [u, l] = [l, u]; return lrand48() % (u - l + 1) + l; } // [l,u] 363 long int random( long int u ) { return random( 0, u - 1 ); } // [0,u) 366 364 unsigned long int random( void ) { return lrand48(); } 367 365 unsigned long int random( unsigned long int u ) { return lrand48() % u; } // [0,u) 368 unsigned long int random( unsigned long int l, unsigned long int u ) { if ( u < l ) [u, l] = [l, u]; return lrand48() % (u - l ) + l; } // [l,u)366 unsigned long int random( unsigned long int l, unsigned long int u ) { if ( u < l ) [u, l] = [l, u]; return lrand48() % (u - l + 1) + l; } // [l,u] 369 367 370 368 char random( void ) { return (unsigned long int)random(); } … … 387 385 //--------------------------------------- 388 386 387 struct PRNG { 388 uint32_t callcnt; // call count 389 uint32_t seed; // current seed 390 uint32_t state; // random state 391 }; // PRNG 392 393 extern uint32_t prng( PRNG & prng ) __attribute__(( warn_unused_result )); // [0,UINT_MAX] 394 static inline { 395 void set_seed( PRNG & prng, uint32_t seed_ ) with( prng ) { state = seed = seed_; } // set seed 396 void ?{}( PRNG & prng ) { set_seed( prng, rdtscl() ); } // random seed 397 void ?{}( PRNG & prng, uint32_t seed ) { set_seed( prng, seed ); } // fixed seed 398 uint32_t get_seed( PRNG & prng ) __attribute__(( warn_unused_result )) with( prng ) { return seed; } // get seed 399 uint32_t prng( PRNG & prng, uint32_t u ) __attribute__(( warn_unused_result )) { return prng( prng ) % u; } // [0,u) 400 uint32_t prng( PRNG & prng, uint32_t l, uint32_t u ) __attribute__(( warn_unused_result )) { return prng( prng, u - l + 1 ) + l; } // [l,u] 401 uint32_t calls( PRNG & prng ) __attribute__(( warn_unused_result )) with( prng ) { return callcnt; } 402 } // distribution 403 404 extern void set_seed( uint32_t seed ); // set per thread seed 405 extern uint32_t get_seed(); // get seed 406 extern uint32_t prng( void ) __attribute__(( warn_unused_result )); // [0,UINT_MAX] 407 static inline { 408 uint32_t prng( uint32_t u ) __attribute__(( warn_unused_result )); 409 uint32_t prng( uint32_t u ) { return prng() % u; } // [0,u) 410 uint32_t prng( uint32_t l, uint32_t u ) __attribute__(( warn_unused_result )); 411 uint32_t prng( uint32_t l, uint32_t u ) { return prng( u - l + 1 ) + l; } // [l,u] 412 } // distribution 413 414 //--------------------------------------- 415 389 416 extern bool threading_enabled( void ) OPTIONAL_THREAD; 390 417
Note:
See TracChangeset
for help on using the changeset viewer.