- Timestamp:
- Mar 21, 2022, 1:44:06 PM (4 years ago)
- Branches:
- ADT, ast-experimental, enum, master, pthread-emulation, qualifiedEnum
- Children:
- a76202d
- Parents:
- ef3c383 (diff), dbe2533 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the(diff)
links above to see all the changes relative to each parent. - Location:
- libcfa/src
- Files:
-
- 3 added
- 24 edited
- 1 moved
Legend:
- Unmodified
- Added
- Removed
-
libcfa/src/Makefile.am
ref3c383 rd672350 63 63 containers/queueLockFree.hfa \ 64 64 containers/stackLockFree.hfa \ 65 containers/string_sharectx.hfa \ 65 66 containers/vector2.hfa \ 66 67 vec/vec.hfa \ … … 118 119 concurrency/exception.hfa \ 119 120 concurrency/kernel.hfa \ 121 concurrency/kernel/cluster.hfa \ 120 122 concurrency/locks.hfa \ 121 123 concurrency/monitor.hfa \ … … 133 135 concurrency/io/call.cfa \ 134 136 concurrency/iofwd.hfa \ 135 concurrency/kernel _private.hfa \137 concurrency/kernel/private.hfa \ 136 138 concurrency/kernel/startup.cfa \ 137 139 concurrency/preemption.cfa \ -
libcfa/src/concurrency/coroutine.cfa
ref3c383 rd672350 27 27 #include <unwind.h> 28 28 29 #include "kernel _private.hfa"29 #include "kernel/private.hfa" 30 30 #include "exception.hfa" 31 31 #include "math.hfa" -
libcfa/src/concurrency/io.cfa
ref3c383 rd672350 41 41 #include "kernel.hfa" 42 42 #include "kernel/fwd.hfa" 43 #include "kernel _private.hfa"43 #include "kernel/private.hfa" 44 44 #include "io/types.hfa" 45 45 … … 93 93 extern void __kernel_unpark( thread$ * thrd, unpark_hint ); 94 94 95 bool __cfa_io_drain( processor * proc) {95 bool __cfa_io_drain( $io_context * ctx ) { 96 96 /* paranoid */ verify( ! __preemption_enabled() ); 97 97 /* paranoid */ verify( ready_schedule_islocked() ); 98 /* paranoid */ verify( proc ); 99 /* paranoid */ verify( proc->io.ctx ); 98 /* paranoid */ verify( ctx ); 100 99 101 100 // Drain the queue 102 $io_context * ctx = proc->io.ctx;103 101 unsigned head = *ctx->cq.head; 104 102 unsigned tail = *ctx->cq.tail; … … 110 108 if(count == 0) return false; 111 109 110 if(!__atomic_try_acquire(&ctx->cq.lock)) { 111 return false; 112 } 113 112 114 for(i; count) { 113 115 unsigned idx = (head + i) & mask; … … 130 132 /* paranoid */ verify( ready_schedule_islocked() ); 131 133 /* paranoid */ verify( ! __preemption_enabled() ); 134 135 __atomic_unlock(&ctx->cq.lock); 132 136 133 137 return true; … … 175 179 /* paranoid */ verify( ! __preemption_enabled() ); 176 180 177 ctx.proc->io.pending = false;181 __atomic_store_n(&ctx.proc->io.pending, false, __ATOMIC_RELAXED); 178 182 } 179 183 180 184 ready_schedule_lock(); 181 bool ret = __cfa_io_drain( proc);185 bool ret = __cfa_io_drain( &ctx ); 182 186 ready_schedule_unlock(); 183 187 return ret; … … 287 291 //============================================================================================= 288 292 // submission 289 static inline void __submit ( struct $io_context * ctx, __u32 idxs[], __u32 have, bool lazy) {293 static inline void __submit_only( struct $io_context * ctx, __u32 idxs[], __u32 have) { 290 294 // We can proceed to the fast path 291 295 // Get the right objects … … 304 308 sq.to_submit += have; 305 309 306 ctx->proc->io.pending = true; 307 ctx->proc->io.dirty = true; 310 __atomic_store_n(&ctx->proc->io.pending, true, __ATOMIC_RELAXED); 311 __atomic_store_n(&ctx->proc->io.dirty , true, __ATOMIC_RELAXED); 312 } 313 314 static inline void __submit( struct $io_context * ctx, __u32 idxs[], __u32 have, bool lazy) { 315 __sub_ring_t & sq = ctx->sq; 316 __submit_only(ctx, idxs, have); 317 308 318 if(sq.to_submit > 30) { 309 319 __tls_stats()->io.flush.full++; … … 402 412 // I/O Arbiter 403 413 //============================================================================================= 404 static inline void block(__outstanding_io_queue & queue, __outstanding_io & item) { 414 static inline bool enqueue(__outstanding_io_queue & queue, __outstanding_io & item) { 415 bool was_empty; 416 405 417 // Lock the list, it's not thread safe 406 418 lock( queue.lock __cfaabi_dbg_ctx2 ); 407 419 { 420 was_empty = empty(queue.queue); 421 408 422 // Add our request to the list 409 423 add( queue.queue, item ); … … 414 428 unlock( queue.lock ); 415 429 416 wait( item.sem );430 return was_empty; 417 431 } 418 432 … … 432 446 pa.want = want; 433 447 434 block(this.pending, (__outstanding_io&)pa); 448 enqueue(this.pending, (__outstanding_io&)pa); 449 450 wait( pa.sem ); 435 451 436 452 return pa.ctx; … … 485 501 ei.lazy = lazy; 486 502 487 block(ctx->ext_sq, (__outstanding_io&)ei); 503 bool we = enqueue(ctx->ext_sq, (__outstanding_io&)ei); 504 505 __atomic_store_n(&ctx->proc->io.pending, true, __ATOMIC_SEQ_CST); 506 507 if( we ) { 508 sigval_t value = { PREEMPT_IO }; 509 pthread_sigqueue(ctx->proc->kernel_thread, SIGUSR1, value); 510 } 511 512 wait( ei.sem ); 488 513 489 514 __cfadbg_print_safe(io, "Kernel I/O : %u submitted from arbiter\n", have); … … 501 526 __external_io & ei = (__external_io&)drop( ctx.ext_sq.queue ); 502 527 503 __submit (&ctx, ei.idxs, ei.have, ei.lazy);528 __submit_only(&ctx, ei.idxs, ei.have); 504 529 505 530 post( ei.sem ); -
libcfa/src/concurrency/io/setup.cfa
ref3c383 rd672350 39 39 40 40 #else 41 #pragma GCC diagnostic push 42 #pragma GCC diagnostic ignored "-Waddress-of-packed-member" 41 43 #include <errno.h> 42 44 #include <stdint.h> … … 56 58 57 59 #include "bitmanip.hfa" 58 #include "kernel_private.hfa" 60 #include "fstream.hfa" 61 #include "kernel/private.hfa" 59 62 #include "thread.hfa" 63 #pragma GCC diagnostic pop 60 64 61 65 void ?{}(io_context_params & this) { … … 111 115 this.ext_sq.empty = true; 112 116 (this.ext_sq.queue){}; 113 __io_uring_setup( this, cl.io.params, proc->idle_ fd );117 __io_uring_setup( this, cl.io.params, proc->idle_wctx.evfd ); 114 118 __cfadbg_print_safe(io_core, "Kernel I/O : Created ring for io_context %u (%p)\n", this.fd, &this); 115 119 } … … 121 125 __cfadbg_print_safe(io_core, "Kernel I/O : Destroyed ring for io_context %u\n", this.fd); 122 126 } 123 124 extern void __disable_interrupts_hard();125 extern void __enable_interrupts_hard();126 127 127 128 static void __io_uring_setup( $io_context & this, const io_context_params & params_in, int procfd ) { … … 213 214 214 215 // completion queue 216 cq.lock = 0; 215 217 cq.head = (volatile __u32 *)(((intptr_t)cq.ring_ptr) + params.cq_off.head); 216 218 cq.tail = (volatile __u32 *)(((intptr_t)cq.ring_ptr) + params.cq_off.tail); … … 226 228 __cfadbg_print_safe(io_core, "Kernel I/O : registering %d for completion with ring %d\n", procfd, fd); 227 229 228 __disable_interrupts_hard();229 230 230 int ret = syscall( __NR_io_uring_register, fd, IORING_REGISTER_EVENTFD, &procfd, 1); 231 231 if (ret < 0) { 232 232 abort("KERNEL ERROR: IO_URING EVENTFD REGISTER - %s\n", strerror(errno)); 233 233 } 234 235 __enable_interrupts_hard();236 234 237 235 __cfadbg_print_safe(io_core, "Kernel I/O : registered %d for completion with ring %d\n", procfd, fd); … … 258 256 struct __sub_ring_t & sq = this.sq; 259 257 struct __cmp_ring_t & cq = this.cq; 258 { 259 __u32 fhead = sq.free_ring.head; 260 __u32 ftail = sq.free_ring.tail; 261 262 __u32 total = *sq.num; 263 __u32 avail = ftail - fhead; 264 265 if(avail != total) abort | "Processor (" | (void*)this.proc | ") tearing down ring with" | (total - avail) | "entries allocated but not submitted, out of" | total; 266 } 260 267 261 268 // unmap the submit queue entries -
libcfa/src/concurrency/io/types.hfa
ref3c383 rd672350 23 23 #include "bits/locks.hfa" 24 24 #include "bits/queue.hfa" 25 #include "iofwd.hfa" 25 26 #include "kernel/fwd.hfa" 26 27 … … 77 78 78 79 struct __cmp_ring_t { 80 volatile bool lock; 81 79 82 // Head and tail of the ring 80 83 volatile __u32 * head; … … 170 173 // void __ioctx_prepare_block($io_context & ctx); 171 174 #endif 172 173 //-----------------------------------------------------------------------174 // IO user data175 struct io_future_t {176 future_t self;177 __s32 result;178 };179 180 static inline {181 thread$ * fulfil( io_future_t & this, __s32 result, bool do_unpark = true ) {182 this.result = result;183 return fulfil(this.self, do_unpark);184 }185 186 // Wait for the future to be fulfilled187 bool wait ( io_future_t & this ) { return wait (this.self); }188 void reset ( io_future_t & this ) { return reset (this.self); }189 bool available( io_future_t & this ) { return available(this.self); }190 } -
libcfa/src/concurrency/iofwd.hfa
ref3c383 rd672350 19 19 extern "C" { 20 20 #include <asm/types.h> 21 #include <sys/stat.h> // needed for mode_t 21 22 #if CFA_HAVE_LINUX_IO_URING_H 22 23 #include <linux/io_uring.h> … … 24 25 } 25 26 #include "bits/defs.hfa" 27 #include "kernel/fwd.hfa" 26 28 #include "time.hfa" 27 29 … … 47 49 48 50 struct cluster; 49 struct io_future_t;50 51 struct $io_context; 51 52 … … 57 58 58 59 struct io_uring_sqe; 60 61 //----------------------------------------------------------------------- 62 // IO user data 63 struct io_future_t { 64 future_t self; 65 __s32 result; 66 }; 67 68 static inline { 69 thread$ * fulfil( io_future_t & this, __s32 result, bool do_unpark = true ) { 70 this.result = result; 71 return fulfil(this.self, do_unpark); 72 } 73 74 // Wait for the future to be fulfilled 75 bool wait ( io_future_t & this ) { return wait (this.self); } 76 void reset ( io_future_t & this ) { return reset (this.self); } 77 bool available( io_future_t & this ) { return available(this.self); } 78 } 59 79 60 80 //---------- … … 133 153 // Check if a function is blocks a only the user thread 134 154 bool has_user_level_blocking( fptr_t func ); 155 156 #if CFA_HAVE_LINUX_IO_URING_H 157 static inline void zero_sqe(struct io_uring_sqe * sqe) { 158 sqe->flags = 0; 159 sqe->ioprio = 0; 160 sqe->fd = 0; 161 sqe->off = 0; 162 sqe->addr = 0; 163 sqe->len = 0; 164 sqe->fsync_flags = 0; 165 sqe->__pad2[0] = 0; 166 sqe->__pad2[1] = 0; 167 sqe->__pad2[2] = 0; 168 sqe->fd = 0; 169 sqe->off = 0; 170 sqe->addr = 0; 171 sqe->len = 0; 172 } 173 #endif -
libcfa/src/concurrency/kernel.cfa
ref3c383 rd672350 19 19 // #define __CFA_DEBUG_PRINT_RUNTIME_CORE__ 20 20 21 #pragma GCC diagnostic push 22 #pragma GCC diagnostic ignored "-Waddress-of-packed-member" 23 21 24 //C Includes 22 25 #include <errno.h> … … 25 28 #include <signal.h> 26 29 #include <unistd.h> 30 27 31 extern "C" { 28 32 #include <sys/eventfd.h> … … 31 35 32 36 //CFA Includes 33 #include "kernel _private.hfa"37 #include "kernel/private.hfa" 34 38 #include "preemption.hfa" 35 39 #include "strstream.hfa" … … 40 44 #define __CFA_INVOKE_PRIVATE__ 41 45 #include "invoke.h" 46 #pragma GCC diagnostic pop 42 47 43 48 #if !defined(__CFA_NO_STATISTICS__) … … 131 136 static void mark_awake(__cluster_proc_list & idles, processor & proc); 132 137 133 extern void __cfa_io_start( processor * ); 134 extern bool __cfa_io_drain( processor * ); 138 extern bool __cfa_io_drain( $io_context * ); 135 139 extern bool __cfa_io_flush( processor *, int min_comp ); 136 extern void __cfa_io_stop ( processor * );137 140 static inline bool __maybe_io_drain( processor * ); 138 141 … … 159 162 verify(this); 160 163 161 io_future_t future; // used for idle sleep when io_uring is present 162 future.self.ptr = 1p; // mark it as already fulfilled so we know if there is a pending request or not 163 eventfd_t idle_val; 164 iovec idle_iovec = { &idle_val, sizeof(idle_val) }; 165 166 __cfa_io_start( this ); 164 /* paranoid */ verify( this->idle_wctx.ftr != 0p ); 165 /* paranoid */ verify( this->idle_wctx.rdbuf != 0p ); 166 167 // used for idle sleep when io_uring is present 168 // mark it as already fulfilled so we know if there is a pending request or not 169 this->idle_wctx.ftr->self.ptr = 1p; 170 iovec idle_iovec = { this->idle_wctx.rdbuf, sizeof(eventfd_t) }; 167 171 168 172 __cfadbg_print_safe(runtime_core, "Kernel : core %p starting\n", this); … … 231 235 } 232 236 233 idle_sleep( this, future, idle_iovec );237 idle_sleep( this, *this->idle_wctx.ftr, idle_iovec ); 234 238 235 239 // We were woken up, remove self from idle … … 251 255 if( __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST) ) break MAIN_LOOP; 252 256 253 if( this->io.pending && !this->io.dirty) {257 if(__atomic_load_n(&this->io.pending, __ATOMIC_RELAXED) && !__atomic_load_n(&this->io.dirty, __ATOMIC_RELAXED)) { 254 258 __IO_STATS__(true, io.flush.dirty++; ) 255 259 __cfa_io_flush( this, 0 ); … … 259 263 __cfadbg_print_safe(runtime_core, "Kernel : core %p stopping\n", this); 260 264 } 261 262 for(int i = 0; !available(future); i++) {263 if(i > 1000) __cfaabi_dbg_write( "ERROR: kernel has bin spinning on a flush after exit loop.\n", 60);264 __cfa_io_flush( this, 1 );265 }266 267 __cfa_io_stop( this );268 265 269 266 post( this->terminated ); … … 634 631 635 632 int fd = 1; 636 if( __atomic_load_n(&fdp-> fd, __ATOMIC_SEQ_CST) != 1 ) {637 fd = __atomic_exchange_n(&fdp-> fd, 1, __ATOMIC_RELAXED);633 if( __atomic_load_n(&fdp->sem, __ATOMIC_SEQ_CST) != 1 ) { 634 fd = __atomic_exchange_n(&fdp->sem, 1, __ATOMIC_RELAXED); 638 635 } 639 636 … … 677 674 __cfadbg_print_safe(runtime_core, "Kernel : waking Processor %p\n", this); 678 675 679 this->idle_wctx. fd= 1;676 this->idle_wctx.sem = 1; 680 677 681 678 eventfd_t val; 682 679 val = 1; 683 eventfd_write( this->idle_ fd, val );680 eventfd_write( this->idle_wctx.evfd, val ); 684 681 685 682 /* paranoid */ verify( ! __preemption_enabled() ); … … 689 686 // Tell everyone we are ready to go do sleep 690 687 for() { 691 int expected = this->idle_wctx. fd;688 int expected = this->idle_wctx.sem; 692 689 693 690 // Someone already told us to wake-up! No time for a nap. … … 695 692 696 693 // Try to mark that we are going to sleep 697 if(__atomic_compare_exchange_n(&this->idle_wctx. fd, &expected, this->idle_fd, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) ) {694 if(__atomic_compare_exchange_n(&this->idle_wctx.sem, &expected, this->idle_wctx.evfd, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) ) { 698 695 // Every one agreed, taking a nap 699 696 break; … … 713 710 { 714 711 eventfd_t val; 715 ssize_t ret = read( this->idle_ fd, &val, sizeof(val) );712 ssize_t ret = read( this->idle_wctx.evfd, &val, sizeof(val) ); 716 713 if(ret < 0) { 717 714 switch((int)errno) { … … 740 737 reset(future); 741 738 742 __kernel_read(this, future, iov, this->idle_ fd );739 __kernel_read(this, future, iov, this->idle_wctx.evfd ); 743 740 } 744 741 … … 750 747 __STATS__(true, ready.sleep.halts++; ) 751 748 752 proc.idle_wctx. fd= 0;749 proc.idle_wctx.sem = 0; 753 750 754 751 /* paranoid */ verify( ! __preemption_enabled() ); … … 842 839 if(head == tail) return false; 843 840 ready_schedule_lock(); 844 ret = __cfa_io_drain( proc);841 ret = __cfa_io_drain( ctx ); 845 842 ready_schedule_unlock(); 846 843 #endif -
libcfa/src/concurrency/kernel.hfa
ref3c383 rd672350 48 48 extern struct cluster * mainCluster; 49 49 50 // Processor id, required for scheduling threads 51 52 50 // Coroutine used py processors for the 2-step context switch 53 51 coroutine processorCtx_t { 54 52 struct processor * proc; 55 53 }; 56 54 57 55 struct io_future_t; 56 57 // Information needed for idle sleep 58 58 struct __fd_waitctx { 59 volatile int fd; 59 // semaphore/future like object 60 // values can be 0, 1 or some file descriptor. 61 // 0 - is the default state 62 // 1 - means the proc should wake-up immediately 63 // FD - means the proc is going asleep and should be woken by writing to the FD. 64 volatile int sem; 65 66 // The event FD that corresponds to this processor 67 int evfd; 68 69 // buffer into which the proc will read from evfd 70 // unused if not using io_uring for idle sleep 71 void * rdbuf; 72 73 // future use to track the read of the eventfd 74 // unused if not using io_uring for idle sleep 75 io_future_t * ftr; 60 76 }; 61 77 … … 92 108 struct { 93 109 $io_context * ctx; 94 bool pending; 95 bool dirty; 110 unsigned id; 111 unsigned target; 112 volatile bool pending; 113 volatile bool dirty; 96 114 } io; 97 115 … … 103 121 bool pending_preemption; 104 122 105 // Idle lock (kernel semaphore) 106 int idle_fd; 107 108 // Idle waitctx 123 // context for idle sleep 109 124 struct __fd_waitctx idle_wctx; 110 125 … … 155 170 void ^?{}(__intrusive_lane_t & this); 156 171 157 // Aligned timestamps which are used by the re laxed ready queue172 // Aligned timestamps which are used by the ready queue and io subsystem 158 173 struct __attribute__((aligned(128))) __timestamp_t { 159 174 volatile unsigned long long tv; … … 161 176 }; 162 177 178 static inline void ?{}(__timestamp_t & this) { this.tv = 0; this.ma = 0; } 179 static inline void ^?{}(__timestamp_t &) {} 180 181 163 182 struct __attribute__((aligned(16))) __cache_id_t { 164 183 volatile unsigned id; 165 184 }; 166 167 // Aligned timestamps which are used by the relaxed ready queue168 struct __attribute__((aligned(128))) __help_cnts_t {169 volatile unsigned long long src;170 volatile unsigned long long dst;171 volatile unsigned long long tri;172 };173 174 static inline void ?{}(__timestamp_t & this) { this.tv = 0; this.ma = 0; }175 static inline void ^?{}(__timestamp_t &) {}176 177 struct __attribute__((aligned(128))) __ready_queue_caches_t;178 void ?{}(__ready_queue_caches_t & this);179 void ^?{}(__ready_queue_caches_t & this);180 181 //TODO adjust cache size to ARCHITECTURE182 // Structure holding the ready queue183 struct __ready_queue_t {184 // Data tracking the actual lanes185 // On a seperate cacheline from the used struct since186 // used can change on each push/pop but this data187 // only changes on shrink/grow188 struct {189 // Arary of lanes190 __intrusive_lane_t * volatile data;191 192 // Array of times193 __timestamp_t * volatile tscs;194 195 __cache_id_t * volatile caches;196 197 // Array of stats198 __help_cnts_t * volatile help;199 200 // Number of lanes (empty or not)201 volatile size_t count;202 } lanes;203 };204 205 void ?{}(__ready_queue_t & this);206 void ^?{}(__ready_queue_t & this);207 #if !defined(__CFA_NO_STATISTICS__)208 unsigned cnt(const __ready_queue_t & this, unsigned idx);209 #endif210 185 211 186 // Idle Sleep … … 233 208 // Cluster 234 209 struct __attribute__((aligned(128))) cluster { 235 // Ready queue for threads 236 __ready_queue_t ready_queue; 210 struct { 211 struct { 212 // Arary of subqueues 213 __intrusive_lane_t * data; 214 215 // Time since subqueues were processed 216 __timestamp_t * tscs; 217 218 // Number of subqueue / timestamps 219 size_t count; 220 } readyQ; 221 222 struct { 223 // Array of $io_ 224 $io_context ** data; 225 226 // Time since subqueues were processed 227 __timestamp_t * tscs; 228 229 // Number of I/O subqueues 230 size_t count; 231 } io; 232 233 // Cache each kernel thread belongs to 234 __cache_id_t * caches; 235 } sched; 236 237 // // Ready queue for threads 238 // __ready_queue_t ready_queue; 237 239 238 240 // Name of the cluster -
libcfa/src/concurrency/kernel/fwd.hfa
ref3c383 rd672350 347 347 struct oneshot * want = expected == 0p ? 1p : 2p; 348 348 if(__atomic_compare_exchange_n(&this.ptr, &expected, want, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) { 349 if( expected == 0p ) { /* paranoid */ verify( this.ptr == 1p);return 0p; }349 if( expected == 0p ) { return 0p; } 350 350 thread$ * ret = post( *expected, do_unpark ); 351 351 __atomic_store_n( &this.ptr, 1p, __ATOMIC_SEQ_CST); -
libcfa/src/concurrency/kernel/private.hfa
ref3c383 rd672350 5 5 // file "LICENCE" distributed with Cforall. 6 6 // 7 // kernel _private.hfa --7 // kernel/private.hfa -- 8 8 // 9 9 // Author : Thierry Delisle … … 17 17 18 18 #if !defined(__cforall_thread__) 19 #error kernel _private.hfa should only be included in libcfathread source19 #error kernel/private.hfa should only be included in libcfathread source 20 20 #endif 21 21 … … 33 33 #else 34 34 #ifndef _GNU_SOURCE 35 #error kernel _private requires gnu_source35 #error kernel/private requires gnu_source 36 36 #endif 37 37 #include <sched.h> … … 59 59 60 60 extern bool __preemption_enabled(); 61 62 enum { 63 PREEMPT_NORMAL = 0, 64 PREEMPT_TERMINATE = 1, 65 PREEMPT_IO = 2, 66 }; 61 67 62 68 static inline void __disable_interrupts_checked() { … … 359 365 void ready_queue_shrink(struct cluster * cltr); 360 366 367 //----------------------------------------------------------------------- 368 // Decrease the width of the ready queue (number of lanes) by 4 369 void ready_queue_close(struct cluster * cltr); 361 370 362 371 // Local Variables: // -
libcfa/src/concurrency/kernel/startup.cfa
ref3c383 rd672350 18 18 19 19 // C Includes 20 #include <errno.h> 20 #include <errno.h> // errno 21 21 #include <signal.h> 22 #include <string.h> 23 #include <unistd.h> 22 #include <string.h> // strerror 23 #include <unistd.h> // sysconf 24 24 25 25 extern "C" { 26 #include <limits.h> 27 #include <unistd.h> 28 #include <sys/eventfd.h> 29 #include <sys/mman.h> 30 #include <sys/resource.h> 26 #include <limits.h> // PTHREAD_STACK_MIN 27 #include <unistd.h> // syscall 28 #include <sys/eventfd.h> // eventfd 29 #include <sys/mman.h> // mprotect 30 #include <sys/resource.h> // getrlimit 31 31 } 32 32 33 33 // CFA Includes 34 #include "kernel_private.hfa" 35 #include "startup.hfa" // STARTUP_PRIORITY_XXX 34 #include "kernel/private.hfa" 35 #include "iofwd.hfa" 36 #include "startup.hfa" // STARTUP_PRIORITY_XXX 36 37 #include "limits.hfa" 37 38 #include "math.hfa" … … 97 98 extern void __kernel_alarm_startup(void); 98 99 extern void __kernel_alarm_shutdown(void); 100 extern void __cfa_io_start( processor * ); 101 extern void __cfa_io_stop ( processor * ); 99 102 100 103 //----------------------------------------------------------------------------- … … 111 114 KERNEL_STORAGE(__stack_t, mainThreadCtx); 112 115 KERNEL_STORAGE(__scheduler_RWLock_t, __scheduler_lock); 116 KERNEL_STORAGE(eventfd_t, mainIdleEventFd); 117 KERNEL_STORAGE(io_future_t, mainIdleFuture); 113 118 #if !defined(__CFA_NO_STATISTICS__) 114 119 KERNEL_STORAGE(__stats_t, mainProcStats); … … 224 229 (*mainProcessor){}; 225 230 231 mainProcessor->idle_wctx.rdbuf = &storage_mainIdleEventFd; 232 mainProcessor->idle_wctx.ftr = (io_future_t*)&storage_mainIdleFuture; 233 /* paranoid */ verify( sizeof(storage_mainIdleEventFd) == sizeof(eventfd_t) ); 234 226 235 register_tls( mainProcessor ); 236 __cfa_io_start( mainProcessor ); 227 237 228 238 // Start by initializing the main thread … … 304 314 mainProcessor->local_data = 0p; 305 315 316 __cfa_io_stop( mainProcessor ); 306 317 unregister_tls( mainProcessor ); 307 318 … … 355 366 register_tls( proc ); 356 367 368 __cfa_io_start( proc ); 369 370 // used for idle sleep when io_uring is present 371 io_future_t future; 372 eventfd_t idle_buf; 373 proc->idle_wctx.ftr = &future; 374 proc->idle_wctx.rdbuf = &idle_buf; 375 376 357 377 // SKULLDUGGERY: We want to create a context for the processor coroutine 358 378 // which is needed for the 2-step context switch. However, there is no reason … … 381 401 // Main routine of the core returned, the core is now fully terminated 382 402 __cfadbg_print_safe(runtime_core, "Kernel : core %p main ended (%p)\n", proc, &proc->runner); 403 404 __cfa_io_stop( proc ); 383 405 384 406 #if !defined(__CFA_NO_STATISTICS__) … … 515 537 this.rdq.its = 0; 516 538 this.rdq.itr = 0; 517 this.rdq.id = MAX;539 this.rdq.id = 0; 518 540 this.rdq.target = MAX; 519 541 this.rdq.last = MAX; … … 532 554 this.local_data = 0p; 533 555 534 this.idle_fd = eventfd(0, 0);535 if (idle_ fd < 0) {556 idle_wctx.evfd = eventfd(0, 0); 557 if (idle_wctx.evfd < 0) { 536 558 abort("KERNEL ERROR: PROCESSOR EVENTFD - %s\n", strerror(errno)); 537 559 } 538 560 539 this.idle_wctx.fd= 0;561 idle_wctx.sem = 0; 540 562 541 563 // I'm assuming these two are reserved for standard input and output 542 564 // so I'm using them as sentinels with idle_wctx. 543 /* paranoid */ verify( this.idle_fd != 0 );544 /* paranoid */ verify( this.idle_fd != 1 );565 /* paranoid */ verify( idle_wctx.evfd != 0 ); 566 /* paranoid */ verify( idle_wctx.evfd != 1 ); 545 567 546 568 #if !defined(__CFA_NO_STATISTICS__) … … 554 576 // Not a ctor, it just preps the destruction but should not destroy members 555 577 static void deinit(processor & this) { 556 close(this.idle_ fd);578 close(this.idle_wctx.evfd); 557 579 } 558 580 … … 605 627 this.name = name; 606 628 this.preemption_rate = preemption_rate; 607 ready_queue{}; 629 this.sched.readyQ.data = 0p; 630 this.sched.readyQ.tscs = 0p; 631 this.sched.readyQ.count = 0; 632 this.sched.io.tscs = 0p; 633 this.sched.caches = 0p; 608 634 609 635 #if !defined(__CFA_NO_STATISTICS__) … … 644 670 // Unlock the RWlock 645 671 ready_mutate_unlock( last_size ); 672 673 ready_queue_close( &this ); 674 /* paranoid */ verify( this.sched.readyQ.data == 0p ); 675 /* paranoid */ verify( this.sched.readyQ.tscs == 0p ); 676 /* paranoid */ verify( this.sched.readyQ.count == 0 ); 677 /* paranoid */ verify( this.sched.io.tscs == 0p ); 678 /* paranoid */ verify( this.sched.caches == 0p ); 679 646 680 enable_interrupts( false ); // Don't poll, could be in main cluster 681 647 682 648 683 #if !defined(__CFA_NO_STATISTICS__) … … 736 771 check( pthread_attr_init( &attr ), "pthread_attr_init" ); // initialize attribute 737 772 738 size_t stacksize = DEFAULT_STACK_SIZE;773 size_t stacksize = max( PTHREAD_STACK_MIN, DEFAULT_STACK_SIZE ); 739 774 740 775 void * stack; -
libcfa/src/concurrency/locks.cfa
ref3c383 rd672350 19 19 20 20 #include "locks.hfa" 21 #include "kernel _private.hfa"21 #include "kernel/private.hfa" 22 22 23 23 #include <kernel.hfa> -
libcfa/src/concurrency/locks.hfa
ref3c383 rd672350 164 164 } 165 165 166 static inline boollock(linear_backoff_then_block_lock & this) with(this) {166 static inline void lock(linear_backoff_then_block_lock & this) with(this) { 167 167 // if owner just return 168 if (active_thread() == owner) return true;168 if (active_thread() == owner) return; 169 169 size_t compare_val = 0; 170 170 int spin = spin_start; … … 172 172 for( ;; ) { 173 173 compare_val = 0; 174 if (internal_try_lock(this, compare_val)) return true;174 if (internal_try_lock(this, compare_val)) return; 175 175 if (2 == compare_val) break; 176 176 for (int i = 0; i < spin; i++) Pause(); … … 179 179 } 180 180 181 if(2 != compare_val && try_lock_contention(this)) return true;181 if(2 != compare_val && try_lock_contention(this)) return; 182 182 // block until signalled 183 while (block(this)) if(try_lock_contention(this)) return true; 184 185 // this should never be reached as block(this) always returns true 186 return false; 183 while (block(this)) if(try_lock_contention(this)) return; 187 184 } 188 185 -
libcfa/src/concurrency/monitor.cfa
ref3c383 rd672350 22 22 #include <inttypes.h> 23 23 24 #include "kernel _private.hfa"24 #include "kernel/private.hfa" 25 25 26 26 #include "bits/algorithm.hfa" -
libcfa/src/concurrency/mutex.cfa
ref3c383 rd672350 21 21 #include "mutex.hfa" 22 22 23 #include "kernel _private.hfa"23 #include "kernel/private.hfa" 24 24 25 25 //----------------------------------------------------------------------------- -
libcfa/src/concurrency/mutex_stmt.hfa
ref3c383 rd672350 12 12 }; 13 13 14 15 struct __mutex_stmt_lock_guard { 16 void ** lockarr; 17 __lock_size_t count; 18 }; 19 20 static inline void ?{}( __mutex_stmt_lock_guard & this, void * lockarr [], __lock_size_t count ) { 21 this.lockarr = lockarr; 22 this.count = count; 23 24 // Sort locks based on address 25 __libcfa_small_sort(this.lockarr, count); 26 27 // acquire locks in order 28 // for ( size_t i = 0; i < count; i++ ) { 29 // lock(*this.lockarr[i]); 30 // } 31 } 32 33 static inline void ^?{}( __mutex_stmt_lock_guard & this ) with(this) { 34 // for ( size_t i = count; i > 0; i-- ) { 35 // unlock(*lockarr[i - 1]); 36 // } 37 } 38 14 39 forall(L & | is_lock(L)) { 15 16 struct __mutex_stmt_lock_guard {17 L ** lockarr;18 __lock_size_t count;19 };20 21 static inline void ?{}( __mutex_stmt_lock_guard(L) & this, L * lockarr [], __lock_size_t count ) {22 this.lockarr = lockarr;23 this.count = count;24 25 // Sort locks based on address26 __libcfa_small_sort(this.lockarr, count);27 28 // acquire locks in order29 for ( size_t i = 0; i < count; i++ ) {30 lock(*this.lockarr[i]);31 }32 }33 34 static inline void ^?{}( __mutex_stmt_lock_guard(L) & this ) with(this) {35 for ( size_t i = count; i > 0; i-- ) {36 unlock(*lockarr[i - 1]);37 }38 }39 40 40 41 struct scoped_lock { … … 51 52 } 52 53 53 static inline L * __get_ptr( L & this ) {54 static inline void * __get_mutexstmt_lock_ptr( L & this ) { 54 55 return &this; 55 56 } 56 57 57 static inline L __get_ type( L & this );58 static inline L __get_mutexstmt_lock_type( L & this ); 58 59 59 static inline L __get_ type( L * this );60 static inline L __get_mutexstmt_lock_type( L * this ); 60 61 } -
libcfa/src/concurrency/preemption.cfa
ref3c383 rd672350 10 10 // Created On : Mon Jun 5 14:20:42 2017 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Fri Nov 6 07:42:13 202013 // Update Count : 5 412 // Last Modified On : Thu Feb 17 11:18:57 2022 13 // Update Count : 59 14 14 // 15 15 … … 31 31 #include "bits/debug.hfa" 32 32 #include "bits/signal.hfa" 33 #include "kernel _private.hfa"33 #include "kernel/private.hfa" 34 34 35 35 … … 97 97 } 98 98 99 enum {100 PREEMPT_NORMAL = 0,101 PREEMPT_TERMINATE = 1,102 };103 104 99 //============================================================================================= 105 100 // Kernel Preemption logic … … 243 238 //---------- 244 239 // special case for preemption since used often 245 bool __preemption_enabled() {240 __attribute__((optimize("no-reorder-blocks"))) bool __preemption_enabled() { 246 241 // create a assembler label before 247 242 // marked as clobber all to avoid movement … … 664 659 choose(sfp->si_value.sival_int) { 665 660 case PREEMPT_NORMAL : ;// Normal case, nothing to do here 661 case PREEMPT_IO : ;// I/O asked to stop spinning, nothing to do here 666 662 case PREEMPT_TERMINATE: verify( __atomic_load_n( &__cfaabi_tls.this_processor->do_terminate, __ATOMIC_SEQ_CST ) ); 667 663 default: -
libcfa/src/concurrency/ready_queue.cfa
ref3c383 rd672350 20 20 21 21 22 // #define USE_RELAXED_FIFO23 // #define USE_WORK_STEALING24 // #define USE_CPU_WORK_STEALING25 22 #define USE_AWARE_STEALING 26 23 27 24 #include "bits/defs.hfa" 28 25 #include "device/cpu.hfa" 29 #include "kernel _private.hfa"30 31 #include "stdlib.hfa" 26 #include "kernel/cluster.hfa" 27 #include "kernel/private.hfa" 28 32 29 #include "limits.hfa" 33 #include "math.hfa" 34 35 #include <errno.h> 36 #include <unistd.h> 37 38 extern "C" { 39 #include <sys/syscall.h> // __NR_xxx 40 } 30 31 // #include <errno.h> 32 // #include <unistd.h> 41 33 42 34 #include "ready_subqueue.hfa" … … 50 42 #endif 51 43 52 // No overriden function, no environment variable, no define53 // fall back to a magic number54 #ifndef __CFA_MAX_PROCESSORS__55 #define __CFA_MAX_PROCESSORS__ 102456 #endif57 58 #if defined(USE_AWARE_STEALING)59 #define READYQ_SHARD_FACTOR 260 #define SEQUENTIAL_SHARD 261 #elif defined(USE_CPU_WORK_STEALING)62 #define READYQ_SHARD_FACTOR 263 #elif defined(USE_RELAXED_FIFO)64 #define BIAS 465 #define READYQ_SHARD_FACTOR 466 #define SEQUENTIAL_SHARD 167 #elif defined(USE_WORK_STEALING)68 #define READYQ_SHARD_FACTOR 269 #define SEQUENTIAL_SHARD 270 #else71 #error no scheduling strategy selected72 #endif73 74 44 static inline struct thread$ * try_pop(struct cluster * cltr, unsigned w __STATS(, __stats_readyQ_pop_t & stats)); 75 45 static inline struct thread$ * try_pop(struct cluster * cltr, unsigned i, unsigned j __STATS(, __stats_readyQ_pop_t & stats)); 76 46 static inline struct thread$ * search(struct cluster * cltr); 77 static inline [unsigned, bool] idx_from_r(unsigned r, unsigned preferred);78 79 80 // returns the maximum number of processors the RWLock support81 __attribute__((weak)) unsigned __max_processors() {82 const char * max_cores_s = getenv("CFA_MAX_PROCESSORS");83 if(!max_cores_s) {84 __cfadbg_print_nolock(ready_queue, "No CFA_MAX_PROCESSORS in ENV\n");85 return __CFA_MAX_PROCESSORS__;86 }87 88 char * endptr = 0p;89 long int max_cores_l = strtol(max_cores_s, &endptr, 10);90 if(max_cores_l < 1 || max_cores_l > 65535) {91 __cfadbg_print_nolock(ready_queue, "CFA_MAX_PROCESSORS out of range : %ld\n", max_cores_l);92 return __CFA_MAX_PROCESSORS__;93 }94 if('\0' != *endptr) {95 __cfadbg_print_nolock(ready_queue, "CFA_MAX_PROCESSORS not a decimal number : %s\n", max_cores_s);96 return __CFA_MAX_PROCESSORS__;97 }98 99 return max_cores_l;100 }101 102 #if defined(CFA_HAVE_LINUX_LIBRSEQ)103 // No forward declaration needed104 #define __kernel_rseq_register rseq_register_current_thread105 #define __kernel_rseq_unregister rseq_unregister_current_thread106 #elif defined(CFA_HAVE_LINUX_RSEQ_H)107 static void __kernel_raw_rseq_register (void);108 static void __kernel_raw_rseq_unregister(void);109 110 #define __kernel_rseq_register __kernel_raw_rseq_register111 #define __kernel_rseq_unregister __kernel_raw_rseq_unregister112 #else113 // No forward declaration needed114 // No initialization needed115 static inline void noop(void) {}116 117 #define __kernel_rseq_register noop118 #define __kernel_rseq_unregister noop119 #endif120 121 //=======================================================================122 // Cluster wide reader-writer lock123 //=======================================================================124 void ?{}(__scheduler_RWLock_t & this) {125 this.max = __max_processors();126 this.alloc = 0;127 this.ready = 0;128 this.data = alloc(this.max);129 this.write_lock = false;130 131 /*paranoid*/ verify(__atomic_is_lock_free(sizeof(this.alloc), &this.alloc));132 /*paranoid*/ verify(__atomic_is_lock_free(sizeof(this.ready), &this.ready));133 134 }135 void ^?{}(__scheduler_RWLock_t & this) {136 free(this.data);137 }138 139 140 //=======================================================================141 // Lock-Free registering/unregistering of threads142 unsigned register_proc_id( void ) with(*__scheduler_lock) {143 __kernel_rseq_register();144 145 bool * handle = (bool *)&kernelTLS().sched_lock;146 147 // Step - 1 : check if there is already space in the data148 uint_fast32_t s = ready;149 150 // Check among all the ready151 for(uint_fast32_t i = 0; i < s; i++) {152 bool * volatile * cell = (bool * volatile *)&data[i]; // Cforall is bugged and the double volatiles causes problems153 /* paranoid */ verify( handle != *cell );154 155 bool * null = 0p; // Re-write every loop since compare thrashes it156 if( __atomic_load_n(cell, (int)__ATOMIC_RELAXED) == null157 && __atomic_compare_exchange_n( cell, &null, handle, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {158 /* paranoid */ verify(i < ready);159 /* paranoid */ verify( (kernelTLS().sched_id = i, true) );160 return i;161 }162 }163 164 if(max <= alloc) abort("Trying to create more than %ud processors", __scheduler_lock->max);165 166 // Step - 2 : F&A to get a new spot in the array.167 uint_fast32_t n = __atomic_fetch_add(&alloc, 1, __ATOMIC_SEQ_CST);168 if(max <= n) abort("Trying to create more than %ud processors", __scheduler_lock->max);169 170 // Step - 3 : Mark space as used and then publish it.171 data[n] = handle;172 while() {173 unsigned copy = n;174 if( __atomic_load_n(&ready, __ATOMIC_RELAXED) == n175 && __atomic_compare_exchange_n(&ready, ©, n + 1, true, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))176 break;177 Pause();178 }179 180 // Return new spot.181 /* paranoid */ verify(n < ready);182 /* paranoid */ verify( (kernelTLS().sched_id = n, true) );183 return n;184 }185 186 void unregister_proc_id( unsigned id ) with(*__scheduler_lock) {187 /* paranoid */ verify(id < ready);188 /* paranoid */ verify(id == kernelTLS().sched_id);189 /* paranoid */ verify(data[id] == &kernelTLS().sched_lock);190 191 bool * volatile * cell = (bool * volatile *)&data[id]; // Cforall is bugged and the double volatiles causes problems192 193 __atomic_store_n(cell, 0p, __ATOMIC_RELEASE);194 195 __kernel_rseq_unregister();196 }197 198 //-----------------------------------------------------------------------199 // Writer side : acquire when changing the ready queue, e.g. adding more200 // queues or removing them.201 uint_fast32_t ready_mutate_lock( void ) with(*__scheduler_lock) {202 /* paranoid */ verify( ! __preemption_enabled() );203 204 // Step 1 : lock global lock205 // It is needed to avoid processors that register mid Critical-Section206 // to simply lock their own lock and enter.207 __atomic_acquire( &write_lock );208 209 // Make sure we won't deadlock ourself210 // Checking before acquiring the writer lock isn't safe211 // because someone else could have locked us.212 /* paranoid */ verify( ! kernelTLS().sched_lock );213 214 // Step 2 : lock per-proc lock215 // Processors that are currently being registered aren't counted216 // but can't be in read_lock or in the critical section.217 // All other processors are counted218 uint_fast32_t s = ready;219 for(uint_fast32_t i = 0; i < s; i++) {220 volatile bool * llock = data[i];221 if(llock) __atomic_acquire( llock );222 }223 224 /* paranoid */ verify( ! __preemption_enabled() );225 return s;226 }227 228 void ready_mutate_unlock( uint_fast32_t last_s ) with(*__scheduler_lock) {229 /* paranoid */ verify( ! __preemption_enabled() );230 231 // Step 1 : release local locks232 // This must be done while the global lock is held to avoid233 // threads that where created mid critical section234 // to race to lock their local locks and have the writer235 // immidiately unlock them236 // Alternative solution : return s in write_lock and pass it to write_unlock237 for(uint_fast32_t i = 0; i < last_s; i++) {238 volatile bool * llock = data[i];239 if(llock) __atomic_store_n(llock, (bool)false, __ATOMIC_RELEASE);240 }241 242 // Step 2 : release global lock243 /*paranoid*/ assert(true == write_lock);244 __atomic_store_n(&write_lock, (bool)false, __ATOMIC_RELEASE);245 246 /* paranoid */ verify( ! __preemption_enabled() );247 }248 249 //=======================================================================250 // caches handling251 252 struct __attribute__((aligned(128))) __ready_queue_caches_t {253 // Count States:254 // - 0 : No one is looking after this cache255 // - 1 : No one is looking after this cache, BUT it's not empty256 // - 2+ : At least one processor is looking after this cache257 volatile unsigned count;258 };259 260 void ?{}(__ready_queue_caches_t & this) { this.count = 0; }261 void ^?{}(__ready_queue_caches_t & this) {}262 263 static inline void depart(__ready_queue_caches_t & cache) {264 /* paranoid */ verify( cache.count > 1);265 __atomic_fetch_add(&cache.count, -1, __ATOMIC_SEQ_CST);266 /* paranoid */ verify( cache.count != 0);267 /* paranoid */ verify( cache.count < 65536 ); // This verify assumes no cluster will have more than 65000 kernel threads mapped to a single cache, which could be correct but is super weird.268 }269 270 static inline void arrive(__ready_queue_caches_t & cache) {271 // for() {272 // unsigned expected = cache.count;273 // unsigned desired = 0 == expected ? 2 : expected + 1;274 // }275 }276 47 277 48 //======================================================================= 278 49 // Cforall Ready Queue used for scheduling 279 50 //======================================================================= 280 unsigned long long moving_average(unsigned long long currtsc, unsigned long long instsc, unsigned long long old_avg) { 281 /* paranoid */ verifyf( currtsc < 45000000000000000, "Suspiciously large current time: %'llu (%llx)\n", currtsc, currtsc ); 282 /* paranoid */ verifyf( instsc < 45000000000000000, "Suspiciously large insert time: %'llu (%llx)\n", instsc, instsc ); 283 /* paranoid */ verifyf( old_avg < 15000000000000, "Suspiciously large previous average: %'llu (%llx)\n", old_avg, old_avg ); 284 285 const unsigned long long new_val = currtsc > instsc ? currtsc - instsc : 0; 286 const unsigned long long total_weight = 16; 287 const unsigned long long new_weight = 4; 288 const unsigned long long old_weight = total_weight - new_weight; 289 const unsigned long long ret = ((new_weight * new_val) + (old_weight * old_avg)) / total_weight; 290 return ret; 291 } 292 293 void ?{}(__ready_queue_t & this) with (this) { 294 #if defined(USE_CPU_WORK_STEALING) 295 lanes.count = cpu_info.hthrd_count * READYQ_SHARD_FACTOR; 296 lanes.data = alloc( lanes.count ); 297 lanes.tscs = alloc( lanes.count ); 298 lanes.help = alloc( cpu_info.hthrd_count ); 299 300 for( idx; (size_t)lanes.count ) { 301 (lanes.data[idx]){}; 302 lanes.tscs[idx].tv = rdtscl(); 303 lanes.tscs[idx].ma = rdtscl(); 304 } 305 for( idx; (size_t)cpu_info.hthrd_count ) { 306 lanes.help[idx].src = 0; 307 lanes.help[idx].dst = 0; 308 lanes.help[idx].tri = 0; 309 } 310 #else 311 lanes.data = 0p; 312 lanes.tscs = 0p; 313 lanes.caches = 0p; 314 lanes.help = 0p; 315 lanes.count = 0; 316 #endif 317 } 318 319 void ^?{}(__ready_queue_t & this) with (this) { 320 #if !defined(USE_CPU_WORK_STEALING) 321 verify( SEQUENTIAL_SHARD == lanes.count ); 322 #endif 323 324 free(lanes.data); 325 free(lanes.tscs); 326 free(lanes.caches); 327 free(lanes.help); 328 } 329 330 //----------------------------------------------------------------------- 331 #if defined(USE_AWARE_STEALING) 332 __attribute__((hot)) void push(struct cluster * cltr, struct thread$ * thrd, unpark_hint hint) with (cltr->ready_queue) { 333 processor * const proc = kernelTLS().this_processor; 334 const bool external = (!proc) || (cltr != proc->cltr); 335 const bool remote = hint == UNPARK_REMOTE; 336 337 unsigned i; 338 if( external || remote ) { 339 // Figure out where thread was last time and make sure it's valid 340 /* paranoid */ verify(thrd->preferred >= 0); 341 if(thrd->preferred * READYQ_SHARD_FACTOR < lanes.count) { 342 /* paranoid */ verify(thrd->preferred * READYQ_SHARD_FACTOR < lanes.count); 343 unsigned start = thrd->preferred * READYQ_SHARD_FACTOR; 344 do { 345 unsigned r = __tls_rand(); 346 i = start + (r % READYQ_SHARD_FACTOR); 347 /* paranoid */ verify( i < lanes.count ); 348 // If we can't lock it retry 349 } while( !__atomic_try_acquire( &lanes.data[i].lock ) ); 350 } else { 351 do { 352 i = __tls_rand() % lanes.count; 353 } while( !__atomic_try_acquire( &lanes.data[i].lock ) ); 354 } 51 // void ?{}(__ready_queue_t & this) with (this) { 52 // lanes.data = 0p; 53 // lanes.tscs = 0p; 54 // lanes.caches = 0p; 55 // lanes.count = 0; 56 // } 57 58 // void ^?{}(__ready_queue_t & this) with (this) { 59 // free(lanes.data); 60 // free(lanes.tscs); 61 // free(lanes.caches); 62 // } 63 64 //----------------------------------------------------------------------- 65 __attribute__((hot)) void push(struct cluster * cltr, struct thread$ * thrd, unpark_hint hint) with (cltr->sched) { 66 processor * const proc = kernelTLS().this_processor; 67 const bool external = (!proc) || (cltr != proc->cltr); 68 const bool remote = hint == UNPARK_REMOTE; 69 const size_t lanes_count = readyQ.count; 70 71 /* paranoid */ verify( __shard_factor.readyq > 0 ); 72 /* paranoid */ verify( lanes_count > 0 ); 73 74 unsigned i; 75 if( external || remote ) { 76 // Figure out where thread was last time and make sure it's valid 77 /* paranoid */ verify(thrd->preferred >= 0); 78 unsigned start = thrd->preferred * __shard_factor.readyq; 79 if(start < lanes_count) { 80 do { 81 unsigned r = __tls_rand(); 82 i = start + (r % __shard_factor.readyq); 83 /* paranoid */ verify( i < lanes_count ); 84 // If we can't lock it retry 85 } while( !__atomic_try_acquire( &readyQ.data[i].lock ) ); 355 86 } else { 356 87 do { 357 unsigned r = proc->rdq.its++; 358 i = proc->rdq.id + (r % READYQ_SHARD_FACTOR); 359 /* paranoid */ verify( i < lanes.count ); 360 // If we can't lock it retry 361 } while( !__atomic_try_acquire( &lanes.data[i].lock ) ); 362 } 363 364 // Actually push it 365 push(lanes.data[i], thrd); 366 367 // Unlock and return 368 __atomic_unlock( &lanes.data[i].lock ); 369 370 #if !defined(__CFA_NO_STATISTICS__) 371 if(unlikely(external || remote)) __atomic_fetch_add(&cltr->stats->ready.push.extrn.success, 1, __ATOMIC_RELAXED); 372 else __tls_stats()->ready.push.local.success++; 373 #endif 374 } 375 376 static inline unsigned long long calc_cutoff(const unsigned long long ctsc, const processor * proc, __ready_queue_t & rdq) { 377 unsigned start = proc->rdq.id; 378 unsigned long long max = 0; 379 for(i; READYQ_SHARD_FACTOR) { 380 unsigned long long ptsc = ts(rdq.lanes.data[start + i]); 381 if(ptsc != -1ull) { 382 /* paranoid */ verify( start + i < rdq.lanes.count ); 383 unsigned long long tsc = moving_average(ctsc, ptsc, rdq.lanes.tscs[start + i].ma); 384 if(tsc > max) max = tsc; 385 } 386 } 387 return (max + 2 * max) / 2; 388 } 389 390 __attribute__((hot)) struct thread$ * pop_fast(struct cluster * cltr) with (cltr->ready_queue) { 391 /* paranoid */ verify( lanes.count > 0 ); 392 /* paranoid */ verify( kernelTLS().this_processor ); 393 /* paranoid */ verify( kernelTLS().this_processor->rdq.id < lanes.count ); 394 395 processor * const proc = kernelTLS().this_processor; 396 unsigned this = proc->rdq.id; 397 /* paranoid */ verify( this < lanes.count ); 398 __cfadbg_print_safe(ready_queue, "Kernel : pop from %u\n", this); 399 400 // Figure out the current cpu and make sure it is valid 401 const int cpu = __kernel_getcpu(); 402 /* paranoid */ verify(cpu >= 0); 403 /* paranoid */ verify(cpu < cpu_info.hthrd_count); 404 unsigned this_cache = cpu_info.llc_map[cpu].cache; 405 406 // Super important: don't write the same value over and over again 407 // We want to maximise our chances that his particular values stays in cache 408 if(lanes.caches[this / READYQ_SHARD_FACTOR].id != this_cache) 409 __atomic_store_n(&lanes.caches[this / READYQ_SHARD_FACTOR].id, this_cache, __ATOMIC_RELAXED); 410 411 const unsigned long long ctsc = rdtscl(); 412 413 if(proc->rdq.target == MAX) { 414 uint64_t chaos = __tls_rand(); 415 unsigned ext = chaos & 0xff; 416 unsigned other = (chaos >> 8) % (lanes.count); 417 418 if(ext < 3 || __atomic_load_n(&lanes.caches[other / READYQ_SHARD_FACTOR].id, __ATOMIC_RELAXED) == this_cache) { 419 proc->rdq.target = other; 420 } 421 } 422 else { 423 const unsigned target = proc->rdq.target; 424 __cfadbg_print_safe(ready_queue, "Kernel : %u considering helping %u, tcsc %llu\n", this, target, lanes.tscs[target].tv); 425 /* paranoid */ verify( lanes.tscs[target].tv != MAX ); 426 if(target < lanes.count) { 427 const unsigned long long cutoff = calc_cutoff(ctsc, proc, cltr->ready_queue); 428 const unsigned long long age = moving_average(ctsc, lanes.tscs[target].tv, lanes.tscs[target].ma); 429 __cfadbg_print_safe(ready_queue, "Kernel : Help attempt on %u from %u, age %'llu vs cutoff %'llu, %s\n", target, this, age, cutoff, age > cutoff ? "yes" : "no"); 430 if(age > cutoff) { 431 thread$ * t = try_pop(cltr, target __STATS(, __tls_stats()->ready.pop.help)); 432 if(t) return t; 433 } 434 } 435 proc->rdq.target = MAX; 436 } 437 438 for(READYQ_SHARD_FACTOR) { 439 unsigned i = this + (proc->rdq.itr++ % READYQ_SHARD_FACTOR); 440 if(thread$ * t = try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.local))) return t; 441 } 442 443 // All lanes where empty return 0p 444 return 0p; 445 446 } 447 __attribute__((hot)) struct thread$ * pop_slow(struct cluster * cltr) with (cltr->ready_queue) { 448 unsigned i = __tls_rand() % lanes.count; 449 return try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.steal)); 450 } 451 __attribute__((hot)) struct thread$ * pop_search(struct cluster * cltr) { 452 return search(cltr); 453 } 454 #endif 455 #if defined(USE_CPU_WORK_STEALING) 456 __attribute__((hot)) void push(struct cluster * cltr, struct thread$ * thrd, unpark_hint hint) with (cltr->ready_queue) { 457 __cfadbg_print_safe(ready_queue, "Kernel : Pushing %p on cluster %p\n", thrd, cltr); 458 459 processor * const proc = kernelTLS().this_processor; 460 const bool external = (!proc) || (cltr != proc->cltr); 461 462 // Figure out the current cpu and make sure it is valid 463 const int cpu = __kernel_getcpu(); 464 /* paranoid */ verify(cpu >= 0); 465 /* paranoid */ verify(cpu < cpu_info.hthrd_count); 466 /* paranoid */ verify(cpu * READYQ_SHARD_FACTOR < lanes.count); 467 468 // Figure out where thread was last time and make sure it's 469 /* paranoid */ verify(thrd->preferred >= 0); 470 /* paranoid */ verify(thrd->preferred < cpu_info.hthrd_count); 471 /* paranoid */ verify(thrd->preferred * READYQ_SHARD_FACTOR < lanes.count); 472 const int prf = thrd->preferred * READYQ_SHARD_FACTOR; 473 474 const cpu_map_entry_t & map; 475 choose(hint) { 476 case UNPARK_LOCAL : &map = &cpu_info.llc_map[cpu]; 477 case UNPARK_REMOTE: &map = &cpu_info.llc_map[prf]; 478 } 479 /* paranoid */ verify(map.start * READYQ_SHARD_FACTOR < lanes.count); 480 /* paranoid */ verify(map.self * READYQ_SHARD_FACTOR < lanes.count); 481 /* paranoid */ verifyf((map.start + map.count) * READYQ_SHARD_FACTOR <= lanes.count, "have %zu lanes but map can go up to %u", lanes.count, (map.start + map.count) * READYQ_SHARD_FACTOR); 482 483 const int start = map.self * READYQ_SHARD_FACTOR; 484 unsigned i; 88 i = __tls_rand() % lanes_count; 89 } while( !__atomic_try_acquire( &readyQ.data[i].lock ) ); 90 } 91 } else { 485 92 do { 486 unsigned r; 487 if(unlikely(external)) { r = __tls_rand(); } 488 else { r = proc->rdq.its++; } 489 choose(hint) { 490 case UNPARK_LOCAL : i = start + (r % READYQ_SHARD_FACTOR); 491 case UNPARK_REMOTE: i = prf + (r % READYQ_SHARD_FACTOR); 492 } 93 unsigned r = proc->rdq.its++; 94 i = proc->rdq.id + (r % __shard_factor.readyq); 95 /* paranoid */ verify( i < lanes_count ); 493 96 // If we can't lock it retry 494 } while( !__atomic_try_acquire( &lanes.data[i].lock ) ); 495 496 // Actually push it 497 push(lanes.data[i], thrd); 498 499 // Unlock and return 500 __atomic_unlock( &lanes.data[i].lock ); 501 502 #if !defined(__CFA_NO_STATISTICS__) 503 if(unlikely(external)) __atomic_fetch_add(&cltr->stats->ready.push.extrn.success, 1, __ATOMIC_RELAXED); 504 else __tls_stats()->ready.push.local.success++; 505 #endif 506 507 __cfadbg_print_safe(ready_queue, "Kernel : Pushed %p on cluster %p (idx: %u, mask %llu, first %d)\n", thrd, cltr, i, used.mask[0], lane_first); 508 509 } 510 511 // Pop from the ready queue from a given cluster 512 __attribute__((hot)) thread$ * pop_fast(struct cluster * cltr) with (cltr->ready_queue) { 513 /* paranoid */ verify( lanes.count > 0 ); 514 /* paranoid */ verify( kernelTLS().this_processor ); 515 516 processor * const proc = kernelTLS().this_processor; 517 const int cpu = __kernel_getcpu(); 518 /* paranoid */ verify(cpu >= 0); 519 /* paranoid */ verify(cpu < cpu_info.hthrd_count); 520 /* paranoid */ verify(cpu * READYQ_SHARD_FACTOR < lanes.count); 521 522 const cpu_map_entry_t & map = cpu_info.llc_map[cpu]; 523 /* paranoid */ verify(map.start * READYQ_SHARD_FACTOR < lanes.count); 524 /* paranoid */ verify(map.self * READYQ_SHARD_FACTOR < lanes.count); 525 /* paranoid */ verifyf((map.start + map.count) * READYQ_SHARD_FACTOR <= lanes.count, "have %zu lanes but map can go up to %u", lanes.count, (map.start + map.count) * READYQ_SHARD_FACTOR); 526 527 const int start = map.self * READYQ_SHARD_FACTOR; 528 const unsigned long long ctsc = rdtscl(); 529 530 // Did we already have a help target 531 if(proc->rdq.target == MAX) { 532 unsigned long long max = 0; 533 for(i; READYQ_SHARD_FACTOR) { 534 unsigned long long tsc = moving_average(ctsc, ts(lanes.data[start + i]), lanes.tscs[start + i].ma); 535 if(tsc > max) max = tsc; 536 } 537 // proc->rdq.cutoff = (max + 2 * max) / 2; 538 /* paranoid */ verify(lanes.count < 65536); // The following code assumes max 65536 cores. 539 /* paranoid */ verify(map.count < 65536); // The following code assumes max 65536 cores. 540 541 if(0 == (__tls_rand() % 100)) { 542 proc->rdq.target = __tls_rand() % lanes.count; 543 } else { 544 unsigned cpu_chaos = map.start + (__tls_rand() % map.count); 545 proc->rdq.target = (cpu_chaos * READYQ_SHARD_FACTOR) + (__tls_rand() % READYQ_SHARD_FACTOR); 546 /* paranoid */ verify(proc->rdq.target >= (map.start * READYQ_SHARD_FACTOR)); 547 /* paranoid */ verify(proc->rdq.target < ((map.start + map.count) * READYQ_SHARD_FACTOR)); 548 } 549 550 /* paranoid */ verify(proc->rdq.target != MAX); 551 } 552 else { 553 unsigned long long max = 0; 554 for(i; READYQ_SHARD_FACTOR) { 555 unsigned long long tsc = moving_average(ctsc, ts(lanes.data[start + i]), lanes.tscs[start + i].ma); 556 if(tsc > max) max = tsc; 557 } 558 const unsigned long long cutoff = (max + 2 * max) / 2; 559 { 560 unsigned target = proc->rdq.target; 561 proc->rdq.target = MAX; 562 lanes.help[target / READYQ_SHARD_FACTOR].tri++; 563 if(moving_average(ctsc, lanes.tscs[target].tv, lanes.tscs[target].ma) > cutoff) { 564 thread$ * t = try_pop(cltr, target __STATS(, __tls_stats()->ready.pop.help)); 565 proc->rdq.last = target; 566 if(t) return t; 567 } 568 proc->rdq.target = MAX; 569 } 570 571 unsigned last = proc->rdq.last; 572 if(last != MAX && moving_average(ctsc, lanes.tscs[last].tv, lanes.tscs[last].ma) > cutoff) { 573 thread$ * t = try_pop(cltr, last __STATS(, __tls_stats()->ready.pop.help)); 574 if(t) return t; 575 } 576 else { 577 proc->rdq.last = MAX; 578 } 579 } 580 581 for(READYQ_SHARD_FACTOR) { 582 unsigned i = start + (proc->rdq.itr++ % READYQ_SHARD_FACTOR); 583 if(thread$ * t = try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.local))) return t; 584 } 585 586 // All lanes where empty return 0p 587 return 0p; 588 } 589 590 __attribute__((hot)) struct thread$ * pop_slow(struct cluster * cltr) with (cltr->ready_queue) { 591 processor * const proc = kernelTLS().this_processor; 592 unsigned last = proc->rdq.last; 593 if(last != MAX) { 594 struct thread$ * t = try_pop(cltr, last __STATS(, __tls_stats()->ready.pop.steal)); 595 if(t) return t; 596 proc->rdq.last = MAX; 597 } 598 599 unsigned i = __tls_rand() % lanes.count; 600 return try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.steal)); 601 } 602 __attribute__((hot)) struct thread$ * pop_search(struct cluster * cltr) { 603 return search(cltr); 604 } 605 #endif 606 #if defined(USE_RELAXED_FIFO) 607 //----------------------------------------------------------------------- 608 // get index from random number with or without bias towards queues 609 static inline [unsigned, bool] idx_from_r(unsigned r, unsigned preferred) { 610 unsigned i; 611 bool local; 612 unsigned rlow = r % BIAS; 613 unsigned rhigh = r / BIAS; 614 if((0 != rlow) && preferred >= 0) { 615 // (BIAS - 1) out of BIAS chances 616 // Use perferred queues 617 i = preferred + (rhigh % READYQ_SHARD_FACTOR); 618 local = true; 619 } 620 else { 621 // 1 out of BIAS chances 622 // Use all queues 623 i = rhigh; 624 local = false; 625 } 626 return [i, local]; 627 } 628 629 __attribute__((hot)) void push(struct cluster * cltr, struct thread$ * thrd, unpark_hint hint) with (cltr->ready_queue) { 630 __cfadbg_print_safe(ready_queue, "Kernel : Pushing %p on cluster %p\n", thrd, cltr); 631 632 const bool external = (hint != UNPARK_LOCAL) || (!kernelTLS().this_processor) || (cltr != kernelTLS().this_processor->cltr); 633 /* paranoid */ verify(external || kernelTLS().this_processor->rdq.id < lanes.count ); 634 635 bool local; 636 int preferred = external ? -1 : kernelTLS().this_processor->rdq.id; 637 638 // Try to pick a lane and lock it 639 unsigned i; 640 do { 641 // Pick the index of a lane 642 unsigned r = __tls_rand_fwd(); 643 [i, local] = idx_from_r(r, preferred); 644 645 i %= __atomic_load_n( &lanes.count, __ATOMIC_RELAXED ); 646 647 #if !defined(__CFA_NO_STATISTICS__) 648 if(unlikely(external)) __atomic_fetch_add(&cltr->stats->ready.push.extrn.attempt, 1, __ATOMIC_RELAXED); 649 else if(local) __tls_stats()->ready.push.local.attempt++; 650 else __tls_stats()->ready.push.share.attempt++; 651 #endif 652 653 // If we can't lock it retry 654 } while( !__atomic_try_acquire( &lanes.data[i].lock ) ); 655 656 // Actually push it 657 push(lanes.data[i], thrd); 658 659 // Unlock and return 660 __atomic_unlock( &lanes.data[i].lock ); 661 662 // Mark the current index in the tls rng instance as having an item 663 __tls_rand_advance_bck(); 664 665 __cfadbg_print_safe(ready_queue, "Kernel : Pushed %p on cluster %p (idx: %u, mask %llu, first %d)\n", thrd, cltr, i, used.mask[0], lane_first); 666 667 // Update statistics 668 #if !defined(__CFA_NO_STATISTICS__) 669 if(unlikely(external)) __atomic_fetch_add(&cltr->stats->ready.push.extrn.success, 1, __ATOMIC_RELAXED); 670 else if(local) __tls_stats()->ready.push.local.success++; 671 else __tls_stats()->ready.push.share.success++; 672 #endif 673 } 674 675 // Pop from the ready queue from a given cluster 676 __attribute__((hot)) thread$ * pop_fast(struct cluster * cltr) with (cltr->ready_queue) { 677 /* paranoid */ verify( lanes.count > 0 ); 678 /* paranoid */ verify( kernelTLS().this_processor ); 679 /* paranoid */ verify( kernelTLS().this_processor->rdq.id < lanes.count ); 680 681 unsigned count = __atomic_load_n( &lanes.count, __ATOMIC_RELAXED ); 682 int preferred = kernelTLS().this_processor->rdq.id; 683 684 685 // As long as the list is not empty, try finding a lane that isn't empty and pop from it 686 for(25) { 687 // Pick two lists at random 688 unsigned ri = __tls_rand_bck(); 689 unsigned rj = __tls_rand_bck(); 690 691 unsigned i, j; 692 __attribute__((unused)) bool locali, localj; 693 [i, locali] = idx_from_r(ri, preferred); 694 [j, localj] = idx_from_r(rj, preferred); 695 696 i %= count; 697 j %= count; 698 699 // try popping from the 2 picked lists 700 struct thread$ * thrd = try_pop(cltr, i, j __STATS(, *(locali || localj ? &__tls_stats()->ready.pop.local : &__tls_stats()->ready.pop.help))); 701 if(thrd) { 702 return thrd; 703 } 704 } 705 706 // All lanes where empty return 0p 707 return 0p; 708 } 709 710 __attribute__((hot)) struct thread$ * pop_slow(struct cluster * cltr) { return pop_fast(cltr); } 711 __attribute__((hot)) struct thread$ * pop_search(struct cluster * cltr) { 712 return search(cltr); 713 } 714 #endif 715 #if defined(USE_WORK_STEALING) 716 __attribute__((hot)) void push(struct cluster * cltr, struct thread$ * thrd, unpark_hint hint) with (cltr->ready_queue) { 717 __cfadbg_print_safe(ready_queue, "Kernel : Pushing %p on cluster %p\n", thrd, cltr); 718 719 // #define USE_PREFERRED 720 #if !defined(USE_PREFERRED) 721 const bool external = (hint != UNPARK_LOCAL) || (!kernelTLS().this_processor) || (cltr != kernelTLS().this_processor->cltr); 722 /* paranoid */ verify(external || kernelTLS().this_processor->rdq.id < lanes.count ); 723 #else 724 unsigned preferred = thrd->preferred; 725 const bool external = (hint != UNPARK_LOCAL) || (!kernelTLS().this_processor) || preferred == MAX || thrd->curr_cluster != cltr; 726 /* paranoid */ verifyf(external || preferred < lanes.count, "Invalid preferred queue %u for %u lanes", preferred, lanes.count ); 727 728 unsigned r = preferred % READYQ_SHARD_FACTOR; 729 const unsigned start = preferred - r; 730 #endif 731 732 // Try to pick a lane and lock it 733 unsigned i; 734 do { 735 #if !defined(__CFA_NO_STATISTICS__) 736 if(unlikely(external)) __atomic_fetch_add(&cltr->stats->ready.push.extrn.attempt, 1, __ATOMIC_RELAXED); 737 else __tls_stats()->ready.push.local.attempt++; 738 #endif 739 740 if(unlikely(external)) { 741 i = __tls_rand() % lanes.count; 742 } 743 else { 744 #if !defined(USE_PREFERRED) 745 processor * proc = kernelTLS().this_processor; 746 unsigned r = proc->rdq.its++; 747 i = proc->rdq.id + (r % READYQ_SHARD_FACTOR); 748 #else 749 i = start + (r++ % READYQ_SHARD_FACTOR); 750 #endif 751 } 752 // If we can't lock it retry 753 } while( !__atomic_try_acquire( &lanes.data[i].lock ) ); 754 755 // Actually push it 756 push(lanes.data[i], thrd); 757 758 // Unlock and return 759 __atomic_unlock( &lanes.data[i].lock ); 760 761 #if !defined(__CFA_NO_STATISTICS__) 762 if(unlikely(external)) __atomic_fetch_add(&cltr->stats->ready.push.extrn.success, 1, __ATOMIC_RELAXED); 763 else __tls_stats()->ready.push.local.success++; 764 #endif 765 766 __cfadbg_print_safe(ready_queue, "Kernel : Pushed %p on cluster %p (idx: %u, mask %llu, first %d)\n", thrd, cltr, i, used.mask[0], lane_first); 767 } 768 769 // Pop from the ready queue from a given cluster 770 __attribute__((hot)) thread$ * pop_fast(struct cluster * cltr) with (cltr->ready_queue) { 771 /* paranoid */ verify( lanes.count > 0 ); 772 /* paranoid */ verify( kernelTLS().this_processor ); 773 /* paranoid */ verify( kernelTLS().this_processor->rdq.id < lanes.count ); 774 775 processor * proc = kernelTLS().this_processor; 776 777 if(proc->rdq.target == MAX) { 778 unsigned long long min = ts(lanes.data[proc->rdq.id]); 779 for(int i = 0; i < READYQ_SHARD_FACTOR; i++) { 780 unsigned long long tsc = ts(lanes.data[proc->rdq.id + i]); 781 if(tsc < min) min = tsc; 782 } 783 proc->rdq.cutoff = min; 784 proc->rdq.target = __tls_rand() % lanes.count; 785 } 786 else { 787 unsigned target = proc->rdq.target; 788 proc->rdq.target = MAX; 789 const unsigned long long bias = 0; //2_500_000_000; 790 const unsigned long long cutoff = proc->rdq.cutoff > bias ? proc->rdq.cutoff - bias : proc->rdq.cutoff; 791 if(lanes.tscs[target].tv < cutoff && ts(lanes.data[target]) < cutoff) { 97 } while( !__atomic_try_acquire( &readyQ.data[i].lock ) ); 98 } 99 100 // Actually push it 101 push(readyQ.data[i], thrd); 102 103 // Unlock and return 104 __atomic_unlock( &readyQ.data[i].lock ); 105 106 #if !defined(__CFA_NO_STATISTICS__) 107 if(unlikely(external || remote)) __atomic_fetch_add(&cltr->stats->ready.push.extrn.success, 1, __ATOMIC_RELAXED); 108 else __tls_stats()->ready.push.local.success++; 109 #endif 110 } 111 112 __attribute__((hot)) struct thread$ * pop_fast(struct cluster * cltr) with (cltr->sched) { 113 const size_t lanes_count = readyQ.count; 114 115 /* paranoid */ verify( __shard_factor.readyq > 0 ); 116 /* paranoid */ verify( lanes_count > 0 ); 117 /* paranoid */ verify( kernelTLS().this_processor ); 118 /* paranoid */ verify( kernelTLS().this_processor->rdq.id < lanes_count ); 119 120 processor * const proc = kernelTLS().this_processor; 121 unsigned this = proc->rdq.id; 122 /* paranoid */ verify( this < lanes_count ); 123 __cfadbg_print_safe(ready_queue, "Kernel : pop from %u\n", this); 124 125 // Figure out the current cache is 126 const unsigned this_cache = cache_id(cltr, this / __shard_factor.readyq); 127 const unsigned long long ctsc = rdtscl(); 128 129 if(proc->rdq.target == MAX) { 130 uint64_t chaos = __tls_rand(); 131 unsigned ext = chaos & 0xff; 132 unsigned other = (chaos >> 8) % (lanes_count); 133 134 if(ext < 3 || __atomic_load_n(&caches[other / __shard_factor.readyq].id, __ATOMIC_RELAXED) == this_cache) { 135 proc->rdq.target = other; 136 } 137 } 138 else { 139 const unsigned target = proc->rdq.target; 140 __cfadbg_print_safe(ready_queue, "Kernel : %u considering helping %u, tcsc %llu\n", this, target, readyQ.tscs[target].tv); 141 /* paranoid */ verify( readyQ.tscs[target].tv != MAX ); 142 if(target < lanes_count) { 143 const unsigned long long cutoff = calc_cutoff(ctsc, proc, lanes_count, cltr->sched.readyQ.data, cltr->sched.readyQ.tscs, __shard_factor.readyq); 144 const unsigned long long age = moving_average(ctsc, readyQ.tscs[target].tv, readyQ.tscs[target].ma); 145 __cfadbg_print_safe(ready_queue, "Kernel : Help attempt on %u from %u, age %'llu vs cutoff %'llu, %s\n", target, this, age, cutoff, age > cutoff ? "yes" : "no"); 146 if(age > cutoff) { 792 147 thread$ * t = try_pop(cltr, target __STATS(, __tls_stats()->ready.pop.help)); 793 148 if(t) return t; 794 149 } 795 150 } 796 797 for(READYQ_SHARD_FACTOR) { 798 unsigned i = proc->rdq.id + (proc->rdq.itr++ % READYQ_SHARD_FACTOR); 799 if(thread$ * t = try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.local))) return t; 800 } 801 return 0p; 802 } 803 804 __attribute__((hot)) struct thread$ * pop_slow(struct cluster * cltr) with (cltr->ready_queue) { 805 unsigned i = __tls_rand() % lanes.count; 806 return try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.steal)); 807 } 808 809 __attribute__((hot)) struct thread$ * pop_search(struct cluster * cltr) with (cltr->ready_queue) { 810 return search(cltr); 811 } 812 #endif 151 proc->rdq.target = MAX; 152 } 153 154 for(__shard_factor.readyq) { 155 unsigned i = this + (proc->rdq.itr++ % __shard_factor.readyq); 156 if(thread$ * t = try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.local))) return t; 157 } 158 159 // All lanes where empty return 0p 160 return 0p; 161 162 } 163 __attribute__((hot)) struct thread$ * pop_slow(struct cluster * cltr) { 164 unsigned i = __tls_rand() % (cltr->sched.readyQ.count); 165 return try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.steal)); 166 } 167 __attribute__((hot)) struct thread$ * pop_search(struct cluster * cltr) { 168 return search(cltr); 169 } 813 170 814 171 //======================================================================= … … 820 177 //----------------------------------------------------------------------- 821 178 // try to pop from a lane given by index w 822 static inline struct thread$ * try_pop(struct cluster * cltr, unsigned w __STATS(, __stats_readyQ_pop_t & stats)) with (cltr-> ready_queue) {823 /* paranoid */ verify( w < lanes.count );179 static inline struct thread$ * try_pop(struct cluster * cltr, unsigned w __STATS(, __stats_readyQ_pop_t & stats)) with (cltr->sched) { 180 /* paranoid */ verify( w < readyQ.count ); 824 181 __STATS( stats.attempt++; ) 825 182 826 183 // Get relevant elements locally 827 __intrusive_lane_t & lane = lanes.data[w];184 __intrusive_lane_t & lane = readyQ.data[w]; 828 185 829 186 // If list looks empty retry … … 845 202 // Actually pop the list 846 203 struct thread$ * thrd; 847 #if defined(USE_AWARE_STEALING) || defined(USE_WORK_STEALING) || defined(USE_CPU_WORK_STEALING) 848 unsigned long long tsc_before = ts(lane); 849 #endif 204 unsigned long long tsc_before = ts(lane); 850 205 unsigned long long tsv; 851 206 [thrd, tsv] = pop(lane); … … 861 216 __STATS( stats.success++; ) 862 217 863 #if defined(USE_AWARE_STEALING) || defined(USE_WORK_STEALING) || defined(USE_CPU_WORK_STEALING) 864 if (tsv != MAX) { 865 unsigned long long now = rdtscl(); 866 unsigned long long pma = __atomic_load_n(&lanes.tscs[w].ma, __ATOMIC_RELAXED); 867 __atomic_store_n(&lanes.tscs[w].tv, tsv, __ATOMIC_RELAXED); 868 __atomic_store_n(&lanes.tscs[w].ma, moving_average(now, tsc_before, pma), __ATOMIC_RELAXED); 869 } 870 #endif 871 872 #if defined(USE_AWARE_STEALING) || defined(USE_CPU_WORK_STEALING) 873 thrd->preferred = w / READYQ_SHARD_FACTOR; 874 #else 875 thrd->preferred = w; 876 #endif 218 if (tsv != MAX) { 219 unsigned long long now = rdtscl(); 220 unsigned long long pma = __atomic_load_n(&readyQ.tscs[w].ma, __ATOMIC_RELAXED); 221 __atomic_store_n(&readyQ.tscs[w].tv, tsv, __ATOMIC_RELAXED); 222 __atomic_store_n(&readyQ.tscs[w].ma, moving_average(now, tsc_before, pma), __ATOMIC_RELAXED); 223 } 224 225 thrd->preferred = w / __shard_factor.readyq; 877 226 878 227 // return the popped thread … … 883 232 // try to pop from any lanes making sure you don't miss any threads push 884 233 // before the start of the function 885 static inline struct thread$ * search(struct cluster * cltr) with (cltr->ready_queue) { 886 /* paranoid */ verify( lanes.count > 0 ); 887 unsigned count = __atomic_load_n( &lanes.count, __ATOMIC_RELAXED ); 234 static inline struct thread$ * search(struct cluster * cltr) { 235 const size_t lanes_count = cltr->sched.readyQ.count; 236 /* paranoid */ verify( lanes_count > 0 ); 237 unsigned count = __atomic_load_n( &lanes_count, __ATOMIC_RELAXED ); 888 238 unsigned offset = __tls_rand(); 889 239 for(i; count) { … … 902 252 // get preferred ready for new thread 903 253 unsigned ready_queue_new_preferred() { 904 unsigned pref = 0;254 unsigned pref = MAX; 905 255 if(struct thread$ * thrd = publicTLS_get( this_thread )) { 906 256 pref = thrd->preferred; 907 257 } 908 else {909 #if defined(USE_CPU_WORK_STEALING)910 pref = __kernel_getcpu();911 #endif912 }913 914 #if defined(USE_CPU_WORK_STEALING)915 /* paranoid */ verify(pref >= 0);916 /* paranoid */ verify(pref < cpu_info.hthrd_count);917 #endif918 258 919 259 return pref; … … 921 261 922 262 //----------------------------------------------------------------------- 923 // Check that all the intrusive queues in the data structure are still consistent924 static void check( __ready_queue_t & q ) with (q) {925 #if defined(__CFA_WITH_VERIFY__)926 {927 for( idx ; lanes.count ) {928 __intrusive_lane_t & sl = lanes.data[idx];929 assert(!lanes.data[idx].lock);930 931 if(is_empty(sl)) {932 assert( sl.anchor.next == 0p );933 assert( sl.anchor.ts == -1llu );934 assert( mock_head(sl) == sl.prev );935 } else {936 assert( sl.anchor.next != 0p );937 assert( sl.anchor.ts != -1llu );938 assert( mock_head(sl) != sl.prev );939 }940 }941 }942 #endif943 }944 945 //-----------------------------------------------------------------------946 263 // Given 2 indexes, pick the list with the oldest push an try to pop from it 947 static inline struct thread$ * try_pop(struct cluster * cltr, unsigned i, unsigned j __STATS(, __stats_readyQ_pop_t & stats)) with (cltr-> ready_queue) {264 static inline struct thread$ * try_pop(struct cluster * cltr, unsigned i, unsigned j __STATS(, __stats_readyQ_pop_t & stats)) with (cltr->sched) { 948 265 // Pick the bet list 949 266 int w = i; 950 if( __builtin_expect(!is_empty( lanes.data[j]), true) ) {951 w = (ts( lanes.data[i]) < ts(lanes.data[j])) ? i : j;267 if( __builtin_expect(!is_empty(readyQ.data[j]), true) ) { 268 w = (ts(readyQ.data[i]) < ts(readyQ.data[j])) ? i : j; 952 269 } 953 270 954 271 return try_pop(cltr, w __STATS(, stats)); 955 272 } 956 957 // Call this function of the intrusive list was moved using memcpy958 // fixes the list so that the pointers back to anchors aren't left dangling959 static inline void fix(__intrusive_lane_t & ll) {960 if(is_empty(ll)) {961 verify(ll.anchor.next == 0p);962 ll.prev = mock_head(ll);963 }964 }965 966 static void assign_list(unsigned & value, dlist(processor) & list, unsigned count) {967 processor * it = &list`first;968 for(unsigned i = 0; i < count; i++) {969 /* paranoid */ verifyf( it, "Unexpected null iterator, at index %u of %u\n", i, count);970 it->rdq.id = value;971 it->rdq.target = MAX;972 value += READYQ_SHARD_FACTOR;973 it = &(*it)`next;974 }975 }976 977 static void reassign_cltr_id(struct cluster * cltr) {978 unsigned preferred = 0;979 assign_list(preferred, cltr->procs.actives, cltr->procs.total - cltr->procs.idle);980 assign_list(preferred, cltr->procs.idles , cltr->procs.idle );981 }982 983 static void fix_times( struct cluster * cltr ) with( cltr->ready_queue ) {984 #if defined(USE_AWARE_STEALING) || defined(USE_WORK_STEALING)985 lanes.tscs = alloc(lanes.count, lanes.tscs`realloc);986 for(i; lanes.count) {987 lanes.tscs[i].tv = rdtscl();988 lanes.tscs[i].ma = 0;989 }990 #endif991 }992 993 #if defined(USE_CPU_WORK_STEALING)994 // ready_queue size is fixed in this case995 void ready_queue_grow(struct cluster * cltr) {}996 void ready_queue_shrink(struct cluster * cltr) {}997 #else998 // Grow the ready queue999 void ready_queue_grow(struct cluster * cltr) {1000 size_t ncount;1001 int target = cltr->procs.total;1002 1003 /* paranoid */ verify( ready_mutate_islocked() );1004 __cfadbg_print_safe(ready_queue, "Kernel : Growing ready queue\n");1005 1006 // Make sure that everything is consistent1007 /* paranoid */ check( cltr->ready_queue );1008 1009 // grow the ready queue1010 with( cltr->ready_queue ) {1011 // Find new count1012 // Make sure we always have atleast 1 list1013 if(target >= 2) {1014 ncount = target * READYQ_SHARD_FACTOR;1015 } else {1016 ncount = SEQUENTIAL_SHARD;1017 }1018 1019 // Allocate new array (uses realloc and memcpies the data)1020 lanes.data = alloc( ncount, lanes.data`realloc );1021 1022 // Fix the moved data1023 for( idx; (size_t)lanes.count ) {1024 fix(lanes.data[idx]);1025 }1026 1027 // Construct new data1028 for( idx; (size_t)lanes.count ~ ncount) {1029 (lanes.data[idx]){};1030 }1031 1032 // Update original1033 lanes.count = ncount;1034 1035 lanes.caches = alloc( target, lanes.caches`realloc );1036 }1037 1038 fix_times(cltr);1039 1040 reassign_cltr_id(cltr);1041 1042 // Make sure that everything is consistent1043 /* paranoid */ check( cltr->ready_queue );1044 1045 __cfadbg_print_safe(ready_queue, "Kernel : Growing ready queue done\n");1046 1047 /* paranoid */ verify( ready_mutate_islocked() );1048 }1049 1050 // Shrink the ready queue1051 void ready_queue_shrink(struct cluster * cltr) {1052 /* paranoid */ verify( ready_mutate_islocked() );1053 __cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue\n");1054 1055 // Make sure that everything is consistent1056 /* paranoid */ check( cltr->ready_queue );1057 1058 int target = cltr->procs.total;1059 1060 with( cltr->ready_queue ) {1061 // Remember old count1062 size_t ocount = lanes.count;1063 1064 // Find new count1065 // Make sure we always have atleast 1 list1066 lanes.count = target >= 2 ? target * READYQ_SHARD_FACTOR: SEQUENTIAL_SHARD;1067 /* paranoid */ verify( ocount >= lanes.count );1068 /* paranoid */ verify( lanes.count == target * READYQ_SHARD_FACTOR || target < 2 );1069 1070 // for printing count the number of displaced threads1071 #if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_READY_QUEUE__)1072 __attribute__((unused)) size_t displaced = 0;1073 #endif1074 1075 // redistribute old data1076 for( idx; (size_t)lanes.count ~ ocount) {1077 // Lock is not strictly needed but makes checking invariants much easier1078 __attribute__((unused)) bool locked = __atomic_try_acquire(&lanes.data[idx].lock);1079 verify(locked);1080 1081 // As long as we can pop from this lane to push the threads somewhere else in the queue1082 while(!is_empty(lanes.data[idx])) {1083 struct thread$ * thrd;1084 unsigned long long _;1085 [thrd, _] = pop(lanes.data[idx]);1086 1087 push(cltr, thrd, true);1088 1089 // for printing count the number of displaced threads1090 #if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_READY_QUEUE__)1091 displaced++;1092 #endif1093 }1094 1095 // Unlock the lane1096 __atomic_unlock(&lanes.data[idx].lock);1097 1098 // TODO print the queue statistics here1099 1100 ^(lanes.data[idx]){};1101 }1102 1103 __cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue displaced %zu threads\n", displaced);1104 1105 // Allocate new array (uses realloc and memcpies the data)1106 lanes.data = alloc( lanes.count, lanes.data`realloc );1107 1108 // Fix the moved data1109 for( idx; (size_t)lanes.count ) {1110 fix(lanes.data[idx]);1111 }1112 1113 lanes.caches = alloc( target, lanes.caches`realloc );1114 }1115 1116 fix_times(cltr);1117 1118 1119 reassign_cltr_id(cltr);1120 1121 // Make sure that everything is consistent1122 /* paranoid */ check( cltr->ready_queue );1123 1124 __cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue done\n");1125 /* paranoid */ verify( ready_mutate_islocked() );1126 }1127 #endif1128 1129 #if !defined(__CFA_NO_STATISTICS__)1130 unsigned cnt(const __ready_queue_t & this, unsigned idx) {1131 /* paranoid */ verify(this.lanes.count > idx);1132 return this.lanes.data[idx].cnt;1133 }1134 #endif1135 1136 1137 #if defined(CFA_HAVE_LINUX_LIBRSEQ)1138 // No definition needed1139 #elif defined(CFA_HAVE_LINUX_RSEQ_H)1140 1141 #if defined( __x86_64 ) || defined( __i386 )1142 #define RSEQ_SIG 0x530530531143 #elif defined( __ARM_ARCH )1144 #ifdef __ARMEB__1145 #define RSEQ_SIG 0xf3def5e7 /* udf #24035 ; 0x5de3 (ARMv6+) */1146 #else1147 #define RSEQ_SIG 0xe7f5def3 /* udf #24035 ; 0x5de3 */1148 #endif1149 #endif1150 1151 extern void __disable_interrupts_hard();1152 extern void __enable_interrupts_hard();1153 1154 static void __kernel_raw_rseq_register (void) {1155 /* paranoid */ verify( __cfaabi_rseq.cpu_id == RSEQ_CPU_ID_UNINITIALIZED );1156 1157 // int ret = syscall(__NR_rseq, &__cfaabi_rseq, sizeof(struct rseq), 0, (sigset_t *)0p, _NSIG / 8);1158 int ret = syscall(__NR_rseq, &__cfaabi_rseq, sizeof(struct rseq), 0, RSEQ_SIG);1159 if(ret != 0) {1160 int e = errno;1161 switch(e) {1162 case EINVAL: abort("KERNEL ERROR: rseq register invalid argument");1163 case ENOSYS: abort("KERNEL ERROR: rseq register no supported");1164 case EFAULT: abort("KERNEL ERROR: rseq register with invalid argument");1165 case EBUSY : abort("KERNEL ERROR: rseq register already registered");1166 case EPERM : abort("KERNEL ERROR: rseq register sig argument on unregistration does not match the signature received on registration");1167 default: abort("KERNEL ERROR: rseq register unexpected return %d", e);1168 }1169 }1170 }1171 1172 static void __kernel_raw_rseq_unregister(void) {1173 /* paranoid */ verify( __cfaabi_rseq.cpu_id >= 0 );1174 1175 // int ret = syscall(__NR_rseq, &__cfaabi_rseq, sizeof(struct rseq), RSEQ_FLAG_UNREGISTER, (sigset_t *)0p, _NSIG / 8);1176 int ret = syscall(__NR_rseq, &__cfaabi_rseq, sizeof(struct rseq), RSEQ_FLAG_UNREGISTER, RSEQ_SIG);1177 if(ret != 0) {1178 int e = errno;1179 switch(e) {1180 case EINVAL: abort("KERNEL ERROR: rseq unregister invalid argument");1181 case ENOSYS: abort("KERNEL ERROR: rseq unregister no supported");1182 case EFAULT: abort("KERNEL ERROR: rseq unregister with invalid argument");1183 case EBUSY : abort("KERNEL ERROR: rseq unregister already registered");1184 case EPERM : abort("KERNEL ERROR: rseq unregister sig argument on unregistration does not match the signature received on registration");1185 default: abort("KERNEL ERROR: rseq unregisteunexpected return %d", e);1186 }1187 }1188 }1189 #else1190 // No definition needed1191 #endif -
libcfa/src/concurrency/ready_subqueue.hfa
ref3c383 rd672350 25 25 ); 26 26 return rhead; 27 }28 29 // Ctor30 void ?{}( __intrusive_lane_t & this ) {31 this.lock = false;32 this.prev = mock_head(this);33 this.anchor.next = 0p;34 this.anchor.ts = -1llu;35 #if !defined(__CFA_NO_STATISTICS__)36 this.cnt = 0;37 #endif38 39 // We add a boat-load of assertions here because the anchor code is very fragile40 /* paranoid */ _Static_assert( offsetof( thread$, link ) == offsetof(__intrusive_lane_t, anchor) );41 /* paranoid */ verify( offsetof( thread$, link ) == offsetof(__intrusive_lane_t, anchor) );42 /* paranoid */ verify( ((uintptr_t)( mock_head(this) ) + offsetof( thread$, link )) == (uintptr_t)(&this.anchor) );43 /* paranoid */ verify( &mock_head(this)->link.next == &this.anchor.next );44 /* paranoid */ verify( &mock_head(this)->link.ts == &this.anchor.ts );45 /* paranoid */ verify( mock_head(this)->link.next == 0p );46 /* paranoid */ verify( mock_head(this)->link.ts == -1llu );47 /* paranoid */ verify( mock_head(this) == this.prev );48 /* paranoid */ verify( __alignof__(__intrusive_lane_t) == 128 );49 /* paranoid */ verify( __alignof__(this) == 128 );50 /* paranoid */ verifyf( ((intptr_t)(&this) % 128) == 0, "Expected address to be aligned %p %% 128 == %zd", &this, ((intptr_t)(&this) % 128) );51 }52 53 // Dtor is trivial54 void ^?{}( __intrusive_lane_t & this ) {55 // Make sure the list is empty56 /* paranoid */ verify( this.anchor.next == 0p );57 /* paranoid */ verify( this.anchor.ts == -1llu );58 /* paranoid */ verify( mock_head(this) == this.prev );59 27 } 60 28 -
libcfa/src/concurrency/thread.cfa
ref3c383 rd672350 19 19 #include "thread.hfa" 20 20 21 #include "kernel _private.hfa"21 #include "kernel/private.hfa" 22 22 #include "exception.hfa" 23 23 -
libcfa/src/containers/string.cfa
ref3c383 rd672350 92 92 } 93 93 94 string ?=?(string & this, string other) {94 string & ?=?(string & this, string & other) { //// <---- straw man change 95 95 (*this.inner) = (*other.inner); 96 96 return this; … … 235 235 int find(const string &s, const char* search, size_t searchsize) { 236 236 return find( *s.inner, search, searchsize); 237 } 238 239 int findFrom(const string &s, size_t fromPos, char search) { 240 return findFrom( *s.inner, fromPos, search ); 241 } 242 243 int findFrom(const string &s, size_t fromPos, const string &search) { 244 return findFrom( *s.inner, fromPos, *search.inner ); 245 } 246 247 int findFrom(const string &s, size_t fromPos, const char* search) { 248 return findFrom( *s.inner, fromPos, search ); 249 } 250 251 int findFrom(const string &s, size_t fromPos, const char* search, size_t searchsize) { 252 return findFrom( *s.inner, fromPos, search, searchsize ); 237 253 } 238 254 -
libcfa/src/containers/string.hfa
ref3c383 rd672350 41 41 void ?=?(string &s, const string &other); 42 42 void ?=?(string &s, char other); 43 string ?=?(string &s, string other); // string tolerates memcpys; still saw calls to autogen44 43 string & ?=?(string &s, string &other); // surprising ret seems to help avoid calls to autogen 44 //string ?=?( string &, string ) = void; 45 45 void ^?{}(string &s); 46 46 … … 93 93 int find(const string &s, const char* search, size_t searchsize); 94 94 95 int findFrom(const string &s, size_t fromPos, char search); 96 int findFrom(const string &s, size_t fromPos, const string &search); 97 int findFrom(const string &s, size_t fromPos, const char* search); 98 int findFrom(const string &s, size_t fromPos, const char* search, size_t searchsize); 99 95 100 bool includes(const string &s, const string &search); 96 101 bool includes(const string &s, const char* search); -
libcfa/src/containers/string_res.cfa
ref3c383 rd672350 15 15 16 16 #include "string_res.hfa" 17 #include <stdlib.hfa> // e.g. malloc 18 #include <string.h> // e.g. strlen 17 #include "string_sharectx.hfa" 18 #include "stdlib.hfa" 19 20 // Workaround for observed performance penalty from calling CFA's alloc. 21 // Workaround is: EndVbyte = TEMP_ALLOC(char, CurrSize) 22 // Should be: EndVbyte = alloc(CurrSize) 23 #define TEMP_ALLOC(T, n) (( T* ) malloc( n * sizeof( T ) )) 24 25 #include <assert.h> 19 26 20 27 //######################### VbyteHeap "header" ######################### 21 22 23 24 25 26 27 28 29 // DON'T COMMIT:30 // #define VbyteDebug31 32 33 34 35 28 36 29 #ifdef VbyteDebug … … 54 47 55 48 56 static inlinevoid compaction( VbyteHeap & ); // compaction of the byte area57 static inline void garbage( VbyteHeap &); // garbage collect the byte area58 static inlinevoid extend( VbyteHeap &, int ); // extend the size of the byte area59 static inlinevoid reduce( VbyteHeap &, int ); // reduce the size of the byte area60 61 static inline void ?{}( VbyteHeap &, int = 1000 );62 static inlinevoid ^?{}( VbyteHeap & );63 static inline void ByteCopy( VbyteHeap &, char *, int, int, char *, int, int ); // copy a block of bytes from one location in the heap to another 64 static in line int ByteCmp( VbyteHeap &,char *, int, int, char *, int, int ); // compare 2 blocks of bytes65 static inlinechar *VbyteAlloc( VbyteHeap &, int ); // allocate a block bytes in the heap66 67 68 static inlinevoid AddThisAfter( HandleNode &, HandleNode & );69 static inlinevoid DeleteNode( HandleNode & );70 static inlinevoid MoveThisAfter( HandleNode &, const HandleNode & ); // move current handle after parameter handle49 static void compaction( VbyteHeap & ); // compaction of the byte area 50 static void garbage( VbyteHeap &, int ); // garbage collect the byte area 51 static void extend( VbyteHeap &, int ); // extend the size of the byte area 52 static void reduce( VbyteHeap &, int ); // reduce the size of the byte area 53 54 static void ?{}( VbyteHeap &, size_t = 1000 ); 55 static void ^?{}( VbyteHeap & ); 56 57 static int ByteCmp( char *, int, int, char *, int, int ); // compare 2 blocks of bytes 58 static char *VbyteAlloc( VbyteHeap &, int ); // allocate a block bytes in the heap 59 static char *VbyteTryAdjustLast( VbyteHeap &, int ); 60 61 static void AddThisAfter( HandleNode &, HandleNode & ); 62 static void DeleteNode( HandleNode & ); 63 static void MoveThisAfter( HandleNode &, const HandleNode & ); // move current handle after parameter handle 71 64 72 65 73 66 // Allocate the storage for the variable sized area and intialize the heap variables. 74 67 75 static inline void ?{}( VbyteHeap & this, int Size ) with(this) {68 static void ?{}( VbyteHeap & this, size_t Size ) with(this) { 76 69 #ifdef VbyteDebug 77 70 serr | "enter:VbyteHeap::VbyteHeap, this:" | &this | " Size:" | Size; … … 79 72 NoOfCompactions = NoOfExtensions = NoOfReductions = 0; 80 73 InitSize = CurrSize = Size; 81 StartVbyte = EndVbyte = alloc(CurrSize);74 StartVbyte = EndVbyte = TEMP_ALLOC(char, CurrSize); 82 75 ExtVbyte = (void *)( StartVbyte + CurrSize ); 83 76 Header.flink = Header.blink = &Header; 77 Header.ulink = & this; 84 78 #ifdef VbyteDebug 85 79 HeaderPtr = &Header; … … 91 85 // Release the dynamically allocated storage for the byte area. 92 86 93 static inlinevoid ^?{}( VbyteHeap & this ) with(this) {87 static void ^?{}( VbyteHeap & this ) with(this) { 94 88 free( StartVbyte ); 95 89 } // ~VbyteHeap … … 102 96 // creator. 103 97 104 void ?{}( HandleNode & this ) with(this) {98 static void ?{}( HandleNode & this ) with(this) { 105 99 #ifdef VbyteDebug 106 100 serr | "enter:HandleNode::HandleNode, this:" | &this; … … 117 111 // collection. 118 112 119 void ?{}( HandleNode & this, VbyteHeap & vh ) with(this) {113 static void ?{}( HandleNode & this, VbyteHeap & vh ) with(this) { 120 114 #ifdef VbyteDebug 121 115 serr | "enter:HandleNode::HandleNode, this:" | &this; … … 123 117 s = 0; 124 118 lnth = 0; 119 ulink = &vh; 125 120 AddThisAfter( this, *vh.Header.blink ); 126 121 #ifdef VbyteDebug … … 133 128 // is the responsibility of the creator to destroy it. 134 129 135 void ^?{}( HandleNode & this ) with(this) {130 static void ^?{}( HandleNode & this ) with(this) { 136 131 #ifdef VbyteDebug 137 132 serr | "enter:HandleNode::~HandleNode, this:" | & this; … … 149 144 } // ~HandleNode 150 145 146 147 //######################### String Sharing Context ######################### 148 149 static string_sharectx * ambient_string_sharectx; // fickle top of stack 150 static string_sharectx default_string_sharectx = {NEW_SHARING}; // stable bottom of stack 151 152 void ?{}( string_sharectx & this, StringSharectx_Mode mode ) with( this ) { 153 (older){ ambient_string_sharectx }; 154 if ( mode == NEW_SHARING ) { 155 (activeHeap){ new( (size_t) 1000 ) }; 156 } else { 157 verify( mode == NO_SHARING ); 158 (activeHeap){ 0p }; 159 } 160 ambient_string_sharectx = & this; 161 } 162 163 void ^?{}( string_sharectx & this ) with( this ) { 164 if ( activeHeap ) delete( activeHeap ); 165 166 // unlink this from older-list starting from ambient_string_sharectx 167 // usually, this==ambient_string_sharectx and the loop runs zero times 168 string_sharectx *& c = ambient_string_sharectx; 169 while ( c != &this ) &c = &c->older; // find this 170 c = this.older; // unlink 171 } 172 151 173 //######################### String Resource ######################### 152 174 153 175 154 VbyteHeap HeapArea; 155 156 VbyteHeap * DEBUG_string_heap = & HeapArea; 176 VbyteHeap * DEBUG_string_heap() { 177 assert( ambient_string_sharectx->activeHeap && "No sharing context is active" ); 178 return ambient_string_sharectx->activeHeap; 179 } 157 180 158 181 size_t DEBUG_string_bytes_avail_until_gc( VbyteHeap * heap ) { … … 160 183 } 161 184 185 size_t DEBUG_string_bytes_in_heap( VbyteHeap * heap ) { 186 return heap->CurrSize; 187 } 188 162 189 const char * DEBUG_string_heap_start( VbyteHeap * heap ) { 163 190 return heap->StartVbyte; 164 191 } 165 166 192 167 193 // Returns the size of the string in bytes … … 187 213 // Store auto-newline state so it can be restored 188 214 bool anl = getANL$(out); 189 nlOff(out); 190 for (size_t i = 0; i < s.Handle.lnth; i++) { 191 // Need to re-apply on the last output operator, for whole-statement version 192 if (anl && i == s.Handle.lnth-1) nlOn(out); 193 out | s[i]; 194 } 195 return out; 215 if( s.Handle.lnth == 0 ) { 216 sout | ""; 217 } else { 218 nlOff(out); 219 for (size_t i = 0; i < s.Handle.lnth; i++) { 220 // Need to re-apply on the last output operator, for whole-statement version 221 if (anl && i == s.Handle.lnth-1) nlOn(out); 222 out | s[i]; 223 } 224 } 196 225 } 197 226 198 227 // Empty constructor 199 228 void ?{}(string_res &s) with(s) { 200 (Handle){ HeapArea }; 229 if( ambient_string_sharectx->activeHeap ) { 230 (Handle){ * ambient_string_sharectx->activeHeap }; 231 (shareEditSet_owns_ulink){ false }; 232 verify( Handle.s == 0p && Handle.lnth == 0 ); 233 } else { 234 (Handle){ * new( (size_t) 10 ) }; // TODO: can I lazily avoid allocating for empty string 235 (shareEditSet_owns_ulink){ true }; 236 Handle.s = Handle.ulink->StartVbyte; 237 verify( Handle.lnth == 0 ); 238 } 201 239 s.shareEditSet_prev = &s; 202 240 s.shareEditSet_next = &s; 203 241 } 204 242 243 static void eagerCopyCtorHelper(string_res &s, const char* rhs, size_t rhslnth) with(s) { 244 if( ambient_string_sharectx->activeHeap ) { 245 (Handle){ * ambient_string_sharectx->activeHeap }; 246 (shareEditSet_owns_ulink){ false }; 247 } else { 248 (Handle){ * new( rhslnth ) }; 249 (shareEditSet_owns_ulink){ true }; 250 } 251 Handle.s = VbyteAlloc(*Handle.ulink, rhslnth); 252 Handle.lnth = rhslnth; 253 memmove( Handle.s, rhs, rhslnth ); 254 s.shareEditSet_prev = &s; 255 s.shareEditSet_next = &s; 256 } 257 205 258 // Constructor from a raw buffer and size 206 259 void ?{}(string_res &s, const char* rhs, size_t rhslnth) with(s) { 207 (Handle){ HeapArea }; 208 Handle.s = VbyteAlloc(HeapArea, rhslnth); 260 eagerCopyCtorHelper(s, rhs, rhslnth); 261 } 262 263 // private ctor (not in header): use specified heap (ignore ambient) and copy chars in 264 void ?{}( string_res &s, VbyteHeap & heap, const char* rhs, size_t rhslnth ) with(s) { 265 (Handle){ heap }; 266 Handle.s = VbyteAlloc(*Handle.ulink, rhslnth); 209 267 Handle.lnth = rhslnth; 210 for ( int i = 0; i < rhslnth; i += 1 ) { // copy characters 211 Handle.s[i] = rhs[i]; 212 } // for 268 (s.shareEditSet_owns_ulink){ false }; 269 memmove( Handle.s, rhs, rhslnth ); 213 270 s.shareEditSet_prev = &s; 214 271 s.shareEditSet_next = &s; 215 272 } 216 273 217 // String literal constructor218 void ?{}(string_res &s, const char* rhs) {219 (s){ rhs, strlen(rhs) };220 }221 222 274 // General copy constructor 223 275 void ?{}(string_res &s, const string_res & s2, StrResInitMode mode, size_t start, size_t end ) { 224 276 225 (s.Handle){ HeapArea }; 226 s.Handle.s = s2.Handle.s + start; 227 s.Handle.lnth = end - start; 228 MoveThisAfter(s.Handle, s2.Handle ); // insert this handle after rhs handle 229 // ^ bug? skip others at early point in string 230 231 if (mode == COPY_VALUE) { 232 // make s alone in its shareEditSet 233 s.shareEditSet_prev = &s; 234 s.shareEditSet_next = &s; 277 verify( start <= end && end <= s2.Handle.lnth ); 278 279 if (s2.Handle.ulink != ambient_string_sharectx->activeHeap && mode == COPY_VALUE) { 280 // crossing heaps (including private): copy eagerly 281 eagerCopyCtorHelper(s, s2.Handle.s + start, end - start); 282 verify(s.shareEditSet_prev == &s); 283 verify(s.shareEditSet_next == &s); 235 284 } else { 236 assert( mode == SHARE_EDITS ); 237 238 // s2 is logically const but not implementation const 239 string_res & s2mod = (string_res &) s2; 240 241 // insert s after s2 on shareEditSet 242 s.shareEditSet_next = s2mod.shareEditSet_next; 243 s.shareEditSet_prev = &s2mod; 244 s.shareEditSet_next->shareEditSet_prev = &s; 245 s.shareEditSet_prev->shareEditSet_next = &s; 246 } 247 } 248 249 void assign(string_res &this, const char* buffer, size_t bsize) { 250 251 // traverse the incumbent share-edit set (SES) to recover the range of a base string to which `this` belongs 252 string_res * shareEditSetStartPeer = & this; 253 string_res * shareEditSetEndPeer = & this; 254 for (string_res * editPeer = this.shareEditSet_next; editPeer != &this; editPeer = editPeer->shareEditSet_next) { 255 if ( editPeer->Handle.s < shareEditSetStartPeer->Handle.s ) { 256 shareEditSetStartPeer = editPeer; 285 (s.Handle){}; 286 s.Handle.s = s2.Handle.s + start; 287 s.Handle.lnth = end - start; 288 s.Handle.ulink = s2.Handle.ulink; 289 290 AddThisAfter(s.Handle, s2.Handle ); // insert this handle after rhs handle 291 // ^ bug? skip others at early point in string 292 293 if (mode == COPY_VALUE) { 294 verify(s2.Handle.ulink == ambient_string_sharectx->activeHeap); 295 // requested logical copy in same heap: defer copy until write 296 297 (s.shareEditSet_owns_ulink){ false }; 298 299 // make s alone in its shareEditSet 300 s.shareEditSet_prev = &s; 301 s.shareEditSet_next = &s; 302 } else { 303 verify( mode == SHARE_EDITS ); 304 // sharing edits with source forces same heap as source (ignore context) 305 306 (s.shareEditSet_owns_ulink){ s2.shareEditSet_owns_ulink }; 307 308 // s2 is logically const but not implementation const 309 string_res & s2mod = (string_res &) s2; 310 311 // insert s after s2 on shareEditSet 312 s.shareEditSet_next = s2mod.shareEditSet_next; 313 s.shareEditSet_prev = &s2mod; 314 s.shareEditSet_next->shareEditSet_prev = &s; 315 s.shareEditSet_prev->shareEditSet_next = &s; 257 316 } 258 if ( shareEditSetEndPeer->Handle.s + shareEditSetEndPeer->Handle.lnth < editPeer->Handle.s + editPeer->Handle.lnth) { 259 shareEditSetEndPeer = editPeer; 260 } 261 } 262 263 // full string is from start of shareEditSetStartPeer thru end of shareEditSetEndPeer 264 // `this` occurs in the middle of it, to be replaced 265 // build up the new text in `pasting` 266 267 string_res pasting = { 268 shareEditSetStartPeer->Handle.s, // start of SES 269 this.Handle.s - shareEditSetStartPeer->Handle.s }; // length of SES, before this 270 append( pasting, 271 buffer, // start of replacement for this 272 bsize ); // length of replacement for this 273 append( pasting, 274 this.Handle.s + this.Handle.lnth, // start of SES after this 275 shareEditSetEndPeer->Handle.s + shareEditSetEndPeer->Handle.lnth - 276 (this.Handle.s + this.Handle.lnth) ); // length of SES, after this 277 278 // The above string building can trigger compaction. 279 // The reference points (that are arguments of the string building) may move during that building. 280 // From this point on, they are stable. 281 // So now, capture their values for use in the overlap cases, below. 282 // Do not factor these definitions with the arguments used above. 317 } 318 } 319 320 static void assignEditSet(string_res & this, string_res * shareEditSetStartPeer, string_res * shareEditSetEndPeer, 321 char * resultSesStart, 322 size_t resultSesLnth, 323 HandleNode * resultPadPosition, size_t bsize ) { 283 324 284 325 char * beforeBegin = shareEditSetStartPeer->Handle.s; … … 290 331 size_t oldLnth = this.Handle.lnth; 291 332 292 this.Handle.s = pasting.Handle.s+ beforeLen;333 this.Handle.s = resultSesStart + beforeLen; 293 334 this.Handle.lnth = bsize; 294 MoveThisAfter( this.Handle, pasting.Handle ); 335 if (resultPadPosition) 336 MoveThisAfter( this.Handle, *resultPadPosition ); 295 337 296 338 // adjust all substring string and handle locations, and check if any substring strings are outside the new base string 297 char *limit = pasting.Handle.s + pasting.Handle.lnth;339 char *limit = resultSesStart + resultSesLnth; 298 340 for (string_res * p = this.shareEditSet_next; p != &this; p = p->shareEditSet_next) { 299 assert(p->Handle.s >= beforeBegin);341 verify (p->Handle.s >= beforeBegin); 300 342 if ( p->Handle.s >= afterBegin ) { 301 assert( p->Handle.s <= afterBegin + afterLen );302 assert( p->Handle.s + p->Handle.lnth <= afterBegin + afterLen );343 verify ( p->Handle.s <= afterBegin + afterLen ); 344 verify ( p->Handle.s + p->Handle.lnth <= afterBegin + afterLen ); 303 345 // p starts after the edit 304 346 // take start and end as end-anchored … … 318 360 } else { 319 361 // p ends after the edit 320 assert( p->Handle.s + p->Handle.lnth <= afterBegin + afterLen );362 verify ( p->Handle.s + p->Handle.lnth <= afterBegin + afterLen ); 321 363 // take end as end-anchored 322 364 // stretch-shrink p according to the edit … … 326 368 // take start as start-anchored 327 369 size_t startOffsetFromStart = p->Handle.s - beforeBegin; 328 p->Handle.s = pasting.Handle.s+ startOffsetFromStart;370 p->Handle.s = resultSesStart + startOffsetFromStart; 329 371 } else { 330 assert( p->Handle.s < afterBegin );372 verify ( p->Handle.s < afterBegin ); 331 373 // p starts during the edit 332 assert( p->Handle.s + p->Handle.lnth >= beforeBegin + beforeLen );374 verify( p->Handle.s + p->Handle.lnth >= beforeBegin + beforeLen ); 333 375 if ( p->Handle.s + p->Handle.lnth < afterBegin ) { 334 376 // p ends during the edit; p does not include the last character replaced … … 344 386 } 345 387 } 346 MoveThisAfter( p->Handle, pasting.Handle ); // move substring handle to maintain sorted order by string position 347 } 348 } 349 350 void ?=?(string_res &s, const char* other) { 351 assign(s, other, strlen(other)); 352 } 353 354 void ?=?(string_res &s, char other) { 355 assign(s, &other, 1); 388 if (resultPadPosition) 389 MoveThisAfter( p->Handle, *resultPadPosition ); // move substring handle to maintain sorted order by string position 390 } 391 } 392 393 static string_res & assign_(string_res &this, const char* buffer, size_t bsize, const string_res & valSrc) { 394 395 // traverse the incumbent share-edit set (SES) to recover the range of a base string to which `this` belongs 396 string_res * shareEditSetStartPeer = & this; 397 string_res * shareEditSetEndPeer = & this; 398 for (string_res * editPeer = this.shareEditSet_next; editPeer != &this; editPeer = editPeer->shareEditSet_next) { 399 if ( editPeer->Handle.s < shareEditSetStartPeer->Handle.s ) { 400 shareEditSetStartPeer = editPeer; 401 } 402 if ( shareEditSetEndPeer->Handle.s + shareEditSetEndPeer->Handle.lnth < editPeer->Handle.s + editPeer->Handle.lnth) { 403 shareEditSetEndPeer = editPeer; 404 } 405 } 406 407 verify( shareEditSetEndPeer->Handle.s >= shareEditSetStartPeer->Handle.s ); 408 size_t origEditSetLength = shareEditSetEndPeer->Handle.s + shareEditSetEndPeer->Handle.lnth - shareEditSetStartPeer->Handle.s; 409 verify( origEditSetLength >= this.Handle.lnth ); 410 411 if ( this.shareEditSet_owns_ulink ) { // assigning to private context 412 // ok to overwrite old value within LHS 413 char * prefixStartOrig = shareEditSetStartPeer->Handle.s; 414 int prefixLen = this.Handle.s - prefixStartOrig; 415 char * suffixStartOrig = this.Handle.s + this.Handle.lnth; 416 int suffixLen = shareEditSetEndPeer->Handle.s + shareEditSetEndPeer->Handle.lnth - suffixStartOrig; 417 418 int delta = bsize - this.Handle.lnth; 419 if ( char * oldBytes = VbyteTryAdjustLast( *this.Handle.ulink, delta ) ) { 420 // growing: copy from old to new 421 char * dest = VbyteAlloc( *this.Handle.ulink, origEditSetLength + delta ); 422 char *destCursor = dest; memcpy(destCursor, prefixStartOrig, prefixLen); 423 destCursor += prefixLen; memcpy(destCursor, buffer , bsize ); 424 destCursor += bsize; memcpy(destCursor, suffixStartOrig, suffixLen); 425 assignEditSet(this, shareEditSetStartPeer, shareEditSetEndPeer, 426 dest, 427 origEditSetLength + delta, 428 0p, bsize); 429 free( oldBytes ); 430 } else { 431 // room is already allocated in-place: bubble suffix and overwite middle 432 memmove( suffixStartOrig + delta, suffixStartOrig, suffixLen ); 433 memcpy( this.Handle.s, buffer, bsize ); 434 435 assignEditSet(this, shareEditSetStartPeer, shareEditSetEndPeer, 436 shareEditSetStartPeer->Handle.s, 437 origEditSetLength + delta, 438 0p, bsize); 439 } 440 441 } else if ( // assigning to shared context 442 this.Handle.lnth == origEditSetLength && // overwriting entire run of SES 443 & valSrc && // sourcing from a managed string 444 valSrc.Handle.ulink == this.Handle.ulink ) { // sourcing from same heap 445 446 // SES's result will only use characters from the source string => reuse source 447 assignEditSet(this, shareEditSetStartPeer, shareEditSetEndPeer, 448 valSrc.Handle.s, 449 valSrc.Handle.lnth, 450 &((string_res&)valSrc).Handle, bsize); 451 452 } else { 453 // overwriting a proper substring of some string: mash characters from old and new together (copy on write) 454 // OR we are importing characters: need to copy eagerly (can't refer to source) 455 456 // full string is from start of shareEditSetStartPeer thru end of shareEditSetEndPeer 457 // `this` occurs in the middle of it, to be replaced 458 // build up the new text in `pasting` 459 460 string_res pasting = { 461 * this.Handle.ulink, // maintain same heap, regardless of context 462 shareEditSetStartPeer->Handle.s, // start of SES 463 this.Handle.s - shareEditSetStartPeer->Handle.s }; // length of SES, before this 464 append( pasting, 465 buffer, // start of replacement for this 466 bsize ); // length of replacement for this 467 append( pasting, 468 this.Handle.s + this.Handle.lnth, // start of SES after this 469 shareEditSetEndPeer->Handle.s + shareEditSetEndPeer->Handle.lnth - 470 (this.Handle.s + this.Handle.lnth) ); // length of SES, after this 471 472 // The above string building can trigger compaction. 473 // The reference points (that are arguments of the string building) may move during that building. 474 // From this point on, they are stable. 475 476 assignEditSet(this, shareEditSetStartPeer, shareEditSetEndPeer, 477 pasting.Handle.s, 478 pasting.Handle.lnth, 479 &pasting.Handle, bsize); 480 } 481 482 return this; 483 } 484 485 string_res & assign(string_res &this, const char* buffer, size_t bsize) { 486 return assign_(this, buffer, bsize, *0p); 487 } 488 489 string_res & ?=?(string_res &s, char other) { 490 return assign(s, &other, 1); 356 491 } 357 492 358 493 // Copy assignment operator 359 void?=?(string_res & this, const string_res & rhs) with( this ) {360 assign(this, rhs.Handle.s, rhs.Handle.lnth);361 } 362 363 void?=?(string_res & this, string_res & rhs) with( this ) {494 string_res & ?=?(string_res & this, const string_res & rhs) with( this ) { 495 return assign_(this, rhs.Handle.s, rhs.Handle.lnth, rhs); 496 } 497 498 string_res & ?=?(string_res & this, string_res & rhs) with( this ) { 364 499 const string_res & rhs2 = rhs; 365 this = rhs2;500 return this = rhs2; 366 501 } 367 502 … … 374 509 s.shareEditSet_prev->shareEditSet_next = s.shareEditSet_next; 375 510 s.shareEditSet_next->shareEditSet_prev = s.shareEditSet_prev; 376 s.shareEditSet_next = &s; 377 s.shareEditSet_prev = &s; 511 // s.shareEditSet_next = &s; 512 // s.shareEditSet_prev = &s; 513 514 if (shareEditSet_owns_ulink && s.shareEditSet_next == &s) { // last one out 515 delete( s.Handle.ulink ); 516 } 378 517 } 379 518 … … 387 526 } 388 527 528 void assignAt(const string_res &s, size_t index, char val) { 529 string_res editZone = { s, SHARE_EDITS, index, index+1 }; 530 assign(editZone, &val, 1); 531 } 532 389 533 390 534 /////////////////////////////////////////////////////////////////// … … 392 536 393 537 void append(string_res &str1, const char * buffer, size_t bsize) { 394 size_t clnth = s ize(str1)+ bsize;395 if ( str1.Handle.s + s ize(str1)== buffer ) { // already juxtapose ?538 size_t clnth = str1.Handle.lnth + bsize; 539 if ( str1.Handle.s + str1.Handle.lnth == buffer ) { // already juxtapose ? 396 540 // no-op 397 541 } else { // must copy some text 398 if ( str1.Handle.s + s ize(str1) == VbyteAlloc(HeapArea, 0) ) { // str1 at end of string area ?399 VbyteAlloc( HeapArea, bsize); // create room for 2nd part at the end of string area542 if ( str1.Handle.s + str1.Handle.lnth == VbyteAlloc(*str1.Handle.ulink, 0) ) { // str1 at end of string area ? 543 VbyteAlloc( *str1.Handle.ulink, bsize ); // create room for 2nd part at the end of string area 400 544 } else { // copy the two parts 401 char * str1oldBuf = str1.Handle.s; 402 str1.Handle.s = VbyteAlloc( HeapArea, clnth ); 403 ByteCopy( HeapArea, str1.Handle.s, 0, str1.Handle.lnth, str1oldBuf, 0, str1.Handle.lnth); 545 char * str1newBuf = VbyteAlloc( *str1.Handle.ulink, clnth ); 546 char * str1oldBuf = str1.Handle.s; // must read after VbyteAlloc call in case it gs's 547 str1.Handle.s = str1newBuf; 548 memcpy( str1.Handle.s, str1oldBuf, str1.Handle.lnth ); 404 549 } // if 405 ByteCopy( HeapArea, str1.Handle.s, str1.Handle.lnth, bsize, (char*)buffer, 0, (int)bsize); 406 // VbyteHeap & this, char *Dst, int DstStart, int DstLnth, char *Src, int SrcStart, int SrcLnth 550 memcpy( str1.Handle.s + str1.Handle.lnth, buffer, bsize ); 407 551 } // if 408 552 str1.Handle.lnth = clnth; … … 417 561 } 418 562 419 void ?+=?(string_res &s, const char* other) {420 append( s, other, strlen(other) );421 }422 563 423 564 … … 429 570 430 571 bool ?==?(const string_res &s1, const string_res &s2) { 431 return ByteCmp( HeapArea,s1.Handle.s, 0, s1.Handle.lnth, s2.Handle.s, 0, s2.Handle.lnth) == 0;572 return ByteCmp( s1.Handle.s, 0, s1.Handle.lnth, s2.Handle.s, 0, s2.Handle.lnth) == 0; 432 573 } 433 574 … … 455 596 456 597 int find(const string_res &s, char search) { 457 for (i; size(s)) { 458 if (s[i] == search) return i; 459 } 460 return size(s); 461 } 598 return findFrom(s, 0, search); 599 } 600 601 int findFrom(const string_res &s, size_t fromPos, char search) { 602 // FIXME: This paricular overload (find of single char) is optimized to use memchr. 603 // The general overload (find of string, memchr applying to its first character) and `contains` should be adjusted to match. 604 char * searchFrom = s.Handle.s + fromPos; 605 size_t searchLnth = s.Handle.lnth - fromPos; 606 int searchVal = search; 607 char * foundAt = (char *) memchr(searchFrom, searchVal, searchLnth); 608 if (foundAt == 0p) return s.Handle.lnth; 609 else return foundAt - s.Handle.s; 610 } 611 612 int find(const string_res &s, const string_res &search) { 613 return findFrom(s, 0, search); 614 } 615 616 int findFrom(const string_res &s, size_t fromPos, const string_res &search) { 617 return findFrom(s, fromPos, search.Handle.s, search.Handle.lnth); 618 } 619 620 int find(const string_res &s, const char* search) { 621 return findFrom(s, 0, search); 622 } 623 int findFrom(const string_res &s, size_t fromPos, const char* search) { 624 return findFrom(s, fromPos, search, strlen(search)); 625 } 626 627 int find(const string_res &s, const char* search, size_t searchsize) { 628 return findFrom(s, 0, search, searchsize); 629 } 630 631 int findFrom(const string_res &s, size_t fromPos, const char* search, size_t searchsize) { 462 632 463 633 /* Remaining implementations essentially ported from Sunjay's work */ 464 634 465 int find(const string_res &s, const string_res &search) { 466 return find(s, search.Handle.s, search.Handle.lnth); 467 } 468 469 int find(const string_res &s, const char* search) { 470 return find(s, search, strlen(search)); 471 } 472 473 int find(const string_res &s, const char* search, size_t searchsize) { 635 474 636 // FIXME: This is a naive algorithm. We probably want to switch to someting 475 637 // like Boyer-Moore in the future. … … 481 643 } 482 644 483 for (size_t i = 0; i < s.Handle.lnth; i++) {645 for (size_t i = fromPos; i < s.Handle.lnth; i++) { 484 646 size_t remaining = s.Handle.lnth - i; 485 647 // Never going to find the search string if the remaining string is … … 596 758 // Add a new HandleNode node n after the current HandleNode node. 597 759 598 static inlinevoid AddThisAfter( HandleNode & this, HandleNode & n ) with(this) {760 static void AddThisAfter( HandleNode & this, HandleNode & n ) with(this) { 599 761 #ifdef VbyteDebug 600 762 serr | "enter:AddThisAfter, this:" | &this | " n:" | &n; 601 763 #endif // VbyteDebug 764 // Performance note: we are on the critical path here. MB has ensured that the verifies don't contribute to runtime (are compiled away, like they're supposed to be). 765 verify( n.ulink != 0p ); 766 verify( this.ulink == n.ulink ); 602 767 flink = n.flink; 603 768 blink = &n; … … 624 789 // Delete the current HandleNode node. 625 790 626 static inlinevoid DeleteNode( HandleNode & this ) with(this) {791 static void DeleteNode( HandleNode & this ) with(this) { 627 792 #ifdef VbyteDebug 628 793 serr | "enter:DeleteNode, this:" | &this; … … 638 803 639 804 // Allocates specified storage for a string from byte-string area. If not enough space remains to perform the 640 // allocation, the garbage collection routine is called and a second attempt is made to allocate the space. If the 641 // second attempt fails, a further attempt is made to create a new, larger byte-string area. 642 643 static inline char * VbyteAlloc( VbyteHeap & this, int size ) with(this) { 805 // allocation, the garbage collection routine is called. 806 807 static char * VbyteAlloc( VbyteHeap & this, int size ) with(this) { 644 808 #ifdef VbyteDebug 645 809 serr | "enter:VbyteAlloc, size:" | size; … … 650 814 NoBytes = ( uintptr_t )EndVbyte + size; 651 815 if ( NoBytes > ( uintptr_t )ExtVbyte ) { // enough room for new byte-string ? 652 garbage( this ); // firer up the garbage collector 653 NoBytes = ( uintptr_t )EndVbyte + size; // try again 654 if ( NoBytes > ( uintptr_t )ExtVbyte ) { // enough room for new byte-string ? 655 assert( 0 && "need to implement actual growth" ); 656 // extend( size ); // extend the byte-string area 657 } // if 816 garbage( this, size ); // firer up the garbage collector 817 verify( (( uintptr_t )EndVbyte + size) <= ( uintptr_t )ExtVbyte && "garbage run did not free up required space" ); 658 818 } // if 659 819 r = EndVbyte; … … 666 826 667 827 828 // Adjusts the last allocation in this heap by delta bytes, or resets this heap to be able to offer 829 // new allocations of its original size + delta bytes. Positive delta means bigger; 830 // negative means smaller. A null return indicates that the original heap location has room for 831 // the requested growth. A non-null return indicates that copying to a new location is required 832 // but has not been done; the returned value is the old heap storage location; `this` heap is 833 // modified to reference the new location. In the copy-requred case, the caller should use 834 // VbyteAlloc to claim the new space, while doing optimal copying from old to new, then free old. 835 836 static char * VbyteTryAdjustLast( VbyteHeap & this, int delta ) with(this) { 837 838 if ( ( uintptr_t )EndVbyte + delta <= ( uintptr_t )ExtVbyte ) { 839 // room available 840 EndVbyte += delta; 841 return 0p; 842 } 843 844 char *oldBytes = StartVbyte; 845 846 NoOfExtensions += 1; 847 CurrSize *= 2; 848 StartVbyte = EndVbyte = TEMP_ALLOC(char, CurrSize); 849 ExtVbyte = StartVbyte + CurrSize; 850 851 return oldBytes; 852 } 853 854 668 855 // Move an existing HandleNode node h somewhere after the current HandleNode node so that it is in ascending order by 669 856 // the address in the byte string area. 670 857 671 static inlinevoid MoveThisAfter( HandleNode & this, const HandleNode & h ) with(this) {858 static void MoveThisAfter( HandleNode & this, const HandleNode & h ) with(this) { 672 859 #ifdef VbyteDebug 673 860 serr | "enter:MoveThisAfter, this:" | & this | " h:" | & h; 674 861 #endif // VbyteDebug 862 verify( h.ulink != 0p ); 863 verify( this.ulink == h.ulink ); 675 864 if ( s < h.s ) { // check argument values 676 865 // serr | "VbyteSM: Error - Cannot move byte string starting at:" | s | " after byte string starting at:" 677 866 // | ( h->s ) | " and keep handles in ascending order"; 678 867 // exit(-1 ); 679 assert( 0 && "VbyteSM: Error - Cannot move byte strings as requested and keep handles in ascending order");868 verify( 0 && "VbyteSM: Error - Cannot move byte strings as requested and keep handles in ascending order"); 680 869 } // if 681 870 … … 709 898 //######################### VbyteHeap ######################### 710 899 711 // Move characters from one location in the byte-string area to another. The routine handles the following situations:712 //713 // if the |Src| > |Dst| => truncate714 // if the |Dst| > |Src| => pad Dst with blanks715 716 void ByteCopy( VbyteHeap & this, char *Dst, int DstStart, int DstLnth, char *Src, int SrcStart, int SrcLnth ) {717 for ( int i = 0; i < DstLnth; i += 1 ) {718 if ( i == SrcLnth ) { // |Dst| > |Src|719 for ( ; i < DstLnth; i += 1 ) { // pad Dst with blanks720 Dst[DstStart + i] = ' ';721 } // for722 break;723 } // exit724 Dst[DstStart + i] = Src[SrcStart + i];725 } // for726 } // ByteCopy727 728 900 // Compare two byte strings in the byte-string area. The routine returns the following values: 729 901 // … … 732 904 // -1 => Src1-byte-string < Src2-byte-string 733 905 734 int ByteCmp( VbyteHeap & this, char *Src1, int Src1Start, int Src1Lnth, char *Src2, int Src2Start, int Src2Lnth ) with(this){906 int ByteCmp( char *Src1, int Src1Start, int Src1Lnth, char *Src2, int Src2Start, int Src2Lnth ) { 735 907 #ifdef VbyteDebug 736 908 serr | "enter:ByteCmp, Src1Start:" | Src1Start | " Src1Lnth:" | Src1Lnth | " Src2Start:" | Src2Start | " Src2Lnth:" | Src2Lnth; … … 789 961 h = Header.flink; // ignore header node 790 962 for (;;) { 791 ByteCopy( this, EndVbyte, 0, h->lnth, h->s, 0, h->lnth );963 memmove( EndVbyte, h->s, h->lnth ); 792 964 obase = h->s; 793 965 h->s = EndVbyte; … … 810 982 811 983 984 static double heap_expansion_freespace_threshold = 0.1; // default inherited from prior work: expand heap when less than 10% "free" (i.e. garbage) 985 // probably an unreasonable default, but need to assess early-round tests on changing it 986 987 void TUNING_set_string_heap_liveness_threshold( double val ) { 988 heap_expansion_freespace_threshold = 1.0 - val; 989 } 990 991 812 992 // Garbage determines the amount of free space left in the heap and then reduces, leave the same, or extends the size of 813 993 // the heap. The heap is then compacted in the existing heap or into the newly allocated heap. 814 994 815 void garbage(VbyteHeap & this ) with(this) {995 void garbage(VbyteHeap & this, int minreq ) with(this) { 816 996 #ifdef VbyteDebug 817 997 serr | "enter:garbage"; … … 837 1017 AmountFree = ( uintptr_t )ExtVbyte - ( uintptr_t )StartVbyte - AmountUsed; 838 1018 839 if ( AmountFree < ( int )( CurrSize * 0.1 )) { // free space less than 10% ? 840 841 assert( 0 && "need to implement actual growth" ); 842 // extend( CurrSize ); // extend the heap 1019 if ( ( double ) AmountFree < ( CurrSize * heap_expansion_freespace_threshold ) || AmountFree < minreq ) { // free space less than threshold or not enough to serve cur request 1020 1021 extend( this, max( CurrSize, minreq ) ); // extend the heap 843 1022 844 1023 // Peter says, "This needs work before it should be used." … … 846 1025 // reduce(( AmountFree / CurrSize - 3 ) * CurrSize ); // reduce the memory 847 1026 848 } // if 849 compaction(this); // compact the byte area, in the same or new heap area 1027 // `extend` implies a `compaction` during the copy 1028 1029 } else { 1030 compaction(this); // in-place 1031 }// if 850 1032 #ifdef VbyteDebug 851 1033 { … … 867 1049 #undef VbyteDebug 868 1050 869 //WIP870 #if 0871 1051 872 1052 … … 874 1054 // area is deleted. 875 1055 876 void VbyteHeap::extend( int size) {1056 void extend( VbyteHeap & this, int size ) with (this) { 877 1057 #ifdef VbyteDebug 878 1058 serr | "enter:extend, size:" | size; … … 884 1064 885 1065 CurrSize += size > InitSize ? size : InitSize; // minimum extension, initial size 886 StartVbyte = EndVbyte = new char[CurrSize];1066 StartVbyte = EndVbyte = TEMP_ALLOC(char, CurrSize); 887 1067 ExtVbyte = (void *)( StartVbyte + CurrSize ); 888 compaction( ); // copy from old heap to new & adjust pointers to new heap889 delete OldStartVbyte; // release old heap1068 compaction(this); // copy from old heap to new & adjust pointers to new heap 1069 free( OldStartVbyte ); // release old heap 890 1070 #ifdef VbyteDebug 891 1071 serr | "exit:extend, CurrSize:" | CurrSize; … … 893 1073 } // extend 894 1074 1075 //WIP 1076 #if 0 895 1077 896 1078 // Extend the size of the byte-string area by creating a new area and copying the old area into it. The old byte-string -
libcfa/src/containers/string_res.hfa
ref3c383 rd672350 17 17 18 18 #include <fstream.hfa> 19 #include <string.h> // e.g. strlen 19 20 20 21 … … 27 28 HandleNode *flink; // forward link 28 29 HandleNode *blink; // backward link 30 VbyteHeap *ulink; // upward link 29 31 30 32 char *s; // pointer to byte string … … 32 34 }; // HandleNode 33 35 34 void ?{}( HandleNode & ); // constructor for header node 35 36 void ?{}( HandleNode &, VbyteHeap & ); // constructor for nodes in the handle list 37 void ^?{}( HandleNode & ); // destructor for handle nodes 38 39 extern VbyteHeap * DEBUG_string_heap; 36 VbyteHeap * DEBUG_string_heap(); 37 size_t DEBUG_string_bytes_in_heap( VbyteHeap * heap ); 40 38 size_t DEBUG_string_bytes_avail_until_gc( VbyteHeap * heap ); 41 39 const char * DEBUG_string_heap_start( VbyteHeap * heap ); 42 40 41 void TUNING_set_string_heap_liveness_threshold( double val ); 43 42 44 43 //######################### String ######################### … … 47 46 struct string_res { 48 47 HandleNode Handle; // chars, start, end, global neighbours 48 bool shareEditSet_owns_ulink; 49 49 string_res * shareEditSet_prev; 50 50 string_res * shareEditSet_next; … … 74 74 // Constructors, Assignment Operators, Destructor 75 75 void ?{}(string_res &s); // empty string 76 void ?{}(string_res &s, const char* initial); // copy from string literal (NULL-terminated)77 76 void ?{}(string_res &s, const char* buffer, size_t bsize); // copy specific length from buffer 77 static inline void ?{}(string_res &s, const char* rhs) { // copy from string literal (NULL-terminated) 78 (s){ rhs, strlen(rhs) }; 79 } 78 80 79 81 void ?{}(string_res &s, const string_res & s2) = void; … … 86 88 } 87 89 88 void assign(string_res &s, const char* buffer, size_t bsize); // copy specific length from buffer 89 void ?=?(string_res &s, const char* other); // copy from string literal (NULL-terminated) 90 void ?=?(string_res &s, const string_res &other); 91 void ?=?(string_res &s, string_res &other); 92 void ?=?(string_res &s, char other); 90 string_res & assign(string_res &s, const char* buffer, size_t bsize); // copy specific length from buffer 91 static inline string_res & ?=?(string_res &s, const char* other) { // copy from string literal (NULL-terminated) 92 return assign(s, other, strlen(other)); 93 } 94 string_res & ?=?(string_res &s, const string_res &other); 95 string_res & ?=?(string_res &s, string_res &other); 96 string_res & ?=?(string_res &s, char other); 93 97 94 98 void ^?{}(string_res &s); … … 99 103 100 104 // Concatenation 105 void append(string_res &s, const char* buffer, size_t bsize); 101 106 void ?+=?(string_res &s, char other); // append a character 102 107 void ?+=?(string_res &s, const string_res &s2); // append-concatenate to first string 103 void ?+=?(string_res &s, const char* other); 104 void append(string_res &s, const char* buffer, size_t bsize); 108 static inline void ?+=?(string_res &s, const char* other) { 109 append( s, other, strlen(other) ); 110 } 105 111 106 112 // Character access 113 void assignAt(const string_res &s, size_t index, char val); 107 114 char ?[?](const string_res &s, size_t index); // Mike changed to ret by val from Sunjay's ref, to match Peter's 108 115 //char codePointAt(const string_res &s, size_t index); // revisit under Unicode … … 121 128 int find(const string_res &s, const char* search); 122 129 int find(const string_res &s, const char* search, size_t searchsize); 130 131 int findFrom(const string_res &s, size_t fromPos, char search); 132 int findFrom(const string_res &s, size_t fromPos, const string_res &search); 133 int findFrom(const string_res &s, size_t fromPos, const char* search); 134 int findFrom(const string_res &s, size_t fromPos, const char* search, size_t searchsize); 123 135 124 136 bool includes(const string_res &s, const string_res &search); -
libcfa/src/math.trait.hfa
ref3c383 rd672350 16 16 #pragma once 17 17 18 trait Not( T) {19 void ?{}( T&, zero_t );20 int !?( T);18 trait Not( U ) { 19 void ?{}( U &, zero_t ); 20 int !?( U ); 21 21 }; // Not 22 22 … … 26 26 }; // Equality 27 27 28 trait Relational( T | Equality( T) ) {29 int ?<?( T, T);30 int ?<=?( T, T);31 int ?>?( T, T);32 int ?>=?( T, T);28 trait Relational( U | Equality( U ) ) { 29 int ?<?( U, U ); 30 int ?<=?( U, U ); 31 int ?>?( U, U ); 32 int ?>=?( U, U ); 33 33 }; // Relational 34 34 … … 39 39 }; // Signed 40 40 41 trait Additive( T | Signed( T) ) {42 T ?+?( T, T);43 T ?-?( T, T);44 T ?+=?( T &, T);45 T ?-=?( T &, T);41 trait Additive( U | Signed( U ) ) { 42 U ?+?( U, U ); 43 U ?-?( U, U ); 44 U ?+=?( U &, U ); 45 U ?-=?( U &, U ); 46 46 }; // Additive 47 47 … … 49 49 void ?{}( T &, one_t ); 50 50 // T ?++( T & ); 51 // T ++?( T & );51 // T ++?( T & ); 52 52 // T ?--( T & ); 53 53 // T --?( T & ); 54 54 }; // Incdec 55 55 56 trait Multiplicative( T | Incdec( T) ) {57 T ?*?( T, T);58 T ?/?( T, T);59 T ?%?( T, T);60 T ?/=?( T &, T);56 trait Multiplicative( U | Incdec( U ) ) { 57 U ?*?( U, U ); 58 U ?/?( U, U ); 59 U ?%?( U, U ); 60 U ?/=?( U &, U ); 61 61 }; // Multiplicative 62 62
Note:
See TracChangeset
for help on using the changeset viewer.