Changeset d672350 for libcfa


Ignore:
Timestamp:
Mar 21, 2022, 1:44:06 PM (4 years ago)
Author:
Thierry Delisle <tdelisle@…>
Branches:
ADT, ast-experimental, enum, master, pthread-emulation, qualifiedEnum
Children:
a76202d
Parents:
ef3c383 (diff), dbe2533 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.
Message:

Merge branch 'master' of plg.uwaterloo.ca:software/cfa/cfa-cc

Location:
libcfa/src
Files:
3 added
24 edited
1 moved

Legend:

Unmodified
Added
Removed
  • libcfa/src/Makefile.am

    ref3c383 rd672350  
    6363        containers/queueLockFree.hfa \
    6464        containers/stackLockFree.hfa \
     65        containers/string_sharectx.hfa \
    6566        containers/vector2.hfa \
    6667        vec/vec.hfa \
     
    118119        concurrency/exception.hfa \
    119120        concurrency/kernel.hfa \
     121        concurrency/kernel/cluster.hfa \
    120122        concurrency/locks.hfa \
    121123        concurrency/monitor.hfa \
     
    133135        concurrency/io/call.cfa \
    134136        concurrency/iofwd.hfa \
    135         concurrency/kernel_private.hfa \
     137        concurrency/kernel/private.hfa \
    136138        concurrency/kernel/startup.cfa \
    137139        concurrency/preemption.cfa \
  • libcfa/src/concurrency/coroutine.cfa

    ref3c383 rd672350  
    2727#include <unwind.h>
    2828
    29 #include "kernel_private.hfa"
     29#include "kernel/private.hfa"
    3030#include "exception.hfa"
    3131#include "math.hfa"
  • libcfa/src/concurrency/io.cfa

    ref3c383 rd672350  
    4141        #include "kernel.hfa"
    4242        #include "kernel/fwd.hfa"
    43         #include "kernel_private.hfa"
     43        #include "kernel/private.hfa"
    4444        #include "io/types.hfa"
    4545
     
    9393        extern void __kernel_unpark( thread$ * thrd, unpark_hint );
    9494
    95         bool __cfa_io_drain( processor * proc ) {
     95        bool __cfa_io_drain( $io_context * ctx ) {
    9696                /* paranoid */ verify( ! __preemption_enabled() );
    9797                /* paranoid */ verify( ready_schedule_islocked() );
    98                 /* paranoid */ verify( proc );
    99                 /* paranoid */ verify( proc->io.ctx );
     98                /* paranoid */ verify( ctx );
    10099
    101100                // Drain the queue
    102                 $io_context * ctx = proc->io.ctx;
    103101                unsigned head = *ctx->cq.head;
    104102                unsigned tail = *ctx->cq.tail;
     
    110108                if(count == 0) return false;
    111109
     110                if(!__atomic_try_acquire(&ctx->cq.lock)) {
     111                        return false;
     112                }
     113
    112114                for(i; count) {
    113115                        unsigned idx = (head + i) & mask;
     
    130132                /* paranoid */ verify( ready_schedule_islocked() );
    131133                /* paranoid */ verify( ! __preemption_enabled() );
     134
     135                __atomic_unlock(&ctx->cq.lock);
    132136
    133137                return true;
     
    175179                        /* paranoid */ verify( ! __preemption_enabled() );
    176180
    177                         ctx.proc->io.pending = false;
     181                        __atomic_store_n(&ctx.proc->io.pending, false, __ATOMIC_RELAXED);
    178182                }
    179183
    180184                ready_schedule_lock();
    181                 bool ret = __cfa_io_drain( proc );
     185                bool ret = __cfa_io_drain( &ctx );
    182186                ready_schedule_unlock();
    183187                return ret;
     
    287291        //=============================================================================================
    288292        // submission
    289         static inline void __submit( struct $io_context * ctx, __u32 idxs[], __u32 have, bool lazy) {
     293        static inline void __submit_only( struct $io_context * ctx, __u32 idxs[], __u32 have) {
    290294                // We can proceed to the fast path
    291295                // Get the right objects
     
    304308                sq.to_submit += have;
    305309
    306                 ctx->proc->io.pending = true;
    307                 ctx->proc->io.dirty   = true;
     310                __atomic_store_n(&ctx->proc->io.pending, true, __ATOMIC_RELAXED);
     311                __atomic_store_n(&ctx->proc->io.dirty  , true, __ATOMIC_RELAXED);
     312        }
     313
     314        static inline void __submit( struct $io_context * ctx, __u32 idxs[], __u32 have, bool lazy) {
     315                __sub_ring_t & sq = ctx->sq;
     316                __submit_only(ctx, idxs, have);
     317
    308318                if(sq.to_submit > 30) {
    309319                        __tls_stats()->io.flush.full++;
     
    402412// I/O Arbiter
    403413//=============================================================================================
    404         static inline void block(__outstanding_io_queue & queue, __outstanding_io & item) {
     414        static inline bool enqueue(__outstanding_io_queue & queue, __outstanding_io & item) {
     415                bool was_empty;
     416
    405417                // Lock the list, it's not thread safe
    406418                lock( queue.lock __cfaabi_dbg_ctx2 );
    407419                {
     420                        was_empty = empty(queue.queue);
     421
    408422                        // Add our request to the list
    409423                        add( queue.queue, item );
     
    414428                unlock( queue.lock );
    415429
    416                 wait( item.sem );
     430                return was_empty;
    417431        }
    418432
     
    432446                pa.want = want;
    433447
    434                 block(this.pending, (__outstanding_io&)pa);
     448                enqueue(this.pending, (__outstanding_io&)pa);
     449
     450                wait( pa.sem );
    435451
    436452                return pa.ctx;
     
    485501                ei.lazy = lazy;
    486502
    487                 block(ctx->ext_sq, (__outstanding_io&)ei);
     503                bool we = enqueue(ctx->ext_sq, (__outstanding_io&)ei);
     504
     505                __atomic_store_n(&ctx->proc->io.pending, true, __ATOMIC_SEQ_CST);
     506
     507                if( we ) {
     508                        sigval_t value = { PREEMPT_IO };
     509                        pthread_sigqueue(ctx->proc->kernel_thread, SIGUSR1, value);
     510                }
     511
     512                wait( ei.sem );
    488513
    489514                __cfadbg_print_safe(io, "Kernel I/O : %u submitted from arbiter\n", have);
     
    501526                                        __external_io & ei = (__external_io&)drop( ctx.ext_sq.queue );
    502527
    503                                         __submit(&ctx, ei.idxs, ei.have, ei.lazy);
     528                                        __submit_only(&ctx, ei.idxs, ei.have);
    504529
    505530                                        post( ei.sem );
  • libcfa/src/concurrency/io/setup.cfa

    ref3c383 rd672350  
    3939
    4040#else
     41#pragma GCC diagnostic push
     42#pragma GCC diagnostic ignored "-Waddress-of-packed-member"
    4143        #include <errno.h>
    4244        #include <stdint.h>
     
    5658
    5759        #include "bitmanip.hfa"
    58         #include "kernel_private.hfa"
     60        #include "fstream.hfa"
     61        #include "kernel/private.hfa"
    5962        #include "thread.hfa"
     63#pragma GCC diagnostic pop
    6064
    6165        void ?{}(io_context_params & this) {
     
    111115                this.ext_sq.empty = true;
    112116                (this.ext_sq.queue){};
    113                 __io_uring_setup( this, cl.io.params, proc->idle_fd );
     117                __io_uring_setup( this, cl.io.params, proc->idle_wctx.evfd );
    114118                __cfadbg_print_safe(io_core, "Kernel I/O : Created ring for io_context %u (%p)\n", this.fd, &this);
    115119        }
     
    121125                __cfadbg_print_safe(io_core, "Kernel I/O : Destroyed ring for io_context %u\n", this.fd);
    122126        }
    123 
    124         extern void __disable_interrupts_hard();
    125         extern void __enable_interrupts_hard();
    126127
    127128        static void __io_uring_setup( $io_context & this, const io_context_params & params_in, int procfd ) {
     
    213214
    214215                // completion queue
     216                cq.lock      = 0;
    215217                cq.head      = (volatile __u32 *)(((intptr_t)cq.ring_ptr) + params.cq_off.head);
    216218                cq.tail      = (volatile __u32 *)(((intptr_t)cq.ring_ptr) + params.cq_off.tail);
     
    226228                        __cfadbg_print_safe(io_core, "Kernel I/O : registering %d for completion with ring %d\n", procfd, fd);
    227229
    228                         __disable_interrupts_hard();
    229 
    230230                        int ret = syscall( __NR_io_uring_register, fd, IORING_REGISTER_EVENTFD, &procfd, 1);
    231231                        if (ret < 0) {
    232232                                abort("KERNEL ERROR: IO_URING EVENTFD REGISTER - %s\n", strerror(errno));
    233233                        }
    234 
    235                         __enable_interrupts_hard();
    236234
    237235                        __cfadbg_print_safe(io_core, "Kernel I/O : registered %d for completion with ring %d\n", procfd, fd);
     
    258256                struct __sub_ring_t & sq = this.sq;
    259257                struct __cmp_ring_t & cq = this.cq;
     258                {
     259                        __u32 fhead = sq.free_ring.head;
     260                        __u32 ftail = sq.free_ring.tail;
     261
     262                        __u32 total = *sq.num;
     263                        __u32 avail = ftail - fhead;
     264
     265                        if(avail != total) abort | "Processor (" | (void*)this.proc | ") tearing down ring with" | (total - avail) | "entries allocated but not submitted, out of" | total;
     266                }
    260267
    261268                // unmap the submit queue entries
  • libcfa/src/concurrency/io/types.hfa

    ref3c383 rd672350  
    2323#include "bits/locks.hfa"
    2424#include "bits/queue.hfa"
     25#include "iofwd.hfa"
    2526#include "kernel/fwd.hfa"
    2627
     
    7778
    7879        struct __cmp_ring_t {
     80                volatile bool lock;
     81
    7982                // Head and tail of the ring
    8083                volatile __u32 * head;
     
    170173        // void __ioctx_prepare_block($io_context & ctx);
    171174#endif
    172 
    173 //-----------------------------------------------------------------------
    174 // IO user data
    175 struct io_future_t {
    176         future_t self;
    177         __s32 result;
    178 };
    179 
    180 static inline {
    181         thread$ * fulfil( io_future_t & this, __s32 result, bool do_unpark = true ) {
    182                 this.result = result;
    183                 return fulfil(this.self, do_unpark);
    184         }
    185 
    186         // Wait for the future to be fulfilled
    187         bool wait     ( io_future_t & this ) { return wait     (this.self); }
    188         void reset    ( io_future_t & this ) { return reset    (this.self); }
    189         bool available( io_future_t & this ) { return available(this.self); }
    190 }
  • libcfa/src/concurrency/iofwd.hfa

    ref3c383 rd672350  
    1919extern "C" {
    2020        #include <asm/types.h>
     21        #include <sys/stat.h> // needed for mode_t
    2122        #if CFA_HAVE_LINUX_IO_URING_H
    2223                #include <linux/io_uring.h>
     
    2425}
    2526#include "bits/defs.hfa"
     27#include "kernel/fwd.hfa"
    2628#include "time.hfa"
    2729
     
    4749
    4850struct cluster;
    49 struct io_future_t;
    5051struct $io_context;
    5152
     
    5758
    5859struct io_uring_sqe;
     60
     61//-----------------------------------------------------------------------
     62// IO user data
     63struct io_future_t {
     64        future_t self;
     65        __s32 result;
     66};
     67
     68static inline {
     69        thread$ * fulfil( io_future_t & this, __s32 result, bool do_unpark = true ) {
     70                this.result = result;
     71                return fulfil(this.self, do_unpark);
     72        }
     73
     74        // Wait for the future to be fulfilled
     75        bool wait     ( io_future_t & this ) { return wait     (this.self); }
     76        void reset    ( io_future_t & this ) { return reset    (this.self); }
     77        bool available( io_future_t & this ) { return available(this.self); }
     78}
    5979
    6080//----------
     
    133153// Check if a function is blocks a only the user thread
    134154bool has_user_level_blocking( fptr_t func );
     155
     156#if CFA_HAVE_LINUX_IO_URING_H
     157        static inline void zero_sqe(struct io_uring_sqe * sqe) {
     158                sqe->flags = 0;
     159                sqe->ioprio = 0;
     160                sqe->fd = 0;
     161                sqe->off = 0;
     162                sqe->addr = 0;
     163                sqe->len = 0;
     164                sqe->fsync_flags = 0;
     165                sqe->__pad2[0] = 0;
     166                sqe->__pad2[1] = 0;
     167                sqe->__pad2[2] = 0;
     168                sqe->fd = 0;
     169                sqe->off = 0;
     170                sqe->addr = 0;
     171                sqe->len = 0;
     172        }
     173#endif
  • libcfa/src/concurrency/kernel.cfa

    ref3c383 rd672350  
    1919// #define __CFA_DEBUG_PRINT_RUNTIME_CORE__
    2020
     21#pragma GCC diagnostic push
     22#pragma GCC diagnostic ignored "-Waddress-of-packed-member"
     23
    2124//C Includes
    2225#include <errno.h>
     
    2528#include <signal.h>
    2629#include <unistd.h>
     30
    2731extern "C" {
    2832        #include <sys/eventfd.h>
     
    3135
    3236//CFA Includes
    33 #include "kernel_private.hfa"
     37#include "kernel/private.hfa"
    3438#include "preemption.hfa"
    3539#include "strstream.hfa"
     
    4044#define __CFA_INVOKE_PRIVATE__
    4145#include "invoke.h"
     46#pragma GCC diagnostic pop
    4247
    4348#if !defined(__CFA_NO_STATISTICS__)
     
    131136static void mark_awake(__cluster_proc_list & idles, processor & proc);
    132137
    133 extern void __cfa_io_start( processor * );
    134 extern bool __cfa_io_drain( processor * );
     138extern bool __cfa_io_drain( $io_context * );
    135139extern bool __cfa_io_flush( processor *, int min_comp );
    136 extern void __cfa_io_stop ( processor * );
    137140static inline bool __maybe_io_drain( processor * );
    138141
     
    159162        verify(this);
    160163
    161         io_future_t future; // used for idle sleep when io_uring is present
    162         future.self.ptr = 1p;  // mark it as already fulfilled so we know if there is a pending request or not
    163         eventfd_t idle_val;
    164         iovec idle_iovec = { &idle_val, sizeof(idle_val) };
    165 
    166         __cfa_io_start( this );
     164        /* paranoid */ verify( this->idle_wctx.ftr   != 0p );
     165        /* paranoid */ verify( this->idle_wctx.rdbuf != 0p );
     166
     167        // used for idle sleep when io_uring is present
     168        // mark it as already fulfilled so we know if there is a pending request or not
     169        this->idle_wctx.ftr->self.ptr = 1p;
     170        iovec idle_iovec = { this->idle_wctx.rdbuf, sizeof(eventfd_t) };
    167171
    168172        __cfadbg_print_safe(runtime_core, "Kernel : core %p starting\n", this);
     
    231235                                }
    232236
    233                                 idle_sleep( this, future, idle_iovec );
     237                                idle_sleep( this, *this->idle_wctx.ftr, idle_iovec );
    234238
    235239                                // We were woken up, remove self from idle
     
    251255                        if( __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST) ) break MAIN_LOOP;
    252256
    253                         if(this->io.pending && !this->io.dirty) {
     257                        if(__atomic_load_n(&this->io.pending, __ATOMIC_RELAXED) && !__atomic_load_n(&this->io.dirty, __ATOMIC_RELAXED)) {
    254258                                __IO_STATS__(true, io.flush.dirty++; )
    255259                                __cfa_io_flush( this, 0 );
     
    259263                __cfadbg_print_safe(runtime_core, "Kernel : core %p stopping\n", this);
    260264        }
    261 
    262         for(int i = 0; !available(future); i++) {
    263                 if(i > 1000) __cfaabi_dbg_write( "ERROR: kernel has bin spinning on a flush after exit loop.\n", 60);
    264                 __cfa_io_flush( this, 1 );
    265         }
    266 
    267         __cfa_io_stop( this );
    268265
    269266        post( this->terminated );
     
    634631
    635632        int fd = 1;
    636         if( __atomic_load_n(&fdp->fd, __ATOMIC_SEQ_CST) != 1 ) {
    637                 fd = __atomic_exchange_n(&fdp->fd, 1, __ATOMIC_RELAXED);
     633        if( __atomic_load_n(&fdp->sem, __ATOMIC_SEQ_CST) != 1 ) {
     634                fd = __atomic_exchange_n(&fdp->sem, 1, __ATOMIC_RELAXED);
    638635        }
    639636
     
    677674        __cfadbg_print_safe(runtime_core, "Kernel : waking Processor %p\n", this);
    678675
    679         this->idle_wctx.fd = 1;
     676        this->idle_wctx.sem = 1;
    680677
    681678        eventfd_t val;
    682679        val = 1;
    683         eventfd_write( this->idle_fd, val );
     680        eventfd_write( this->idle_wctx.evfd, val );
    684681
    685682        /* paranoid */ verify( ! __preemption_enabled() );
     
    689686        // Tell everyone we are ready to go do sleep
    690687        for() {
    691                 int expected = this->idle_wctx.fd;
     688                int expected = this->idle_wctx.sem;
    692689
    693690                // Someone already told us to wake-up! No time for a nap.
     
    695692
    696693                // Try to mark that we are going to sleep
    697                 if(__atomic_compare_exchange_n(&this->idle_wctx.fd, &expected, this->idle_fd, false,  __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) ) {
     694                if(__atomic_compare_exchange_n(&this->idle_wctx.sem, &expected, this->idle_wctx.evfd, false,  __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) ) {
    698695                        // Every one agreed, taking a nap
    699696                        break;
     
    713710                {
    714711                        eventfd_t val;
    715                         ssize_t ret = read( this->idle_fd, &val, sizeof(val) );
     712                        ssize_t ret = read( this->idle_wctx.evfd, &val, sizeof(val) );
    716713                        if(ret < 0) {
    717714                                switch((int)errno) {
     
    740737                        reset(future);
    741738
    742                         __kernel_read(this, future, iov, this->idle_fd );
     739                        __kernel_read(this, future, iov, this->idle_wctx.evfd );
    743740                }
    744741
     
    750747        __STATS__(true, ready.sleep.halts++; )
    751748
    752         proc.idle_wctx.fd = 0;
     749        proc.idle_wctx.sem = 0;
    753750
    754751        /* paranoid */ verify( ! __preemption_enabled() );
     
    842839                if(head == tail) return false;
    843840                ready_schedule_lock();
    844                 ret = __cfa_io_drain( proc );
     841                ret = __cfa_io_drain( ctx );
    845842                ready_schedule_unlock();
    846843        #endif
  • libcfa/src/concurrency/kernel.hfa

    ref3c383 rd672350  
    4848extern struct cluster * mainCluster;
    4949
    50 // Processor id, required for scheduling threads
    51 
    52 
     50// Coroutine used py processors for the 2-step context switch
    5351coroutine processorCtx_t {
    5452        struct processor * proc;
    5553};
    5654
    57 
     55struct io_future_t;
     56
     57// Information needed for idle sleep
    5858struct __fd_waitctx {
    59         volatile int fd;
     59        // semaphore/future like object
     60        // values can be 0, 1 or some file descriptor.
     61        // 0 - is the default state
     62        // 1 - means the proc should wake-up immediately
     63        // FD - means the proc is going asleep and should be woken by writing to the FD.
     64        volatile int sem;
     65
     66        // The event FD that corresponds to this processor
     67        int evfd;
     68
     69        // buffer into which the proc will read from evfd
     70        // unused if not using io_uring for idle sleep
     71        void * rdbuf;
     72
     73        // future use to track the read of the eventfd
     74        // unused if not using io_uring for idle sleep
     75        io_future_t * ftr;
    6076};
    6177
     
    92108        struct {
    93109                $io_context * ctx;
    94                 bool pending;
    95                 bool dirty;
     110                unsigned id;
     111                unsigned target;
     112                volatile bool pending;
     113                volatile bool dirty;
    96114        } io;
    97115
     
    103121        bool pending_preemption;
    104122
    105         // Idle lock (kernel semaphore)
    106         int idle_fd;
    107 
    108         // Idle waitctx
     123        // context for idle sleep
    109124        struct __fd_waitctx idle_wctx;
    110125
     
    155170void ^?{}(__intrusive_lane_t & this);
    156171
    157 // Aligned timestamps which are used by the relaxed ready queue
     172// Aligned timestamps which are used by the ready queue and io subsystem
    158173struct __attribute__((aligned(128))) __timestamp_t {
    159174        volatile unsigned long long tv;
     
    161176};
    162177
     178static inline void  ?{}(__timestamp_t & this) { this.tv = 0; this.ma = 0; }
     179static inline void ^?{}(__timestamp_t &) {}
     180
     181
    163182struct __attribute__((aligned(16))) __cache_id_t {
    164183        volatile unsigned id;
    165184};
    166 
    167 // Aligned timestamps which are used by the relaxed ready queue
    168 struct __attribute__((aligned(128))) __help_cnts_t {
    169         volatile unsigned long long src;
    170         volatile unsigned long long dst;
    171         volatile unsigned long long tri;
    172 };
    173 
    174 static inline void  ?{}(__timestamp_t & this) { this.tv = 0; this.ma = 0; }
    175 static inline void ^?{}(__timestamp_t &) {}
    176 
    177 struct __attribute__((aligned(128))) __ready_queue_caches_t;
    178 void  ?{}(__ready_queue_caches_t & this);
    179 void ^?{}(__ready_queue_caches_t & this);
    180 
    181 //TODO adjust cache size to ARCHITECTURE
    182 // Structure holding the ready queue
    183 struct __ready_queue_t {
    184         // Data tracking the actual lanes
    185         // On a seperate cacheline from the used struct since
    186         // used can change on each push/pop but this data
    187         // only changes on shrink/grow
    188         struct {
    189                 // Arary of lanes
    190                 __intrusive_lane_t * volatile data;
    191 
    192                 // Array of times
    193                 __timestamp_t * volatile tscs;
    194 
    195                 __cache_id_t * volatile caches;
    196 
    197                 // Array of stats
    198                 __help_cnts_t * volatile help;
    199 
    200                 // Number of lanes (empty or not)
    201                 volatile size_t count;
    202         } lanes;
    203 };
    204 
    205 void  ?{}(__ready_queue_t & this);
    206 void ^?{}(__ready_queue_t & this);
    207 #if !defined(__CFA_NO_STATISTICS__)
    208         unsigned cnt(const __ready_queue_t & this, unsigned idx);
    209 #endif
    210185
    211186// Idle Sleep
     
    233208// Cluster
    234209struct __attribute__((aligned(128))) cluster {
    235         // Ready queue for threads
    236         __ready_queue_t ready_queue;
     210        struct {
     211                struct {
     212                        // Arary of subqueues
     213                        __intrusive_lane_t * data;
     214
     215                        // Time since subqueues were processed
     216                        __timestamp_t * tscs;
     217
     218                        // Number of subqueue / timestamps
     219                        size_t count;
     220                } readyQ;
     221
     222                struct {
     223                        // Array of $io_
     224                        $io_context ** data;
     225
     226                        // Time since subqueues were processed
     227                        __timestamp_t * tscs;
     228
     229                        // Number of I/O subqueues
     230                        size_t count;
     231                } io;
     232
     233                // Cache each kernel thread belongs to
     234                __cache_id_t * caches;
     235        } sched;
     236
     237        // // Ready queue for threads
     238        // __ready_queue_t ready_queue;
    237239
    238240        // Name of the cluster
  • libcfa/src/concurrency/kernel/fwd.hfa

    ref3c383 rd672350  
    347347                                        struct oneshot * want = expected == 0p ? 1p : 2p;
    348348                                        if(__atomic_compare_exchange_n(&this.ptr, &expected, want, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
    349                                                 if( expected == 0p ) { /* paranoid */ verify( this.ptr == 1p); return 0p; }
     349                                                if( expected == 0p ) { return 0p; }
    350350                                                thread$ * ret = post( *expected, do_unpark );
    351351                                                __atomic_store_n( &this.ptr, 1p, __ATOMIC_SEQ_CST);
  • libcfa/src/concurrency/kernel/private.hfa

    ref3c383 rd672350  
    55// file "LICENCE" distributed with Cforall.
    66//
    7 // kernel_private.hfa --
     7// kernel/private.hfa --
    88//
    99// Author           : Thierry Delisle
     
    1717
    1818#if !defined(__cforall_thread__)
    19         #error kernel_private.hfa should only be included in libcfathread source
     19        #error kernel/private.hfa should only be included in libcfathread source
    2020#endif
    2121
     
    3333#else
    3434        #ifndef _GNU_SOURCE
    35         #error kernel_private requires gnu_source
     35        #error kernel/private requires gnu_source
    3636        #endif
    3737        #include <sched.h>
     
    5959
    6060extern bool __preemption_enabled();
     61
     62enum {
     63        PREEMPT_NORMAL    = 0,
     64        PREEMPT_TERMINATE = 1,
     65        PREEMPT_IO = 2,
     66};
    6167
    6268static inline void __disable_interrupts_checked() {
     
    359365void ready_queue_shrink(struct cluster * cltr);
    360366
     367//-----------------------------------------------------------------------
     368// Decrease the width of the ready queue (number of lanes) by 4
     369void ready_queue_close(struct cluster * cltr);
    361370
    362371// Local Variables: //
  • libcfa/src/concurrency/kernel/startup.cfa

    ref3c383 rd672350  
    1818
    1919// C Includes
    20 #include <errno.h>                                                                              // errno
     20#include <errno.h>                                      // errno
    2121#include <signal.h>
    22 #include <string.h>                                                                             // strerror
    23 #include <unistd.h>                                                                             // sysconf
     22#include <string.h>                                     // strerror
     23#include <unistd.h>                                     // sysconf
    2424
    2525extern "C" {
    26         #include <limits.h>                                                                     // PTHREAD_STACK_MIN
    27         #include <unistd.h>                                                                     // syscall
    28         #include <sys/eventfd.h>                                                        // eventfd
    29         #include <sys/mman.h>                                                           // mprotect
    30         #include <sys/resource.h>                                                       // getrlimit
     26        #include <limits.h>                             // PTHREAD_STACK_MIN
     27        #include <unistd.h>                             // syscall
     28        #include <sys/eventfd.h>                        // eventfd
     29        #include <sys/mman.h>                           // mprotect
     30        #include <sys/resource.h>                       // getrlimit
    3131}
    3232
    3333// CFA Includes
    34 #include "kernel_private.hfa"
    35 #include "startup.hfa"                                                                  // STARTUP_PRIORITY_XXX
     34#include "kernel/private.hfa"
     35#include "iofwd.hfa"
     36#include "startup.hfa"                                  // STARTUP_PRIORITY_XXX
    3637#include "limits.hfa"
    3738#include "math.hfa"
     
    9798extern void __kernel_alarm_startup(void);
    9899extern void __kernel_alarm_shutdown(void);
     100extern void __cfa_io_start( processor * );
     101extern void __cfa_io_stop ( processor * );
    99102
    100103//-----------------------------------------------------------------------------
     
    111114KERNEL_STORAGE(__stack_t,            mainThreadCtx);
    112115KERNEL_STORAGE(__scheduler_RWLock_t, __scheduler_lock);
     116KERNEL_STORAGE(eventfd_t,            mainIdleEventFd);
     117KERNEL_STORAGE(io_future_t,          mainIdleFuture);
    113118#if !defined(__CFA_NO_STATISTICS__)
    114119KERNEL_STORAGE(__stats_t, mainProcStats);
     
    224229        (*mainProcessor){};
    225230
     231        mainProcessor->idle_wctx.rdbuf = &storage_mainIdleEventFd;
     232        mainProcessor->idle_wctx.ftr   = (io_future_t*)&storage_mainIdleFuture;
     233        /* paranoid */ verify( sizeof(storage_mainIdleEventFd) == sizeof(eventfd_t) );
     234
    226235        register_tls( mainProcessor );
     236        __cfa_io_start( mainProcessor );
    227237
    228238        // Start by initializing the main thread
     
    304314        mainProcessor->local_data = 0p;
    305315
     316        __cfa_io_stop( mainProcessor );
    306317        unregister_tls( mainProcessor );
    307318
     
    355366        register_tls( proc );
    356367
     368        __cfa_io_start( proc );
     369
     370        // used for idle sleep when io_uring is present
     371        io_future_t future;
     372        eventfd_t idle_buf;
     373        proc->idle_wctx.ftr = &future;
     374        proc->idle_wctx.rdbuf = &idle_buf;
     375
     376
    357377        // SKULLDUGGERY: We want to create a context for the processor coroutine
    358378        // which is needed for the 2-step context switch. However, there is no reason
     
    381401        // Main routine of the core returned, the core is now fully terminated
    382402        __cfadbg_print_safe(runtime_core, "Kernel : core %p main ended (%p)\n", proc, &proc->runner);
     403
     404        __cfa_io_stop( proc );
    383405
    384406        #if !defined(__CFA_NO_STATISTICS__)
     
    515537        this.rdq.its = 0;
    516538        this.rdq.itr = 0;
    517         this.rdq.id  = MAX;
     539        this.rdq.id  = 0;
    518540        this.rdq.target = MAX;
    519541        this.rdq.last = MAX;
     
    532554        this.local_data = 0p;
    533555
    534         this.idle_fd = eventfd(0, 0);
    535         if (idle_fd < 0) {
     556        idle_wctx.evfd = eventfd(0, 0);
     557        if (idle_wctx.evfd < 0) {
    536558                abort("KERNEL ERROR: PROCESSOR EVENTFD - %s\n", strerror(errno));
    537559        }
    538560
    539         this.idle_wctx.fd = 0;
     561        idle_wctx.sem = 0;
    540562
    541563        // I'm assuming these two are reserved for standard input and output
    542564        // so I'm using them as sentinels with idle_wctx.
    543         /* paranoid */ verify( this.idle_fd != 0 );
    544         /* paranoid */ verify( this.idle_fd != 1 );
     565        /* paranoid */ verify( idle_wctx.evfd != 0 );
     566        /* paranoid */ verify( idle_wctx.evfd != 1 );
    545567
    546568        #if !defined(__CFA_NO_STATISTICS__)
     
    554576// Not a ctor, it just preps the destruction but should not destroy members
    555577static void deinit(processor & this) {
    556         close(this.idle_fd);
     578        close(this.idle_wctx.evfd);
    557579}
    558580
     
    605627        this.name = name;
    606628        this.preemption_rate = preemption_rate;
    607         ready_queue{};
     629        this.sched.readyQ.data = 0p;
     630        this.sched.readyQ.tscs = 0p;
     631        this.sched.readyQ.count = 0;
     632        this.sched.io.tscs = 0p;
     633        this.sched.caches = 0p;
    608634
    609635        #if !defined(__CFA_NO_STATISTICS__)
     
    644670        // Unlock the RWlock
    645671        ready_mutate_unlock( last_size );
     672
     673        ready_queue_close( &this );
     674        /* paranoid */ verify( this.sched.readyQ.data == 0p );
     675        /* paranoid */ verify( this.sched.readyQ.tscs == 0p );
     676        /* paranoid */ verify( this.sched.readyQ.count == 0 );
     677        /* paranoid */ verify( this.sched.io.tscs == 0p );
     678        /* paranoid */ verify( this.sched.caches == 0p );
     679
    646680        enable_interrupts( false ); // Don't poll, could be in main cluster
     681
    647682
    648683        #if !defined(__CFA_NO_STATISTICS__)
     
    736771        check( pthread_attr_init( &attr ), "pthread_attr_init" ); // initialize attribute
    737772
    738         size_t stacksize = DEFAULT_STACK_SIZE;
     773        size_t stacksize = max( PTHREAD_STACK_MIN, DEFAULT_STACK_SIZE );
    739774
    740775        void * stack;
  • libcfa/src/concurrency/locks.cfa

    ref3c383 rd672350  
    1919
    2020#include "locks.hfa"
    21 #include "kernel_private.hfa"
     21#include "kernel/private.hfa"
    2222
    2323#include <kernel.hfa>
  • libcfa/src/concurrency/locks.hfa

    ref3c383 rd672350  
    164164}
    165165
    166 static inline bool lock(linear_backoff_then_block_lock & this) with(this) {
     166static inline void lock(linear_backoff_then_block_lock & this) with(this) {
    167167        // if owner just return
    168         if (active_thread() == owner) return true;
     168        if (active_thread() == owner) return;
    169169        size_t compare_val = 0;
    170170        int spin = spin_start;
     
    172172        for( ;; ) {
    173173                compare_val = 0;
    174                 if (internal_try_lock(this, compare_val)) return true;
     174                if (internal_try_lock(this, compare_val)) return;
    175175                if (2 == compare_val) break;
    176176                for (int i = 0; i < spin; i++) Pause();
     
    179179        }
    180180
    181         if(2 != compare_val && try_lock_contention(this)) return true;
     181        if(2 != compare_val && try_lock_contention(this)) return;
    182182        // block until signalled
    183         while (block(this)) if(try_lock_contention(this)) return true;
    184 
    185         // this should never be reached as block(this) always returns true
    186         return false;
     183        while (block(this)) if(try_lock_contention(this)) return;
    187184}
    188185
  • libcfa/src/concurrency/monitor.cfa

    ref3c383 rd672350  
    2222#include <inttypes.h>
    2323
    24 #include "kernel_private.hfa"
     24#include "kernel/private.hfa"
    2525
    2626#include "bits/algorithm.hfa"
  • libcfa/src/concurrency/mutex.cfa

    ref3c383 rd672350  
    2121#include "mutex.hfa"
    2222
    23 #include "kernel_private.hfa"
     23#include "kernel/private.hfa"
    2424
    2525//-----------------------------------------------------------------------------
  • libcfa/src/concurrency/mutex_stmt.hfa

    ref3c383 rd672350  
    1212};
    1313
     14
     15struct __mutex_stmt_lock_guard {
     16    void ** lockarr;
     17    __lock_size_t count;
     18};
     19
     20static inline void ?{}( __mutex_stmt_lock_guard & this, void * lockarr [], __lock_size_t count  ) {
     21    this.lockarr = lockarr;
     22    this.count = count;
     23
     24    // Sort locks based on address
     25    __libcfa_small_sort(this.lockarr, count);
     26
     27    // acquire locks in order
     28    // for ( size_t i = 0; i < count; i++ ) {
     29    //     lock(*this.lockarr[i]);
     30    // }
     31}
     32
     33static inline void ^?{}( __mutex_stmt_lock_guard & this ) with(this) {
     34    // for ( size_t i = count; i > 0; i-- ) {
     35    //     unlock(*lockarr[i - 1]);
     36    // }
     37}
     38
    1439forall(L & | is_lock(L)) {
    15 
    16     struct __mutex_stmt_lock_guard {
    17         L ** lockarr;
    18         __lock_size_t count;
    19     };
    20    
    21     static inline void ?{}( __mutex_stmt_lock_guard(L) & this, L * lockarr [], __lock_size_t count  ) {
    22         this.lockarr = lockarr;
    23         this.count = count;
    24 
    25         // Sort locks based on address
    26         __libcfa_small_sort(this.lockarr, count);
    27 
    28         // acquire locks in order
    29         for ( size_t i = 0; i < count; i++ ) {
    30             lock(*this.lockarr[i]);
    31         }
    32     }
    33    
    34     static inline void ^?{}( __mutex_stmt_lock_guard(L) & this ) with(this) {
    35         for ( size_t i = count; i > 0; i-- ) {
    36             unlock(*lockarr[i - 1]);
    37         }
    38     }
    3940
    4041    struct scoped_lock {
     
    5152    }
    5253
    53     static inline L * __get_ptr( L & this ) {
     54    static inline void * __get_mutexstmt_lock_ptr( L & this ) {
    5455        return &this;
    5556    }
    5657
    57     static inline L __get_type( L & this );
     58    static inline L __get_mutexstmt_lock_type( L & this );
    5859
    59     static inline L __get_type( L * this );
     60    static inline L __get_mutexstmt_lock_type( L * this );
    6061}
  • libcfa/src/concurrency/preemption.cfa

    ref3c383 rd672350  
    1010// Created On       : Mon Jun 5 14:20:42 2017
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Fri Nov  6 07:42:13 2020
    13 // Update Count     : 54
     12// Last Modified On : Thu Feb 17 11:18:57 2022
     13// Update Count     : 59
    1414//
    1515
     
    3131#include "bits/debug.hfa"
    3232#include "bits/signal.hfa"
    33 #include "kernel_private.hfa"
     33#include "kernel/private.hfa"
    3434
    3535
     
    9797}
    9898
    99 enum {
    100         PREEMPT_NORMAL    = 0,
    101         PREEMPT_TERMINATE = 1,
    102 };
    103 
    10499//=============================================================================================
    105100// Kernel Preemption logic
     
    243238//----------
    244239// special case for preemption since used often
    245 bool __preemption_enabled() {
     240__attribute__((optimize("no-reorder-blocks"))) bool __preemption_enabled() {
    246241        // create a assembler label before
    247242        // marked as clobber all to avoid movement
     
    664659        choose(sfp->si_value.sival_int) {
    665660                case PREEMPT_NORMAL   : ;// Normal case, nothing to do here
     661                case PREEMPT_IO       : ;// I/O asked to stop spinning, nothing to do here
    666662                case PREEMPT_TERMINATE: verify( __atomic_load_n( &__cfaabi_tls.this_processor->do_terminate, __ATOMIC_SEQ_CST ) );
    667663                default:
  • libcfa/src/concurrency/ready_queue.cfa

    ref3c383 rd672350  
    2020
    2121
    22 // #define USE_RELAXED_FIFO
    23 // #define USE_WORK_STEALING
    24 // #define USE_CPU_WORK_STEALING
    2522#define USE_AWARE_STEALING
    2623
    2724#include "bits/defs.hfa"
    2825#include "device/cpu.hfa"
    29 #include "kernel_private.hfa"
    30 
    31 #include "stdlib.hfa"
     26#include "kernel/cluster.hfa"
     27#include "kernel/private.hfa"
     28
    3229#include "limits.hfa"
    33 #include "math.hfa"
    34 
    35 #include <errno.h>
    36 #include <unistd.h>
    37 
    38 extern "C" {
    39         #include <sys/syscall.h>  // __NR_xxx
    40 }
     30
     31// #include <errno.h>
     32// #include <unistd.h>
    4133
    4234#include "ready_subqueue.hfa"
     
    5042#endif
    5143
    52 // No overriden function, no environment variable, no define
    53 // fall back to a magic number
    54 #ifndef __CFA_MAX_PROCESSORS__
    55         #define __CFA_MAX_PROCESSORS__ 1024
    56 #endif
    57 
    58 #if   defined(USE_AWARE_STEALING)
    59         #define READYQ_SHARD_FACTOR 2
    60         #define SEQUENTIAL_SHARD 2
    61 #elif defined(USE_CPU_WORK_STEALING)
    62         #define READYQ_SHARD_FACTOR 2
    63 #elif defined(USE_RELAXED_FIFO)
    64         #define BIAS 4
    65         #define READYQ_SHARD_FACTOR 4
    66         #define SEQUENTIAL_SHARD 1
    67 #elif defined(USE_WORK_STEALING)
    68         #define READYQ_SHARD_FACTOR 2
    69         #define SEQUENTIAL_SHARD 2
    70 #else
    71         #error no scheduling strategy selected
    72 #endif
    73 
    7444static inline struct thread$ * try_pop(struct cluster * cltr, unsigned w __STATS(, __stats_readyQ_pop_t & stats));
    7545static inline struct thread$ * try_pop(struct cluster * cltr, unsigned i, unsigned j __STATS(, __stats_readyQ_pop_t & stats));
    7646static inline struct thread$ * search(struct cluster * cltr);
    77 static inline [unsigned, bool] idx_from_r(unsigned r, unsigned preferred);
    78 
    79 
    80 // returns the maximum number of processors the RWLock support
    81 __attribute__((weak)) unsigned __max_processors() {
    82         const char * max_cores_s = getenv("CFA_MAX_PROCESSORS");
    83         if(!max_cores_s) {
    84                 __cfadbg_print_nolock(ready_queue, "No CFA_MAX_PROCESSORS in ENV\n");
    85                 return __CFA_MAX_PROCESSORS__;
    86         }
    87 
    88         char * endptr = 0p;
    89         long int max_cores_l = strtol(max_cores_s, &endptr, 10);
    90         if(max_cores_l < 1 || max_cores_l > 65535) {
    91                 __cfadbg_print_nolock(ready_queue, "CFA_MAX_PROCESSORS out of range : %ld\n", max_cores_l);
    92                 return __CFA_MAX_PROCESSORS__;
    93         }
    94         if('\0' != *endptr) {
    95                 __cfadbg_print_nolock(ready_queue, "CFA_MAX_PROCESSORS not a decimal number : %s\n", max_cores_s);
    96                 return __CFA_MAX_PROCESSORS__;
    97         }
    98 
    99         return max_cores_l;
    100 }
    101 
    102 #if   defined(CFA_HAVE_LINUX_LIBRSEQ)
    103         // No forward declaration needed
    104         #define __kernel_rseq_register rseq_register_current_thread
    105         #define __kernel_rseq_unregister rseq_unregister_current_thread
    106 #elif defined(CFA_HAVE_LINUX_RSEQ_H)
    107         static void __kernel_raw_rseq_register  (void);
    108         static void __kernel_raw_rseq_unregister(void);
    109 
    110         #define __kernel_rseq_register __kernel_raw_rseq_register
    111         #define __kernel_rseq_unregister __kernel_raw_rseq_unregister
    112 #else
    113         // No forward declaration needed
    114         // No initialization needed
    115         static inline void noop(void) {}
    116 
    117         #define __kernel_rseq_register noop
    118         #define __kernel_rseq_unregister noop
    119 #endif
    120 
    121 //=======================================================================
    122 // Cluster wide reader-writer lock
    123 //=======================================================================
    124 void  ?{}(__scheduler_RWLock_t & this) {
    125         this.max   = __max_processors();
    126         this.alloc = 0;
    127         this.ready = 0;
    128         this.data  = alloc(this.max);
    129         this.write_lock  = false;
    130 
    131         /*paranoid*/ verify(__atomic_is_lock_free(sizeof(this.alloc), &this.alloc));
    132         /*paranoid*/ verify(__atomic_is_lock_free(sizeof(this.ready), &this.ready));
    133 
    134 }
    135 void ^?{}(__scheduler_RWLock_t & this) {
    136         free(this.data);
    137 }
    138 
    139 
    140 //=======================================================================
    141 // Lock-Free registering/unregistering of threads
    142 unsigned register_proc_id( void ) with(*__scheduler_lock) {
    143         __kernel_rseq_register();
    144 
    145         bool * handle = (bool *)&kernelTLS().sched_lock;
    146 
    147         // Step - 1 : check if there is already space in the data
    148         uint_fast32_t s = ready;
    149 
    150         // Check among all the ready
    151         for(uint_fast32_t i = 0; i < s; i++) {
    152                 bool * volatile * cell = (bool * volatile *)&data[i]; // Cforall is bugged and the double volatiles causes problems
    153                 /* paranoid */ verify( handle != *cell );
    154 
    155                 bool * null = 0p; // Re-write every loop since compare thrashes it
    156                 if( __atomic_load_n(cell, (int)__ATOMIC_RELAXED) == null
    157                         && __atomic_compare_exchange_n( cell, &null, handle, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
    158                         /* paranoid */ verify(i < ready);
    159                         /* paranoid */ verify( (kernelTLS().sched_id = i, true) );
    160                         return i;
    161                 }
    162         }
    163 
    164         if(max <= alloc) abort("Trying to create more than %ud processors", __scheduler_lock->max);
    165 
    166         // Step - 2 : F&A to get a new spot in the array.
    167         uint_fast32_t n = __atomic_fetch_add(&alloc, 1, __ATOMIC_SEQ_CST);
    168         if(max <= n) abort("Trying to create more than %ud processors", __scheduler_lock->max);
    169 
    170         // Step - 3 : Mark space as used and then publish it.
    171         data[n] = handle;
    172         while() {
    173                 unsigned copy = n;
    174                 if( __atomic_load_n(&ready, __ATOMIC_RELAXED) == n
    175                         && __atomic_compare_exchange_n(&ready, &copy, n + 1, true, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
    176                         break;
    177                 Pause();
    178         }
    179 
    180         // Return new spot.
    181         /* paranoid */ verify(n < ready);
    182         /* paranoid */ verify( (kernelTLS().sched_id = n, true) );
    183         return n;
    184 }
    185 
    186 void unregister_proc_id( unsigned id ) with(*__scheduler_lock) {
    187         /* paranoid */ verify(id < ready);
    188         /* paranoid */ verify(id == kernelTLS().sched_id);
    189         /* paranoid */ verify(data[id] == &kernelTLS().sched_lock);
    190 
    191         bool * volatile * cell = (bool * volatile *)&data[id]; // Cforall is bugged and the double volatiles causes problems
    192 
    193         __atomic_store_n(cell, 0p, __ATOMIC_RELEASE);
    194 
    195         __kernel_rseq_unregister();
    196 }
    197 
    198 //-----------------------------------------------------------------------
    199 // Writer side : acquire when changing the ready queue, e.g. adding more
    200 //  queues or removing them.
    201 uint_fast32_t ready_mutate_lock( void ) with(*__scheduler_lock) {
    202         /* paranoid */ verify( ! __preemption_enabled() );
    203 
    204         // Step 1 : lock global lock
    205         // It is needed to avoid processors that register mid Critical-Section
    206         //   to simply lock their own lock and enter.
    207         __atomic_acquire( &write_lock );
    208 
    209         // Make sure we won't deadlock ourself
    210         // Checking before acquiring the writer lock isn't safe
    211         // because someone else could have locked us.
    212         /* paranoid */ verify( ! kernelTLS().sched_lock );
    213 
    214         // Step 2 : lock per-proc lock
    215         // Processors that are currently being registered aren't counted
    216         //   but can't be in read_lock or in the critical section.
    217         // All other processors are counted
    218         uint_fast32_t s = ready;
    219         for(uint_fast32_t i = 0; i < s; i++) {
    220                 volatile bool * llock = data[i];
    221                 if(llock) __atomic_acquire( llock );
    222         }
    223 
    224         /* paranoid */ verify( ! __preemption_enabled() );
    225         return s;
    226 }
    227 
    228 void ready_mutate_unlock( uint_fast32_t last_s ) with(*__scheduler_lock) {
    229         /* paranoid */ verify( ! __preemption_enabled() );
    230 
    231         // Step 1 : release local locks
    232         // This must be done while the global lock is held to avoid
    233         //   threads that where created mid critical section
    234         //   to race to lock their local locks and have the writer
    235         //   immidiately unlock them
    236         // Alternative solution : return s in write_lock and pass it to write_unlock
    237         for(uint_fast32_t i = 0; i < last_s; i++) {
    238                 volatile bool * llock = data[i];
    239                 if(llock) __atomic_store_n(llock, (bool)false, __ATOMIC_RELEASE);
    240         }
    241 
    242         // Step 2 : release global lock
    243         /*paranoid*/ assert(true == write_lock);
    244         __atomic_store_n(&write_lock, (bool)false, __ATOMIC_RELEASE);
    245 
    246         /* paranoid */ verify( ! __preemption_enabled() );
    247 }
    248 
    249 //=======================================================================
    250 // caches handling
    251 
    252 struct __attribute__((aligned(128))) __ready_queue_caches_t {
    253         // Count States:
    254         // - 0  : No one is looking after this cache
    255         // - 1  : No one is looking after this cache, BUT it's not empty
    256         // - 2+ : At least one processor is looking after this cache
    257         volatile unsigned count;
    258 };
    259 
    260 void  ?{}(__ready_queue_caches_t & this) { this.count = 0; }
    261 void ^?{}(__ready_queue_caches_t & this) {}
    262 
    263 static inline void depart(__ready_queue_caches_t & cache) {
    264         /* paranoid */ verify( cache.count > 1);
    265         __atomic_fetch_add(&cache.count, -1, __ATOMIC_SEQ_CST);
    266         /* paranoid */ verify( cache.count != 0);
    267         /* paranoid */ verify( cache.count < 65536 ); // This verify assumes no cluster will have more than 65000 kernel threads mapped to a single cache, which could be correct but is super weird.
    268 }
    269 
    270 static inline void arrive(__ready_queue_caches_t & cache) {
    271         // for() {
    272         //      unsigned expected = cache.count;
    273         //      unsigned desired  = 0 == expected ? 2 : expected + 1;
    274         // }
    275 }
    27647
    27748//=======================================================================
    27849// Cforall Ready Queue used for scheduling
    27950//=======================================================================
    280 unsigned long long moving_average(unsigned long long currtsc, unsigned long long instsc, unsigned long long old_avg) {
    281         /* paranoid */ verifyf( currtsc < 45000000000000000, "Suspiciously large current time: %'llu (%llx)\n", currtsc, currtsc );
    282         /* paranoid */ verifyf( instsc  < 45000000000000000, "Suspiciously large insert time: %'llu (%llx)\n", instsc, instsc );
    283         /* paranoid */ verifyf( old_avg < 15000000000000, "Suspiciously large previous average: %'llu (%llx)\n", old_avg, old_avg );
    284 
    285         const unsigned long long new_val = currtsc > instsc ? currtsc - instsc : 0;
    286         const unsigned long long total_weight = 16;
    287         const unsigned long long new_weight   = 4;
    288         const unsigned long long old_weight = total_weight - new_weight;
    289         const unsigned long long ret = ((new_weight * new_val) + (old_weight * old_avg)) / total_weight;
    290         return ret;
    291 }
    292 
    293 void ?{}(__ready_queue_t & this) with (this) {
    294         #if defined(USE_CPU_WORK_STEALING)
    295                 lanes.count = cpu_info.hthrd_count * READYQ_SHARD_FACTOR;
    296                 lanes.data = alloc( lanes.count );
    297                 lanes.tscs = alloc( lanes.count );
    298                 lanes.help = alloc( cpu_info.hthrd_count );
    299 
    300                 for( idx; (size_t)lanes.count ) {
    301                         (lanes.data[idx]){};
    302                         lanes.tscs[idx].tv = rdtscl();
    303                         lanes.tscs[idx].ma = rdtscl();
    304                 }
    305                 for( idx; (size_t)cpu_info.hthrd_count ) {
    306                         lanes.help[idx].src = 0;
    307                         lanes.help[idx].dst = 0;
    308                         lanes.help[idx].tri = 0;
    309                 }
    310         #else
    311                 lanes.data   = 0p;
    312                 lanes.tscs   = 0p;
    313                 lanes.caches = 0p;
    314                 lanes.help   = 0p;
    315                 lanes.count  = 0;
    316         #endif
    317 }
    318 
    319 void ^?{}(__ready_queue_t & this) with (this) {
    320         #if !defined(USE_CPU_WORK_STEALING)
    321                 verify( SEQUENTIAL_SHARD == lanes.count );
    322         #endif
    323 
    324         free(lanes.data);
    325         free(lanes.tscs);
    326         free(lanes.caches);
    327         free(lanes.help);
    328 }
    329 
    330 //-----------------------------------------------------------------------
    331 #if defined(USE_AWARE_STEALING)
    332         __attribute__((hot)) void push(struct cluster * cltr, struct thread$ * thrd, unpark_hint hint) with (cltr->ready_queue) {
    333                 processor * const proc = kernelTLS().this_processor;
    334                 const bool external = (!proc) || (cltr != proc->cltr);
    335                 const bool remote   = hint == UNPARK_REMOTE;
    336 
    337                 unsigned i;
    338                 if( external || remote ) {
    339                         // Figure out where thread was last time and make sure it's valid
    340                         /* paranoid */ verify(thrd->preferred >= 0);
    341                         if(thrd->preferred * READYQ_SHARD_FACTOR < lanes.count) {
    342                                 /* paranoid */ verify(thrd->preferred * READYQ_SHARD_FACTOR < lanes.count);
    343                                 unsigned start = thrd->preferred * READYQ_SHARD_FACTOR;
    344                                 do {
    345                                         unsigned r = __tls_rand();
    346                                         i = start + (r % READYQ_SHARD_FACTOR);
    347                                         /* paranoid */ verify( i < lanes.count );
    348                                         // If we can't lock it retry
    349                                 } while( !__atomic_try_acquire( &lanes.data[i].lock ) );
    350                         } else {
    351                                 do {
    352                                         i = __tls_rand() % lanes.count;
    353                                 } while( !__atomic_try_acquire( &lanes.data[i].lock ) );
    354                         }
     51// void ?{}(__ready_queue_t & this) with (this) {
     52//      lanes.data   = 0p;
     53//      lanes.tscs   = 0p;
     54//      lanes.caches = 0p;
     55//      lanes.count  = 0;
     56// }
     57
     58// void ^?{}(__ready_queue_t & this) with (this) {
     59//      free(lanes.data);
     60//      free(lanes.tscs);
     61//      free(lanes.caches);
     62// }
     63
     64//-----------------------------------------------------------------------
     65__attribute__((hot)) void push(struct cluster * cltr, struct thread$ * thrd, unpark_hint hint) with (cltr->sched) {
     66        processor * const proc = kernelTLS().this_processor;
     67        const bool external = (!proc) || (cltr != proc->cltr);
     68        const bool remote   = hint == UNPARK_REMOTE;
     69        const size_t lanes_count = readyQ.count;
     70
     71        /* paranoid */ verify( __shard_factor.readyq > 0 );
     72        /* paranoid */ verify( lanes_count > 0 );
     73
     74        unsigned i;
     75        if( external || remote ) {
     76                // Figure out where thread was last time and make sure it's valid
     77                /* paranoid */ verify(thrd->preferred >= 0);
     78                unsigned start = thrd->preferred * __shard_factor.readyq;
     79                if(start < lanes_count) {
     80                        do {
     81                                unsigned r = __tls_rand();
     82                                i = start + (r % __shard_factor.readyq);
     83                                /* paranoid */ verify( i < lanes_count );
     84                                // If we can't lock it retry
     85                        } while( !__atomic_try_acquire( &readyQ.data[i].lock ) );
    35586                } else {
    35687                        do {
    357                                 unsigned r = proc->rdq.its++;
    358                                 i = proc->rdq.id + (r % READYQ_SHARD_FACTOR);
    359                                 /* paranoid */ verify( i < lanes.count );
    360                                 // If we can't lock it retry
    361                         } while( !__atomic_try_acquire( &lanes.data[i].lock ) );
    362                 }
    363 
    364                 // Actually push it
    365                 push(lanes.data[i], thrd);
    366 
    367                 // Unlock and return
    368                 __atomic_unlock( &lanes.data[i].lock );
    369 
    370                 #if !defined(__CFA_NO_STATISTICS__)
    371                         if(unlikely(external || remote)) __atomic_fetch_add(&cltr->stats->ready.push.extrn.success, 1, __ATOMIC_RELAXED);
    372                         else __tls_stats()->ready.push.local.success++;
    373                 #endif
    374         }
    375 
    376         static inline unsigned long long calc_cutoff(const unsigned long long ctsc, const processor * proc, __ready_queue_t & rdq) {
    377                 unsigned start = proc->rdq.id;
    378                 unsigned long long max = 0;
    379                 for(i; READYQ_SHARD_FACTOR) {
    380                         unsigned long long ptsc = ts(rdq.lanes.data[start + i]);
    381                         if(ptsc != -1ull) {
    382                                 /* paranoid */ verify( start + i < rdq.lanes.count );
    383                                 unsigned long long tsc = moving_average(ctsc, ptsc, rdq.lanes.tscs[start + i].ma);
    384                                 if(tsc > max) max = tsc;
    385                         }
    386                 }
    387                 return (max + 2 * max) / 2;
    388         }
    389 
    390         __attribute__((hot)) struct thread$ * pop_fast(struct cluster * cltr) with (cltr->ready_queue) {
    391                 /* paranoid */ verify( lanes.count > 0 );
    392                 /* paranoid */ verify( kernelTLS().this_processor );
    393                 /* paranoid */ verify( kernelTLS().this_processor->rdq.id < lanes.count );
    394 
    395                 processor * const proc = kernelTLS().this_processor;
    396                 unsigned this = proc->rdq.id;
    397                 /* paranoid */ verify( this < lanes.count );
    398                 __cfadbg_print_safe(ready_queue, "Kernel : pop from %u\n", this);
    399 
    400                 // Figure out the current cpu and make sure it is valid
    401                 const int cpu = __kernel_getcpu();
    402                 /* paranoid */ verify(cpu >= 0);
    403                 /* paranoid */ verify(cpu < cpu_info.hthrd_count);
    404                 unsigned this_cache = cpu_info.llc_map[cpu].cache;
    405 
    406                 // Super important: don't write the same value over and over again
    407                 // We want to maximise our chances that his particular values stays in cache
    408                 if(lanes.caches[this / READYQ_SHARD_FACTOR].id != this_cache)
    409                         __atomic_store_n(&lanes.caches[this / READYQ_SHARD_FACTOR].id, this_cache, __ATOMIC_RELAXED);
    410 
    411                 const unsigned long long ctsc = rdtscl();
    412 
    413                 if(proc->rdq.target == MAX) {
    414                         uint64_t chaos = __tls_rand();
    415                         unsigned ext = chaos & 0xff;
    416                         unsigned other  = (chaos >> 8) % (lanes.count);
    417 
    418                         if(ext < 3 || __atomic_load_n(&lanes.caches[other / READYQ_SHARD_FACTOR].id, __ATOMIC_RELAXED) == this_cache) {
    419                                 proc->rdq.target = other;
    420                         }
    421                 }
    422                 else {
    423                         const unsigned target = proc->rdq.target;
    424                         __cfadbg_print_safe(ready_queue, "Kernel : %u considering helping %u, tcsc %llu\n", this, target, lanes.tscs[target].tv);
    425                         /* paranoid */ verify( lanes.tscs[target].tv != MAX );
    426                         if(target < lanes.count) {
    427                                 const unsigned long long cutoff = calc_cutoff(ctsc, proc, cltr->ready_queue);
    428                                 const unsigned long long age = moving_average(ctsc, lanes.tscs[target].tv, lanes.tscs[target].ma);
    429                                 __cfadbg_print_safe(ready_queue, "Kernel : Help attempt on %u from %u, age %'llu vs cutoff %'llu, %s\n", target, this, age, cutoff, age > cutoff ? "yes" : "no");
    430                                 if(age > cutoff) {
    431                                         thread$ * t = try_pop(cltr, target __STATS(, __tls_stats()->ready.pop.help));
    432                                         if(t) return t;
    433                                 }
    434                         }
    435                         proc->rdq.target = MAX;
    436                 }
    437 
    438                 for(READYQ_SHARD_FACTOR) {
    439                         unsigned i = this + (proc->rdq.itr++ % READYQ_SHARD_FACTOR);
    440                         if(thread$ * t = try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.local))) return t;
    441                 }
    442 
    443                 // All lanes where empty return 0p
    444                 return 0p;
    445 
    446         }
    447         __attribute__((hot)) struct thread$ * pop_slow(struct cluster * cltr) with (cltr->ready_queue) {
    448                 unsigned i = __tls_rand() % lanes.count;
    449                 return try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.steal));
    450         }
    451         __attribute__((hot)) struct thread$ * pop_search(struct cluster * cltr) {
    452                 return search(cltr);
    453         }
    454 #endif
    455 #if defined(USE_CPU_WORK_STEALING)
    456         __attribute__((hot)) void push(struct cluster * cltr, struct thread$ * thrd, unpark_hint hint) with (cltr->ready_queue) {
    457                 __cfadbg_print_safe(ready_queue, "Kernel : Pushing %p on cluster %p\n", thrd, cltr);
    458 
    459                 processor * const proc = kernelTLS().this_processor;
    460                 const bool external = (!proc) || (cltr != proc->cltr);
    461 
    462                 // Figure out the current cpu and make sure it is valid
    463                 const int cpu = __kernel_getcpu();
    464                 /* paranoid */ verify(cpu >= 0);
    465                 /* paranoid */ verify(cpu < cpu_info.hthrd_count);
    466                 /* paranoid */ verify(cpu * READYQ_SHARD_FACTOR < lanes.count);
    467 
    468                 // Figure out where thread was last time and make sure it's
    469                 /* paranoid */ verify(thrd->preferred >= 0);
    470                 /* paranoid */ verify(thrd->preferred < cpu_info.hthrd_count);
    471                 /* paranoid */ verify(thrd->preferred * READYQ_SHARD_FACTOR < lanes.count);
    472                 const int prf = thrd->preferred * READYQ_SHARD_FACTOR;
    473 
    474                 const cpu_map_entry_t & map;
    475                 choose(hint) {
    476                         case UNPARK_LOCAL : &map = &cpu_info.llc_map[cpu];
    477                         case UNPARK_REMOTE: &map = &cpu_info.llc_map[prf];
    478                 }
    479                 /* paranoid */ verify(map.start * READYQ_SHARD_FACTOR < lanes.count);
    480                 /* paranoid */ verify(map.self * READYQ_SHARD_FACTOR < lanes.count);
    481                 /* paranoid */ verifyf((map.start + map.count) * READYQ_SHARD_FACTOR <= lanes.count, "have %zu lanes but map can go up to %u", lanes.count, (map.start + map.count) * READYQ_SHARD_FACTOR);
    482 
    483                 const int start = map.self * READYQ_SHARD_FACTOR;
    484                 unsigned i;
     88                                i = __tls_rand() % lanes_count;
     89                        } while( !__atomic_try_acquire( &readyQ.data[i].lock ) );
     90                }
     91        } else {
    48592                do {
    486                         unsigned r;
    487                         if(unlikely(external)) { r = __tls_rand(); }
    488                         else { r = proc->rdq.its++; }
    489                         choose(hint) {
    490                                 case UNPARK_LOCAL : i = start + (r % READYQ_SHARD_FACTOR);
    491                                 case UNPARK_REMOTE: i = prf   + (r % READYQ_SHARD_FACTOR);
    492                         }
     93                        unsigned r = proc->rdq.its++;
     94                        i = proc->rdq.id + (r % __shard_factor.readyq);
     95                        /* paranoid */ verify( i < lanes_count );
    49396                        // If we can't lock it retry
    494                 } while( !__atomic_try_acquire( &lanes.data[i].lock ) );
    495 
    496                 // Actually push it
    497                 push(lanes.data[i], thrd);
    498 
    499                 // Unlock and return
    500                 __atomic_unlock( &lanes.data[i].lock );
    501 
    502                 #if !defined(__CFA_NO_STATISTICS__)
    503                         if(unlikely(external)) __atomic_fetch_add(&cltr->stats->ready.push.extrn.success, 1, __ATOMIC_RELAXED);
    504                         else __tls_stats()->ready.push.local.success++;
    505                 #endif
    506 
    507                 __cfadbg_print_safe(ready_queue, "Kernel : Pushed %p on cluster %p (idx: %u, mask %llu, first %d)\n", thrd, cltr, i, used.mask[0], lane_first);
    508 
    509         }
    510 
    511         // Pop from the ready queue from a given cluster
    512         __attribute__((hot)) thread$ * pop_fast(struct cluster * cltr) with (cltr->ready_queue) {
    513                 /* paranoid */ verify( lanes.count > 0 );
    514                 /* paranoid */ verify( kernelTLS().this_processor );
    515 
    516                 processor * const proc = kernelTLS().this_processor;
    517                 const int cpu = __kernel_getcpu();
    518                 /* paranoid */ verify(cpu >= 0);
    519                 /* paranoid */ verify(cpu < cpu_info.hthrd_count);
    520                 /* paranoid */ verify(cpu * READYQ_SHARD_FACTOR < lanes.count);
    521 
    522                 const cpu_map_entry_t & map = cpu_info.llc_map[cpu];
    523                 /* paranoid */ verify(map.start * READYQ_SHARD_FACTOR < lanes.count);
    524                 /* paranoid */ verify(map.self * READYQ_SHARD_FACTOR < lanes.count);
    525                 /* paranoid */ verifyf((map.start + map.count) * READYQ_SHARD_FACTOR <= lanes.count, "have %zu lanes but map can go up to %u", lanes.count, (map.start + map.count) * READYQ_SHARD_FACTOR);
    526 
    527                 const int start = map.self * READYQ_SHARD_FACTOR;
    528                 const unsigned long long ctsc = rdtscl();
    529 
    530                 // Did we already have a help target
    531                 if(proc->rdq.target == MAX) {
    532                         unsigned long long max = 0;
    533                         for(i; READYQ_SHARD_FACTOR) {
    534                                 unsigned long long tsc = moving_average(ctsc, ts(lanes.data[start + i]), lanes.tscs[start + i].ma);
    535                                 if(tsc > max) max = tsc;
    536                         }
    537                         //  proc->rdq.cutoff = (max + 2 * max) / 2;
    538                         /* paranoid */ verify(lanes.count < 65536); // The following code assumes max 65536 cores.
    539                         /* paranoid */ verify(map.count < 65536); // The following code assumes max 65536 cores.
    540 
    541                         if(0 == (__tls_rand() % 100)) {
    542                                 proc->rdq.target = __tls_rand() % lanes.count;
    543                         } else {
    544                                 unsigned cpu_chaos = map.start + (__tls_rand() % map.count);
    545                                 proc->rdq.target = (cpu_chaos * READYQ_SHARD_FACTOR) + (__tls_rand() % READYQ_SHARD_FACTOR);
    546                                 /* paranoid */ verify(proc->rdq.target >= (map.start * READYQ_SHARD_FACTOR));
    547                                 /* paranoid */ verify(proc->rdq.target <  ((map.start + map.count) * READYQ_SHARD_FACTOR));
    548                         }
    549 
    550                         /* paranoid */ verify(proc->rdq.target != MAX);
    551                 }
    552                 else {
    553                         unsigned long long max = 0;
    554                         for(i; READYQ_SHARD_FACTOR) {
    555                                 unsigned long long tsc = moving_average(ctsc, ts(lanes.data[start + i]), lanes.tscs[start + i].ma);
    556                                 if(tsc > max) max = tsc;
    557                         }
    558                         const unsigned long long cutoff = (max + 2 * max) / 2;
    559                         {
    560                                 unsigned target = proc->rdq.target;
    561                                 proc->rdq.target = MAX;
    562                                 lanes.help[target / READYQ_SHARD_FACTOR].tri++;
    563                                 if(moving_average(ctsc, lanes.tscs[target].tv, lanes.tscs[target].ma) > cutoff) {
    564                                         thread$ * t = try_pop(cltr, target __STATS(, __tls_stats()->ready.pop.help));
    565                                         proc->rdq.last = target;
    566                                         if(t) return t;
    567                                 }
    568                                 proc->rdq.target = MAX;
    569                         }
    570 
    571                         unsigned last = proc->rdq.last;
    572                         if(last != MAX && moving_average(ctsc, lanes.tscs[last].tv, lanes.tscs[last].ma) > cutoff) {
    573                                 thread$ * t = try_pop(cltr, last __STATS(, __tls_stats()->ready.pop.help));
    574                                 if(t) return t;
    575                         }
    576                         else {
    577                                 proc->rdq.last = MAX;
    578                         }
    579                 }
    580 
    581                 for(READYQ_SHARD_FACTOR) {
    582                         unsigned i = start + (proc->rdq.itr++ % READYQ_SHARD_FACTOR);
    583                         if(thread$ * t = try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.local))) return t;
    584                 }
    585 
    586                 // All lanes where empty return 0p
    587                 return 0p;
    588         }
    589 
    590         __attribute__((hot)) struct thread$ * pop_slow(struct cluster * cltr) with (cltr->ready_queue) {
    591                 processor * const proc = kernelTLS().this_processor;
    592                 unsigned last = proc->rdq.last;
    593                 if(last != MAX) {
    594                         struct thread$ * t = try_pop(cltr, last __STATS(, __tls_stats()->ready.pop.steal));
    595                         if(t) return t;
    596                         proc->rdq.last = MAX;
    597                 }
    598 
    599                 unsigned i = __tls_rand() % lanes.count;
    600                 return try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.steal));
    601         }
    602         __attribute__((hot)) struct thread$ * pop_search(struct cluster * cltr) {
    603                 return search(cltr);
    604         }
    605 #endif
    606 #if defined(USE_RELAXED_FIFO)
    607         //-----------------------------------------------------------------------
    608         // get index from random number with or without bias towards queues
    609         static inline [unsigned, bool] idx_from_r(unsigned r, unsigned preferred) {
    610                 unsigned i;
    611                 bool local;
    612                 unsigned rlow  = r % BIAS;
    613                 unsigned rhigh = r / BIAS;
    614                 if((0 != rlow) && preferred >= 0) {
    615                         // (BIAS - 1) out of BIAS chances
    616                         // Use perferred queues
    617                         i = preferred + (rhigh % READYQ_SHARD_FACTOR);
    618                         local = true;
    619                 }
    620                 else {
    621                         // 1 out of BIAS chances
    622                         // Use all queues
    623                         i = rhigh;
    624                         local = false;
    625                 }
    626                 return [i, local];
    627         }
    628 
    629         __attribute__((hot)) void push(struct cluster * cltr, struct thread$ * thrd, unpark_hint hint) with (cltr->ready_queue) {
    630                 __cfadbg_print_safe(ready_queue, "Kernel : Pushing %p on cluster %p\n", thrd, cltr);
    631 
    632                 const bool external = (hint != UNPARK_LOCAL) || (!kernelTLS().this_processor) || (cltr != kernelTLS().this_processor->cltr);
    633                 /* paranoid */ verify(external || kernelTLS().this_processor->rdq.id < lanes.count );
    634 
    635                 bool local;
    636                 int preferred = external ? -1 : kernelTLS().this_processor->rdq.id;
    637 
    638                 // Try to pick a lane and lock it
    639                 unsigned i;
    640                 do {
    641                         // Pick the index of a lane
    642                         unsigned r = __tls_rand_fwd();
    643                         [i, local] = idx_from_r(r, preferred);
    644 
    645                         i %= __atomic_load_n( &lanes.count, __ATOMIC_RELAXED );
    646 
    647                         #if !defined(__CFA_NO_STATISTICS__)
    648                                 if(unlikely(external)) __atomic_fetch_add(&cltr->stats->ready.push.extrn.attempt, 1, __ATOMIC_RELAXED);
    649                                 else if(local) __tls_stats()->ready.push.local.attempt++;
    650                                 else __tls_stats()->ready.push.share.attempt++;
    651                         #endif
    652 
    653                         // If we can't lock it retry
    654                 } while( !__atomic_try_acquire( &lanes.data[i].lock ) );
    655 
    656                 // Actually push it
    657                 push(lanes.data[i], thrd);
    658 
    659                 // Unlock and return
    660                 __atomic_unlock( &lanes.data[i].lock );
    661 
    662                 // Mark the current index in the tls rng instance as having an item
    663                 __tls_rand_advance_bck();
    664 
    665                 __cfadbg_print_safe(ready_queue, "Kernel : Pushed %p on cluster %p (idx: %u, mask %llu, first %d)\n", thrd, cltr, i, used.mask[0], lane_first);
    666 
    667                 // Update statistics
    668                 #if !defined(__CFA_NO_STATISTICS__)
    669                         if(unlikely(external)) __atomic_fetch_add(&cltr->stats->ready.push.extrn.success, 1, __ATOMIC_RELAXED);
    670                         else if(local) __tls_stats()->ready.push.local.success++;
    671                         else __tls_stats()->ready.push.share.success++;
    672                 #endif
    673         }
    674 
    675         // Pop from the ready queue from a given cluster
    676         __attribute__((hot)) thread$ * pop_fast(struct cluster * cltr) with (cltr->ready_queue) {
    677                 /* paranoid */ verify( lanes.count > 0 );
    678                 /* paranoid */ verify( kernelTLS().this_processor );
    679                 /* paranoid */ verify( kernelTLS().this_processor->rdq.id < lanes.count );
    680 
    681                 unsigned count = __atomic_load_n( &lanes.count, __ATOMIC_RELAXED );
    682                 int preferred = kernelTLS().this_processor->rdq.id;
    683 
    684 
    685                 // As long as the list is not empty, try finding a lane that isn't empty and pop from it
    686                 for(25) {
    687                         // Pick two lists at random
    688                         unsigned ri = __tls_rand_bck();
    689                         unsigned rj = __tls_rand_bck();
    690 
    691                         unsigned i, j;
    692                         __attribute__((unused)) bool locali, localj;
    693                         [i, locali] = idx_from_r(ri, preferred);
    694                         [j, localj] = idx_from_r(rj, preferred);
    695 
    696                         i %= count;
    697                         j %= count;
    698 
    699                         // try popping from the 2 picked lists
    700                         struct thread$ * thrd = try_pop(cltr, i, j __STATS(, *(locali || localj ? &__tls_stats()->ready.pop.local : &__tls_stats()->ready.pop.help)));
    701                         if(thrd) {
    702                                 return thrd;
    703                         }
    704                 }
    705 
    706                 // All lanes where empty return 0p
    707                 return 0p;
    708         }
    709 
    710         __attribute__((hot)) struct thread$ * pop_slow(struct cluster * cltr) { return pop_fast(cltr); }
    711         __attribute__((hot)) struct thread$ * pop_search(struct cluster * cltr) {
    712                 return search(cltr);
    713         }
    714 #endif
    715 #if defined(USE_WORK_STEALING)
    716         __attribute__((hot)) void push(struct cluster * cltr, struct thread$ * thrd, unpark_hint hint) with (cltr->ready_queue) {
    717                 __cfadbg_print_safe(ready_queue, "Kernel : Pushing %p on cluster %p\n", thrd, cltr);
    718 
    719                 // #define USE_PREFERRED
    720                 #if !defined(USE_PREFERRED)
    721                 const bool external = (hint != UNPARK_LOCAL) || (!kernelTLS().this_processor) || (cltr != kernelTLS().this_processor->cltr);
    722                 /* paranoid */ verify(external || kernelTLS().this_processor->rdq.id < lanes.count );
    723                 #else
    724                         unsigned preferred = thrd->preferred;
    725                         const bool external = (hint != UNPARK_LOCAL) || (!kernelTLS().this_processor) || preferred == MAX || thrd->curr_cluster != cltr;
    726                         /* paranoid */ verifyf(external || preferred < lanes.count, "Invalid preferred queue %u for %u lanes", preferred, lanes.count );
    727 
    728                         unsigned r = preferred % READYQ_SHARD_FACTOR;
    729                         const unsigned start = preferred - r;
    730                 #endif
    731 
    732                 // Try to pick a lane and lock it
    733                 unsigned i;
    734                 do {
    735                         #if !defined(__CFA_NO_STATISTICS__)
    736                                 if(unlikely(external)) __atomic_fetch_add(&cltr->stats->ready.push.extrn.attempt, 1, __ATOMIC_RELAXED);
    737                                 else __tls_stats()->ready.push.local.attempt++;
    738                         #endif
    739 
    740                         if(unlikely(external)) {
    741                                 i = __tls_rand() % lanes.count;
    742                         }
    743                         else {
    744                                 #if !defined(USE_PREFERRED)
    745                                         processor * proc = kernelTLS().this_processor;
    746                                         unsigned r = proc->rdq.its++;
    747                                         i =  proc->rdq.id + (r % READYQ_SHARD_FACTOR);
    748                                 #else
    749                                         i = start + (r++ % READYQ_SHARD_FACTOR);
    750                                 #endif
    751                         }
    752                         // If we can't lock it retry
    753                 } while( !__atomic_try_acquire( &lanes.data[i].lock ) );
    754 
    755                 // Actually push it
    756                 push(lanes.data[i], thrd);
    757 
    758                 // Unlock and return
    759                 __atomic_unlock( &lanes.data[i].lock );
    760 
    761                 #if !defined(__CFA_NO_STATISTICS__)
    762                         if(unlikely(external)) __atomic_fetch_add(&cltr->stats->ready.push.extrn.success, 1, __ATOMIC_RELAXED);
    763                         else __tls_stats()->ready.push.local.success++;
    764                 #endif
    765 
    766                 __cfadbg_print_safe(ready_queue, "Kernel : Pushed %p on cluster %p (idx: %u, mask %llu, first %d)\n", thrd, cltr, i, used.mask[0], lane_first);
    767         }
    768 
    769         // Pop from the ready queue from a given cluster
    770         __attribute__((hot)) thread$ * pop_fast(struct cluster * cltr) with (cltr->ready_queue) {
    771                 /* paranoid */ verify( lanes.count > 0 );
    772                 /* paranoid */ verify( kernelTLS().this_processor );
    773                 /* paranoid */ verify( kernelTLS().this_processor->rdq.id < lanes.count );
    774 
    775                 processor * proc = kernelTLS().this_processor;
    776 
    777                 if(proc->rdq.target == MAX) {
    778                         unsigned long long min = ts(lanes.data[proc->rdq.id]);
    779                         for(int i = 0; i < READYQ_SHARD_FACTOR; i++) {
    780                                 unsigned long long tsc = ts(lanes.data[proc->rdq.id + i]);
    781                                 if(tsc < min) min = tsc;
    782                         }
    783                         proc->rdq.cutoff = min;
    784                         proc->rdq.target = __tls_rand() % lanes.count;
    785                 }
    786                 else {
    787                         unsigned target = proc->rdq.target;
    788                         proc->rdq.target = MAX;
    789                         const unsigned long long bias = 0; //2_500_000_000;
    790                         const unsigned long long cutoff = proc->rdq.cutoff > bias ? proc->rdq.cutoff - bias : proc->rdq.cutoff;
    791                         if(lanes.tscs[target].tv < cutoff && ts(lanes.data[target]) < cutoff) {
     97                } while( !__atomic_try_acquire( &readyQ.data[i].lock ) );
     98        }
     99
     100        // Actually push it
     101        push(readyQ.data[i], thrd);
     102
     103        // Unlock and return
     104        __atomic_unlock( &readyQ.data[i].lock );
     105
     106        #if !defined(__CFA_NO_STATISTICS__)
     107                if(unlikely(external || remote)) __atomic_fetch_add(&cltr->stats->ready.push.extrn.success, 1, __ATOMIC_RELAXED);
     108                else __tls_stats()->ready.push.local.success++;
     109        #endif
     110}
     111
     112__attribute__((hot)) struct thread$ * pop_fast(struct cluster * cltr) with (cltr->sched) {
     113        const size_t lanes_count = readyQ.count;
     114
     115        /* paranoid */ verify( __shard_factor.readyq > 0 );
     116        /* paranoid */ verify( lanes_count > 0 );
     117        /* paranoid */ verify( kernelTLS().this_processor );
     118        /* paranoid */ verify( kernelTLS().this_processor->rdq.id < lanes_count );
     119
     120        processor * const proc = kernelTLS().this_processor;
     121        unsigned this = proc->rdq.id;
     122        /* paranoid */ verify( this < lanes_count );
     123        __cfadbg_print_safe(ready_queue, "Kernel : pop from %u\n", this);
     124
     125        // Figure out the current cache is
     126        const unsigned this_cache = cache_id(cltr, this / __shard_factor.readyq);
     127        const unsigned long long ctsc = rdtscl();
     128
     129        if(proc->rdq.target == MAX) {
     130                uint64_t chaos = __tls_rand();
     131                unsigned ext = chaos & 0xff;
     132                unsigned other  = (chaos >> 8) % (lanes_count);
     133
     134                if(ext < 3 || __atomic_load_n(&caches[other / __shard_factor.readyq].id, __ATOMIC_RELAXED) == this_cache) {
     135                        proc->rdq.target = other;
     136                }
     137        }
     138        else {
     139                const unsigned target = proc->rdq.target;
     140                __cfadbg_print_safe(ready_queue, "Kernel : %u considering helping %u, tcsc %llu\n", this, target, readyQ.tscs[target].tv);
     141                /* paranoid */ verify( readyQ.tscs[target].tv != MAX );
     142                if(target < lanes_count) {
     143                        const unsigned long long cutoff = calc_cutoff(ctsc, proc, lanes_count, cltr->sched.readyQ.data, cltr->sched.readyQ.tscs, __shard_factor.readyq);
     144                        const unsigned long long age = moving_average(ctsc, readyQ.tscs[target].tv, readyQ.tscs[target].ma);
     145                        __cfadbg_print_safe(ready_queue, "Kernel : Help attempt on %u from %u, age %'llu vs cutoff %'llu, %s\n", target, this, age, cutoff, age > cutoff ? "yes" : "no");
     146                        if(age > cutoff) {
    792147                                thread$ * t = try_pop(cltr, target __STATS(, __tls_stats()->ready.pop.help));
    793148                                if(t) return t;
    794149                        }
    795150                }
    796 
    797                 for(READYQ_SHARD_FACTOR) {
    798                         unsigned i = proc->rdq.id + (proc->rdq.itr++ % READYQ_SHARD_FACTOR);
    799                         if(thread$ * t = try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.local))) return t;
    800                 }
    801                 return 0p;
    802         }
    803 
    804         __attribute__((hot)) struct thread$ * pop_slow(struct cluster * cltr) with (cltr->ready_queue) {
    805                 unsigned i = __tls_rand() % lanes.count;
    806                 return try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.steal));
    807         }
    808 
    809         __attribute__((hot)) struct thread$ * pop_search(struct cluster * cltr) with (cltr->ready_queue) {
    810                 return search(cltr);
    811         }
    812 #endif
     151                proc->rdq.target = MAX;
     152        }
     153
     154        for(__shard_factor.readyq) {
     155                unsigned i = this + (proc->rdq.itr++ % __shard_factor.readyq);
     156                if(thread$ * t = try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.local))) return t;
     157        }
     158
     159        // All lanes where empty return 0p
     160        return 0p;
     161
     162}
     163__attribute__((hot)) struct thread$ * pop_slow(struct cluster * cltr) {
     164        unsigned i = __tls_rand() % (cltr->sched.readyQ.count);
     165        return try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.steal));
     166}
     167__attribute__((hot)) struct thread$ * pop_search(struct cluster * cltr) {
     168        return search(cltr);
     169}
    813170
    814171//=======================================================================
     
    820177//-----------------------------------------------------------------------
    821178// try to pop from a lane given by index w
    822 static inline struct thread$ * try_pop(struct cluster * cltr, unsigned w __STATS(, __stats_readyQ_pop_t & stats)) with (cltr->ready_queue) {
    823         /* paranoid */ verify( w < lanes.count );
     179static inline struct thread$ * try_pop(struct cluster * cltr, unsigned w __STATS(, __stats_readyQ_pop_t & stats)) with (cltr->sched) {
     180        /* paranoid */ verify( w < readyQ.count );
    824181        __STATS( stats.attempt++; )
    825182
    826183        // Get relevant elements locally
    827         __intrusive_lane_t & lane = lanes.data[w];
     184        __intrusive_lane_t & lane = readyQ.data[w];
    828185
    829186        // If list looks empty retry
     
    845202        // Actually pop the list
    846203        struct thread$ * thrd;
    847         #if defined(USE_AWARE_STEALING) || defined(USE_WORK_STEALING) || defined(USE_CPU_WORK_STEALING)
    848                 unsigned long long tsc_before = ts(lane);
    849         #endif
     204        unsigned long long tsc_before = ts(lane);
    850205        unsigned long long tsv;
    851206        [thrd, tsv] = pop(lane);
     
    861216        __STATS( stats.success++; )
    862217
    863         #if defined(USE_AWARE_STEALING) || defined(USE_WORK_STEALING) || defined(USE_CPU_WORK_STEALING)
    864                 if (tsv != MAX) {
    865                         unsigned long long now = rdtscl();
    866                         unsigned long long pma = __atomic_load_n(&lanes.tscs[w].ma, __ATOMIC_RELAXED);
    867                         __atomic_store_n(&lanes.tscs[w].tv, tsv, __ATOMIC_RELAXED);
    868                         __atomic_store_n(&lanes.tscs[w].ma, moving_average(now, tsc_before, pma), __ATOMIC_RELAXED);
    869                 }
    870         #endif
    871 
    872         #if defined(USE_AWARE_STEALING) || defined(USE_CPU_WORK_STEALING)
    873                 thrd->preferred = w / READYQ_SHARD_FACTOR;
    874         #else
    875                 thrd->preferred = w;
    876         #endif
     218        if (tsv != MAX) {
     219                unsigned long long now = rdtscl();
     220                unsigned long long pma = __atomic_load_n(&readyQ.tscs[w].ma, __ATOMIC_RELAXED);
     221                __atomic_store_n(&readyQ.tscs[w].tv, tsv, __ATOMIC_RELAXED);
     222                __atomic_store_n(&readyQ.tscs[w].ma, moving_average(now, tsc_before, pma), __ATOMIC_RELAXED);
     223        }
     224
     225        thrd->preferred = w / __shard_factor.readyq;
    877226
    878227        // return the popped thread
     
    883232// try to pop from any lanes making sure you don't miss any threads push
    884233// before the start of the function
    885 static inline struct thread$ * search(struct cluster * cltr) with (cltr->ready_queue) {
    886         /* paranoid */ verify( lanes.count > 0 );
    887         unsigned count = __atomic_load_n( &lanes.count, __ATOMIC_RELAXED );
     234static inline struct thread$ * search(struct cluster * cltr) {
     235        const size_t lanes_count = cltr->sched.readyQ.count;
     236        /* paranoid */ verify( lanes_count > 0 );
     237        unsigned count = __atomic_load_n( &lanes_count, __ATOMIC_RELAXED );
    888238        unsigned offset = __tls_rand();
    889239        for(i; count) {
     
    902252// get preferred ready for new thread
    903253unsigned ready_queue_new_preferred() {
    904         unsigned pref = 0;
     254        unsigned pref = MAX;
    905255        if(struct thread$ * thrd = publicTLS_get( this_thread )) {
    906256                pref = thrd->preferred;
    907257        }
    908         else {
    909                 #if defined(USE_CPU_WORK_STEALING)
    910                         pref = __kernel_getcpu();
    911                 #endif
    912         }
    913 
    914         #if defined(USE_CPU_WORK_STEALING)
    915                 /* paranoid */ verify(pref >= 0);
    916                 /* paranoid */ verify(pref < cpu_info.hthrd_count);
    917         #endif
    918258
    919259        return pref;
     
    921261
    922262//-----------------------------------------------------------------------
    923 // Check that all the intrusive queues in the data structure are still consistent
    924 static void check( __ready_queue_t & q ) with (q) {
    925         #if defined(__CFA_WITH_VERIFY__)
    926                 {
    927                         for( idx ; lanes.count ) {
    928                                 __intrusive_lane_t & sl = lanes.data[idx];
    929                                 assert(!lanes.data[idx].lock);
    930 
    931                                         if(is_empty(sl)) {
    932                                                 assert( sl.anchor.next == 0p );
    933                                                 assert( sl.anchor.ts   == -1llu );
    934                                                 assert( mock_head(sl)  == sl.prev );
    935                                         } else {
    936                                                 assert( sl.anchor.next != 0p );
    937                                                 assert( sl.anchor.ts   != -1llu );
    938                                                 assert( mock_head(sl)  != sl.prev );
    939                                         }
    940                         }
    941                 }
    942         #endif
    943 }
    944 
    945 //-----------------------------------------------------------------------
    946263// Given 2 indexes, pick the list with the oldest push an try to pop from it
    947 static inline struct thread$ * try_pop(struct cluster * cltr, unsigned i, unsigned j __STATS(, __stats_readyQ_pop_t & stats)) with (cltr->ready_queue) {
     264static inline struct thread$ * try_pop(struct cluster * cltr, unsigned i, unsigned j __STATS(, __stats_readyQ_pop_t & stats)) with (cltr->sched) {
    948265        // Pick the bet list
    949266        int w = i;
    950         if( __builtin_expect(!is_empty(lanes.data[j]), true) ) {
    951                 w = (ts(lanes.data[i]) < ts(lanes.data[j])) ? i : j;
     267        if( __builtin_expect(!is_empty(readyQ.data[j]), true) ) {
     268                w = (ts(readyQ.data[i]) < ts(readyQ.data[j])) ? i : j;
    952269        }
    953270
    954271        return try_pop(cltr, w __STATS(, stats));
    955272}
    956 
    957 // Call this function of the intrusive list was moved using memcpy
    958 // fixes the list so that the pointers back to anchors aren't left dangling
    959 static inline void fix(__intrusive_lane_t & ll) {
    960                         if(is_empty(ll)) {
    961                                 verify(ll.anchor.next == 0p);
    962                                 ll.prev = mock_head(ll);
    963                         }
    964 }
    965 
    966 static void assign_list(unsigned & value, dlist(processor) & list, unsigned count) {
    967         processor * it = &list`first;
    968         for(unsigned i = 0; i < count; i++) {
    969                 /* paranoid */ verifyf( it, "Unexpected null iterator, at index %u of %u\n", i, count);
    970                 it->rdq.id = value;
    971                 it->rdq.target = MAX;
    972                 value += READYQ_SHARD_FACTOR;
    973                 it = &(*it)`next;
    974         }
    975 }
    976 
    977 static void reassign_cltr_id(struct cluster * cltr) {
    978         unsigned preferred = 0;
    979         assign_list(preferred, cltr->procs.actives, cltr->procs.total - cltr->procs.idle);
    980         assign_list(preferred, cltr->procs.idles  , cltr->procs.idle );
    981 }
    982 
    983 static void fix_times( struct cluster * cltr ) with( cltr->ready_queue ) {
    984         #if defined(USE_AWARE_STEALING) || defined(USE_WORK_STEALING)
    985                 lanes.tscs = alloc(lanes.count, lanes.tscs`realloc);
    986                 for(i; lanes.count) {
    987                         lanes.tscs[i].tv = rdtscl();
    988                         lanes.tscs[i].ma = 0;
    989                 }
    990         #endif
    991 }
    992 
    993 #if defined(USE_CPU_WORK_STEALING)
    994         // ready_queue size is fixed in this case
    995         void ready_queue_grow(struct cluster * cltr) {}
    996         void ready_queue_shrink(struct cluster * cltr) {}
    997 #else
    998         // Grow the ready queue
    999         void ready_queue_grow(struct cluster * cltr) {
    1000                 size_t ncount;
    1001                 int target = cltr->procs.total;
    1002 
    1003                 /* paranoid */ verify( ready_mutate_islocked() );
    1004                 __cfadbg_print_safe(ready_queue, "Kernel : Growing ready queue\n");
    1005 
    1006                 // Make sure that everything is consistent
    1007                 /* paranoid */ check( cltr->ready_queue );
    1008 
    1009                 // grow the ready queue
    1010                 with( cltr->ready_queue ) {
    1011                         // Find new count
    1012                         // Make sure we always have atleast 1 list
    1013                         if(target >= 2) {
    1014                                 ncount = target * READYQ_SHARD_FACTOR;
    1015                         } else {
    1016                                 ncount = SEQUENTIAL_SHARD;
    1017                         }
    1018 
    1019                         // Allocate new array (uses realloc and memcpies the data)
    1020                         lanes.data = alloc( ncount, lanes.data`realloc );
    1021 
    1022                         // Fix the moved data
    1023                         for( idx; (size_t)lanes.count ) {
    1024                                 fix(lanes.data[idx]);
    1025                         }
    1026 
    1027                         // Construct new data
    1028                         for( idx; (size_t)lanes.count ~ ncount) {
    1029                                 (lanes.data[idx]){};
    1030                         }
    1031 
    1032                         // Update original
    1033                         lanes.count = ncount;
    1034 
    1035                         lanes.caches = alloc( target, lanes.caches`realloc );
    1036                 }
    1037 
    1038                 fix_times(cltr);
    1039 
    1040                 reassign_cltr_id(cltr);
    1041 
    1042                 // Make sure that everything is consistent
    1043                 /* paranoid */ check( cltr->ready_queue );
    1044 
    1045                 __cfadbg_print_safe(ready_queue, "Kernel : Growing ready queue done\n");
    1046 
    1047                 /* paranoid */ verify( ready_mutate_islocked() );
    1048         }
    1049 
    1050         // Shrink the ready queue
    1051         void ready_queue_shrink(struct cluster * cltr) {
    1052                 /* paranoid */ verify( ready_mutate_islocked() );
    1053                 __cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue\n");
    1054 
    1055                 // Make sure that everything is consistent
    1056                 /* paranoid */ check( cltr->ready_queue );
    1057 
    1058                 int target = cltr->procs.total;
    1059 
    1060                 with( cltr->ready_queue ) {
    1061                         // Remember old count
    1062                         size_t ocount = lanes.count;
    1063 
    1064                         // Find new count
    1065                         // Make sure we always have atleast 1 list
    1066                         lanes.count = target >= 2 ? target * READYQ_SHARD_FACTOR: SEQUENTIAL_SHARD;
    1067                         /* paranoid */ verify( ocount >= lanes.count );
    1068                         /* paranoid */ verify( lanes.count == target * READYQ_SHARD_FACTOR || target < 2 );
    1069 
    1070                         // for printing count the number of displaced threads
    1071                         #if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_READY_QUEUE__)
    1072                                 __attribute__((unused)) size_t displaced = 0;
    1073                         #endif
    1074 
    1075                         // redistribute old data
    1076                         for( idx; (size_t)lanes.count ~ ocount) {
    1077                                 // Lock is not strictly needed but makes checking invariants much easier
    1078                                 __attribute__((unused)) bool locked = __atomic_try_acquire(&lanes.data[idx].lock);
    1079                                 verify(locked);
    1080 
    1081                                 // As long as we can pop from this lane to push the threads somewhere else in the queue
    1082                                 while(!is_empty(lanes.data[idx])) {
    1083                                         struct thread$ * thrd;
    1084                                         unsigned long long _;
    1085                                         [thrd, _] = pop(lanes.data[idx]);
    1086 
    1087                                         push(cltr, thrd, true);
    1088 
    1089                                         // for printing count the number of displaced threads
    1090                                         #if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_READY_QUEUE__)
    1091                                                 displaced++;
    1092                                         #endif
    1093                                 }
    1094 
    1095                                 // Unlock the lane
    1096                                 __atomic_unlock(&lanes.data[idx].lock);
    1097 
    1098                                 // TODO print the queue statistics here
    1099 
    1100                                 ^(lanes.data[idx]){};
    1101                         }
    1102 
    1103                         __cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue displaced %zu threads\n", displaced);
    1104 
    1105                         // Allocate new array (uses realloc and memcpies the data)
    1106                         lanes.data = alloc( lanes.count, lanes.data`realloc );
    1107 
    1108                         // Fix the moved data
    1109                         for( idx; (size_t)lanes.count ) {
    1110                                 fix(lanes.data[idx]);
    1111                         }
    1112 
    1113                         lanes.caches = alloc( target, lanes.caches`realloc );
    1114                 }
    1115 
    1116                 fix_times(cltr);
    1117 
    1118 
    1119                 reassign_cltr_id(cltr);
    1120 
    1121                 // Make sure that everything is consistent
    1122                 /* paranoid */ check( cltr->ready_queue );
    1123 
    1124                 __cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue done\n");
    1125                 /* paranoid */ verify( ready_mutate_islocked() );
    1126         }
    1127 #endif
    1128 
    1129 #if !defined(__CFA_NO_STATISTICS__)
    1130         unsigned cnt(const __ready_queue_t & this, unsigned idx) {
    1131                 /* paranoid */ verify(this.lanes.count > idx);
    1132                 return this.lanes.data[idx].cnt;
    1133         }
    1134 #endif
    1135 
    1136 
    1137 #if   defined(CFA_HAVE_LINUX_LIBRSEQ)
    1138         // No definition needed
    1139 #elif defined(CFA_HAVE_LINUX_RSEQ_H)
    1140 
    1141         #if defined( __x86_64 ) || defined( __i386 )
    1142                 #define RSEQ_SIG        0x53053053
    1143         #elif defined( __ARM_ARCH )
    1144                 #ifdef __ARMEB__
    1145                 #define RSEQ_SIG    0xf3def5e7      /* udf    #24035    ; 0x5de3 (ARMv6+) */
    1146                 #else
    1147                 #define RSEQ_SIG    0xe7f5def3      /* udf    #24035    ; 0x5de3 */
    1148                 #endif
    1149         #endif
    1150 
    1151         extern void __disable_interrupts_hard();
    1152         extern void __enable_interrupts_hard();
    1153 
    1154         static void __kernel_raw_rseq_register  (void) {
    1155                 /* paranoid */ verify( __cfaabi_rseq.cpu_id == RSEQ_CPU_ID_UNINITIALIZED );
    1156 
    1157                 // int ret = syscall(__NR_rseq, &__cfaabi_rseq, sizeof(struct rseq), 0, (sigset_t *)0p, _NSIG / 8);
    1158                 int ret = syscall(__NR_rseq, &__cfaabi_rseq, sizeof(struct rseq), 0, RSEQ_SIG);
    1159                 if(ret != 0) {
    1160                         int e = errno;
    1161                         switch(e) {
    1162                         case EINVAL: abort("KERNEL ERROR: rseq register invalid argument");
    1163                         case ENOSYS: abort("KERNEL ERROR: rseq register no supported");
    1164                         case EFAULT: abort("KERNEL ERROR: rseq register with invalid argument");
    1165                         case EBUSY : abort("KERNEL ERROR: rseq register already registered");
    1166                         case EPERM : abort("KERNEL ERROR: rseq register sig  argument  on unregistration does not match the signature received on registration");
    1167                         default: abort("KERNEL ERROR: rseq register unexpected return %d", e);
    1168                         }
    1169                 }
    1170         }
    1171 
    1172         static void __kernel_raw_rseq_unregister(void) {
    1173                 /* paranoid */ verify( __cfaabi_rseq.cpu_id >= 0 );
    1174 
    1175                 // int ret = syscall(__NR_rseq, &__cfaabi_rseq, sizeof(struct rseq), RSEQ_FLAG_UNREGISTER, (sigset_t *)0p, _NSIG / 8);
    1176                 int ret = syscall(__NR_rseq, &__cfaabi_rseq, sizeof(struct rseq), RSEQ_FLAG_UNREGISTER, RSEQ_SIG);
    1177                 if(ret != 0) {
    1178                         int e = errno;
    1179                         switch(e) {
    1180                         case EINVAL: abort("KERNEL ERROR: rseq unregister invalid argument");
    1181                         case ENOSYS: abort("KERNEL ERROR: rseq unregister no supported");
    1182                         case EFAULT: abort("KERNEL ERROR: rseq unregister with invalid argument");
    1183                         case EBUSY : abort("KERNEL ERROR: rseq unregister already registered");
    1184                         case EPERM : abort("KERNEL ERROR: rseq unregister sig  argument  on unregistration does not match the signature received on registration");
    1185                         default: abort("KERNEL ERROR: rseq unregisteunexpected return %d", e);
    1186                         }
    1187                 }
    1188         }
    1189 #else
    1190         // No definition needed
    1191 #endif
  • libcfa/src/concurrency/ready_subqueue.hfa

    ref3c383 rd672350  
    2525        );
    2626        return rhead;
    27 }
    28 
    29 // Ctor
    30 void ?{}( __intrusive_lane_t & this ) {
    31         this.lock = false;
    32         this.prev = mock_head(this);
    33         this.anchor.next = 0p;
    34         this.anchor.ts   = -1llu;
    35         #if !defined(__CFA_NO_STATISTICS__)
    36                 this.cnt  = 0;
    37         #endif
    38 
    39         // We add a boat-load of assertions here because the anchor code is very fragile
    40         /* paranoid */ _Static_assert( offsetof( thread$, link ) == offsetof(__intrusive_lane_t, anchor) );
    41         /* paranoid */ verify( offsetof( thread$, link ) == offsetof(__intrusive_lane_t, anchor) );
    42         /* paranoid */ verify( ((uintptr_t)( mock_head(this) ) + offsetof( thread$, link )) == (uintptr_t)(&this.anchor) );
    43         /* paranoid */ verify( &mock_head(this)->link.next == &this.anchor.next );
    44         /* paranoid */ verify( &mock_head(this)->link.ts   == &this.anchor.ts   );
    45         /* paranoid */ verify( mock_head(this)->link.next == 0p );
    46         /* paranoid */ verify( mock_head(this)->link.ts   == -1llu  );
    47         /* paranoid */ verify( mock_head(this) == this.prev );
    48         /* paranoid */ verify( __alignof__(__intrusive_lane_t) == 128 );
    49         /* paranoid */ verify( __alignof__(this) == 128 );
    50         /* paranoid */ verifyf( ((intptr_t)(&this) % 128) == 0, "Expected address to be aligned %p %% 128 == %zd", &this, ((intptr_t)(&this) % 128) );
    51 }
    52 
    53 // Dtor is trivial
    54 void ^?{}( __intrusive_lane_t & this ) {
    55         // Make sure the list is empty
    56         /* paranoid */ verify( this.anchor.next == 0p );
    57         /* paranoid */ verify( this.anchor.ts   == -1llu );
    58         /* paranoid */ verify( mock_head(this)  == this.prev );
    5927}
    6028
  • libcfa/src/concurrency/thread.cfa

    ref3c383 rd672350  
    1919#include "thread.hfa"
    2020
    21 #include "kernel_private.hfa"
     21#include "kernel/private.hfa"
    2222#include "exception.hfa"
    2323
  • libcfa/src/containers/string.cfa

    ref3c383 rd672350  
    9292}
    9393
    94 string ?=?(string & this, string other) {
     94string & ?=?(string & this, string & other) { //// <---- straw man change
    9595    (*this.inner) = (*other.inner);
    9696    return this;
     
    235235int find(const string &s, const char* search, size_t searchsize) {
    236236    return find( *s.inner, search, searchsize);
     237}
     238
     239int findFrom(const string &s, size_t fromPos, char search) {
     240    return findFrom( *s.inner, fromPos, search );
     241}
     242
     243int findFrom(const string &s, size_t fromPos, const string &search) {
     244    return findFrom( *s.inner, fromPos, *search.inner );
     245}
     246
     247int findFrom(const string &s, size_t fromPos, const char* search) {
     248    return findFrom( *s.inner, fromPos, search );
     249}
     250
     251int findFrom(const string &s, size_t fromPos, const char* search, size_t searchsize) {
     252    return findFrom( *s.inner, fromPos, search, searchsize );
    237253}
    238254
  • libcfa/src/containers/string.hfa

    ref3c383 rd672350  
    4141void ?=?(string &s, const string &other);
    4242void ?=?(string &s, char other);
    43 string ?=?(string &s, string other);  // string tolerates memcpys; still saw calls to autogen
    44 
     43string & ?=?(string &s, string &other);  // surprising ret seems to help avoid calls to autogen
     44//string ?=?( string &, string ) = void;
    4545void ^?{}(string &s);
    4646
     
    9393int find(const string &s, const char* search, size_t searchsize);
    9494
     95int findFrom(const string &s, size_t fromPos, char search);
     96int findFrom(const string &s, size_t fromPos, const string &search);
     97int findFrom(const string &s, size_t fromPos, const char* search);
     98int findFrom(const string &s, size_t fromPos, const char* search, size_t searchsize);
     99
    95100bool includes(const string &s, const string &search);
    96101bool includes(const string &s, const char* search);
  • libcfa/src/containers/string_res.cfa

    ref3c383 rd672350  
    1515
    1616#include "string_res.hfa"
    17 #include <stdlib.hfa>  // e.g. malloc
    18 #include <string.h>    // e.g. strlen
     17#include "string_sharectx.hfa"
     18#include "stdlib.hfa"
     19
     20// Workaround for observed performance penalty from calling CFA's alloc.
     21// Workaround is:  EndVbyte = TEMP_ALLOC(char, CurrSize)
     22// Should be:      EndVbyte = alloc(CurrSize)
     23#define TEMP_ALLOC(T, n) (( T* ) malloc( n * sizeof( T ) ))
     24
     25#include <assert.h>
    1926
    2027//######################### VbyteHeap "header" #########################
    21 
    22 
    23 
    24 
    25 
    26 
    27 
    28 
    29 // DON'T COMMIT:
    30 // #define VbyteDebug
    31 
    32 
    33 
    34 
    3528
    3629#ifdef VbyteDebug
     
    5447
    5548   
    56 static inline void compaction( VbyteHeap & );                           // compaction of the byte area
    57 static inline void garbage( VbyteHeap & );                              // garbage collect the byte area
    58 static inline void extend( VbyteHeap &, int );                  // extend the size of the byte area
    59 static inline void reduce( VbyteHeap &, int );                  // reduce the size of the byte area
    60 
    61 static inline void ?{}( VbyteHeap &, int = 1000 );
    62 static inline void ^?{}( VbyteHeap & );
    63 static inline void ByteCopy( VbyteHeap &, char *, int, int, char *, int, int ); // copy a block of bytes from one location in the heap to another
    64 static inline int ByteCmp( VbyteHeap &, char *, int, int, char *, int, int );   // compare 2 blocks of bytes
    65 static inline char *VbyteAlloc( VbyteHeap &, int );                     // allocate a block bytes in the heap
    66 
    67 
    68 static inline void AddThisAfter( HandleNode &, HandleNode & );
    69 static inline void DeleteNode( HandleNode & );
    70 static inline void MoveThisAfter( HandleNode &, const HandleNode & );           // move current handle after parameter handle
     49static void compaction( VbyteHeap & );                          // compaction of the byte area
     50static void garbage( VbyteHeap &, int );                                // garbage collect the byte area
     51static void extend( VbyteHeap &, int );                 // extend the size of the byte area
     52static void reduce( VbyteHeap &, int );                 // reduce the size of the byte area
     53
     54static void ?{}( VbyteHeap &, size_t = 1000 );
     55static void ^?{}( VbyteHeap & );
     56
     57static int ByteCmp( char *, int, int, char *, int, int );       // compare 2 blocks of bytes
     58static char *VbyteAlloc( VbyteHeap &, int );                    // allocate a block bytes in the heap
     59static char *VbyteTryAdjustLast( VbyteHeap &, int );
     60
     61static void AddThisAfter( HandleNode &, HandleNode & );
     62static void DeleteNode( HandleNode & );
     63static void MoveThisAfter( HandleNode &, const HandleNode & );          // move current handle after parameter handle
    7164
    7265
    7366// Allocate the storage for the variable sized area and intialize the heap variables.
    7467
    75 static inline void ?{}( VbyteHeap & this, int Size ) with(this) {
     68static void ?{}( VbyteHeap & this, size_t Size ) with(this) {
    7669#ifdef VbyteDebug
    7770    serr | "enter:VbyteHeap::VbyteHeap, this:" | &this | " Size:" | Size;
     
    7972    NoOfCompactions = NoOfExtensions = NoOfReductions = 0;
    8073    InitSize = CurrSize = Size;
    81     StartVbyte = EndVbyte = alloc(CurrSize);
     74    StartVbyte = EndVbyte = TEMP_ALLOC(char, CurrSize);
    8275    ExtVbyte = (void *)( StartVbyte + CurrSize );
    8376    Header.flink = Header.blink = &Header;
     77    Header.ulink = & this;
    8478#ifdef VbyteDebug
    8579    HeaderPtr = &Header;
     
    9185// Release the dynamically allocated storage for the byte area.
    9286
    93 static inline void ^?{}( VbyteHeap & this ) with(this) {
     87static void ^?{}( VbyteHeap & this ) with(this) {
    9488    free( StartVbyte );
    9589} // ~VbyteHeap
     
    10296// creator.
    10397
    104 void ?{}( HandleNode & this ) with(this) {
     98static void ?{}( HandleNode & this ) with(this) {
    10599#ifdef VbyteDebug
    106100    serr | "enter:HandleNode::HandleNode, this:" | &this;
     
    117111// collection.
    118112
    119 void ?{}( HandleNode & this, VbyteHeap & vh ) with(this) {
     113static void ?{}( HandleNode & this, VbyteHeap & vh ) with(this) {
    120114#ifdef VbyteDebug
    121115    serr | "enter:HandleNode::HandleNode, this:" | &this;
     
    123117    s = 0;
    124118    lnth = 0;
     119    ulink = &vh;
    125120    AddThisAfter( this, *vh.Header.blink );
    126121#ifdef VbyteDebug
     
    133128// is the responsibility of the creator to destroy it.
    134129
    135 void ^?{}( HandleNode & this ) with(this) {
     130static void ^?{}( HandleNode & this ) with(this) {
    136131#ifdef VbyteDebug
    137132    serr | "enter:HandleNode::~HandleNode, this:" | & this;
     
    149144} // ~HandleNode
    150145
     146
     147//######################### String Sharing Context #########################
     148
     149static string_sharectx * ambient_string_sharectx;               // fickle top of stack
     150static string_sharectx default_string_sharectx = {NEW_SHARING}; // stable bottom of stack
     151
     152void ?{}( string_sharectx & this, StringSharectx_Mode mode ) with( this ) {
     153    (older){ ambient_string_sharectx };
     154    if ( mode == NEW_SHARING ) {
     155        (activeHeap){ new( (size_t) 1000 ) };
     156    } else {
     157        verify( mode == NO_SHARING );
     158        (activeHeap){ 0p };
     159    }
     160    ambient_string_sharectx = & this;
     161}
     162
     163void ^?{}( string_sharectx & this ) with( this ) {
     164    if ( activeHeap ) delete( activeHeap );
     165
     166    // unlink this from older-list starting from ambient_string_sharectx
     167    // usually, this==ambient_string_sharectx and the loop runs zero times
     168    string_sharectx *& c = ambient_string_sharectx;
     169    while ( c != &this ) &c = &c->older;              // find this
     170    c = this.older;                                   // unlink
     171}
     172
    151173//######################### String Resource #########################
    152174
    153175
    154 VbyteHeap HeapArea;
    155 
    156 VbyteHeap * DEBUG_string_heap = & HeapArea;
     176VbyteHeap * DEBUG_string_heap() {
     177    assert( ambient_string_sharectx->activeHeap && "No sharing context is active" );
     178    return ambient_string_sharectx->activeHeap;
     179}
    157180
    158181size_t DEBUG_string_bytes_avail_until_gc( VbyteHeap * heap ) {
     
    160183}
    161184
     185size_t DEBUG_string_bytes_in_heap( VbyteHeap * heap ) {
     186    return heap->CurrSize;
     187}
     188
    162189const char * DEBUG_string_heap_start( VbyteHeap * heap ) {
    163190    return heap->StartVbyte;
    164191}
    165 
    166192
    167193// Returns the size of the string in bytes
     
    187213    // Store auto-newline state so it can be restored
    188214    bool anl = getANL$(out);
    189     nlOff(out);
    190     for (size_t i = 0; i < s.Handle.lnth; i++) {
    191         // Need to re-apply on the last output operator, for whole-statement version
    192         if (anl && i == s.Handle.lnth-1) nlOn(out);
    193         out | s[i];
    194     }
    195     return out;
     215    if( s.Handle.lnth == 0 ) {
     216        sout | "";
     217    } else {
     218        nlOff(out);
     219        for (size_t i = 0; i < s.Handle.lnth; i++) {
     220            // Need to re-apply on the last output operator, for whole-statement version
     221            if (anl && i == s.Handle.lnth-1) nlOn(out);
     222            out | s[i];
     223        }
     224    }
    196225}
    197226
    198227// Empty constructor
    199228void ?{}(string_res &s) with(s) {
    200     (Handle){ HeapArea };
     229    if( ambient_string_sharectx->activeHeap ) {
     230        (Handle){ * ambient_string_sharectx->activeHeap };
     231        (shareEditSet_owns_ulink){ false };
     232        verify( Handle.s == 0p && Handle.lnth == 0 );
     233    } else {
     234        (Handle){ * new( (size_t) 10 ) };  // TODO: can I lazily avoid allocating for empty string
     235        (shareEditSet_owns_ulink){ true };
     236        Handle.s = Handle.ulink->StartVbyte;
     237        verify( Handle.lnth == 0 );
     238    }
    201239    s.shareEditSet_prev = &s;
    202240    s.shareEditSet_next = &s;
    203241}
    204242
     243static void eagerCopyCtorHelper(string_res &s, const char* rhs, size_t rhslnth) with(s) {
     244    if( ambient_string_sharectx->activeHeap ) {
     245        (Handle){ * ambient_string_sharectx->activeHeap };
     246        (shareEditSet_owns_ulink){ false };
     247    } else {
     248        (Handle){ * new( rhslnth ) };
     249        (shareEditSet_owns_ulink){ true };
     250    }
     251    Handle.s = VbyteAlloc(*Handle.ulink, rhslnth);
     252    Handle.lnth = rhslnth;
     253    memmove( Handle.s, rhs, rhslnth );
     254    s.shareEditSet_prev = &s;
     255    s.shareEditSet_next = &s;
     256}
     257
    205258// Constructor from a raw buffer and size
    206259void ?{}(string_res &s, const char* rhs, size_t rhslnth) with(s) {
    207     (Handle){ HeapArea };
    208     Handle.s = VbyteAlloc(HeapArea, rhslnth);
     260    eagerCopyCtorHelper(s, rhs, rhslnth);
     261}
     262
     263// private ctor (not in header): use specified heap (ignore ambient) and copy chars in
     264void ?{}( string_res &s, VbyteHeap & heap, const char* rhs, size_t rhslnth ) with(s) {
     265    (Handle){ heap };
     266    Handle.s = VbyteAlloc(*Handle.ulink, rhslnth);
    209267    Handle.lnth = rhslnth;
    210     for ( int i = 0; i < rhslnth; i += 1 ) {            // copy characters
    211         Handle.s[i] = rhs[i];
    212     } // for
     268    (s.shareEditSet_owns_ulink){ false };
     269    memmove( Handle.s, rhs, rhslnth );
    213270    s.shareEditSet_prev = &s;
    214271    s.shareEditSet_next = &s;
    215272}
    216273
    217 // String literal constructor
    218 void ?{}(string_res &s, const char* rhs) {
    219     (s){ rhs, strlen(rhs) };
    220 }
    221 
    222274// General copy constructor
    223275void ?{}(string_res &s, const string_res & s2, StrResInitMode mode, size_t start, size_t end ) {
    224276
    225     (s.Handle){ HeapArea };
    226     s.Handle.s = s2.Handle.s + start;
    227     s.Handle.lnth = end - start;
    228     MoveThisAfter(s.Handle, s2.Handle );                        // insert this handle after rhs handle
    229     // ^ bug?  skip others at early point in string
    230    
    231     if (mode == COPY_VALUE) {
    232         // make s alone in its shareEditSet
    233         s.shareEditSet_prev = &s;
    234         s.shareEditSet_next = &s;
     277    verify( start <= end && end <= s2.Handle.lnth );
     278
     279    if (s2.Handle.ulink != ambient_string_sharectx->activeHeap && mode == COPY_VALUE) {
     280        // crossing heaps (including private): copy eagerly
     281        eagerCopyCtorHelper(s, s2.Handle.s + start, end - start);
     282        verify(s.shareEditSet_prev == &s);
     283        verify(s.shareEditSet_next == &s);
    235284    } else {
    236         assert( mode == SHARE_EDITS );
    237 
    238         // s2 is logically const but not implementation const
    239         string_res & s2mod = (string_res &) s2;
    240 
    241         // insert s after s2 on shareEditSet
    242         s.shareEditSet_next = s2mod.shareEditSet_next;
    243         s.shareEditSet_prev = &s2mod;
    244         s.shareEditSet_next->shareEditSet_prev = &s;
    245         s.shareEditSet_prev->shareEditSet_next = &s;
    246     }
    247 }
    248 
    249 void assign(string_res &this, const char* buffer, size_t bsize) {
    250 
    251     // traverse the incumbent share-edit set (SES) to recover the range of a base string to which `this` belongs
    252     string_res * shareEditSetStartPeer = & this;
    253     string_res * shareEditSetEndPeer = & this;
    254     for (string_res * editPeer = this.shareEditSet_next; editPeer != &this; editPeer = editPeer->shareEditSet_next) {
    255         if ( editPeer->Handle.s < shareEditSetStartPeer->Handle.s ) {
    256             shareEditSetStartPeer = editPeer;
     285        (s.Handle){};
     286        s.Handle.s = s2.Handle.s + start;
     287        s.Handle.lnth = end - start;
     288        s.Handle.ulink = s2.Handle.ulink;
     289
     290        AddThisAfter(s.Handle, s2.Handle );                     // insert this handle after rhs handle
     291        // ^ bug?  skip others at early point in string
     292
     293        if (mode == COPY_VALUE) {
     294            verify(s2.Handle.ulink == ambient_string_sharectx->activeHeap);
     295            // requested logical copy in same heap: defer copy until write
     296
     297            (s.shareEditSet_owns_ulink){ false };
     298
     299            // make s alone in its shareEditSet
     300            s.shareEditSet_prev = &s;
     301            s.shareEditSet_next = &s;
     302        } else {
     303            verify( mode == SHARE_EDITS );
     304            // sharing edits with source forces same heap as source (ignore context)
     305
     306            (s.shareEditSet_owns_ulink){ s2.shareEditSet_owns_ulink };
     307
     308            // s2 is logically const but not implementation const
     309            string_res & s2mod = (string_res &) s2;
     310
     311            // insert s after s2 on shareEditSet
     312            s.shareEditSet_next = s2mod.shareEditSet_next;
     313            s.shareEditSet_prev = &s2mod;
     314            s.shareEditSet_next->shareEditSet_prev = &s;
     315            s.shareEditSet_prev->shareEditSet_next = &s;
    257316        }
    258         if ( shareEditSetEndPeer->Handle.s + shareEditSetEndPeer->Handle.lnth < editPeer->Handle.s + editPeer->Handle.lnth) {
    259             shareEditSetEndPeer = editPeer;
    260         }
    261     }
    262 
    263     // full string is from start of shareEditSetStartPeer thru end of shareEditSetEndPeer
    264     // `this` occurs in the middle of it, to be replaced
    265     // build up the new text in `pasting`
    266 
    267     string_res pasting = {
    268         shareEditSetStartPeer->Handle.s,                   // start of SES
    269         this.Handle.s - shareEditSetStartPeer->Handle.s }; // length of SES, before this
    270     append( pasting,
    271         buffer,                                            // start of replacement for this
    272         bsize );                                           // length of replacement for this
    273     append( pasting,
    274         this.Handle.s + this.Handle.lnth,                  // start of SES after this
    275         shareEditSetEndPeer->Handle.s + shareEditSetEndPeer->Handle.lnth -
    276         (this.Handle.s + this.Handle.lnth) );              // length of SES, after this
    277 
    278     // The above string building can trigger compaction.
    279     // The reference points (that are arguments of the string building) may move during that building.
    280     // From this point on, they are stable.
    281     // So now, capture their values for use in the overlap cases, below.
    282     // Do not factor these definitions with the arguments used above.
     317    }
     318}
     319
     320static void assignEditSet(string_res & this, string_res * shareEditSetStartPeer, string_res * shareEditSetEndPeer,
     321    char * resultSesStart,
     322    size_t resultSesLnth,
     323    HandleNode * resultPadPosition, size_t bsize ) {
    283324
    284325    char * beforeBegin = shareEditSetStartPeer->Handle.s;
     
    290331    size_t oldLnth = this.Handle.lnth;
    291332
    292     this.Handle.s = pasting.Handle.s + beforeLen;
     333    this.Handle.s = resultSesStart + beforeLen;
    293334    this.Handle.lnth = bsize;
    294     MoveThisAfter( this.Handle, pasting.Handle );
     335    if (resultPadPosition)
     336        MoveThisAfter( this.Handle, *resultPadPosition );
    295337
    296338    // adjust all substring string and handle locations, and check if any substring strings are outside the new base string
    297     char *limit = pasting.Handle.s + pasting.Handle.lnth;
     339    char *limit = resultSesStart + resultSesLnth;
    298340    for (string_res * p = this.shareEditSet_next; p != &this; p = p->shareEditSet_next) {
    299         assert (p->Handle.s >= beforeBegin);
     341        verify (p->Handle.s >= beforeBegin);
    300342        if ( p->Handle.s >= afterBegin ) {
    301             assert ( p->Handle.s <= afterBegin + afterLen );
    302             assert ( p->Handle.s + p->Handle.lnth <= afterBegin + afterLen );
     343            verify ( p->Handle.s <= afterBegin + afterLen );
     344            verify ( p->Handle.s + p->Handle.lnth <= afterBegin + afterLen );
    303345            // p starts after the edit
    304346            // take start and end as end-anchored
     
    318360            } else {
    319361                // p ends after the edit
    320                 assert ( p->Handle.s + p->Handle.lnth <= afterBegin + afterLen );
     362                verify ( p->Handle.s + p->Handle.lnth <= afterBegin + afterLen );
    321363                // take end as end-anchored
    322364                // stretch-shrink p according to the edit
     
    326368            // take start as start-anchored
    327369            size_t startOffsetFromStart = p->Handle.s - beforeBegin;
    328             p->Handle.s = pasting.Handle.s + startOffsetFromStart;
     370            p->Handle.s = resultSesStart + startOffsetFromStart;
    329371        } else {
    330             assert ( p->Handle.s < afterBegin );
     372            verify ( p->Handle.s < afterBegin );
    331373            // p starts during the edit
    332             assert( p->Handle.s + p->Handle.lnth >= beforeBegin + beforeLen );
     374            verify( p->Handle.s + p->Handle.lnth >= beforeBegin + beforeLen );
    333375            if ( p->Handle.s + p->Handle.lnth < afterBegin ) {
    334376                // p ends during the edit; p does not include the last character replaced
     
    344386            }
    345387        }
    346         MoveThisAfter( p->Handle, pasting.Handle );     // move substring handle to maintain sorted order by string position
    347     }
    348 }
    349 
    350 void ?=?(string_res &s, const char* other) {
    351     assign(s, other, strlen(other));
    352 }
    353 
    354 void ?=?(string_res &s, char other) {
    355     assign(s, &other, 1);
     388        if (resultPadPosition)
     389            MoveThisAfter( p->Handle, *resultPadPosition );     // move substring handle to maintain sorted order by string position
     390    }
     391}
     392
     393static string_res & assign_(string_res &this, const char* buffer, size_t bsize, const string_res & valSrc) {
     394
     395    // traverse the incumbent share-edit set (SES) to recover the range of a base string to which `this` belongs
     396    string_res * shareEditSetStartPeer = & this;
     397    string_res * shareEditSetEndPeer = & this;
     398    for (string_res * editPeer = this.shareEditSet_next; editPeer != &this; editPeer = editPeer->shareEditSet_next) {
     399        if ( editPeer->Handle.s < shareEditSetStartPeer->Handle.s ) {
     400            shareEditSetStartPeer = editPeer;
     401        }
     402        if ( shareEditSetEndPeer->Handle.s + shareEditSetEndPeer->Handle.lnth < editPeer->Handle.s + editPeer->Handle.lnth) {
     403            shareEditSetEndPeer = editPeer;
     404        }
     405    }
     406
     407    verify( shareEditSetEndPeer->Handle.s >= shareEditSetStartPeer->Handle.s );
     408    size_t origEditSetLength = shareEditSetEndPeer->Handle.s + shareEditSetEndPeer->Handle.lnth - shareEditSetStartPeer->Handle.s;
     409    verify( origEditSetLength >= this.Handle.lnth );
     410
     411    if ( this.shareEditSet_owns_ulink ) {                 // assigning to private context
     412        // ok to overwrite old value within LHS
     413        char * prefixStartOrig = shareEditSetStartPeer->Handle.s;
     414        int prefixLen = this.Handle.s - prefixStartOrig;
     415        char * suffixStartOrig = this.Handle.s + this.Handle.lnth;
     416        int suffixLen = shareEditSetEndPeer->Handle.s + shareEditSetEndPeer->Handle.lnth - suffixStartOrig;
     417
     418        int delta = bsize - this.Handle.lnth;
     419        if ( char * oldBytes = VbyteTryAdjustLast( *this.Handle.ulink, delta ) ) {
     420            // growing: copy from old to new
     421            char * dest = VbyteAlloc( *this.Handle.ulink, origEditSetLength + delta );
     422            char *destCursor = dest;  memcpy(destCursor, prefixStartOrig, prefixLen);
     423            destCursor += prefixLen;  memcpy(destCursor, buffer         , bsize    );
     424            destCursor += bsize;      memcpy(destCursor, suffixStartOrig, suffixLen);
     425            assignEditSet(this, shareEditSetStartPeer, shareEditSetEndPeer,
     426                dest,
     427                origEditSetLength + delta,
     428                0p, bsize);
     429            free( oldBytes );
     430        } else {
     431            // room is already allocated in-place: bubble suffix and overwite middle
     432            memmove( suffixStartOrig + delta, suffixStartOrig, suffixLen );
     433            memcpy( this.Handle.s, buffer, bsize );
     434
     435            assignEditSet(this, shareEditSetStartPeer, shareEditSetEndPeer,
     436                shareEditSetStartPeer->Handle.s,
     437                origEditSetLength + delta,
     438                0p, bsize);
     439        }
     440
     441    } else if (                                           // assigning to shared context
     442        this.Handle.lnth == origEditSetLength &&          // overwriting entire run of SES
     443        & valSrc &&                                       // sourcing from a managed string
     444        valSrc.Handle.ulink == this.Handle.ulink  ) {     // sourcing from same heap
     445
     446        // SES's result will only use characters from the source string => reuse source
     447        assignEditSet(this, shareEditSetStartPeer, shareEditSetEndPeer,
     448            valSrc.Handle.s,
     449            valSrc.Handle.lnth,
     450            &((string_res&)valSrc).Handle, bsize);
     451       
     452    } else {
     453        // overwriting a proper substring of some string: mash characters from old and new together (copy on write)
     454        // OR we are importing characters: need to copy eagerly (can't refer to source)
     455
     456        // full string is from start of shareEditSetStartPeer thru end of shareEditSetEndPeer
     457        // `this` occurs in the middle of it, to be replaced
     458        // build up the new text in `pasting`
     459
     460        string_res pasting = {
     461            * this.Handle.ulink,                               // maintain same heap, regardless of context
     462            shareEditSetStartPeer->Handle.s,                   // start of SES
     463            this.Handle.s - shareEditSetStartPeer->Handle.s }; // length of SES, before this
     464        append( pasting,
     465            buffer,                                            // start of replacement for this
     466            bsize );                                           // length of replacement for this
     467        append( pasting,
     468            this.Handle.s + this.Handle.lnth,                  // start of SES after this
     469            shareEditSetEndPeer->Handle.s + shareEditSetEndPeer->Handle.lnth -
     470            (this.Handle.s + this.Handle.lnth) );              // length of SES, after this
     471
     472        // The above string building can trigger compaction.
     473        // The reference points (that are arguments of the string building) may move during that building.
     474        // From this point on, they are stable.
     475
     476        assignEditSet(this, shareEditSetStartPeer, shareEditSetEndPeer,
     477            pasting.Handle.s,
     478            pasting.Handle.lnth,
     479            &pasting.Handle, bsize);
     480    }
     481
     482    return this;
     483}
     484
     485string_res & assign(string_res &this, const char* buffer, size_t bsize) {
     486    return assign_(this, buffer, bsize, *0p);
     487}
     488
     489string_res & ?=?(string_res &s, char other) {
     490    return assign(s, &other, 1);
    356491}
    357492
    358493// Copy assignment operator
    359 void ?=?(string_res & this, const string_res & rhs) with( this ) {
    360     assign(this, rhs.Handle.s, rhs.Handle.lnth);
    361 }
    362 
    363 void ?=?(string_res & this, string_res & rhs) with( this ) {
     494string_res & ?=?(string_res & this, const string_res & rhs) with( this ) {
     495    return assign_(this, rhs.Handle.s, rhs.Handle.lnth, rhs);
     496}
     497
     498string_res & ?=?(string_res & this, string_res & rhs) with( this ) {
    364499    const string_res & rhs2 = rhs;
    365     this = rhs2;
     500    return this = rhs2;
    366501}
    367502
     
    374509    s.shareEditSet_prev->shareEditSet_next = s.shareEditSet_next;
    375510    s.shareEditSet_next->shareEditSet_prev = s.shareEditSet_prev;
    376     s.shareEditSet_next = &s;
    377     s.shareEditSet_prev = &s;
     511    // s.shareEditSet_next = &s;
     512    // s.shareEditSet_prev = &s;
     513
     514    if (shareEditSet_owns_ulink && s.shareEditSet_next == &s) { // last one out
     515        delete( s.Handle.ulink );
     516    }
    378517}
    379518
     
    387526}
    388527
     528void assignAt(const string_res &s, size_t index, char val) {
     529    string_res editZone = { s, SHARE_EDITS, index, index+1 };
     530    assign(editZone, &val, 1);
     531}
     532
    389533
    390534///////////////////////////////////////////////////////////////////
     
    392536
    393537void append(string_res &str1, const char * buffer, size_t bsize) {
    394     size_t clnth = size(str1) + bsize;
    395     if ( str1.Handle.s + size(str1) == buffer ) { // already juxtapose ?
     538    size_t clnth = str1.Handle.lnth + bsize;
     539    if ( str1.Handle.s + str1.Handle.lnth == buffer ) { // already juxtapose ?
    396540        // no-op
    397541    } else {                                            // must copy some text
    398         if ( str1.Handle.s + size(str1) == VbyteAlloc(HeapArea, 0) ) { // str1 at end of string area ?
    399             VbyteAlloc(HeapArea, bsize); // create room for 2nd part at the end of string area
     542        if ( str1.Handle.s + str1.Handle.lnth == VbyteAlloc(*str1.Handle.ulink, 0) ) { // str1 at end of string area ?
     543            VbyteAlloc( *str1.Handle.ulink, bsize ); // create room for 2nd part at the end of string area
    400544        } else {                                        // copy the two parts
    401             char * str1oldBuf = str1.Handle.s;
    402             str1.Handle.s = VbyteAlloc( HeapArea, clnth );
    403             ByteCopy( HeapArea, str1.Handle.s, 0, str1.Handle.lnth, str1oldBuf, 0, str1.Handle.lnth);
     545            char * str1newBuf = VbyteAlloc( *str1.Handle.ulink, clnth );
     546            char * str1oldBuf = str1.Handle.s;  // must read after VbyteAlloc call in case it gs's
     547            str1.Handle.s = str1newBuf;
     548            memcpy( str1.Handle.s, str1oldBuf,  str1.Handle.lnth );
    404549        } // if
    405         ByteCopy( HeapArea, str1.Handle.s, str1.Handle.lnth, bsize, (char*)buffer, 0, (int)bsize);
    406         //       VbyteHeap & this, char *Dst, int DstStart, int DstLnth, char *Src, int SrcStart, int SrcLnth
     550        memcpy( str1.Handle.s + str1.Handle.lnth, buffer, bsize );
    407551    } // if
    408552    str1.Handle.lnth = clnth;
     
    417561}
    418562
    419 void ?+=?(string_res &s, const char* other) {
    420     append( s, other, strlen(other) );
    421 }
    422563
    423564
     
    429570
    430571bool ?==?(const string_res &s1, const string_res &s2) {
    431     return ByteCmp( HeapArea, s1.Handle.s, 0, s1.Handle.lnth, s2.Handle.s, 0, s2.Handle.lnth) == 0;
     572    return ByteCmp( s1.Handle.s, 0, s1.Handle.lnth, s2.Handle.s, 0, s2.Handle.lnth) == 0;
    432573}
    433574
     
    455596
    456597int find(const string_res &s, char search) {
    457     for (i; size(s)) {
    458         if (s[i] == search) return i;
    459     }
    460     return size(s);
    461 }
     598    return findFrom(s, 0, search);
     599}
     600
     601int findFrom(const string_res &s, size_t fromPos, char search) {
     602    // FIXME: This paricular overload (find of single char) is optimized to use memchr.
     603    // The general overload (find of string, memchr applying to its first character) and `contains` should be adjusted to match.
     604    char * searchFrom = s.Handle.s + fromPos;
     605    size_t searchLnth = s.Handle.lnth - fromPos;
     606    int searchVal = search;
     607    char * foundAt = (char *) memchr(searchFrom, searchVal, searchLnth);
     608    if (foundAt == 0p) return s.Handle.lnth;
     609    else return foundAt - s.Handle.s;
     610}
     611
     612int find(const string_res &s, const string_res &search) {
     613    return findFrom(s, 0, search);
     614}
     615
     616int findFrom(const string_res &s, size_t fromPos, const string_res &search) {
     617    return findFrom(s, fromPos, search.Handle.s, search.Handle.lnth);
     618}
     619
     620int find(const string_res &s, const char* search) {
     621    return findFrom(s, 0, search);
     622}
     623int findFrom(const string_res &s, size_t fromPos, const char* search) {
     624    return findFrom(s, fromPos, search, strlen(search));
     625}
     626
     627int find(const string_res &s, const char* search, size_t searchsize) {
     628    return findFrom(s, 0, search, searchsize);
     629}
     630
     631int findFrom(const string_res &s, size_t fromPos, const char* search, size_t searchsize) {
    462632
    463633    /* Remaining implementations essentially ported from Sunjay's work */
    464634
    465 int find(const string_res &s, const string_res &search) {
    466     return find(s, search.Handle.s, search.Handle.lnth);
    467 }
    468 
    469 int find(const string_res &s, const char* search) {
    470     return find(s, search, strlen(search));
    471 }
    472 
    473 int find(const string_res &s, const char* search, size_t searchsize) {
     635
    474636    // FIXME: This is a naive algorithm. We probably want to switch to someting
    475637    // like Boyer-Moore in the future.
     
    481643    }
    482644
    483     for (size_t i = 0; i < s.Handle.lnth; i++) {
     645    for (size_t i = fromPos; i < s.Handle.lnth; i++) {
    484646        size_t remaining = s.Handle.lnth - i;
    485647        // Never going to find the search string if the remaining string is
     
    596758// Add a new HandleNode node n after the current HandleNode node.
    597759
    598 static inline void AddThisAfter( HandleNode & this, HandleNode & n ) with(this) {
     760static void AddThisAfter( HandleNode & this, HandleNode & n ) with(this) {
    599761#ifdef VbyteDebug
    600762    serr | "enter:AddThisAfter, this:" | &this | " n:" | &n;
    601763#endif // VbyteDebug
     764    // Performance note: we are on the critical path here. MB has ensured that the verifies don't contribute to runtime (are compiled away, like they're supposed to be).
     765    verify( n.ulink != 0p );
     766    verify( this.ulink == n.ulink );
    602767    flink = n.flink;
    603768    blink = &n;
     
    624789// Delete the current HandleNode node.
    625790
    626 static inline void DeleteNode( HandleNode & this ) with(this) {
     791static void DeleteNode( HandleNode & this ) with(this) {
    627792#ifdef VbyteDebug
    628793    serr | "enter:DeleteNode, this:" | &this;
     
    638803
    639804// Allocates specified storage for a string from byte-string area. If not enough space remains to perform the
    640 // allocation, the garbage collection routine is called and a second attempt is made to allocate the space. If the
    641 // second attempt fails, a further attempt is made to create a new, larger byte-string area.
    642 
    643 static inline char * VbyteAlloc( VbyteHeap & this, int size ) with(this) {
     805// allocation, the garbage collection routine is called.
     806
     807static char * VbyteAlloc( VbyteHeap & this, int size ) with(this) {
    644808#ifdef VbyteDebug
    645809    serr | "enter:VbyteAlloc, size:" | size;
     
    650814    NoBytes = ( uintptr_t )EndVbyte + size;
    651815    if ( NoBytes > ( uintptr_t )ExtVbyte ) {            // enough room for new byte-string ?
    652                 garbage( this );                                        // firer up the garbage collector
    653                 NoBytes = ( uintptr_t )EndVbyte + size;         // try again
    654                 if ( NoBytes > ( uintptr_t )ExtVbyte ) {        // enough room for new byte-string ?
    655 assert( 0 && "need to implement actual growth" );
    656                         // extend( size );                              // extend the byte-string area
    657                 } // if
     816                garbage( this, size );                                  // firer up the garbage collector
     817                verify( (( uintptr_t )EndVbyte + size) <= ( uintptr_t )ExtVbyte  && "garbage run did not free up required space" );
    658818    } // if
    659819    r = EndVbyte;
     
    666826
    667827
     828// Adjusts the last allocation in this heap by delta bytes, or resets this heap to be able to offer
     829// new allocations of its original size + delta bytes. Positive delta means bigger;
     830// negative means smaller.  A null return indicates that the original heap location has room for
     831// the requested growth.  A non-null return indicates that copying to a new location is required
     832// but has not been done; the returned value is the old heap storage location; `this` heap is
     833// modified to reference the new location.  In the copy-requred case, the caller should use
     834// VbyteAlloc to claim the new space, while doing optimal copying from old to new, then free old.
     835
     836static char * VbyteTryAdjustLast( VbyteHeap & this, int delta ) with(this) {
     837
     838    if ( ( uintptr_t )EndVbyte + delta <= ( uintptr_t )ExtVbyte ) {
     839        // room available
     840        EndVbyte += delta;
     841        return 0p;
     842    }
     843
     844    char *oldBytes = StartVbyte;
     845
     846    NoOfExtensions += 1;
     847    CurrSize *= 2;
     848    StartVbyte = EndVbyte = TEMP_ALLOC(char, CurrSize);
     849    ExtVbyte = StartVbyte + CurrSize;
     850
     851    return oldBytes;
     852}
     853
     854
    668855// Move an existing HandleNode node h somewhere after the current HandleNode node so that it is in ascending order by
    669856// the address in the byte string area.
    670857
    671 static inline void MoveThisAfter( HandleNode & this, const HandleNode  & h ) with(this) {
     858static void MoveThisAfter( HandleNode & this, const HandleNode  & h ) with(this) {
    672859#ifdef VbyteDebug
    673860    serr | "enter:MoveThisAfter, this:" | & this | " h:" | & h;
    674861#endif // VbyteDebug
     862    verify( h.ulink != 0p );
     863    verify( this.ulink == h.ulink );
    675864    if ( s < h.s ) {                                    // check argument values
    676865                // serr | "VbyteSM: Error - Cannot move byte string starting at:" | s | " after byte string starting at:"
    677866                //      | ( h->s ) | " and keep handles in ascending order";
    678867                // exit(-1 );
    679                 assert( 0 && "VbyteSM: Error - Cannot move byte strings as requested and keep handles in ascending order");
     868                verify( 0 && "VbyteSM: Error - Cannot move byte strings as requested and keep handles in ascending order");
    680869    } // if
    681870
     
    709898//######################### VbyteHeap #########################
    710899
    711 // Move characters from one location in the byte-string area to another. The routine handles the following situations:
    712 //
    713 // if the |Src| > |Dst| => truncate
    714 // if the |Dst| > |Src| => pad Dst with blanks
    715 
    716 void ByteCopy( VbyteHeap & this, char *Dst, int DstStart, int DstLnth, char *Src, int SrcStart, int SrcLnth ) {
    717     for ( int i = 0; i < DstLnth; i += 1 ) {
    718       if ( i == SrcLnth ) {                             // |Dst| > |Src|
    719             for ( ; i < DstLnth; i += 1 ) {             // pad Dst with blanks
    720                 Dst[DstStart + i] = ' ';
    721             } // for
    722             break;
    723         } // exit
    724         Dst[DstStart + i] = Src[SrcStart + i];
    725     } // for
    726 } // ByteCopy
    727 
    728900// Compare two byte strings in the byte-string area. The routine returns the following values:
    729901//
     
    732904// -1 => Src1-byte-string < Src2-byte-string
    733905
    734 int ByteCmp( VbyteHeap & this, char *Src1, int Src1Start, int Src1Lnth, char *Src2, int Src2Start, int Src2Lnth )  with(this) {
     906int ByteCmp( char *Src1, int Src1Start, int Src1Lnth, char *Src2, int Src2Start, int Src2Lnth ) {
    735907#ifdef VbyteDebug
    736908    serr | "enter:ByteCmp, Src1Start:" | Src1Start | " Src1Lnth:" | Src1Lnth | " Src2Start:" | Src2Start | " Src2Lnth:" | Src2Lnth;
     
    789961    h = Header.flink;                                   // ignore header node
    790962    for (;;) {
    791                 ByteCopy( this, EndVbyte, 0, h->lnth, h->s, 0, h->lnth );
     963                memmove( EndVbyte, h->s, h->lnth );
    792964                obase = h->s;
    793965                h->s = EndVbyte;
     
    810982
    811983
     984static double heap_expansion_freespace_threshold = 0.1;  // default inherited from prior work: expand heap when less than 10% "free" (i.e. garbage)
     985                                                         // probably an unreasonable default, but need to assess early-round tests on changing it
     986
     987void TUNING_set_string_heap_liveness_threshold( double val ) {
     988    heap_expansion_freespace_threshold = 1.0 - val;
     989}
     990
     991
    812992// Garbage determines the amount of free space left in the heap and then reduces, leave the same, or extends the size of
    813993// the heap.  The heap is then compacted in the existing heap or into the newly allocated heap.
    814994
    815 void garbage(VbyteHeap & this ) with(this) {
     995void garbage(VbyteHeap & this, int minreq ) with(this) {
    816996#ifdef VbyteDebug
    817997    serr | "enter:garbage";
     
    8371017    AmountFree = ( uintptr_t )ExtVbyte - ( uintptr_t )StartVbyte - AmountUsed;
    8381018   
    839     if ( AmountFree < ( int )( CurrSize * 0.1 )) {      // free space less than 10% ?
    840 
    841 assert( 0 && "need to implement actual growth" );
    842 //              extend( CurrSize );                             // extend the heap
     1019    if ( ( double ) AmountFree < ( CurrSize * heap_expansion_freespace_threshold ) || AmountFree < minreq ) {   // free space less than threshold or not enough to serve cur request
     1020
     1021                extend( this, max( CurrSize, minreq ) );                                // extend the heap
    8431022
    8441023                        //  Peter says, "This needs work before it should be used."
     
    8461025                        //              reduce(( AmountFree / CurrSize - 3 ) * CurrSize ); // reduce the memory
    8471026
    848     } // if
    849     compaction(this);                                   // compact the byte area, in the same or new heap area
     1027        // `extend` implies a `compaction` during the copy
     1028
     1029    } else {
     1030        compaction(this);                                       // in-place
     1031    }// if
    8501032#ifdef VbyteDebug
    8511033    {
     
    8671049#undef VbyteDebug
    8681050
    869 //WIP
    870 #if 0
    8711051
    8721052
     
    8741054// area is deleted.
    8751055
    876 void VbyteHeap::extend( int size ) {
     1056void extend( VbyteHeap & this, int size ) with (this) {
    8771057#ifdef VbyteDebug
    8781058    serr | "enter:extend, size:" | size;
     
    8841064   
    8851065    CurrSize += size > InitSize ? size : InitSize;      // minimum extension, initial size
    886     StartVbyte = EndVbyte = new char[CurrSize];
     1066    StartVbyte = EndVbyte = TEMP_ALLOC(char, CurrSize);
    8871067    ExtVbyte = (void *)( StartVbyte + CurrSize );
    888     compaction();                                       // copy from old heap to new & adjust pointers to new heap
    889     delete OldStartVbyte;                               // release old heap
     1068    compaction(this);                                   // copy from old heap to new & adjust pointers to new heap
     1069    free( OldStartVbyte );                              // release old heap
    8901070#ifdef VbyteDebug
    8911071    serr | "exit:extend, CurrSize:" | CurrSize;
     
    8931073} // extend
    8941074
     1075//WIP
     1076#if 0
    8951077
    8961078// Extend the size of the byte-string area by creating a new area and copying the old area into it. The old byte-string
  • libcfa/src/containers/string_res.hfa

    ref3c383 rd672350  
    1717
    1818#include <fstream.hfa>
     19#include <string.h>    // e.g. strlen
    1920
    2021   
     
    2728    HandleNode *flink;                                  // forward link
    2829    HandleNode *blink;                                  // backward link
     30    VbyteHeap *ulink;                   // upward link
    2931
    3032    char *s;                                            // pointer to byte string
     
    3234}; // HandleNode
    3335
    34 void ?{}( HandleNode & );                       // constructor for header node
    35 
    36 void ?{}( HandleNode &, VbyteHeap & );          // constructor for nodes in the handle list
    37 void ^?{}( HandleNode & );                      // destructor for handle nodes
    38 
    39 extern VbyteHeap * DEBUG_string_heap;
     36VbyteHeap * DEBUG_string_heap();
     37size_t DEBUG_string_bytes_in_heap( VbyteHeap * heap );
    4038size_t DEBUG_string_bytes_avail_until_gc( VbyteHeap * heap );
    4139const char * DEBUG_string_heap_start( VbyteHeap * heap );
    4240
     41void TUNING_set_string_heap_liveness_threshold( double val );
    4342
    4443//######################### String #########################
     
    4746struct string_res {
    4847    HandleNode Handle; // chars, start, end, global neighbours
     48    bool shareEditSet_owns_ulink;
    4949    string_res * shareEditSet_prev;
    5050    string_res * shareEditSet_next;
     
    7474// Constructors, Assignment Operators, Destructor
    7575void ?{}(string_res &s); // empty string
    76 void ?{}(string_res &s, const char* initial); // copy from string literal (NULL-terminated)
    7776void ?{}(string_res &s, const char* buffer, size_t bsize); // copy specific length from buffer
     77static inline void ?{}(string_res &s, const char* rhs) { // copy from string literal (NULL-terminated)
     78    (s){ rhs, strlen(rhs) };
     79}
    7880
    7981void ?{}(string_res &s, const string_res & s2) = void;
     
    8688}
    8789
    88 void assign(string_res &s, const char* buffer, size_t bsize); // copy specific length from buffer
    89 void ?=?(string_res &s, const char* other); // copy from string literal (NULL-terminated)
    90 void ?=?(string_res &s, const string_res &other);
    91 void ?=?(string_res &s, string_res &other);
    92 void ?=?(string_res &s, char other);
     90string_res & assign(string_res &s, const char* buffer, size_t bsize); // copy specific length from buffer
     91static inline string_res & ?=?(string_res &s, const char* other) {  // copy from string literal (NULL-terminated)
     92    return assign(s, other, strlen(other));
     93}
     94string_res & ?=?(string_res &s, const string_res &other);
     95string_res & ?=?(string_res &s, string_res &other);
     96string_res & ?=?(string_res &s, char other);
    9397
    9498void ^?{}(string_res &s);
     
    99103
    100104// Concatenation
     105void append(string_res &s, const char* buffer, size_t bsize);
    101106void ?+=?(string_res &s, char other); // append a character
    102107void ?+=?(string_res &s, const string_res &s2); // append-concatenate to first string
    103 void ?+=?(string_res &s, const char* other);
    104 void append(string_res &s, const char* buffer, size_t bsize);
     108static inline void ?+=?(string_res &s, const char* other) {
     109    append( s, other, strlen(other) );
     110}
    105111
    106112// Character access
     113void assignAt(const string_res &s, size_t index, char val);
    107114char ?[?](const string_res &s, size_t index); // Mike changed to ret by val from Sunjay's ref, to match Peter's
    108115//char codePointAt(const string_res &s, size_t index); // revisit under Unicode
     
    121128int find(const string_res &s, const char* search);
    122129int find(const string_res &s, const char* search, size_t searchsize);
     130
     131int findFrom(const string_res &s, size_t fromPos, char search);
     132int findFrom(const string_res &s, size_t fromPos, const string_res &search);
     133int findFrom(const string_res &s, size_t fromPos, const char* search);
     134int findFrom(const string_res &s, size_t fromPos, const char* search, size_t searchsize);
    123135
    124136bool includes(const string_res &s, const string_res &search);
  • libcfa/src/math.trait.hfa

    ref3c383 rd672350  
    1616#pragma once
    1717
    18 trait Not( T ) {
    19         void ?{}( T &, zero_t );
    20         int !?( T );
     18trait Not( U ) {
     19        void ?{}( U &, zero_t );
     20        int !?( U );
    2121}; // Not
    2222
     
    2626}; // Equality
    2727
    28 trait Relational( T | Equality( T ) ) {
    29         int ?<?( T, T );
    30         int ?<=?( T, T );
    31         int ?>?( T, T );
    32         int ?>=?( T, T );
     28trait Relational( U | Equality( U ) ) {
     29        int ?<?( U, U );
     30        int ?<=?( U, U );
     31        int ?>?( U, U );
     32        int ?>=?( U, U );
    3333}; // Relational
    3434
     
    3939}; // Signed
    4040
    41 trait Additive( T | Signed( T ) ) {
    42         T ?+?( T, T );
    43         T ?-?( T, T );
    44         T ?+=?( T &, T );
    45         T ?-=?( T &, T );
     41trait Additive( U | Signed( U ) ) {
     42        U ?+?( U, U );
     43        U ?-?( U, U );
     44        U ?+=?( U &, U );
     45        U ?-=?( U &, U );
    4646}; // Additive
    4747
     
    4949        void ?{}( T &, one_t );
    5050        // T ?++( T & );
    51         // T ++?( T &);
     51        // T ++?( T & );
    5252        // T ?--( T & );
    5353        // T --?( T & );
    5454}; // Incdec
    5555
    56 trait Multiplicative( T | Incdec( T ) ) {
    57         T ?*?( T, T );
    58         T ?/?( T, T );
    59         T ?%?( T, T );
    60         T ?/=?( T &, T );
     56trait Multiplicative( U | Incdec( U ) ) {
     57        U ?*?( U, U );
     58        U ?/?( U, U );
     59        U ?%?( U, U );
     60        U ?/=?( U &, U );
    6161}; // Multiplicative
    6262
Note: See TracChangeset for help on using the changeset viewer.