Ignore:
File:
1 edited

Legend:

Unmodified
Added
Removed
  • libcfa/src/concurrency/ready_queue.cfa

    r1f45c7d re84ab3d  
    6767#endif
    6868
    69 static inline struct $thread * try_pop(struct cluster * cltr, unsigned w __STATS(, __stats_readyQ_pop_t & stats));
    70 static inline struct $thread * try_pop(struct cluster * cltr, unsigned i, unsigned j __STATS(, __stats_readyQ_pop_t & stats));
    71 static inline struct $thread * search(struct cluster * cltr);
     69static inline struct thread$ * try_pop(struct cluster * cltr, unsigned w __STATS(, __stats_readyQ_pop_t & stats));
     70static inline struct thread$ * try_pop(struct cluster * cltr, unsigned i, unsigned j __STATS(, __stats_readyQ_pop_t & stats));
     71static inline struct thread$ * search(struct cluster * cltr);
    7272static inline [unsigned, bool] idx_from_r(unsigned r, unsigned preferred);
    7373
     
    274274//-----------------------------------------------------------------------
    275275#if defined(USE_CPU_WORK_STEALING)
    276         __attribute__((hot)) void push(struct cluster * cltr, struct $thread * thrd, bool push_local) with (cltr->ready_queue) {
     276        __attribute__((hot)) void push(struct cluster * cltr, struct thread$ * thrd, bool push_local) with (cltr->ready_queue) {
    277277                __cfadbg_print_safe(ready_queue, "Kernel : Pushing %p on cluster %p\n", thrd, cltr);
    278278
     
    316316
    317317        // Pop from the ready queue from a given cluster
    318         __attribute__((hot)) $thread * pop_fast(struct cluster * cltr) with (cltr->ready_queue) {
     318        __attribute__((hot)) thread$ * pop_fast(struct cluster * cltr) with (cltr->ready_queue) {
    319319                /* paranoid */ verify( lanes.count > 0 );
    320320                /* paranoid */ verify( kernelTLS().this_processor );
     
    345345                        /* paranoid */ verify(lanes.count < 65536); // The following code assumes max 65536 cores.
    346346                        /* paranoid */ verify(map.count < 65536); // The following code assumes max 65536 cores.
    347 
    348                         if(0 == (__tls_rand() % 10_000)) {
    349                                 proc->rdq.target = __tls_rand() % lanes.count;
     347                        uint64_t chaos = __tls_rand();
     348                        uint64_t high_chaos = (chaos >> 32);
     349                        uint64_t  mid_chaos = (chaos >> 16) & 0xffff;
     350                        uint64_t  low_chaos = chaos & 0xffff;
     351
     352                        unsigned me = map.self;
     353                        unsigned cpu_chaos = map.start + (mid_chaos % map.count);
     354                        bool global = cpu_chaos == me;
     355
     356                        if(global) {
     357                                proc->rdq.target = high_chaos % lanes.count;
    350358                        } else {
    351                                 unsigned cpu_chaos = map.start + (__tls_rand() % map.count);
    352                                 proc->rdq.target = (cpu_chaos * READYQ_SHARD_FACTOR) + (__tls_rand() % READYQ_SHARD_FACTOR);
     359                                proc->rdq.target = (cpu_chaos * READYQ_SHARD_FACTOR) + (low_chaos % READYQ_SHARD_FACTOR);
    353360                                /* paranoid */ verify(proc->rdq.target >= (map.start * READYQ_SHARD_FACTOR));
    354361                                /* paranoid */ verify(proc->rdq.target <  ((map.start + map.count) * READYQ_SHARD_FACTOR));
     
    364371                                proc->rdq.target = -1u;
    365372                                if(lanes.tscs[target].tv < cutoff && ts(lanes.data[target]) < cutoff) {
    366                                         $thread * t = try_pop(cltr, target __STATS(, __tls_stats()->ready.pop.help));
     373                                        thread$ * t = try_pop(cltr, target __STATS(, __tls_stats()->ready.pop.help));
    367374                                        proc->rdq.last = target;
    368375                                        if(t) return t;
     
    372379                        unsigned last = proc->rdq.last;
    373380                        if(last != -1u && lanes.tscs[last].tv < cutoff && ts(lanes.data[last]) < cutoff) {
    374                                 $thread * t = try_pop(cltr, last __STATS(, __tls_stats()->ready.pop.help));
     381                                thread$ * t = try_pop(cltr, last __STATS(, __tls_stats()->ready.pop.help));
    375382                                if(t) return t;
    376383                        }
     
    382389                for(READYQ_SHARD_FACTOR) {
    383390                        unsigned i = start + (proc->rdq.itr++ % READYQ_SHARD_FACTOR);
    384                         if($thread * t = try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.local))) return t;
     391                        if(thread$ * t = try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.local))) return t;
    385392                }
    386393
     
    389396        }
    390397
    391         __attribute__((hot)) struct $thread * pop_slow(struct cluster * cltr) with (cltr->ready_queue) {
     398        __attribute__((hot)) struct thread$ * pop_slow(struct cluster * cltr) with (cltr->ready_queue) {
    392399                processor * const proc = kernelTLS().this_processor;
    393400                unsigned last = proc->rdq.last;
    394401                if(last != -1u) {
    395                         struct $thread * t = try_pop(cltr, last __STATS(, __tls_stats()->ready.pop.steal));
     402                        struct thread$ * t = try_pop(cltr, last __STATS(, __tls_stats()->ready.pop.steal));
    396403                        if(t) return t;
    397404                        proc->rdq.last = -1u;
     
    401408                return try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.steal));
    402409        }
    403         __attribute__((hot)) struct $thread * pop_search(struct cluster * cltr) {
     410        __attribute__((hot)) struct thread$ * pop_search(struct cluster * cltr) {
    404411                return search(cltr);
    405412        }
     
    428435        }
    429436
    430         __attribute__((hot)) void push(struct cluster * cltr, struct $thread * thrd, bool push_local) with (cltr->ready_queue) {
     437        __attribute__((hot)) void push(struct cluster * cltr, struct thread$ * thrd, bool push_local) with (cltr->ready_queue) {
    431438                __cfadbg_print_safe(ready_queue, "Kernel : Pushing %p on cluster %p\n", thrd, cltr);
    432439
     
    475482
    476483        // Pop from the ready queue from a given cluster
    477         __attribute__((hot)) $thread * pop_fast(struct cluster * cltr) with (cltr->ready_queue) {
     484        __attribute__((hot)) thread$ * pop_fast(struct cluster * cltr) with (cltr->ready_queue) {
    478485                /* paranoid */ verify( lanes.count > 0 );
    479486                /* paranoid */ verify( kernelTLS().this_processor );
     
    499506
    500507                        // try popping from the 2 picked lists
    501                         struct $thread * thrd = try_pop(cltr, i, j __STATS(, *(locali || localj ? &__tls_stats()->ready.pop.local : &__tls_stats()->ready.pop.help)));
     508                        struct thread$ * thrd = try_pop(cltr, i, j __STATS(, *(locali || localj ? &__tls_stats()->ready.pop.local : &__tls_stats()->ready.pop.help)));
    502509                        if(thrd) {
    503510                                return thrd;
     
    509516        }
    510517
    511         __attribute__((hot)) struct $thread * pop_slow(struct cluster * cltr) { return pop_fast(cltr); }
    512         __attribute__((hot)) struct $thread * pop_search(struct cluster * cltr) {
     518        __attribute__((hot)) struct thread$ * pop_slow(struct cluster * cltr) { return pop_fast(cltr); }
     519        __attribute__((hot)) struct thread$ * pop_search(struct cluster * cltr) {
    513520                return search(cltr);
    514521        }
    515522#endif
    516523#if defined(USE_WORK_STEALING)
    517         __attribute__((hot)) void push(struct cluster * cltr, struct $thread * thrd, bool push_local) with (cltr->ready_queue) {
     524        __attribute__((hot)) void push(struct cluster * cltr, struct thread$ * thrd, bool push_local) with (cltr->ready_queue) {
    518525                __cfadbg_print_safe(ready_queue, "Kernel : Pushing %p on cluster %p\n", thrd, cltr);
    519526
     
    569576
    570577        // Pop from the ready queue from a given cluster
    571         __attribute__((hot)) $thread * pop_fast(struct cluster * cltr) with (cltr->ready_queue) {
     578        __attribute__((hot)) thread$ * pop_fast(struct cluster * cltr) with (cltr->ready_queue) {
    572579                /* paranoid */ verify( lanes.count > 0 );
    573580                /* paranoid */ verify( kernelTLS().this_processor );
     
    591598                        const unsigned long long cutoff = proc->rdq.cutoff > bias ? proc->rdq.cutoff - bias : proc->rdq.cutoff;
    592599                        if(lanes.tscs[target].tv < cutoff && ts(lanes.data[target]) < cutoff) {
    593                                 $thread * t = try_pop(cltr, target __STATS(, __tls_stats()->ready.pop.help));
     600                                thread$ * t = try_pop(cltr, target __STATS(, __tls_stats()->ready.pop.help));
    594601                                if(t) return t;
    595602                        }
     
    598605                for(READYQ_SHARD_FACTOR) {
    599606                        unsigned i = proc->rdq.id + (proc->rdq.itr++ % READYQ_SHARD_FACTOR);
    600                         if($thread * t = try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.local))) return t;
     607                        if(thread$ * t = try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.local))) return t;
    601608                }
    602609                return 0p;
    603610        }
    604611
    605         __attribute__((hot)) struct $thread * pop_slow(struct cluster * cltr) with (cltr->ready_queue) {
     612        __attribute__((hot)) struct thread$ * pop_slow(struct cluster * cltr) with (cltr->ready_queue) {
    606613                unsigned i = __tls_rand() % lanes.count;
    607614                return try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.steal));
    608615        }
    609616
    610         __attribute__((hot)) struct $thread * pop_search(struct cluster * cltr) with (cltr->ready_queue) {
     617        __attribute__((hot)) struct thread$ * pop_search(struct cluster * cltr) with (cltr->ready_queue) {
    611618                return search(cltr);
    612619        }
     
    621628//-----------------------------------------------------------------------
    622629// try to pop from a lane given by index w
    623 static inline struct $thread * try_pop(struct cluster * cltr, unsigned w __STATS(, __stats_readyQ_pop_t & stats)) with (cltr->ready_queue) {
     630static inline struct thread$ * try_pop(struct cluster * cltr, unsigned w __STATS(, __stats_readyQ_pop_t & stats)) with (cltr->ready_queue) {
    624631        __STATS( stats.attempt++; )
    625632
     
    644651
    645652        // Actually pop the list
    646         struct $thread * thrd;
     653        struct thread$ * thrd;
    647654        unsigned long long tsv;
    648655        [thrd, tsv] = pop(lane);
     
    671678// try to pop from any lanes making sure you don't miss any threads push
    672679// before the start of the function
    673 static inline struct $thread * search(struct cluster * cltr) with (cltr->ready_queue) {
     680static inline struct thread$ * search(struct cluster * cltr) with (cltr->ready_queue) {
    674681        /* paranoid */ verify( lanes.count > 0 );
    675682        unsigned count = __atomic_load_n( &lanes.count, __ATOMIC_RELAXED );
     
    677684        for(i; count) {
    678685                unsigned idx = (offset + i) % count;
    679                 struct $thread * thrd = try_pop(cltr, idx __STATS(, __tls_stats()->ready.pop.search));
     686                struct thread$ * thrd = try_pop(cltr, idx __STATS(, __tls_stats()->ready.pop.search));
    680687                if(thrd) {
    681688                        return thrd;
     
    712719//-----------------------------------------------------------------------
    713720// Given 2 indexes, pick the list with the oldest push an try to pop from it
    714 static inline struct $thread * try_pop(struct cluster * cltr, unsigned i, unsigned j __STATS(, __stats_readyQ_pop_t & stats)) with (cltr->ready_queue) {
     721static inline struct thread$ * try_pop(struct cluster * cltr, unsigned i, unsigned j __STATS(, __stats_readyQ_pop_t & stats)) with (cltr->ready_queue) {
    715722        // Pick the bet list
    716723        int w = i;
     
    847854                                // As long as we can pop from this lane to push the threads somewhere else in the queue
    848855                                while(!is_empty(lanes.data[idx])) {
    849                                         struct $thread * thrd;
     856                                        struct thread$ * thrd;
    850857                                        unsigned long long _;
    851858                                        [thrd, _] = pop(lanes.data[idx]);
Note: See TracChangeset for help on using the changeset viewer.