Context Navigation

Reverse Diff

Changes in / [a77f25b:f55f110]

Location:

libcfa/src/concurrency

Files:

: 2 edited

kernel.hfa (modified) (2 diffs)
ready_queue.cfa (modified) (8 diffs)

Legend:

: Unmodified
: Added
: Removed

libcfa/src/concurrency/kernel.hfa

-              ra77f25b
+              rf55f110
                 unsigned last;
                 signed   cpu;
-                // unsigned long long int cutoff;
         } rdq;
 …
 };
 struct __attribute__((aligned(128))) __cache_id_t {
+struct __attribute__((aligned(16))) __cache_id_t {
         volatile unsigned id;
 };

libcfa/src/concurrency/ready_queue.cfa

-              ra77f25b
+              rf55f110
                         lanes.help[idx].dst = 0;
                         lanes.help[idx].tri = 0;
+                }
-                caches = alloc( cpu_info.llc_count );
-                for( idx; (size_t)cpu_info.llc_count ) {
-                        (caches[idx]){};
+                }
         #else
 …
                 /* paranoid */ verify(cpu < cpu_info.hthrd_count);
                 unsigned this_cache = cpu_info.llc_map[cpu].cache;
+                __atomic_store_n(&lanes.caches[this / READYQ_SHARD_FACTOR].id, this_cache, __ATOMIC_RELAXED);
+                // Super important: don't write the same value over and over again
+                // We want to maximise our chances that his particular values stays in cache
+                if(lanes.caches[this / READYQ_SHARD_FACTOR].id != this_cache)
+                        __atomic_store_n(&lanes.caches[this / READYQ_SHARD_FACTOR].id, this_cache, __ATOMIC_RELAXED);
                 const unsigned long long ctsc = rdtscl();
 …
+        }
-        static inline int pop_getcpu(processor * proc, __ready_queue_caches_t * caches) {
-                const int prv = proc->rdq.cpu;
-                const int cpu = __kernel_getcpu();
-                if( prv != proc->rdq.cpu ) {
-                        unsigned pidx = cpu_info.llc_map[prv].cache;
-                        /* paranoid */ verify(pidx < cpu_info.llc_count);
-                        unsigned nidx = cpu_info.llc_map[cpu].cache;
-                        /* paranoid */ verify(pidx < cpu_info.llc_count);
-                        depart(caches[pidx]);
-                        arrive(caches[nidx]);
-                        __STATS( /* cpu migs++ */ )
+                }
-                return proc->rdq.cpu = cpu;
+        }
         // Pop from the ready queue from a given cluster
         __attribute__((hot)) thread$ * pop_fast(struct cluster * cltr) with (cltr->ready_queue) {
 …
                 processor * const proc = kernelTLS().this_processor;
+                const int cpu = pop_getcpu( proc, caches );
+                // const int cpu = __kernel_getcpu();
+                const int cpu = __kernel_getcpu();
                 /* paranoid */ verify(cpu >= 0);
                 /* paranoid */ verify(cpu < cpu_info.hthrd_count);
 …
                         unsigned long long max = 0;
                         for(i; READYQ_SHARD_FACTOR) {
                                 unsigned long long tsc = moving_average(ctsc - ts(lanes.data[start + i]), lanes.tscs[start + i].ma);
+                                unsigned long long tsc = moving_average(ctsc, ts(lanes.data[start + i]), lanes.tscs[start + i].ma);
                                 if(tsc > max) max = tsc;
+                        }
 …
                         unsigned long long max = 0;
                         for(i; READYQ_SHARD_FACTOR) {
                                 unsigned long long tsc = moving_average(ctsc - ts(lanes.data[start + i]), lanes.tscs[start + i].ma);
+                                unsigned long long tsc = moving_average(ctsc, ts(lanes.data[start + i]), lanes.tscs[start + i].ma);
                                 if(tsc > max) max = tsc;
+                        }
 …
                                 proc->rdq.target = MAX;
                                 lanes.help[target / READYQ_SHARD_FACTOR].tri++;
+                                if(moving_average(ctsc - lanes.tscs[target].tv, lanes.tscs[target].ma) > cutoff) {
+                                        __STATS( __tls_stats()->ready.pop.helped[target]++; )
+                                if(moving_average(ctsc, lanes.tscs[target].tv, lanes.tscs[target].ma) > cutoff) {
                                         thread$ * t = try_pop(cltr, target __STATS(, __tls_stats()->ready.pop.help));
                                         proc->rdq.last = target;
 …
                         unsigned last = proc->rdq.last;
+                        if(last != MAX && moving_average(ctsc - lanes.tscs[last].tv, lanes.tscs[last].ma) > cutoff) {
+                                __STATS( __tls_stats()->ready.pop.helped[last]++; )
+                        if(last != MAX && moving_average(ctsc, lanes.tscs[last].tv, lanes.tscs[last].ma) > cutoff) {
                                 thread$ * t = try_pop(cltr, last __STATS(, __tls_stats()->ready.pop.help));
                                 if(t) return t;

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changes in / [a77f25b:f55f110]

Legend:

libcfa/src/concurrency/kernel.hfa

libcfa/src/concurrency/ready_queue.cfa

Download in other formats: