Context Navigation

Reverse Diff

Changes in / [572a02f:dcbfcbc]

Files:

: 7 edited

libcfa/src/concurrency/kernel.cfa (modified) (2 diffs)
libcfa/src/concurrency/kernel.hfa (modified) (1 diff)
libcfa/src/concurrency/kernel/startup.cfa (modified) (1 diff)
libcfa/src/concurrency/ready_queue.cfa (modified) (5 diffs)
libcfa/src/device/cpu.cfa (modified) (6 diffs)
libcfa/src/device/cpu.hfa (modified) (1 diff)
tests/device/cpu.cfa (modified) (2 diffs)

Legend:

: Unmodified
: Added
: Removed

libcfa/src/concurrency/kernel.cfa

-                      r572a02f
+                      rdcbfcbc
                                 // Spin a little on I/O, just in case
                                         for(5) {
+                                for(5) {
                                         __maybe_io_drain( this );
                                         readyThread = pop_fast( this->cltr );
 …
                                 // no luck, try stealing a few times
                                         for(5) {
+                                for(5) {
                                         if( __maybe_io_drain( this ) ) {
                                                 readyThread = pop_fast( this->cltr );

libcfa/src/concurrency/kernel.hfa

r572a02f	rdcbfcbc
66	66	unsigned id;
67	67	unsigned target;
	68	unsigned last;
68	69	unsigned long long int cutoff;
69	70	} rdq;

libcfa/src/concurrency/kernel/startup.cfa

r572a02f	rdcbfcbc
541	541	this.rdq.id = -1u;
542	542	this.rdq.target = -1u;
	543	this.rdq.last = -1u;
543	544	this.rdq.cutoff = 0ull;
544	545	do_terminate = false;

libcfa/src/concurrency/ready_queue.cfa

-                      r572a02f
+                      rdcbfcbc
 #include "bits/defs.hfa"
+#include "device/cpu.hfa"
 #include "kernel_private.hfa"
 …
 #endif
+#if   defined(USE_RELAXED_FIFO)
+#if   defined(USE_CPU_WORK_STEALING)
+        #define READYQ_SHARD_FACTOR 2
+#elif defined(USE_RELAXED_FIFO)
         #define BIAS 4
         #define READYQ_SHARD_FACTOR 4
 …
 //=======================================================================
 void ?{}(__ready_queue_t & this) with (this) {
+        lanes.data  = 0p;
+        lanes.tscs  = 0p;
+        lanes.count = 0;
+        #if defined(USE_CPU_WORK_STEALING)
+                lanes.count = cpu_info.hthrd_count * READYQ_SHARD_FACTOR;
+                lanes.data = alloc( lanes.count );
+                lanes.tscs = alloc( lanes.count );
+                for( idx; (size_t)lanes.count ) {
+                        (lanes.data[idx]){};
+                        lanes.tscs[idx].tv = rdtscl();
+                }
+        #else
+                lanes.data  = 0p;
+                lanes.tscs  = 0p;
+                lanes.count = 0;
+        #endif
+}
 void ^?{}(__ready_queue_t & this) with (this) {
+        verify( SEQUENTIAL_SHARD == lanes.count );
+        #if !defined(USE_CPU_WORK_STEALING)
+                verify( SEQUENTIAL_SHARD == lanes.count );
+        #endif
         free(lanes.data);
         free(lanes.tscs);
 …
 //-----------------------------------------------------------------------
+#if defined(USE_CPU_WORK_STEALING)
+        __attribute__((hot)) void push(struct cluster * cltr, struct $thread * thrd, bool push_local) with (cltr->ready_queue) {
+                __cfadbg_print_safe(ready_queue, "Kernel : Pushing %p on cluster %p\n", thrd, cltr);
+                processor * const proc = kernelTLS().this_processor;
+                const bool external = !push_local || (!proc) || (cltr != proc->cltr);
+                const int cpu = __kernel_getcpu();
+                /* paranoid */ verify(cpu >= 0);
+                /* paranoid */ verify(cpu < cpu_info.hthrd_count);
+                /* paranoid */ verify(cpu * READYQ_SHARD_FACTOR < lanes.count);
+                const cpu_map_entry_t & map = cpu_info.llc_map[cpu];
+                /* paranoid */ verify(map.start * READYQ_SHARD_FACTOR < lanes.count);
+                /* paranoid */ verify(map.self * READYQ_SHARD_FACTOR < lanes.count);
+                /* paranoid */ verifyf((map.start + map.count) * READYQ_SHARD_FACTOR <= lanes.count, "have %u lanes but map can go up to %u", lanes.count, (map.start + map.count) * READYQ_SHARD_FACTOR);
+                const int start = map.self * READYQ_SHARD_FACTOR;
+                unsigned i;
+                do {
+                        unsigned r;
+                        if(unlikely(external)) { r = __tls_rand(); }
+                        else { r = proc->rdq.its++; }
+                        i = start + (r % READYQ_SHARD_FACTOR);
+                        // If we can't lock it retry
+                } while( !__atomic_try_acquire( &lanes.data[i].lock ) );
+                // Actually push it
+                push(lanes.data[i], thrd);
+                // Unlock and return
+                __atomic_unlock( &lanes.data[i].lock );
+                #if !defined(__CFA_NO_STATISTICS__)
+                        if(unlikely(external)) __atomic_fetch_add(&cltr->stats->ready.push.extrn.success, 1, __ATOMIC_RELAXED);
+                        else __tls_stats()->ready.push.local.success++;
+                #endif
+                __cfadbg_print_safe(ready_queue, "Kernel : Pushed %p on cluster %p (idx: %u, mask %llu, first %d)\n", thrd, cltr, i, used.mask[0], lane_first);
+        }
+        // Pop from the ready queue from a given cluster
+        __attribute__((hot)) $thread * pop_fast(struct cluster * cltr) with (cltr->ready_queue) {
+                /* paranoid */ verify( lanes.count > 0 );
+                /* paranoid */ verify( kernelTLS().this_processor );
+                const int cpu = __kernel_getcpu();
+                /* paranoid */ verify(cpu >= 0);
+                /* paranoid */ verify(cpu < cpu_info.hthrd_count);
+                /* paranoid */ verify(cpu * READYQ_SHARD_FACTOR < lanes.count);
+                const cpu_map_entry_t & map = cpu_info.llc_map[cpu];
+                /* paranoid */ verify(map.start * READYQ_SHARD_FACTOR < lanes.count);
+                /* paranoid */ verify(map.self * READYQ_SHARD_FACTOR < lanes.count);
+                /* paranoid */ verifyf((map.start + map.count) * READYQ_SHARD_FACTOR <= lanes.count, "have %u lanes but map can go up to %u", lanes.count, (map.start + map.count) * READYQ_SHARD_FACTOR);
+                processor * const proc = kernelTLS().this_processor;
+                const int start = map.self * READYQ_SHARD_FACTOR;
+                // Did we already have a help target
+                if(proc->rdq.target == -1u) {
+                        // if We don't have a
+                        unsigned long long min = ts(lanes.data[start]);
+                        for(i; READYQ_SHARD_FACTOR) {
+                                unsigned long long tsc = ts(lanes.data[start + i]);
+                                if(tsc < min) min = tsc;
+                        }
+                        proc->rdq.cutoff = min;
+                        proc->rdq.target = (map.start * READYQ_SHARD_FACTOR) + (__tls_rand() % (map.count* READYQ_SHARD_FACTOR));
+                }
+                else {
+                        const unsigned long long bias = 0; //2_500_000_000;
+                        const unsigned long long cutoff = proc->rdq.cutoff > bias ? proc->rdq.cutoff - bias : proc->rdq.cutoff;
+                        {
+                                unsigned target = proc->rdq.target;
+                                proc->rdq.target = -1u;
+                                if(lanes.tscs[target].tv < cutoff && ts(lanes.data[target]) < cutoff) {
+                                        $thread * t = try_pop(cltr, target __STATS(, __tls_stats()->ready.pop.help));
+                                        proc->rdq.last = target;
+                                        if(t) return t;
+                                }
+                        }
+                        unsigned last = proc->rdq.last;
+                        if(last != -1u && lanes.tscs[last].tv < cutoff && ts(lanes.data[last]) < cutoff) {
+                                $thread * t = try_pop(cltr, last __STATS(, __tls_stats()->ready.pop.help));
+                                if(t) return t;
+                        }
+                        else {
+                                proc->rdq.last = -1u;
+                        }
+                }
+                for(READYQ_SHARD_FACTOR) {
+                        unsigned i = start + (proc->rdq.itr++ % READYQ_SHARD_FACTOR);
+                        if($thread * t = try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.local))) return t;
+                }
+                // All lanes where empty return 0p
+                return 0p;
+        }
+        __attribute__((hot)) struct $thread * pop_slow(struct cluster * cltr) with (cltr->ready_queue) {
+                processor * const proc = kernelTLS().this_processor;
+                unsigned last = proc->rdq.last;
+                unsigned i = __tls_rand() % lanes.count;
+                return try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.steal));
+        }
+        __attribute__((hot)) struct $thread * pop_search(struct cluster * cltr) {
+                return search(cltr);
+        }
+#endif
 #if defined(USE_RELAXED_FIFO)
         //-----------------------------------------------------------------------
 …
+}
+// Grow the ready queue
+void ready_queue_grow(struct cluster * cltr) {
+        size_t ncount;
+        int target = cltr->procs.total;
+        /* paranoid */ verify( ready_mutate_islocked() );
+        __cfadbg_print_safe(ready_queue, "Kernel : Growing ready queue\n");
+        // Make sure that everything is consistent
+        /* paranoid */ check( cltr->ready_queue );
+        // grow the ready queue
+        with( cltr->ready_queue ) {
+                // Find new count
+                // Make sure we always have atleast 1 list
+                if(target >= 2) {
+                        ncount = target * READYQ_SHARD_FACTOR;
+                } else {
+                        ncount = SEQUENTIAL_SHARD;
+                }
+                // Allocate new array (uses realloc and memcpies the data)
+                lanes.data = alloc( ncount, lanes.data`realloc );
+                // Fix the moved data
+                for( idx; (size_t)lanes.count ) {
+                        fix(lanes.data[idx]);
+                }
+                // Construct new data
+                for( idx; (size_t)lanes.count ~ ncount) {
+                        (lanes.data[idx]){};
+                }
+                // Update original
+                lanes.count = ncount;
+        }
+        fix_times(cltr);
+        reassign_cltr_id(cltr);
+        // Make sure that everything is consistent
+        /* paranoid */ check( cltr->ready_queue );
+        __cfadbg_print_safe(ready_queue, "Kernel : Growing ready queue done\n");
+        /* paranoid */ verify( ready_mutate_islocked() );
+}
+// Shrink the ready queue
+void ready_queue_shrink(struct cluster * cltr) {
+        /* paranoid */ verify( ready_mutate_islocked() );
+        __cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue\n");
+        // Make sure that everything is consistent
+        /* paranoid */ check( cltr->ready_queue );
+        int target = cltr->procs.total;
+        with( cltr->ready_queue ) {
+                // Remember old count
+                size_t ocount = lanes.count;
+                // Find new count
+                // Make sure we always have atleast 1 list
+                lanes.count = target >= 2 ? target * READYQ_SHARD_FACTOR: SEQUENTIAL_SHARD;
+                /* paranoid */ verify( ocount >= lanes.count );
+                /* paranoid */ verify( lanes.count == target * READYQ_SHARD_FACTOR || target < 2 );
+                // for printing count the number of displaced threads
+                #if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_READY_QUEUE__)
+                        __attribute__((unused)) size_t displaced = 0;
+                #endif
+                // redistribute old data
+                for( idx; (size_t)lanes.count ~ ocount) {
+                        // Lock is not strictly needed but makes checking invariants much easier
+                        __attribute__((unused)) bool locked = __atomic_try_acquire(&lanes.data[idx].lock);
+                        verify(locked);
+                        // As long as we can pop from this lane to push the threads somewhere else in the queue
+                        while(!is_empty(lanes.data[idx])) {
+                                struct $thread * thrd;
+                                unsigned long long _;
+                                [thrd, _] = pop(lanes.data[idx]);
+                                push(cltr, thrd, true);
+                                // for printing count the number of displaced threads
+                                #if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_READY_QUEUE__)
+                                        displaced++;
+                                #endif
+                        }
+                        // Unlock the lane
+                        __atomic_unlock(&lanes.data[idx].lock);
+                        // TODO print the queue statistics here
+                        ^(lanes.data[idx]){};
+                }
+                __cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue displaced %zu threads\n", displaced);
+                // Allocate new array (uses realloc and memcpies the data)
+                lanes.data = alloc( lanes.count, lanes.data`realloc );
+                // Fix the moved data
+                for( idx; (size_t)lanes.count ) {
+                        fix(lanes.data[idx]);
+                }
+        }
+        fix_times(cltr);
+        reassign_cltr_id(cltr);
+        // Make sure that everything is consistent
+        /* paranoid */ check( cltr->ready_queue );
+        __cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue done\n");
+        /* paranoid */ verify( ready_mutate_islocked() );
+}
+#if defined(USE_CPU_WORK_STEALING)
+        // ready_queue size is fixed in this case
+        void ready_queue_grow(struct cluster * cltr) {}
+        void ready_queue_shrink(struct cluster * cltr) {}
+#else
+        // Grow the ready queue
+        void ready_queue_grow(struct cluster * cltr) {
+                size_t ncount;
+                int target = cltr->procs.total;
+                /* paranoid */ verify( ready_mutate_islocked() );
+                __cfadbg_print_safe(ready_queue, "Kernel : Growing ready queue\n");
+                // Make sure that everything is consistent
+                /* paranoid */ check( cltr->ready_queue );
+                // grow the ready queue
+                with( cltr->ready_queue ) {
+                        // Find new count
+                        // Make sure we always have atleast 1 list
+                        if(target >= 2) {
+                                ncount = target * READYQ_SHARD_FACTOR;
+                        } else {
+                                ncount = SEQUENTIAL_SHARD;
+                        }
+                        // Allocate new array (uses realloc and memcpies the data)
+                        lanes.data = alloc( ncount, lanes.data`realloc );
+                        // Fix the moved data
+                        for( idx; (size_t)lanes.count ) {
+                                fix(lanes.data[idx]);
+                        }
+                        // Construct new data
+                        for( idx; (size_t)lanes.count ~ ncount) {
+                                (lanes.data[idx]){};
+                        }
+                        // Update original
+                        lanes.count = ncount;
+                }
+                fix_times(cltr);
+                reassign_cltr_id(cltr);
+                // Make sure that everything is consistent
+                /* paranoid */ check( cltr->ready_queue );
+                __cfadbg_print_safe(ready_queue, "Kernel : Growing ready queue done\n");
+                /* paranoid */ verify( ready_mutate_islocked() );
+        }
+        // Shrink the ready queue
+        void ready_queue_shrink(struct cluster * cltr) {
+                /* paranoid */ verify( ready_mutate_islocked() );
+                __cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue\n");
+                // Make sure that everything is consistent
+                /* paranoid */ check( cltr->ready_queue );
+                int target = cltr->procs.total;
+                with( cltr->ready_queue ) {
+                        // Remember old count
+                        size_t ocount = lanes.count;
+                        // Find new count
+                        // Make sure we always have atleast 1 list
+                        lanes.count = target >= 2 ? target * READYQ_SHARD_FACTOR: SEQUENTIAL_SHARD;
+                        /* paranoid */ verify( ocount >= lanes.count );
+                        /* paranoid */ verify( lanes.count == target * READYQ_SHARD_FACTOR || target < 2 );
+                        // for printing count the number of displaced threads
+                        #if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_READY_QUEUE__)
+                                __attribute__((unused)) size_t displaced = 0;
+                        #endif
+                        // redistribute old data
+                        for( idx; (size_t)lanes.count ~ ocount) {
+                                // Lock is not strictly needed but makes checking invariants much easier
+                                __attribute__((unused)) bool locked = __atomic_try_acquire(&lanes.data[idx].lock);
+                                verify(locked);
+                                // As long as we can pop from this lane to push the threads somewhere else in the queue
+                                while(!is_empty(lanes.data[idx])) {
+                                        struct $thread * thrd;
+                                        unsigned long long _;
+                                        [thrd, _] = pop(lanes.data[idx]);
+                                        push(cltr, thrd, true);
+                                        // for printing count the number of displaced threads
+                                        #if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_READY_QUEUE__)
+                                                displaced++;
+                                        #endif
+                                }
+                                // Unlock the lane
+                                __atomic_unlock(&lanes.data[idx].lock);
+                                // TODO print the queue statistics here
+                                ^(lanes.data[idx]){};
+                        }
+                        __cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue displaced %zu threads\n", displaced);
+                        // Allocate new array (uses realloc and memcpies the data)
+                        lanes.data = alloc( lanes.count, lanes.data`realloc );
+                        // Fix the moved data
+                        for( idx; (size_t)lanes.count ) {
+                                fix(lanes.data[idx]);
+                        }
+                }
+                fix_times(cltr);
+                reassign_cltr_id(cltr);
+                // Make sure that everything is consistent
+                /* paranoid */ check( cltr->ready_queue );
+                __cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue done\n");
+                /* paranoid */ verify( ready_mutate_islocked() );
+        }
+#endif
 #if !defined(__CFA_NO_STATISTICS__)

libcfa/src/device/cpu.cfa

-                      r572a02f
+                      rdcbfcbc
+}
+struct llc_map_t {
+        raw_cache_instance * raw;
+        unsigned count;
+        unsigned start;
+};
 // returns an allocate list of all the different distinct last level caches
 static [*idx_range_t, size_t cnt] distinct_llcs(unsigned cpus, unsigned llc_idx, raw_cache_instance ** raw) {
+static [*llc_map_t, size_t cnt] distinct_llcs(unsigned cpus, unsigned llc_idx, raw_cache_instance ** raw) {
         // Allocate at least one element
         idx_range_t * ranges = alloc();
+        llc_map_t* ranges = alloc();
         size_t range_cnt = 1;
         // Initialize with element 0
+        *ranges = raw[0][llc_idx].range;
+        ranges->raw = &raw[0][llc_idx];
+        ranges->count = 0;
+        ranges->start = -1u;
         // Go over all other cpus
         CPU_LOOP: for(i; 1~cpus) {
                 // Check if the range is already there
                 idx_range_t candidate = raw[i][llc_idx].range;
+                raw_cache_instance * candidate = &raw[i][llc_idx];
                 for(j; range_cnt) {
                         idx_range_t exist = ranges[j];
+                        llc_map_t & exist = ranges[j];
                         // If the range is already there just jump to the next cpu
                         if(0 == strcmp(candidate, exist)) continue CPU_LOOP;
+                        if(0 == strcmp(candidate->range, exist.raw->range)) continue CPU_LOOP;
+                }
                 // The range wasn't there, added to the list
                 ranges = alloc(range_cnt + 1, ranges`realloc);
+                ranges[range_cnt] = candidate;
+                ranges[range_cnt].raw = candidate;
+                ranges[range_cnt].count = 0;
+                ranges[range_cnt].start = -1u;
                 range_cnt++;
+        }
 …
 struct cpu_pairing_t {
         unsigned cpu;
         unsigned llc_id;
+        unsigned id;
 };
 int ?<?( cpu_pairing_t lhs, cpu_pairing_t rhs ) {
         return lhs.llc_id < rhs.llc_id;
+}
 static [[]cpu_pairing_t] get_cpu_pairings(unsigned cpus, raw_cache_instance ** raw, idx_range_t * maps, size_t map_cnt) {
+        return lhs.id < rhs.id;
+}
+static [[]cpu_pairing_t] get_cpu_pairings(unsigned cpus, raw_cache_instance ** raw, llc_map_t * maps, size_t map_cnt) {
         cpu_pairing_t * pairings = alloc(cpus);
 …
                 idx_range_t want = raw[i][0].range;
                 MAP_LOOP: for(j; map_cnt) {
                         if(0 != strcmp(want, maps[j])) continue MAP_LOOP;
                         pairings[i].llc_id = j;
+                        if(0 != strcmp(want, maps[j].raw->range)) continue MAP_LOOP;
+                        pairings[i].id = j;
                         continue CPU_LOOP;
+                }
 …
         return pairings;
+}
+#include <fstream.hfa>
 extern "C" {
 …
                 // Find number of distinct cache instances
                 idx_range_t * maps;
+                llc_map_t * maps;
                 size_t map_cnt;
                 [maps, map_cnt] =  distinct_llcs(cpus, cache_levels - llc, raw);
                 #if defined(__CFA_WITH_VERIFY__)
+                // Verify that the caches cover the all the cpus
+                {
+                        unsigned width = 0;
+                        unsigned width1 = 0;
+                        unsigned width2 = 0;
                         for(i; map_cnt) {
                                 const char * _;
+                                width += read_width(maps[i], strlen(maps[i]), &_);
+                                width1 += read_width(maps[i].raw->range, strlen(maps[i].raw->range), &_);
+                                width2 += maps[i].raw->width;
+                        }
+                        verify(width == cpus);
+                        verify(width1 == cpus);
+                        verify(width2 == cpus);
+                }
                 #endif
 …
                 qsort(pairings, cpus);
+                unsigned llc_width = raw[0][cache_levels - llc].width;
+                // From the mappins build the actual cpu map we want
+                {
+                        unsigned it = 0;
+                        for(i; cpus) {
+                                unsigned llc_id = pairings[i].id;
+                                if(maps[llc_id].start == -1u) {
+                                        maps[llc_id].start = it;
+                                        it += maps[llc_id].raw->width;
+                                        /* paranoid */ verify(maps[llc_id].start < it);
+                                        /* paranoid */ verify(it != -1u);
+                                }
+                        }
+                        /* paranoid */ verify(it == cpus);
+                }
+                // From the mappings build the actual cpu map we want
                 struct cpu_map_entry_t * entries = alloc(cpus);
                 for(i; cpus) { entries[i].count = 0; }
                 for(i; cpus) {
+                        /* paranoid */ verify(pairings[i].id < map_cnt);
                         unsigned c = pairings[i].cpu;
+                        entries[c].start = pairings[i].llc_id * llc_width;
+                        entries[c].count = llc_width;
+                        unsigned llc_id = pairings[i].id;
+                        unsigned width = maps[llc_id].raw->width;
+                        unsigned start = maps[llc_id].start;
+                        unsigned self  = start + (maps[llc_id].count++);
+                        entries[c].count = width;
+                        entries[c].start = start;
+                        entries[c].self  = self;
+                }

libcfa/src/device/cpu.hfa

r572a02f	rdcbfcbc
17	17
18	18	struct cpu_map_entry_t {
	19	unsigned self;
19	20	unsigned start;
20	21	unsigned count;

tests/device/cpu.cfa

-                      r572a02f
+                      rdcbfcbc
 #include <fstream.hfa>
 #include <device/cpu.hfa>
+#include <stdlib.hfa>
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
 extern "C" {
+        #include <dirent.h>
+        #include <sys/types.h>
+        #include <sys/stat.h>
         #include <sys/sysinfo.h>
+        #include <fcntl.h>
+}
+// go through a directory calling fn on each file
+static int iterate_dir( const char * path, void (*fn)(struct dirent * ent) ) {
+        // open the directory
+        DIR *dir = opendir(path);
+        if(dir == 0p) { return ENOTDIR; }
+        // call fn for each
+        struct dirent * ent;
+        while ((ent = readdir(dir)) != 0p) {
+                fn( ent );
+        }
+        // no longer need this
+        closedir(dir);
+        return 0;
+}
+// count the number of directories with the specified prefix
+// the directories counted have the form '[prefix]N' where prefix is the parameter
+// and N is an base 10 integer.
+static int count_prefix_dirs(const char * path, const char * prefix) {
+        // read the directory and find the cpu count
+        // and make sure everything is as expected
+        int max = -1;
+        int count = 0;
+        void lambda(struct dirent * ent) {
+                // were are looking for prefixX, where X is a number
+                // check that it starts with 'cpu
+                char * s = strstr(ent->d_name, prefix);
+                if(s == 0p) { return; }
+                if(s != ent->d_name) { return; }
+                // check that the next part is a number
+                s += strlen(prefix);
+                char * end;
+                long int val = strtol(s, &end, 10);
+                if(*end != '\0' || val < 0) { return; }
+                // check that it's a directory
+                if(ent->d_type != DT_DIR) { return; }
+                // it's a match!
+                max = max(val, max);
+                count++;
+        }
+        iterate_dir(path, lambda);
+        /* paranoid */ verifyf(count == max + 1, "Inconsistent %s count, counted %d, but max %s was %d", prefix, count, prefix, (int)max);
+        return count;
+}
+// Count number of cache *indexes* in the system
+// cache indexes are distinct from cache level as Data or Instruction cache
+// can share a level but not an index
+// PITFALL: assumes all cpus have the same indexes as cpu0
+static int count_cache_indexes(void) {
+        return count_prefix_dirs("/sys/devices/system/cpu/cpu0/cache", "index");
+}
+// read information about a spcficic cache index/cpu file into the output buffer
+static size_t read_cpuidxinfo_into(unsigned cpu, unsigned idx, const char * file, char * out, size_t out_len) {
+        // Pick the file we want and read it
+        char buf[128];
+        /* paranoid */ __attribute__((unused)) int len =
+        snprintf(buf, 128, "/sys/devices/system/cpu/cpu%u/cache/index%u/%s", cpu, idx, file);
+        /* paranoid */ verifyf(len > 0, "Could not generate '%s' filename for cpu %u, index %u", file, cpu, idx);
+        int fd = open(buf, 0, O_RDONLY);
+        /* paranoid */ verifyf(fd > 0, "Could not open file '%s'", buf);
+        ssize_t r = read(fd, out, out_len);
+        /* paranoid */ verifyf(r > 0, "Could not read file '%s'", buf);
+        /* paranoid */ __attribute__((unused)) int ret =
+        close(fd);
+        /* paranoid */ verifyf(ret == 0, "Could not close file '%s'", buf);
+        out[r-1] = '\0';
+        return r-1;
+}
+unsigned find_idx() {
+        int idxs = count_cache_indexes();
+        unsigned found_level = 0;
+        unsigned found = -1u;
+        for(i; idxs) {
+                unsigned idx = idxs - 1 - i;
+                char buf[32];
+                // Level is the cache level: higher means bigger and slower
+                read_cpuidxinfo_into(0, idx, "level", buf, 32);
+                char * end;
+                unsigned long level = strtoul(buf, &end, 10);
+                /* paranoid */ verifyf(level <= 250, "Cpu %u has more than 250 levels of cache, that doesn't sound right", 0);
+                /* paranoid */ verify(*end == '\0');
+                if(found_level < level) {
+                        found_level = level;
+                        found = idx;
+                }
+        }
+        /* paranoid */ verify(found != -1u);
+        return found;
+}
 int main() {
+        //-----------------------------------------------------------------------
         int ret1 = get_nprocs();
         int ret2 = cpu_info.hthrd_count;
 …
+        }
+        //-----------------------------------------------------------------------
+        // Make sure no one has the same self
+        for(ime; cpu_info.hthrd_count) {
+                unsigned me = cpu_info.llc_map[ime].self;
+                {
+                        unsigned s = cpu_info.llc_map[ime].start;
+                        unsigned e = s + cpu_info.llc_map[ime].count;
+                        if(me < s || me >= e) {
+                                sout | "CPU" | ime | "outside of it's own map: " | s | "<=" | me | "<" | e;
+                        }
+                }
+                for(ithem; cpu_info.hthrd_count) {
+                        if(ime == ithem) continue;
+                        unsigned them = cpu_info.llc_map[ithem].self;
+                        if(me == them) {
+                                sout | "CPU" | ime | "has conflicting self id with" | ithem | "(" | me | ")";
+                        }
+                }
+        }
+        //-----------------------------------------------------------------------
+        unsigned idx = find_idx();
+        // For all procs check mapping is consistent
+        for(cpu_me; cpu_info.hthrd_count) {
+                char buf_me[32];
+                size_t len_me = read_cpuidxinfo_into(cpu_me, idx, "shared_cpu_list", buf_me, 32);
+                for(cpu_them; cpu_info.hthrd_count) {
+                        if(cpu_me == cpu_them) continue;
+                        char buf_them[32];
+                        size_t len_them = read_cpuidxinfo_into(cpu_them, idx, "shared_cpu_list", buf_them, 32);
+                        bool match_file = len_them == len_me && 0 == strncmp(buf_them, buf_me, len_me);
+                        bool match_info = cpu_info.llc_map[cpu_me].start == cpu_info.llc_map[cpu_them].start && cpu_info.llc_map[cpu_me].count == cpu_info.llc_map[cpu_them].count;
+                        if(match_file != match_info) {
+                                sout | "CPU" | cpu_me | "and" | cpu_them | "have inconsitent file and cpu_info";
+                                sout | cpu_me | ": <" | cpu_info.llc_map[cpu_me  ].start | "," | cpu_info.llc_map[cpu_me  ].count | "> '" | buf_me   | "'";
+                                sout | cpu_me | ": <" | cpu_info.llc_map[cpu_them].start | "," | cpu_info.llc_map[cpu_them].count | "> '" | buf_them | "'";
+                        }
+                }
+        }
+}

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changes in / [572a02f:dcbfcbc]

Legend:

libcfa/src/concurrency/kernel.cfa

libcfa/src/concurrency/kernel.hfa

libcfa/src/concurrency/kernel/startup.cfa

libcfa/src/concurrency/ready_queue.cfa

libcfa/src/device/cpu.cfa

libcfa/src/device/cpu.hfa

tests/device/cpu.cfa

Download in other formats: