| [96f002c1] | 1 | //
 | 
|---|
 | 2 | // Cforall Version 1.0.0 Copyright (C) 2021 University of Waterloo
 | 
|---|
 | 3 | //
 | 
|---|
 | 4 | // The contents of this file are covered under the licence agreement in the
 | 
|---|
 | 5 | // file "LICENCE" distributed with Cforall.
 | 
|---|
 | 6 | //
 | 
|---|
 | 7 | // topology.cfa -- read the data structure
 | 
|---|
 | 8 | //
 | 
|---|
 | 9 | // Author           : Thierry Delisle
 | 
|---|
 | 10 | // Created On       : Thu Jun 10 16:13:07 2021
 | 
|---|
 | 11 | // Last Modified By :
 | 
|---|
 | 12 | // Last Modified On :
 | 
|---|
 | 13 | // Update Count     :
 | 
|---|
 | 14 | //
 | 
|---|
 | 15 | 
 | 
|---|
 | 16 | #include "device/cpu.hfa"
 | 
|---|
 | 17 | 
 | 
|---|
 | 18 | #include <math.hfa>
 | 
|---|
 | 19 | #include <stdlib.hfa>
 | 
|---|
 | 20 | 
 | 
|---|
 | 21 | #include <errno.h>
 | 
|---|
 | 22 | #include <stdio.h>
 | 
|---|
 | 23 | #include <string.h>
 | 
|---|
 | 24 | #include <unistd.h>
 | 
|---|
 | 25 | 
 | 
|---|
 | 26 | extern "C" {
 | 
|---|
 | 27 |         #include <dirent.h>
 | 
|---|
 | 28 |         #include <sys/types.h>
 | 
|---|
 | 29 |         #include <sys/stat.h>
 | 
|---|
 | 30 |         #include <fcntl.h>
 | 
|---|
 | 31 | }
 | 
|---|
 | 32 | 
 | 
|---|
| [032234bd] | 33 | #include "bits/defs.hfa"
 | 
|---|
| [33608cb] | 34 | #include "algorithms/range_iterator.hfa"
 | 
|---|
 | 35 | 
 | 
|---|
| [96f002c1] | 36 | // search a string for character 'character' but looking atmost at len
 | 
|---|
 | 37 | // chars
 | 
|---|
 | 38 | static const char * strnchr(const char * str, int character, size_t len) {
 | 
|---|
 | 39 |         return (const char *)memchr(str, character, strnlen(str, len));
 | 
|---|
 | 40 | }
 | 
|---|
 | 41 | 
 | 
|---|
 | 42 | // Check if have string matches the want string
 | 
|---|
 | 43 | // ignoring any characters that are longer than the want string
 | 
|---|
 | 44 | static bool strmatch(const char * want, char * have) {
 | 
|---|
 | 45 |         size_t w = strlen(want);
 | 
|---|
 | 46 |         return strncmp(want, have, w) == 0;
 | 
|---|
 | 47 | }
 | 
|---|
 | 48 | 
 | 
|---|
 | 49 | typedef const char * idx_range_t;
 | 
|---|
 | 50 | 
 | 
|---|
 | 51 | // read the value of a string and evaluate it
 | 
|---|
 | 52 | // get the end pointer and make sure it is all evaluated
 | 
|---|
 | 53 | static unsigned read_value(idx_range_t map, size_t len, const char ** end) {
 | 
|---|
 | 54 |         unsigned long val = strtoul(map, (char**)end, 10);
 | 
|---|
 | 55 |         /* paranoid */ __attribute__((unused)) size_t read = (*end - map);
 | 
|---|
 | 56 |         /* paranoid */ verifyf(read <= len, "String '%s' passed with inconsistent length %zu", map, len);
 | 
|---|
 | 57 |         /* paranoid */ verifyf(read == len, "String %.*s not entirely a number, %zu chars left", (int)len, map, len - read);
 | 
|---|
 | 58 |         return val;
 | 
|---|
 | 59 | }
 | 
|---|
 | 60 | 
 | 
|---|
 | 61 | // Evaluate the width of a comma seperated list of idx
 | 
|---|
 | 62 | // for example 'A-B,C-D,E,F' has a width of '(B-A) + (D-C) + 1 + 1'
 | 
|---|
 | 63 | // Also has an (non-optional) end ptr like strtoul and friends
 | 
|---|
 | 64 | //
 | 
|---|
 | 65 | // FIXME : the current implementation only supports 1 comma
 | 
|---|
 | 66 | static unsigned read_width(idx_range_t map, size_t len, const char ** end) {
 | 
|---|
 | 67 |         // Do we have a comma
 | 
|---|
 | 68 |         const char * comma = strnchr(map, ',', len);
 | 
|---|
 | 69 |         if(comma != 0p) {
 | 
|---|
 | 70 |                 // We do! recurse and sum the widths
 | 
|---|
 | 71 |                 const char * _;
 | 
|---|
 | 72 |                 size_t split = comma - map;
 | 
|---|
 | 73 |                 unsigned lhs = read_width(map, split, &_);
 | 
|---|
 | 74 |                 unsigned rhs = read_width(comma + 1, len - split - 1, end);
 | 
|---|
 | 75 |                 return lhs + rhs;
 | 
|---|
 | 76 |         }
 | 
|---|
 | 77 | 
 | 
|---|
 | 78 |         // No commas, check for a range
 | 
|---|
 | 79 |         const char * dash = strnchr(map, '-', len);
 | 
|---|
 | 80 |         if(dash != 0p) {
 | 
|---|
 | 81 |                 const char * _;
 | 
|---|
 | 82 |                 size_t split = dash - map;
 | 
|---|
 | 83 |                 unsigned lhs = read_value(map, split, &_);
 | 
|---|
 | 84 |                 unsigned rhs = read_value(dash + 1, len - split - 1, end);
 | 
|---|
 | 85 |                 return rhs - lhs + 1;
 | 
|---|
 | 86 |         }
 | 
|---|
 | 87 | 
 | 
|---|
 | 88 |         // No range, no comma, just a single value
 | 
|---|
 | 89 |         // It's width is 1 and we can consume everything
 | 
|---|
 | 90 |         /* paranoid */ verifyf( ({strtoul(map, (char**)end, 10); *end == (map + len); }), "Value in range '%.*s' not a number", (int)len, map);
 | 
|---|
 | 91 |         *end = map + len;
 | 
|---|
 | 92 |         return 1;
 | 
|---|
 | 93 | }
 | 
|---|
 | 94 | 
 | 
|---|
 | 95 | // go through a directory calling fn on each file
 | 
|---|
 | 96 | static int iterate_dir( const char * path, void (*fn)(struct dirent * ent) ) {
 | 
|---|
 | 97 |         // open the directory
 | 
|---|
 | 98 |         DIR *dir = opendir(path);
 | 
|---|
 | 99 |         if(dir == 0p) { return ENOTDIR; }
 | 
|---|
 | 100 | 
 | 
|---|
 | 101 |         // call fn for each
 | 
|---|
 | 102 |         struct dirent * ent;
 | 
|---|
 | 103 |         while ((ent = readdir(dir)) != 0p) {
 | 
|---|
 | 104 |                 fn( ent );
 | 
|---|
 | 105 |         }
 | 
|---|
 | 106 | 
 | 
|---|
 | 107 |         // no longer need this
 | 
|---|
 | 108 |         closedir(dir);
 | 
|---|
 | 109 |         return 0;
 | 
|---|
 | 110 | }
 | 
|---|
 | 111 | 
 | 
|---|
 | 112 | // count the number of directories with the specified prefix
 | 
|---|
 | 113 | // the directories counted have the form '[prefix]N' where prefix is the parameter
 | 
|---|
 | 114 | // and N is an base 10 integer.
 | 
|---|
 | 115 | static int count_prefix_dirs(const char * path, const char * prefix) {
 | 
|---|
 | 116 |         // read the directory and find the cpu count
 | 
|---|
 | 117 |         // and make sure everything is as expected
 | 
|---|
 | 118 |         int max = -1;
 | 
|---|
 | 119 |         int count = 0;
 | 
|---|
 | 120 |         void lambda(struct dirent * ent) {
 | 
|---|
 | 121 |                 // were are looking for prefixX, where X is a number
 | 
|---|
 | 122 |                 // check that it starts with 'cpu
 | 
|---|
 | 123 |                 char * s = strstr(ent->d_name, prefix);
 | 
|---|
 | 124 |                 if(s == 0p) { return; }
 | 
|---|
 | 125 |                 if(s != ent->d_name) { return; }
 | 
|---|
 | 126 | 
 | 
|---|
 | 127 |                 // check that the next part is a number
 | 
|---|
 | 128 |                 s += strlen(prefix);
 | 
|---|
 | 129 |                 char * end;
 | 
|---|
 | 130 |                 long int val = strtol(s, &end, 10);
 | 
|---|
 | 131 |                 if(*end != '\0' || val < 0) { return; }
 | 
|---|
 | 132 | 
 | 
|---|
 | 133 |                 // check that it's a directory
 | 
|---|
 | 134 |                 if(ent->d_type != DT_DIR) { return; }
 | 
|---|
 | 135 | 
 | 
|---|
 | 136 |                 // it's a match!
 | 
|---|
 | 137 |                 max = max(val, max);
 | 
|---|
 | 138 |                 count++;
 | 
|---|
 | 139 |         }
 | 
|---|
| [8157bde] | 140 |         int ret = iterate_dir(path, lambda);
 | 
|---|
 | 141 |         if(ret == ENOTDIR) return 0;
 | 
|---|
| [96f002c1] | 142 | 
 | 
|---|
 | 143 |         /* paranoid */ verifyf(count == max + 1, "Inconsistent %s count, counted %d, but max %s was %d", prefix, count, prefix, (int)max);
 | 
|---|
 | 144 | 
 | 
|---|
 | 145 |         return count;
 | 
|---|
 | 146 | }
 | 
|---|
 | 147 | 
 | 
|---|
 | 148 | // Count number of cpus in the system
 | 
|---|
| [33608cb] | 149 | static [int, const char *] count_cpus(void) {
 | 
|---|
| [ab4a595] | 150 |         const char * fpath = "/sys/devices/system/cpu/online";
 | 
|---|
| [96f002c1] | 151 |         int fd = open(fpath, 0, O_RDONLY);
 | 
|---|
 | 152 |         /* paranoid */ verifyf(fd >= 0, "Could not open file %s", fpath);
 | 
|---|
 | 153 | 
 | 
|---|
 | 154 |         char buff[128];
 | 
|---|
 | 155 |         ssize_t r = read(fd, buff, 128);
 | 
|---|
 | 156 |         /* paranoid */ verifyf(r > 0, "Could not read file %s", fpath);
 | 
|---|
 | 157 |         /* paranoid */ verify( buff[r-1] == '\n' );
 | 
|---|
 | 158 |         buff[r-1] = '\0';
 | 
|---|
 | 159 | 
 | 
|---|
 | 160 |         /* paranoid */ __attribute__((unused)) int ret =
 | 
|---|
 | 161 |         close(fd);
 | 
|---|
 | 162 |         /* paranoid */ verifyf(ret == 0, "Could not close file %s", fpath);
 | 
|---|
 | 163 | 
 | 
|---|
 | 164 |         const char * _;
 | 
|---|
| [33608cb] | 165 |         return [read_width(buff, r - 1, &_), strndup(buff, r - 1)];
 | 
|---|
| [96f002c1] | 166 | }
 | 
|---|
 | 167 | 
 | 
|---|
 | 168 | // Count number of cache *indexes* in the system
 | 
|---|
 | 169 | // cache indexes are distinct from cache level as Data or Instruction cache
 | 
|---|
 | 170 | // can share a level but not an index
 | 
|---|
 | 171 | // PITFALL: assumes all cpus have the same indexes as cpu0
 | 
|---|
 | 172 | static int count_cache_indexes(void) {
 | 
|---|
 | 173 |         return count_prefix_dirs("/sys/devices/system/cpu/cpu0/cache", "index");
 | 
|---|
 | 174 | }
 | 
|---|
 | 175 | 
 | 
|---|
 | 176 | 
 | 
|---|
 | 177 | // read information about a spcficic cache index/cpu file into the output buffer
 | 
|---|
 | 178 | static size_t read_cpuidxinfo_into(unsigned cpu, unsigned idx, const char * file, char * out, size_t out_len) {
 | 
|---|
 | 179 |         // Pick the file we want and read it
 | 
|---|
 | 180 |         char buf[128];
 | 
|---|
 | 181 |         /* paranoid */ __attribute__((unused)) int len =
 | 
|---|
 | 182 |         snprintf(buf, 128, "/sys/devices/system/cpu/cpu%u/cache/index%u/%s", cpu, idx, file);
 | 
|---|
 | 183 |         /* paranoid */ verifyf(len > 0, "Could not generate '%s' filename for cpu %u, index %u", file, cpu, idx);
 | 
|---|
 | 184 | 
 | 
|---|
 | 185 |         int fd = open(buf, 0, O_RDONLY);
 | 
|---|
 | 186 |         /* paranoid */ verifyf(fd > 0, "Could not open file '%s'", buf);
 | 
|---|
 | 187 | 
 | 
|---|
 | 188 |         ssize_t r = read(fd, out, out_len);
 | 
|---|
 | 189 |         /* paranoid */ verifyf(r > 0, "Could not read file '%s'", buf);
 | 
|---|
 | 190 | 
 | 
|---|
 | 191 |         /* paranoid */ __attribute__((unused)) int ret =
 | 
|---|
 | 192 |         close(fd);
 | 
|---|
 | 193 |         /* paranoid */ verifyf(ret == 0, "Could not close file '%s'", buf);
 | 
|---|
 | 194 |         return r;
 | 
|---|
 | 195 | }
 | 
|---|
 | 196 | 
 | 
|---|
 | 197 | // Iterate over the cache indexes of a given cpu
 | 
|---|
 | 198 | typedef void (*handle_func_t)(unsigned idx, unsigned char level, idx_range_t range, size_t len);
 | 
|---|
 | 199 | static void foreach_cacheidx(unsigned cpu, unsigned idxs, handle_func_t handle) {
 | 
|---|
 | 200 |         for(i; idxs) {
 | 
|---|
 | 201 |                 unsigned idx = idxs - 1 - i;
 | 
|---|
 | 202 |                 char buf[32];
 | 
|---|
 | 203 | 
 | 
|---|
 | 204 |                 // Type says what kind of cache this is,
 | 
|---|
 | 205 |                 // Options are: Unified, Data, Instruction
 | 
|---|
 | 206 |                 read_cpuidxinfo_into(cpu, idx, "type", buf, 32);
 | 
|---|
 | 207 |                 if((!strmatch("Unified", buf)) && (!strmatch("Data", buf))) {
 | 
|---|
 | 208 |                         // We don't care about instruction caches
 | 
|---|
 | 209 |                         continue;
 | 
|---|
 | 210 |                 }
 | 
|---|
 | 211 | 
 | 
|---|
 | 212 |                 // Level is the cache level: higher means bigger and slower
 | 
|---|
 | 213 |                 read_cpuidxinfo_into(cpu, idx, "level", buf, 32);
 | 
|---|
 | 214 |                 char * end;
 | 
|---|
 | 215 |                 unsigned long level = strtoul(buf, &end, 10);
 | 
|---|
 | 216 |                 /* paranoid */ verifyf(level <= 250, "Cpu %u has more than 250 levels of cache, this is not supported", cpu);
 | 
|---|
 | 217 | 
 | 
|---|
 | 218 |                 // shared_cpu_list is a range of cpus that share this particular cache
 | 
|---|
 | 219 |                 size_t n = read_cpuidxinfo_into(cpu, idx, "shared_cpu_list", buf, 32);
 | 
|---|
 | 220 |                 /* paranoid */ verify( buf[n-1] == '\n' );
 | 
|---|
 | 221 |                 buf[n-1] = '\0';
 | 
|---|
 | 222 | 
 | 
|---|
 | 223 |                 // Simply call the functor
 | 
|---|
 | 224 |                 handle(idx, level, buf, n - 1);
 | 
|---|
 | 225 |         }
 | 
|---|
 | 226 | }
 | 
|---|
 | 227 | 
 | 
|---|
 | 228 | 
 | 
|---|
 | 229 | struct raw_cache_instance {
 | 
|---|
| [8157bde] | 230 |         idx_range_t range;      // A text description of the cpus covered
 | 
|---|
 | 231 |         unsigned width;         // The number of cpus covered
 | 
|---|
 | 232 |         unsigned char level;    // the cache level
 | 
|---|
| [96f002c1] | 233 |         // FIXME add at least size and type
 | 
|---|
 | 234 | };
 | 
|---|
 | 235 | 
 | 
|---|
 | 236 | static void  ?{}(raw_cache_instance & this) { this.range = 0p;}
 | 
|---|
 | 237 | static void ^?{}(raw_cache_instance & this) { free(this.range);}
 | 
|---|
 | 238 | 
 | 
|---|
| [8157bde] | 239 | // Returns a 2D array of instances of size [cpu count][cache levels]
 | 
|---|
 | 240 | // where cache level doesn't include instruction caches
 | 
|---|
| [33608cb] | 241 | raw_cache_instance ** build_raw_cache_table(unsigned cpus_c, idx_range_t cpus, unsigned idxs, unsigned cache_levels)
 | 
|---|
| [96f002c1] | 242 | {
 | 
|---|
| [33608cb] | 243 |         raw_cache_instance ** raw = alloc(cpus_c, '\0'`fill);
 | 
|---|
 | 244 | 
 | 
|---|
 | 245 |         RangeIter rc = { cpus };
 | 
|---|
 | 246 |         while(moveNext(rc)) {
 | 
|---|
 | 247 |                 unsigned i = rc.com;
 | 
|---|
 | 248 |                 raw[i] = alloc(cache_levels);
 | 
|---|
 | 249 |                 void addcache(unsigned fidx, unsigned char level, idx_range_t range, size_t len) {
 | 
|---|
 | 250 |                         /* paranoid */ verifyf(level <= cache_levels, "Unexpected cache level %d on cpu %u index %u", (int)level, i, fidx);
 | 
|---|
 | 251 | 
 | 
|---|
 | 252 |                         unsigned idx = cache_levels - level;
 | 
|---|
 | 253 |                         raw_cache_instance & r = raw[i][idx];
 | 
|---|
 | 254 |                         r.range = strndup(range, len);
 | 
|---|
 | 255 |                         r.level = level;
 | 
|---|
 | 256 |                         const char * end;
 | 
|---|
 | 257 |                         r.width = read_width(range, len, &end);
 | 
|---|
| [96f002c1] | 258 |                 }
 | 
|---|
| [33608cb] | 259 |                 foreach_cacheidx(i, idxs, addcache);
 | 
|---|
| [96f002c1] | 260 |         }
 | 
|---|
 | 261 | 
 | 
|---|
 | 262 |         return raw;
 | 
|---|
 | 263 | }
 | 
|---|
 | 264 | 
 | 
|---|
| [cf85f96] | 265 | struct llc_map_t {
 | 
|---|
 | 266 |         raw_cache_instance * raw;
 | 
|---|
 | 267 |         unsigned count;
 | 
|---|
 | 268 |         unsigned start;
 | 
|---|
 | 269 | };
 | 
|---|
 | 270 | 
 | 
|---|
| [96f002c1] | 271 | // returns an allocate list of all the different distinct last level caches
 | 
|---|
| [33608cb] | 272 | static [*llc_map_t, size_t cnt] distinct_llcs(idx_range_t cpus, unsigned llc_idx, raw_cache_instance ** raw) {
 | 
|---|
| [96f002c1] | 273 |         // Allocate at least one element
 | 
|---|
| [cf85f96] | 274 |         llc_map_t* ranges = alloc();
 | 
|---|
| [96f002c1] | 275 |         size_t range_cnt = 1;
 | 
|---|
 | 276 | 
 | 
|---|
| [33608cb] | 277 |         RangeIter rc = { cpus };
 | 
|---|
 | 278 |         __attribute__((unused)) bool ret =
 | 
|---|
 | 279 |         moveNext(rc);
 | 
|---|
 | 280 |         /* paranoid */ verify( ret );
 | 
|---|
 | 281 |         /* paranoid */ verify( rc.com >= 0 );
 | 
|---|
 | 282 | 
 | 
|---|
| [96f002c1] | 283 |         // Initialize with element 0
 | 
|---|
| [33608cb] | 284 |         ranges->raw = &raw[rc.com][llc_idx];
 | 
|---|
| [cf85f96] | 285 |         ranges->count = 0;
 | 
|---|
 | 286 |         ranges->start = -1u;
 | 
|---|
| [96f002c1] | 287 | 
 | 
|---|
 | 288 |         // Go over all other cpus
 | 
|---|
| [33608cb] | 289 |         CPU_LOOP: while(moveNext(rc)) {
 | 
|---|
 | 290 |                 unsigned i = rc.com;
 | 
|---|
| [96f002c1] | 291 |                 // Check if the range is already there
 | 
|---|
| [cf85f96] | 292 |                 raw_cache_instance * candidate = &raw[i][llc_idx];
 | 
|---|
| [96f002c1] | 293 |                 for(j; range_cnt) {
 | 
|---|
| [cf85f96] | 294 |                         llc_map_t & exist = ranges[j];
 | 
|---|
| [96f002c1] | 295 |                         // If the range is already there just jump to the next cpu
 | 
|---|
| [cf85f96] | 296 |                         if(0 == strcmp(candidate->range, exist.raw->range)) continue CPU_LOOP;
 | 
|---|
| [96f002c1] | 297 |                 }
 | 
|---|
 | 298 | 
 | 
|---|
 | 299 |                 // The range wasn't there, added to the list
 | 
|---|
 | 300 |                 ranges = alloc(range_cnt + 1, ranges`realloc);
 | 
|---|
| [cf85f96] | 301 |                 ranges[range_cnt].raw = candidate;
 | 
|---|
 | 302 |                 ranges[range_cnt].count = 0;
 | 
|---|
 | 303 |                 ranges[range_cnt].start = -1u;
 | 
|---|
| [96f002c1] | 304 |                 range_cnt++;
 | 
|---|
 | 305 |         }
 | 
|---|
 | 306 | 
 | 
|---|
 | 307 |         // return what we have
 | 
|---|
 | 308 |         return [ranges, range_cnt];
 | 
|---|
 | 309 | }
 | 
|---|
 | 310 | 
 | 
|---|
 | 311 | struct cpu_pairing_t {
 | 
|---|
 | 312 |         unsigned cpu;
 | 
|---|
| [cf85f96] | 313 |         unsigned id;
 | 
|---|
| [96f002c1] | 314 | };
 | 
|---|
 | 315 | 
 | 
|---|
 | 316 | int ?<?( cpu_pairing_t lhs, cpu_pairing_t rhs ) {
 | 
|---|
| [cf85f96] | 317 |         return lhs.id < rhs.id;
 | 
|---|
| [96f002c1] | 318 | }
 | 
|---|
 | 319 | 
 | 
|---|
| [33608cb] | 320 | static [[]cpu_pairing_t] get_cpu_pairings(unsigned cpus_c, idx_range_t cpus, raw_cache_instance ** raw, llc_map_t * maps, size_t map_cnt) {
 | 
|---|
 | 321 |         cpu_pairing_t * pairings = alloc(cpus_c);
 | 
|---|
| [96f002c1] | 322 | 
 | 
|---|
| [33608cb] | 323 |         RangeIter rc = { cpus };
 | 
|---|
 | 324 |         CPU_LOOP: while(moveNext(rc)) {
 | 
|---|
 | 325 |                 unsigned i = rc.com;
 | 
|---|
| [96f002c1] | 326 |                 pairings[i].cpu = i;
 | 
|---|
 | 327 |                 idx_range_t want = raw[i][0].range;
 | 
|---|
 | 328 |                 MAP_LOOP: for(j; map_cnt) {
 | 
|---|
| [cf85f96] | 329 |                         if(0 != strcmp(want, maps[j].raw->range)) continue MAP_LOOP;
 | 
|---|
| [96f002c1] | 330 | 
 | 
|---|
| [cf85f96] | 331 |                         pairings[i].id = j;
 | 
|---|
| [96f002c1] | 332 |                         continue CPU_LOOP;
 | 
|---|
 | 333 |                 }
 | 
|---|
 | 334 | 
 | 
|---|
 | 335 |                 /* paranoid */ verifyf( false, "Cpu %u map doesn't match", i );
 | 
|---|
 | 336 |         }
 | 
|---|
 | 337 | 
 | 
|---|
 | 338 |         return pairings;
 | 
|---|
 | 339 | }
 | 
|---|
 | 340 | 
 | 
|---|
| [cf85f96] | 341 | #include <fstream.hfa>
 | 
|---|
 | 342 | 
 | 
|---|
| [96f002c1] | 343 | extern "C" {
 | 
|---|
 | 344 |         void __cfaabi_device_startup( void ) {
 | 
|---|
| [33608cb] | 345 |                 int cpus_c;
 | 
|---|
 | 346 |                 const char * cpus;
 | 
|---|
 | 347 |                 [cpus_c, cpus] = count_cpus();
 | 
|---|
 | 348 |                 #if defined(__CFA_WITH_VERIFY__)
 | 
|---|
 | 349 |                 // Verify that the mapping is self consistant.
 | 
|---|
 | 350 |                 {
 | 
|---|
 | 351 |                         RangeIter rc = { cpus };
 | 
|---|
 | 352 |                         while(moveNext(rc)) {
 | 
|---|
 | 353 |                                 unsigned i = rc.com;
 | 
|---|
 | 354 |                                 verify(cpus_c > i);
 | 
|---|
 | 355 |                         }
 | 
|---|
 | 356 |                 }
 | 
|---|
 | 357 |                 #endif
 | 
|---|
 | 358 | 
 | 
|---|
| [96f002c1] | 359 |                 int idxs = count_cache_indexes();
 | 
|---|
 | 360 | 
 | 
|---|
| [72b5805e] | 361 |                 // Do we actually have a cache?
 | 
|---|
 | 362 |                 if(idxs == 0) {
 | 
|---|
 | 363 |                         // if not just fake the data structure, it makes things easier.
 | 
|---|
 | 364 |                         cpu_info.hthrd_count = cpus_c;
 | 
|---|
 | 365 |                         cpu_info.llc_count = 0;
 | 
|---|
 | 366 |                         struct cpu_map_entry_t * entries = alloc(cpu_info.hthrd_count);
 | 
|---|
 | 367 |                         for(i; cpu_info.hthrd_count) {
 | 
|---|
 | 368 |                                 entries[i].self  = i;
 | 
|---|
 | 369 |                                 entries[i].start = 0;
 | 
|---|
 | 370 |                                 entries[i].count = cpu_info.hthrd_count;
 | 
|---|
 | 371 |                                 entries[i].cache = 0;
 | 
|---|
 | 372 |                         }
 | 
|---|
 | 373 |                         cpu_info.llc_map = entries;
 | 
|---|
 | 374 |                         return;
 | 
|---|
 | 375 |                 }
 | 
|---|
 | 376 | 
 | 
|---|
| [96f002c1] | 377 |                 // Count actual cache levels
 | 
|---|
 | 378 |                 unsigned cache_levels = 0;
 | 
|---|
 | 379 |                 unsigned llc = 0;
 | 
|---|
| [72b5805e] | 380 | 
 | 
|---|
 | 381 |                 unsigned char prev = -1u;
 | 
|---|
 | 382 |                 void first(unsigned idx, unsigned char level, const char * map, size_t len) {
 | 
|---|
 | 383 |                         /* paranoid */ verifyf(level < prev, "Index %u of cpu 0 has cache levels out of order: %u then %u", idx, (unsigned)prev, (unsigned)level);
 | 
|---|
 | 384 |                         llc = max(llc, level);
 | 
|---|
 | 385 |                         prev = level;
 | 
|---|
 | 386 |                         cache_levels++;
 | 
|---|
| [96f002c1] | 387 |                 }
 | 
|---|
| [72b5805e] | 388 |                 foreach_cacheidx(0, idxs, first);
 | 
|---|
| [96f002c1] | 389 | 
 | 
|---|
 | 390 |                 // Read in raw data
 | 
|---|
| [33608cb] | 391 |                 raw_cache_instance ** raw = build_raw_cache_table(cpus_c, cpus, idxs, cache_levels);
 | 
|---|
| [96f002c1] | 392 | 
 | 
|---|
 | 393 |                 // Find number of distinct cache instances
 | 
|---|
| [cf85f96] | 394 |                 llc_map_t * maps;
 | 
|---|
| [96f002c1] | 395 |                 size_t map_cnt;
 | 
|---|
 | 396 |                 [maps, map_cnt] =  distinct_llcs(cpus, cache_levels - llc, raw);
 | 
|---|
 | 397 | 
 | 
|---|
| [45fde9f] | 398 |                 #if defined(__CFA_WITH_VERIFY__)
 | 
|---|
| [cf85f96] | 399 |                 // Verify that the caches cover the all the cpus
 | 
|---|
| [45fde9f] | 400 |                 {
 | 
|---|
| [cf85f96] | 401 |                         unsigned width1 = 0;
 | 
|---|
 | 402 |                         unsigned width2 = 0;
 | 
|---|
| [45fde9f] | 403 |                         for(i; map_cnt) {
 | 
|---|
 | 404 |                                 const char * _;
 | 
|---|
| [cf85f96] | 405 |                                 width1 += read_width(maps[i].raw->range, strlen(maps[i].raw->range), &_);
 | 
|---|
 | 406 |                                 width2 += maps[i].raw->width;
 | 
|---|
| [45fde9f] | 407 |                         }
 | 
|---|
| [33608cb] | 408 |                         verify(width1 == cpus_c);
 | 
|---|
 | 409 |                         verify(width2 == cpus_c);
 | 
|---|
| [45fde9f] | 410 |                 }
 | 
|---|
 | 411 |                 #endif
 | 
|---|
| [96f002c1] | 412 | 
 | 
|---|
 | 413 |                 // Get mappings from cpu to cache instance
 | 
|---|
| [33608cb] | 414 |                 cpu_pairing_t * pairings = get_cpu_pairings(cpus_c, cpus, raw, maps, map_cnt);
 | 
|---|
| [96f002c1] | 415 | 
 | 
|---|
 | 416 |                 // Sort by cache instance
 | 
|---|
| [33608cb] | 417 |                 qsort(pairings, cpus_c);
 | 
|---|
| [96f002c1] | 418 | 
 | 
|---|
| [cf85f96] | 419 |                 {
 | 
|---|
 | 420 |                         unsigned it = 0;
 | 
|---|
| [33608cb] | 421 |                         RangeIter rc = { cpus };
 | 
|---|
 | 422 |                         while(moveNext(rc)) {
 | 
|---|
 | 423 |                                 unsigned i = rc.com;
 | 
|---|
| [cf85f96] | 424 |                                 unsigned llc_id = pairings[i].id;
 | 
|---|
 | 425 |                                 if(maps[llc_id].start == -1u) {
 | 
|---|
 | 426 |                                         maps[llc_id].start = it;
 | 
|---|
 | 427 |                                         it += maps[llc_id].raw->width;
 | 
|---|
 | 428 |                                         /* paranoid */ verify(maps[llc_id].start < it);
 | 
|---|
 | 429 |                                         /* paranoid */ verify(it != -1u);
 | 
|---|
 | 430 |                                 }
 | 
|---|
 | 431 |                         }
 | 
|---|
| [33608cb] | 432 |                         /* paranoid */ verify(it == cpus_c);
 | 
|---|
| [cf85f96] | 433 |                 }
 | 
|---|
| [96f002c1] | 434 | 
 | 
|---|
| [cf85f96] | 435 |                 // From the mappings build the actual cpu map we want
 | 
|---|
| [33608cb] | 436 |                 struct cpu_map_entry_t * entries = alloc(cpus_c);
 | 
|---|
 | 437 |                 for(i; cpus_c) { entries[i].count = 0; }
 | 
|---|
 | 438 | 
 | 
|---|
 | 439 |                 RangeIter rc = { cpus };
 | 
|---|
 | 440 |                 while(moveNext(rc)) {
 | 
|---|
 | 441 |                         unsigned i = rc.com;
 | 
|---|
| [cf85f96] | 442 |                         /* paranoid */ verify(pairings[i].id < map_cnt);
 | 
|---|
| [96f002c1] | 443 |                         unsigned c = pairings[i].cpu;
 | 
|---|
| [cf85f96] | 444 |                         unsigned llc_id = pairings[i].id;
 | 
|---|
 | 445 |                         unsigned start = maps[llc_id].start;
 | 
|---|
| [07a1e7a] | 446 |                         entries[c].count = maps[llc_id].raw->width;
 | 
|---|
| [cf85f96] | 447 |                         entries[c].start = start;
 | 
|---|
| [07a1e7a] | 448 |                         entries[c].self  = start + (maps[llc_id].count++);
 | 
|---|
 | 449 |                         entries[c].cache = llc_id;
 | 
|---|
| [96f002c1] | 450 |                 }
 | 
|---|
 | 451 | 
 | 
|---|
 | 452 |                 // get rid of the temporary data
 | 
|---|
 | 453 |                 free(maps);
 | 
|---|
 | 454 |                 free(pairings);
 | 
|---|
 | 455 | 
 | 
|---|
| [33608cb] | 456 |                 for(i; cpus_c) {
 | 
|---|
 | 457 |                         if( raw[i] ) for(j; cache_levels) {
 | 
|---|
| [96f002c1] | 458 |                                 ^(raw[i][j]){};
 | 
|---|
 | 459 |                         }
 | 
|---|
 | 460 |                         free(raw[i]);
 | 
|---|
 | 461 |                 }
 | 
|---|
 | 462 |                 free(raw);
 | 
|---|
 | 463 | 
 | 
|---|
 | 464 |                 cpu_info.llc_map = entries;
 | 
|---|
| [33608cb] | 465 |                 cpu_info.hthrd_count = cpus_c;
 | 
|---|
| [8157bde] | 466 |                 cpu_info.llc_count = map_cnt;
 | 
|---|
| [96f002c1] | 467 |         }
 | 
|---|
 | 468 | 
 | 
|---|
 | 469 |         void __cfaabi_device_shutdown( void ) {
 | 
|---|
 | 470 |                 free(cpu_info.llc_map);
 | 
|---|
 | 471 |         }
 | 
|---|
| [72bd9cd] | 472 | }
 | 
|---|
 | 473 | 
 | 
|---|
| [032234bd] | 474 | libcfa_public cpu_info_t cpu_info;
 | 
|---|