[96f002c1] | 1 | //
|
---|
| 2 | // Cforall Version 1.0.0 Copyright (C) 2021 University of Waterloo
|
---|
| 3 | //
|
---|
| 4 | // The contents of this file are covered under the licence agreement in the
|
---|
| 5 | // file "LICENCE" distributed with Cforall.
|
---|
| 6 | //
|
---|
| 7 | // topology.cfa -- read the data structure
|
---|
| 8 | //
|
---|
| 9 | // Author : Thierry Delisle
|
---|
| 10 | // Created On : Thu Jun 10 16:13:07 2021
|
---|
| 11 | // Last Modified By :
|
---|
| 12 | // Last Modified On :
|
---|
| 13 | // Update Count :
|
---|
| 14 | //
|
---|
| 15 |
|
---|
| 16 | #include "device/cpu.hfa"
|
---|
| 17 |
|
---|
| 18 | #include <math.hfa>
|
---|
| 19 | #include <stdlib.hfa>
|
---|
| 20 |
|
---|
| 21 | #include <errno.h>
|
---|
| 22 | #include <stdio.h>
|
---|
| 23 | #include <string.h>
|
---|
| 24 | #include <unistd.h>
|
---|
| 25 |
|
---|
| 26 | extern "C" {
|
---|
| 27 | #include <dirent.h>
|
---|
| 28 | #include <sys/types.h>
|
---|
| 29 | #include <sys/stat.h>
|
---|
| 30 | #include <fcntl.h>
|
---|
| 31 | }
|
---|
| 32 |
|
---|
| 33 | // search a string for character 'character' but looking atmost at len
|
---|
| 34 | // chars
|
---|
| 35 | static const char * strnchr(const char * str, int character, size_t len) {
|
---|
| 36 | return (const char *)memchr(str, character, strnlen(str, len));
|
---|
| 37 | }
|
---|
| 38 |
|
---|
| 39 | // Check if have string matches the want string
|
---|
| 40 | // ignoring any characters that are longer than the want string
|
---|
| 41 | static bool strmatch(const char * want, char * have) {
|
---|
| 42 | size_t w = strlen(want);
|
---|
| 43 | return strncmp(want, have, w) == 0;
|
---|
| 44 | }
|
---|
| 45 |
|
---|
| 46 | typedef const char * idx_range_t;
|
---|
| 47 |
|
---|
| 48 | // read the value of a string and evaluate it
|
---|
| 49 | // get the end pointer and make sure it is all evaluated
|
---|
| 50 | static unsigned read_value(idx_range_t map, size_t len, const char ** end) {
|
---|
| 51 | unsigned long val = strtoul(map, (char**)end, 10);
|
---|
| 52 | /* paranoid */ __attribute__((unused)) size_t read = (*end - map);
|
---|
| 53 | /* paranoid */ verifyf(read <= len, "String '%s' passed with inconsistent length %zu", map, len);
|
---|
| 54 | /* paranoid */ verifyf(read == len, "String %.*s not entirely a number, %zu chars left", (int)len, map, len - read);
|
---|
| 55 | return val;
|
---|
| 56 | }
|
---|
| 57 |
|
---|
| 58 | // Evaluate the width of a comma seperated list of idx
|
---|
| 59 | // for example 'A-B,C-D,E,F' has a width of '(B-A) + (D-C) + 1 + 1'
|
---|
| 60 | // Also has an (non-optional) end ptr like strtoul and friends
|
---|
| 61 | //
|
---|
| 62 | // FIXME : the current implementation only supports 1 comma
|
---|
| 63 | static unsigned read_width(idx_range_t map, size_t len, const char ** end) {
|
---|
| 64 | // Do we have a comma
|
---|
| 65 | const char * comma = strnchr(map, ',', len);
|
---|
| 66 | if(comma != 0p) {
|
---|
| 67 | // We do! recurse and sum the widths
|
---|
| 68 | const char * _;
|
---|
| 69 | size_t split = comma - map;
|
---|
| 70 | unsigned lhs = read_width(map, split, &_);
|
---|
| 71 | unsigned rhs = read_width(comma + 1, len - split - 1, end);
|
---|
| 72 | return lhs + rhs;
|
---|
| 73 | }
|
---|
| 74 |
|
---|
| 75 | // No commas, check for a range
|
---|
| 76 | const char * dash = strnchr(map, '-', len);
|
---|
| 77 | if(dash != 0p) {
|
---|
| 78 | const char * _;
|
---|
| 79 | size_t split = dash - map;
|
---|
| 80 | unsigned lhs = read_value(map, split, &_);
|
---|
| 81 | unsigned rhs = read_value(dash + 1, len - split - 1, end);
|
---|
| 82 | return rhs - lhs + 1;
|
---|
| 83 | }
|
---|
| 84 |
|
---|
| 85 | // No range, no comma, just a single value
|
---|
| 86 | // It's width is 1 and we can consume everything
|
---|
| 87 | /* paranoid */ verifyf( ({strtoul(map, (char**)end, 10); *end == (map + len); }), "Value in range '%.*s' not a number", (int)len, map);
|
---|
| 88 | *end = map + len;
|
---|
| 89 | return 1;
|
---|
| 90 | }
|
---|
| 91 |
|
---|
| 92 | // go through a directory calling fn on each file
|
---|
| 93 | static int iterate_dir( const char * path, void (*fn)(struct dirent * ent) ) {
|
---|
| 94 | // open the directory
|
---|
| 95 | DIR *dir = opendir(path);
|
---|
| 96 | if(dir == 0p) { return ENOTDIR; }
|
---|
| 97 |
|
---|
| 98 | // call fn for each
|
---|
| 99 | struct dirent * ent;
|
---|
| 100 | while ((ent = readdir(dir)) != 0p) {
|
---|
| 101 | fn( ent );
|
---|
| 102 | }
|
---|
| 103 |
|
---|
| 104 | // no longer need this
|
---|
| 105 | closedir(dir);
|
---|
| 106 | return 0;
|
---|
| 107 | }
|
---|
| 108 |
|
---|
| 109 | // count the number of directories with the specified prefix
|
---|
| 110 | // the directories counted have the form '[prefix]N' where prefix is the parameter
|
---|
| 111 | // and N is an base 10 integer.
|
---|
| 112 | static int count_prefix_dirs(const char * path, const char * prefix) {
|
---|
| 113 | // read the directory and find the cpu count
|
---|
| 114 | // and make sure everything is as expected
|
---|
| 115 | int max = -1;
|
---|
| 116 | int count = 0;
|
---|
| 117 | void lambda(struct dirent * ent) {
|
---|
| 118 | // were are looking for prefixX, where X is a number
|
---|
| 119 | // check that it starts with 'cpu
|
---|
| 120 | char * s = strstr(ent->d_name, prefix);
|
---|
| 121 | if(s == 0p) { return; }
|
---|
| 122 | if(s != ent->d_name) { return; }
|
---|
| 123 |
|
---|
| 124 | // check that the next part is a number
|
---|
| 125 | s += strlen(prefix);
|
---|
| 126 | char * end;
|
---|
| 127 | long int val = strtol(s, &end, 10);
|
---|
| 128 | if(*end != '\0' || val < 0) { return; }
|
---|
| 129 |
|
---|
| 130 | // check that it's a directory
|
---|
| 131 | if(ent->d_type != DT_DIR) { return; }
|
---|
| 132 |
|
---|
| 133 | // it's a match!
|
---|
| 134 | max = max(val, max);
|
---|
| 135 | count++;
|
---|
| 136 | }
|
---|
| 137 | iterate_dir(path, lambda);
|
---|
| 138 |
|
---|
| 139 | /* paranoid */ verifyf(count == max + 1, "Inconsistent %s count, counted %d, but max %s was %d", prefix, count, prefix, (int)max);
|
---|
| 140 |
|
---|
| 141 | return count;
|
---|
| 142 | }
|
---|
| 143 |
|
---|
| 144 | // Count number of cpus in the system
|
---|
| 145 | static int count_cpus(void) {
|
---|
[a8b0618] | 146 | const char * fpath = "/sys/devices/system/cpu/present";
|
---|
[96f002c1] | 147 | int fd = open(fpath, 0, O_RDONLY);
|
---|
| 148 | /* paranoid */ verifyf(fd >= 0, "Could not open file %s", fpath);
|
---|
| 149 |
|
---|
| 150 | char buff[128];
|
---|
| 151 | ssize_t r = read(fd, buff, 128);
|
---|
| 152 | /* paranoid */ verifyf(r > 0, "Could not read file %s", fpath);
|
---|
| 153 | /* paranoid */ verify( buff[r-1] == '\n' );
|
---|
| 154 | buff[r-1] = '\0';
|
---|
| 155 |
|
---|
| 156 | /* paranoid */ __attribute__((unused)) int ret =
|
---|
| 157 | close(fd);
|
---|
| 158 | /* paranoid */ verifyf(ret == 0, "Could not close file %s", fpath);
|
---|
| 159 |
|
---|
| 160 | const char * _;
|
---|
| 161 | int cnt = read_width(buff, r - 1, &_);
|
---|
| 162 | /* paranoid */ verify(cnt == count_prefix_dirs("/sys/devices/system/cpu", "cpu"));
|
---|
| 163 | return cnt;
|
---|
| 164 | }
|
---|
| 165 |
|
---|
| 166 | // Count number of cache *indexes* in the system
|
---|
| 167 | // cache indexes are distinct from cache level as Data or Instruction cache
|
---|
| 168 | // can share a level but not an index
|
---|
| 169 | // PITFALL: assumes all cpus have the same indexes as cpu0
|
---|
| 170 | static int count_cache_indexes(void) {
|
---|
| 171 | return count_prefix_dirs("/sys/devices/system/cpu/cpu0/cache", "index");
|
---|
| 172 | }
|
---|
| 173 |
|
---|
| 174 |
|
---|
| 175 | // read information about a spcficic cache index/cpu file into the output buffer
|
---|
| 176 | static size_t read_cpuidxinfo_into(unsigned cpu, unsigned idx, const char * file, char * out, size_t out_len) {
|
---|
| 177 | // Pick the file we want and read it
|
---|
| 178 | char buf[128];
|
---|
| 179 | /* paranoid */ __attribute__((unused)) int len =
|
---|
| 180 | snprintf(buf, 128, "/sys/devices/system/cpu/cpu%u/cache/index%u/%s", cpu, idx, file);
|
---|
| 181 | /* paranoid */ verifyf(len > 0, "Could not generate '%s' filename for cpu %u, index %u", file, cpu, idx);
|
---|
| 182 |
|
---|
| 183 | int fd = open(buf, 0, O_RDONLY);
|
---|
| 184 | /* paranoid */ verifyf(fd > 0, "Could not open file '%s'", buf);
|
---|
| 185 |
|
---|
| 186 | ssize_t r = read(fd, out, out_len);
|
---|
| 187 | /* paranoid */ verifyf(r > 0, "Could not read file '%s'", buf);
|
---|
| 188 |
|
---|
| 189 | /* paranoid */ __attribute__((unused)) int ret =
|
---|
| 190 | close(fd);
|
---|
| 191 | /* paranoid */ verifyf(ret == 0, "Could not close file '%s'", buf);
|
---|
| 192 | return r;
|
---|
| 193 | }
|
---|
| 194 |
|
---|
| 195 | // Iterate over the cache indexes of a given cpu
|
---|
| 196 | typedef void (*handle_func_t)(unsigned idx, unsigned char level, idx_range_t range, size_t len);
|
---|
| 197 | static void foreach_cacheidx(unsigned cpu, unsigned idxs, handle_func_t handle) {
|
---|
| 198 | for(i; idxs) {
|
---|
| 199 | unsigned idx = idxs - 1 - i;
|
---|
| 200 | char buf[32];
|
---|
| 201 |
|
---|
| 202 | // Type says what kind of cache this is,
|
---|
| 203 | // Options are: Unified, Data, Instruction
|
---|
| 204 | read_cpuidxinfo_into(cpu, idx, "type", buf, 32);
|
---|
| 205 | if((!strmatch("Unified", buf)) && (!strmatch("Data", buf))) {
|
---|
| 206 | // We don't care about instruction caches
|
---|
| 207 | continue;
|
---|
| 208 | }
|
---|
| 209 |
|
---|
| 210 | // Level is the cache level: higher means bigger and slower
|
---|
| 211 | read_cpuidxinfo_into(cpu, idx, "level", buf, 32);
|
---|
| 212 | char * end;
|
---|
| 213 | unsigned long level = strtoul(buf, &end, 10);
|
---|
| 214 | /* paranoid */ verifyf(level <= 250, "Cpu %u has more than 250 levels of cache, this is not supported", cpu);
|
---|
| 215 |
|
---|
| 216 | // shared_cpu_list is a range of cpus that share this particular cache
|
---|
| 217 | size_t n = read_cpuidxinfo_into(cpu, idx, "shared_cpu_list", buf, 32);
|
---|
| 218 | /* paranoid */ verify( buf[n-1] == '\n' );
|
---|
| 219 | buf[n-1] = '\0';
|
---|
| 220 |
|
---|
| 221 | // Simply call the functor
|
---|
| 222 | handle(idx, level, buf, n - 1);
|
---|
| 223 | }
|
---|
| 224 | }
|
---|
| 225 |
|
---|
| 226 |
|
---|
| 227 | struct raw_cache_instance {
|
---|
| 228 | idx_range_t range;
|
---|
| 229 | unsigned width;
|
---|
| 230 | unsigned char level;
|
---|
| 231 | // FIXME add at least size and type
|
---|
| 232 | };
|
---|
| 233 |
|
---|
| 234 | static void ?{}(raw_cache_instance & this) { this.range = 0p;}
|
---|
| 235 | static void ^?{}(raw_cache_instance & this) { free(this.range);}
|
---|
| 236 |
|
---|
| 237 | raw_cache_instance ** build_raw_cache_table(unsigned cpus, unsigned idxs, unsigned cache_levels)
|
---|
| 238 | {
|
---|
| 239 | raw_cache_instance ** raw = alloc(cpus);
|
---|
| 240 | for(i; cpus) {
|
---|
| 241 | raw[i] = alloc(cache_levels);
|
---|
| 242 | void addcache(unsigned fidx, unsigned char level, idx_range_t range, size_t len) {
|
---|
| 243 | /* paranoid */ verifyf(level <= cache_levels, "Unexpected cache level %d on cpu %u index %u", (int)level, i, fidx);
|
---|
| 244 |
|
---|
| 245 | unsigned idx = cache_levels - level;
|
---|
| 246 | raw_cache_instance & r = raw[i][idx];
|
---|
| 247 | r.range = strndup(range, len);
|
---|
| 248 | r.level = level;
|
---|
| 249 | const char * end;
|
---|
| 250 | r.width = read_width(range, len, &end);
|
---|
| 251 | }
|
---|
| 252 | foreach_cacheidx(i, idxs, addcache);
|
---|
| 253 | }
|
---|
| 254 |
|
---|
| 255 | #if defined(__CFA_WITH_VERIFY__)
|
---|
| 256 | // Check widths are consistent
|
---|
| 257 | for(i; 1~cpus) {
|
---|
| 258 | for(j; cache_levels) {
|
---|
[1f05c83] | 259 | verifyf(raw[0][j].width == raw[i][j].width, "Unexpected width %u for cpu %u, index %u. Expected %u.", raw[i][j].width, i, j, raw[0][j].width);
|
---|
[96f002c1] | 260 | }
|
---|
| 261 | }
|
---|
| 262 | #endif
|
---|
| 263 |
|
---|
| 264 | return raw;
|
---|
| 265 | }
|
---|
| 266 |
|
---|
| 267 | // returns an allocate list of all the different distinct last level caches
|
---|
| 268 | static [*idx_range_t, size_t cnt] distinct_llcs(unsigned cpus, unsigned llc_idx, raw_cache_instance ** raw) {
|
---|
| 269 | // Allocate at least one element
|
---|
| 270 | idx_range_t * ranges = alloc();
|
---|
| 271 | size_t range_cnt = 1;
|
---|
| 272 |
|
---|
| 273 | // Initialize with element 0
|
---|
| 274 | *ranges = raw[0][llc_idx].range;
|
---|
| 275 |
|
---|
| 276 | // Go over all other cpus
|
---|
| 277 | CPU_LOOP: for(i; 1~cpus) {
|
---|
| 278 | // Check if the range is already there
|
---|
| 279 | idx_range_t candidate = raw[i][llc_idx].range;
|
---|
| 280 | for(j; range_cnt) {
|
---|
| 281 | idx_range_t exist = ranges[j];
|
---|
| 282 | // If the range is already there just jump to the next cpu
|
---|
| 283 | if(0 == strcmp(candidate, exist)) continue CPU_LOOP;
|
---|
| 284 | }
|
---|
| 285 |
|
---|
| 286 | // The range wasn't there, added to the list
|
---|
| 287 | ranges = alloc(range_cnt + 1, ranges`realloc);
|
---|
| 288 | ranges[range_cnt] = candidate;
|
---|
| 289 | range_cnt++;
|
---|
| 290 | }
|
---|
| 291 |
|
---|
| 292 | // return what we have
|
---|
| 293 | return [ranges, range_cnt];
|
---|
| 294 | }
|
---|
| 295 |
|
---|
| 296 | struct cpu_pairing_t {
|
---|
| 297 | unsigned cpu;
|
---|
| 298 | unsigned llc_id;
|
---|
| 299 | };
|
---|
| 300 |
|
---|
| 301 | int ?<?( cpu_pairing_t lhs, cpu_pairing_t rhs ) {
|
---|
| 302 | return lhs.llc_id < rhs.llc_id;
|
---|
| 303 | }
|
---|
| 304 |
|
---|
| 305 | static [[]cpu_pairing_t] get_cpu_pairings(unsigned cpus, raw_cache_instance ** raw, idx_range_t * maps, size_t map_cnt) {
|
---|
| 306 | cpu_pairing_t * pairings = alloc(cpus);
|
---|
| 307 |
|
---|
| 308 | CPU_LOOP: for(i; cpus) {
|
---|
| 309 | pairings[i].cpu = i;
|
---|
| 310 | idx_range_t want = raw[i][0].range;
|
---|
| 311 | MAP_LOOP: for(j; map_cnt) {
|
---|
| 312 | if(0 != strcmp(want, maps[j])) continue MAP_LOOP;
|
---|
| 313 |
|
---|
| 314 | pairings[i].llc_id = j;
|
---|
| 315 | continue CPU_LOOP;
|
---|
| 316 | }
|
---|
| 317 |
|
---|
| 318 | /* paranoid */ verifyf( false, "Cpu %u map doesn't match", i );
|
---|
| 319 | }
|
---|
| 320 |
|
---|
| 321 | return pairings;
|
---|
| 322 | }
|
---|
| 323 |
|
---|
| 324 | extern "C" {
|
---|
| 325 | void __cfaabi_device_startup( void ) {
|
---|
| 326 | int cpus = count_cpus();
|
---|
| 327 | int idxs = count_cache_indexes();
|
---|
| 328 |
|
---|
| 329 | // Count actual cache levels
|
---|
| 330 | unsigned cache_levels = 0;
|
---|
| 331 | unsigned llc = 0;
|
---|
| 332 | {
|
---|
| 333 | unsigned char prev = -1u;
|
---|
| 334 | void first(unsigned idx, unsigned char level, const char * map, size_t len) {
|
---|
| 335 | /* paranoid */ verifyf(level < prev, "Index %u of cpu 0 has cache levels out of order: %u then %u", idx, (unsigned)prev, (unsigned)level);
|
---|
| 336 | llc = max(llc, level);
|
---|
| 337 | prev = level;
|
---|
| 338 | cache_levels++;
|
---|
| 339 | }
|
---|
| 340 | foreach_cacheidx(0, idxs, first);
|
---|
| 341 | }
|
---|
| 342 |
|
---|
| 343 | // Read in raw data
|
---|
| 344 | raw_cache_instance ** raw = build_raw_cache_table(cpus, idxs, cache_levels);
|
---|
| 345 |
|
---|
| 346 | // Find number of distinct cache instances
|
---|
| 347 | idx_range_t * maps;
|
---|
| 348 | size_t map_cnt;
|
---|
| 349 | [maps, map_cnt] = distinct_llcs(cpus, cache_levels - llc, raw);
|
---|
| 350 |
|
---|
| 351 | /* paranoid */ verify((map_cnt * raw[0][cache_levels - llc].width) == cpus);
|
---|
| 352 |
|
---|
| 353 | // Get mappings from cpu to cache instance
|
---|
| 354 | cpu_pairing_t * pairings = get_cpu_pairings(cpus, raw, maps, map_cnt);
|
---|
| 355 |
|
---|
| 356 | // Sort by cache instance
|
---|
| 357 | qsort(pairings, cpus);
|
---|
| 358 |
|
---|
| 359 | unsigned llc_width = raw[0][cache_levels - llc].width;
|
---|
| 360 |
|
---|
| 361 | // From the mappins build the actual cpu map we want
|
---|
| 362 | struct cpu_map_entry_t * entries = alloc(cpus);
|
---|
| 363 | for(i; cpus) { entries[i].count = 0; }
|
---|
| 364 | for(i; cpus) {
|
---|
| 365 | unsigned c = pairings[i].cpu;
|
---|
| 366 | entries[c].start = pairings[i].llc_id * llc_width;
|
---|
| 367 | entries[c].count = llc_width;
|
---|
| 368 | }
|
---|
| 369 |
|
---|
| 370 | // get rid of the temporary data
|
---|
| 371 | free(maps);
|
---|
| 372 | free(pairings);
|
---|
| 373 |
|
---|
| 374 | for(i; cpus) {
|
---|
| 375 | for(j; cache_levels) {
|
---|
| 376 | ^(raw[i][j]){};
|
---|
| 377 | }
|
---|
| 378 | free(raw[i]);
|
---|
| 379 | }
|
---|
| 380 | free(raw);
|
---|
| 381 |
|
---|
| 382 | cpu_info.llc_map = entries;
|
---|
| 383 | cpu_info.hthrd_count = cpus;
|
---|
| 384 | }
|
---|
| 385 |
|
---|
| 386 | void __cfaabi_device_shutdown( void ) {
|
---|
| 387 | free(cpu_info.llc_map);
|
---|
| 388 | }
|
---|
| 389 | }
|
---|