source: libcfa/src/device/cpu.cfa@ 0ebbca4

ADT ast-experimental enum pthread-emulation qualifiedEnum
Last change on this file since 0ebbca4 was 07a1e7a, checked in by Thierry Delisle <tdelisle@…>, 4 years ago

Added cpu info for which cache the map referes to.

  • Property mode set to 100644
File size: 13.2 KB
Line 
1//
2// Cforall Version 1.0.0 Copyright (C) 2021 University of Waterloo
3//
4// The contents of this file are covered under the licence agreement in the
5// file "LICENCE" distributed with Cforall.
6//
7// topology.cfa -- read the data structure
8//
9// Author : Thierry Delisle
10// Created On : Thu Jun 10 16:13:07 2021
11// Last Modified By :
12// Last Modified On :
13// Update Count :
14//
15
16#include "device/cpu.hfa"
17
18#include <math.hfa>
19#include <stdlib.hfa>
20
21#include <errno.h>
22#include <stdio.h>
23#include <string.h>
24#include <unistd.h>
25
26extern "C" {
27 #include <dirent.h>
28 #include <sys/types.h>
29 #include <sys/stat.h>
30 #include <fcntl.h>
31}
32
33#include "algorithms/range_iterator.hfa"
34
35// search a string for character 'character' but looking atmost at len
36// chars
37static const char * strnchr(const char * str, int character, size_t len) {
38 return (const char *)memchr(str, character, strnlen(str, len));
39}
40
41// Check if have string matches the want string
42// ignoring any characters that are longer than the want string
43static bool strmatch(const char * want, char * have) {
44 size_t w = strlen(want);
45 return strncmp(want, have, w) == 0;
46}
47
48typedef const char * idx_range_t;
49
50// read the value of a string and evaluate it
51// get the end pointer and make sure it is all evaluated
52static unsigned read_value(idx_range_t map, size_t len, const char ** end) {
53 unsigned long val = strtoul(map, (char**)end, 10);
54 /* paranoid */ __attribute__((unused)) size_t read = (*end - map);
55 /* paranoid */ verifyf(read <= len, "String '%s' passed with inconsistent length %zu", map, len);
56 /* paranoid */ verifyf(read == len, "String %.*s not entirely a number, %zu chars left", (int)len, map, len - read);
57 return val;
58}
59
60// Evaluate the width of a comma seperated list of idx
61// for example 'A-B,C-D,E,F' has a width of '(B-A) + (D-C) + 1 + 1'
62// Also has an (non-optional) end ptr like strtoul and friends
63//
64// FIXME : the current implementation only supports 1 comma
65static unsigned read_width(idx_range_t map, size_t len, const char ** end) {
66 // Do we have a comma
67 const char * comma = strnchr(map, ',', len);
68 if(comma != 0p) {
69 // We do! recurse and sum the widths
70 const char * _;
71 size_t split = comma - map;
72 unsigned lhs = read_width(map, split, &_);
73 unsigned rhs = read_width(comma + 1, len - split - 1, end);
74 return lhs + rhs;
75 }
76
77 // No commas, check for a range
78 const char * dash = strnchr(map, '-', len);
79 if(dash != 0p) {
80 const char * _;
81 size_t split = dash - map;
82 unsigned lhs = read_value(map, split, &_);
83 unsigned rhs = read_value(dash + 1, len - split - 1, end);
84 return rhs - lhs + 1;
85 }
86
87 // No range, no comma, just a single value
88 // It's width is 1 and we can consume everything
89 /* paranoid */ verifyf( ({strtoul(map, (char**)end, 10); *end == (map + len); }), "Value in range '%.*s' not a number", (int)len, map);
90 *end = map + len;
91 return 1;
92}
93
94// go through a directory calling fn on each file
95static int iterate_dir( const char * path, void (*fn)(struct dirent * ent) ) {
96 // open the directory
97 DIR *dir = opendir(path);
98 if(dir == 0p) { return ENOTDIR; }
99
100 // call fn for each
101 struct dirent * ent;
102 while ((ent = readdir(dir)) != 0p) {
103 fn( ent );
104 }
105
106 // no longer need this
107 closedir(dir);
108 return 0;
109}
110
111// count the number of directories with the specified prefix
112// the directories counted have the form '[prefix]N' where prefix is the parameter
113// and N is an base 10 integer.
114static int count_prefix_dirs(const char * path, const char * prefix) {
115 // read the directory and find the cpu count
116 // and make sure everything is as expected
117 int max = -1;
118 int count = 0;
119 void lambda(struct dirent * ent) {
120 // were are looking for prefixX, where X is a number
121 // check that it starts with 'cpu
122 char * s = strstr(ent->d_name, prefix);
123 if(s == 0p) { return; }
124 if(s != ent->d_name) { return; }
125
126 // check that the next part is a number
127 s += strlen(prefix);
128 char * end;
129 long int val = strtol(s, &end, 10);
130 if(*end != '\0' || val < 0) { return; }
131
132 // check that it's a directory
133 if(ent->d_type != DT_DIR) { return; }
134
135 // it's a match!
136 max = max(val, max);
137 count++;
138 }
139 int ret = iterate_dir(path, lambda);
140 if(ret == ENOTDIR) return 0;
141
142 /* paranoid */ verifyf(count == max + 1, "Inconsistent %s count, counted %d, but max %s was %d", prefix, count, prefix, (int)max);
143
144 return count;
145}
146
147// Count number of cpus in the system
148static [int, const char *] count_cpus(void) {
149 const char * fpath = "/sys/devices/system/cpu/online";
150 int fd = open(fpath, 0, O_RDONLY);
151 /* paranoid */ verifyf(fd >= 0, "Could not open file %s", fpath);
152
153 char buff[128];
154 ssize_t r = read(fd, buff, 128);
155 /* paranoid */ verifyf(r > 0, "Could not read file %s", fpath);
156 /* paranoid */ verify( buff[r-1] == '\n' );
157 buff[r-1] = '\0';
158
159 /* paranoid */ __attribute__((unused)) int ret =
160 close(fd);
161 /* paranoid */ verifyf(ret == 0, "Could not close file %s", fpath);
162
163 const char * _;
164 return [read_width(buff, r - 1, &_), strndup(buff, r - 1)];
165}
166
167// Count number of cache *indexes* in the system
168// cache indexes are distinct from cache level as Data or Instruction cache
169// can share a level but not an index
170// PITFALL: assumes all cpus have the same indexes as cpu0
171static int count_cache_indexes(void) {
172 return count_prefix_dirs("/sys/devices/system/cpu/cpu0/cache", "index");
173}
174
175
176// read information about a spcficic cache index/cpu file into the output buffer
177static size_t read_cpuidxinfo_into(unsigned cpu, unsigned idx, const char * file, char * out, size_t out_len) {
178 // Pick the file we want and read it
179 char buf[128];
180 /* paranoid */ __attribute__((unused)) int len =
181 snprintf(buf, 128, "/sys/devices/system/cpu/cpu%u/cache/index%u/%s", cpu, idx, file);
182 /* paranoid */ verifyf(len > 0, "Could not generate '%s' filename for cpu %u, index %u", file, cpu, idx);
183
184 int fd = open(buf, 0, O_RDONLY);
185 /* paranoid */ verifyf(fd > 0, "Could not open file '%s'", buf);
186
187 ssize_t r = read(fd, out, out_len);
188 /* paranoid */ verifyf(r > 0, "Could not read file '%s'", buf);
189
190 /* paranoid */ __attribute__((unused)) int ret =
191 close(fd);
192 /* paranoid */ verifyf(ret == 0, "Could not close file '%s'", buf);
193 return r;
194}
195
196// Iterate over the cache indexes of a given cpu
197typedef void (*handle_func_t)(unsigned idx, unsigned char level, idx_range_t range, size_t len);
198static void foreach_cacheidx(unsigned cpu, unsigned idxs, handle_func_t handle) {
199 for(i; idxs) {
200 unsigned idx = idxs - 1 - i;
201 char buf[32];
202
203 // Type says what kind of cache this is,
204 // Options are: Unified, Data, Instruction
205 read_cpuidxinfo_into(cpu, idx, "type", buf, 32);
206 if((!strmatch("Unified", buf)) && (!strmatch("Data", buf))) {
207 // We don't care about instruction caches
208 continue;
209 }
210
211 // Level is the cache level: higher means bigger and slower
212 read_cpuidxinfo_into(cpu, idx, "level", buf, 32);
213 char * end;
214 unsigned long level = strtoul(buf, &end, 10);
215 /* paranoid */ verifyf(level <= 250, "Cpu %u has more than 250 levels of cache, this is not supported", cpu);
216
217 // shared_cpu_list is a range of cpus that share this particular cache
218 size_t n = read_cpuidxinfo_into(cpu, idx, "shared_cpu_list", buf, 32);
219 /* paranoid */ verify( buf[n-1] == '\n' );
220 buf[n-1] = '\0';
221
222 // Simply call the functor
223 handle(idx, level, buf, n - 1);
224 }
225}
226
227
228struct raw_cache_instance {
229 idx_range_t range; // A text description of the cpus covered
230 unsigned width; // The number of cpus covered
231 unsigned char level; // the cache level
232 // FIXME add at least size and type
233};
234
235static void ?{}(raw_cache_instance & this) { this.range = 0p;}
236static void ^?{}(raw_cache_instance & this) { free(this.range);}
237
238// Returns a 2D array of instances of size [cpu count][cache levels]
239// where cache level doesn't include instruction caches
240raw_cache_instance ** build_raw_cache_table(unsigned cpus_c, idx_range_t cpus, unsigned idxs, unsigned cache_levels)
241{
242 raw_cache_instance ** raw = alloc(cpus_c, '\0'`fill);
243
244 RangeIter rc = { cpus };
245 while(moveNext(rc)) {
246 unsigned i = rc.com;
247 raw[i] = alloc(cache_levels);
248 void addcache(unsigned fidx, unsigned char level, idx_range_t range, size_t len) {
249 /* paranoid */ verifyf(level <= cache_levels, "Unexpected cache level %d on cpu %u index %u", (int)level, i, fidx);
250
251 unsigned idx = cache_levels - level;
252 raw_cache_instance & r = raw[i][idx];
253 r.range = strndup(range, len);
254 r.level = level;
255 const char * end;
256 r.width = read_width(range, len, &end);
257 }
258 foreach_cacheidx(i, idxs, addcache);
259 }
260
261 return raw;
262}
263
264struct llc_map_t {
265 raw_cache_instance * raw;
266 unsigned count;
267 unsigned start;
268};
269
270// returns an allocate list of all the different distinct last level caches
271static [*llc_map_t, size_t cnt] distinct_llcs(idx_range_t cpus, unsigned llc_idx, raw_cache_instance ** raw) {
272 // Allocate at least one element
273 llc_map_t* ranges = alloc();
274 size_t range_cnt = 1;
275
276 RangeIter rc = { cpus };
277 __attribute__((unused)) bool ret =
278 moveNext(rc);
279 /* paranoid */ verify( ret );
280 /* paranoid */ verify( rc.com >= 0 );
281
282 // Initialize with element 0
283 ranges->raw = &raw[rc.com][llc_idx];
284 ranges->count = 0;
285 ranges->start = -1u;
286
287 // Go over all other cpus
288 CPU_LOOP: while(moveNext(rc)) {
289 unsigned i = rc.com;
290 // Check if the range is already there
291 raw_cache_instance * candidate = &raw[i][llc_idx];
292 for(j; range_cnt) {
293 llc_map_t & exist = ranges[j];
294 // If the range is already there just jump to the next cpu
295 if(0 == strcmp(candidate->range, exist.raw->range)) continue CPU_LOOP;
296 }
297
298 // The range wasn't there, added to the list
299 ranges = alloc(range_cnt + 1, ranges`realloc);
300 ranges[range_cnt].raw = candidate;
301 ranges[range_cnt].count = 0;
302 ranges[range_cnt].start = -1u;
303 range_cnt++;
304 }
305
306 // return what we have
307 return [ranges, range_cnt];
308}
309
310struct cpu_pairing_t {
311 unsigned cpu;
312 unsigned id;
313};
314
315int ?<?( cpu_pairing_t lhs, cpu_pairing_t rhs ) {
316 return lhs.id < rhs.id;
317}
318
319static [[]cpu_pairing_t] get_cpu_pairings(unsigned cpus_c, idx_range_t cpus, raw_cache_instance ** raw, llc_map_t * maps, size_t map_cnt) {
320 cpu_pairing_t * pairings = alloc(cpus_c);
321
322 RangeIter rc = { cpus };
323 CPU_LOOP: while(moveNext(rc)) {
324 unsigned i = rc.com;
325 pairings[i].cpu = i;
326 idx_range_t want = raw[i][0].range;
327 MAP_LOOP: for(j; map_cnt) {
328 if(0 != strcmp(want, maps[j].raw->range)) continue MAP_LOOP;
329
330 pairings[i].id = j;
331 continue CPU_LOOP;
332 }
333
334 /* paranoid */ verifyf( false, "Cpu %u map doesn't match", i );
335 }
336
337 return pairings;
338}
339
340#include <fstream.hfa>
341
342extern "C" {
343 void __cfaabi_device_startup( void ) {
344 int cpus_c;
345 const char * cpus;
346 [cpus_c, cpus] = count_cpus();
347 #if defined(__CFA_WITH_VERIFY__)
348 // Verify that the mapping is self consistant.
349 {
350 RangeIter rc = { cpus };
351 while(moveNext(rc)) {
352 unsigned i = rc.com;
353 verify(cpus_c > i);
354 }
355 }
356 #endif
357
358 int idxs = count_cache_indexes();
359
360 // Count actual cache levels
361 unsigned cache_levels = 0;
362 unsigned llc = 0;
363 if (idxs != 0) {
364 unsigned char prev = -1u;
365 void first(unsigned idx, unsigned char level, const char * map, size_t len) {
366 /* paranoid */ verifyf(level < prev, "Index %u of cpu 0 has cache levels out of order: %u then %u", idx, (unsigned)prev, (unsigned)level);
367 llc = max(llc, level);
368 prev = level;
369 cache_levels++;
370 }
371 foreach_cacheidx(0, idxs, first);
372 }
373
374 // Read in raw data
375 raw_cache_instance ** raw = build_raw_cache_table(cpus_c, cpus, idxs, cache_levels);
376
377 // Find number of distinct cache instances
378 llc_map_t * maps;
379 size_t map_cnt;
380 [maps, map_cnt] = distinct_llcs(cpus, cache_levels - llc, raw);
381
382 #if defined(__CFA_WITH_VERIFY__)
383 // Verify that the caches cover the all the cpus
384 {
385 unsigned width1 = 0;
386 unsigned width2 = 0;
387 for(i; map_cnt) {
388 const char * _;
389 width1 += read_width(maps[i].raw->range, strlen(maps[i].raw->range), &_);
390 width2 += maps[i].raw->width;
391 }
392 verify(width1 == cpus_c);
393 verify(width2 == cpus_c);
394 }
395 #endif
396
397 // Get mappings from cpu to cache instance
398 cpu_pairing_t * pairings = get_cpu_pairings(cpus_c, cpus, raw, maps, map_cnt);
399
400 // Sort by cache instance
401 qsort(pairings, cpus_c);
402
403 {
404 unsigned it = 0;
405 RangeIter rc = { cpus };
406 while(moveNext(rc)) {
407 unsigned i = rc.com;
408 unsigned llc_id = pairings[i].id;
409 if(maps[llc_id].start == -1u) {
410 maps[llc_id].start = it;
411 it += maps[llc_id].raw->width;
412 /* paranoid */ verify(maps[llc_id].start < it);
413 /* paranoid */ verify(it != -1u);
414 }
415 }
416 /* paranoid */ verify(it == cpus_c);
417 }
418
419 // From the mappings build the actual cpu map we want
420 struct cpu_map_entry_t * entries = alloc(cpus_c);
421 for(i; cpus_c) { entries[i].count = 0; }
422
423 RangeIter rc = { cpus };
424 while(moveNext(rc)) {
425 unsigned i = rc.com;
426 /* paranoid */ verify(pairings[i].id < map_cnt);
427 unsigned c = pairings[i].cpu;
428 unsigned llc_id = pairings[i].id;
429 unsigned start = maps[llc_id].start;
430 entries[c].count = maps[llc_id].raw->width;
431 entries[c].start = start;
432 entries[c].self = start + (maps[llc_id].count++);
433 entries[c].cache = llc_id;
434 }
435
436 // get rid of the temporary data
437 free(maps);
438 free(pairings);
439
440 for(i; cpus_c) {
441 if( raw[i] ) for(j; cache_levels) {
442 ^(raw[i][j]){};
443 }
444 free(raw[i]);
445 }
446 free(raw);
447
448 cpu_info.llc_map = entries;
449 cpu_info.hthrd_count = cpus_c;
450 cpu_info.llc_count = map_cnt;
451 }
452
453 void __cfaabi_device_shutdown( void ) {
454 free(cpu_info.llc_map);
455 }
456}
457
458cpu_info_t cpu_info;
Note: See TracBrowser for help on using the repository browser.