source: libcfa/src/device/cpu.cfa@ b51e389c

ADT ast-experimental enum forall-pointer-decay jacob/cs343-translation new-ast-unique-expr pthread-emulation qualifiedEnum
Last change on this file since b51e389c was 1f05c83, checked in by Thierry Delisle <tdelisle@…>, 4 years ago

Added more debug to defice/cpu

  • Property mode set to 100644
File size: 11.4 KB
Line 
1//
2// Cforall Version 1.0.0 Copyright (C) 2021 University of Waterloo
3//
4// The contents of this file are covered under the licence agreement in the
5// file "LICENCE" distributed with Cforall.
6//
7// topology.cfa -- read the data structure
8//
9// Author : Thierry Delisle
10// Created On : Thu Jun 10 16:13:07 2021
11// Last Modified By :
12// Last Modified On :
13// Update Count :
14//
15
16#include "device/cpu.hfa"
17
18#include <math.hfa>
19#include <stdlib.hfa>
20
21#include <errno.h>
22#include <stdio.h>
23#include <string.h>
24#include <unistd.h>
25
26extern "C" {
27 #include <dirent.h>
28 #include <sys/types.h>
29 #include <sys/stat.h>
30 #include <fcntl.h>
31}
32
33// search a string for character 'character' but looking atmost at len
34// chars
35static const char * strnchr(const char * str, int character, size_t len) {
36 return (const char *)memchr(str, character, strnlen(str, len));
37}
38
39// Check if have string matches the want string
40// ignoring any characters that are longer than the want string
41static bool strmatch(const char * want, char * have) {
42 size_t w = strlen(want);
43 return strncmp(want, have, w) == 0;
44}
45
46typedef const char * idx_range_t;
47
48// read the value of a string and evaluate it
49// get the end pointer and make sure it is all evaluated
50static unsigned read_value(idx_range_t map, size_t len, const char ** end) {
51 unsigned long val = strtoul(map, (char**)end, 10);
52 /* paranoid */ __attribute__((unused)) size_t read = (*end - map);
53 /* paranoid */ verifyf(read <= len, "String '%s' passed with inconsistent length %zu", map, len);
54 /* paranoid */ verifyf(read == len, "String %.*s not entirely a number, %zu chars left", (int)len, map, len - read);
55 return val;
56}
57
58// Evaluate the width of a comma seperated list of idx
59// for example 'A-B,C-D,E,F' has a width of '(B-A) + (D-C) + 1 + 1'
60// Also has an (non-optional) end ptr like strtoul and friends
61//
62// FIXME : the current implementation only supports 1 comma
63static unsigned read_width(idx_range_t map, size_t len, const char ** end) {
64 // Do we have a comma
65 const char * comma = strnchr(map, ',', len);
66 if(comma != 0p) {
67 // We do! recurse and sum the widths
68 const char * _;
69 size_t split = comma - map;
70 unsigned lhs = read_width(map, split, &_);
71 unsigned rhs = read_width(comma + 1, len - split - 1, end);
72 return lhs + rhs;
73 }
74
75 // No commas, check for a range
76 const char * dash = strnchr(map, '-', len);
77 if(dash != 0p) {
78 const char * _;
79 size_t split = dash - map;
80 unsigned lhs = read_value(map, split, &_);
81 unsigned rhs = read_value(dash + 1, len - split - 1, end);
82 return rhs - lhs + 1;
83 }
84
85 // No range, no comma, just a single value
86 // It's width is 1 and we can consume everything
87 /* paranoid */ verifyf( ({strtoul(map, (char**)end, 10); *end == (map + len); }), "Value in range '%.*s' not a number", (int)len, map);
88 *end = map + len;
89 return 1;
90}
91
92// go through a directory calling fn on each file
93static int iterate_dir( const char * path, void (*fn)(struct dirent * ent) ) {
94 // open the directory
95 DIR *dir = opendir(path);
96 if(dir == 0p) { return ENOTDIR; }
97
98 // call fn for each
99 struct dirent * ent;
100 while ((ent = readdir(dir)) != 0p) {
101 fn( ent );
102 }
103
104 // no longer need this
105 closedir(dir);
106 return 0;
107}
108
109// count the number of directories with the specified prefix
110// the directories counted have the form '[prefix]N' where prefix is the parameter
111// and N is an base 10 integer.
112static int count_prefix_dirs(const char * path, const char * prefix) {
113 // read the directory and find the cpu count
114 // and make sure everything is as expected
115 int max = -1;
116 int count = 0;
117 void lambda(struct dirent * ent) {
118 // were are looking for prefixX, where X is a number
119 // check that it starts with 'cpu
120 char * s = strstr(ent->d_name, prefix);
121 if(s == 0p) { return; }
122 if(s != ent->d_name) { return; }
123
124 // check that the next part is a number
125 s += strlen(prefix);
126 char * end;
127 long int val = strtol(s, &end, 10);
128 if(*end != '\0' || val < 0) { return; }
129
130 // check that it's a directory
131 if(ent->d_type != DT_DIR) { return; }
132
133 // it's a match!
134 max = max(val, max);
135 count++;
136 }
137 iterate_dir(path, lambda);
138
139 /* paranoid */ verifyf(count == max + 1, "Inconsistent %s count, counted %d, but max %s was %d", prefix, count, prefix, (int)max);
140
141 return count;
142}
143
144// Count number of cpus in the system
145static int count_cpus(void) {
146 const char * fpath = "/sys/devices/system/cpu/present";
147 int fd = open(fpath, 0, O_RDONLY);
148 /* paranoid */ verifyf(fd >= 0, "Could not open file %s", fpath);
149
150 char buff[128];
151 ssize_t r = read(fd, buff, 128);
152 /* paranoid */ verifyf(r > 0, "Could not read file %s", fpath);
153 /* paranoid */ verify( buff[r-1] == '\n' );
154 buff[r-1] = '\0';
155
156 /* paranoid */ __attribute__((unused)) int ret =
157 close(fd);
158 /* paranoid */ verifyf(ret == 0, "Could not close file %s", fpath);
159
160 const char * _;
161 int cnt = read_width(buff, r - 1, &_);
162 /* paranoid */ verify(cnt == count_prefix_dirs("/sys/devices/system/cpu", "cpu"));
163 return cnt;
164}
165
166// Count number of cache *indexes* in the system
167// cache indexes are distinct from cache level as Data or Instruction cache
168// can share a level but not an index
169// PITFALL: assumes all cpus have the same indexes as cpu0
170static int count_cache_indexes(void) {
171 return count_prefix_dirs("/sys/devices/system/cpu/cpu0/cache", "index");
172}
173
174
175// read information about a spcficic cache index/cpu file into the output buffer
176static size_t read_cpuidxinfo_into(unsigned cpu, unsigned idx, const char * file, char * out, size_t out_len) {
177 // Pick the file we want and read it
178 char buf[128];
179 /* paranoid */ __attribute__((unused)) int len =
180 snprintf(buf, 128, "/sys/devices/system/cpu/cpu%u/cache/index%u/%s", cpu, idx, file);
181 /* paranoid */ verifyf(len > 0, "Could not generate '%s' filename for cpu %u, index %u", file, cpu, idx);
182
183 int fd = open(buf, 0, O_RDONLY);
184 /* paranoid */ verifyf(fd > 0, "Could not open file '%s'", buf);
185
186 ssize_t r = read(fd, out, out_len);
187 /* paranoid */ verifyf(r > 0, "Could not read file '%s'", buf);
188
189 /* paranoid */ __attribute__((unused)) int ret =
190 close(fd);
191 /* paranoid */ verifyf(ret == 0, "Could not close file '%s'", buf);
192 return r;
193}
194
195// Iterate over the cache indexes of a given cpu
196typedef void (*handle_func_t)(unsigned idx, unsigned char level, idx_range_t range, size_t len);
197static void foreach_cacheidx(unsigned cpu, unsigned idxs, handle_func_t handle) {
198 for(i; idxs) {
199 unsigned idx = idxs - 1 - i;
200 char buf[32];
201
202 // Type says what kind of cache this is,
203 // Options are: Unified, Data, Instruction
204 read_cpuidxinfo_into(cpu, idx, "type", buf, 32);
205 if((!strmatch("Unified", buf)) && (!strmatch("Data", buf))) {
206 // We don't care about instruction caches
207 continue;
208 }
209
210 // Level is the cache level: higher means bigger and slower
211 read_cpuidxinfo_into(cpu, idx, "level", buf, 32);
212 char * end;
213 unsigned long level = strtoul(buf, &end, 10);
214 /* paranoid */ verifyf(level <= 250, "Cpu %u has more than 250 levels of cache, this is not supported", cpu);
215
216 // shared_cpu_list is a range of cpus that share this particular cache
217 size_t n = read_cpuidxinfo_into(cpu, idx, "shared_cpu_list", buf, 32);
218 /* paranoid */ verify( buf[n-1] == '\n' );
219 buf[n-1] = '\0';
220
221 // Simply call the functor
222 handle(idx, level, buf, n - 1);
223 }
224}
225
226
227struct raw_cache_instance {
228 idx_range_t range;
229 unsigned width;
230 unsigned char level;
231 // FIXME add at least size and type
232};
233
234static void ?{}(raw_cache_instance & this) { this.range = 0p;}
235static void ^?{}(raw_cache_instance & this) { free(this.range);}
236
237raw_cache_instance ** build_raw_cache_table(unsigned cpus, unsigned idxs, unsigned cache_levels)
238{
239 raw_cache_instance ** raw = alloc(cpus);
240 for(i; cpus) {
241 raw[i] = alloc(cache_levels);
242 void addcache(unsigned fidx, unsigned char level, idx_range_t range, size_t len) {
243 /* paranoid */ verifyf(level <= cache_levels, "Unexpected cache level %d on cpu %u index %u", (int)level, i, fidx);
244
245 unsigned idx = cache_levels - level;
246 raw_cache_instance & r = raw[i][idx];
247 r.range = strndup(range, len);
248 r.level = level;
249 const char * end;
250 r.width = read_width(range, len, &end);
251 }
252 foreach_cacheidx(i, idxs, addcache);
253 }
254
255 #if defined(__CFA_WITH_VERIFY__)
256 // Check widths are consistent
257 for(i; 1~cpus) {
258 for(j; cache_levels) {
259 verifyf(raw[0][j].width == raw[i][j].width, "Unexpected width %u for cpu %u, index %u. Expected %u.", raw[i][j].width, i, j, raw[0][j].width);
260 }
261 }
262 #endif
263
264 return raw;
265}
266
267// returns an allocate list of all the different distinct last level caches
268static [*idx_range_t, size_t cnt] distinct_llcs(unsigned cpus, unsigned llc_idx, raw_cache_instance ** raw) {
269 // Allocate at least one element
270 idx_range_t * ranges = alloc();
271 size_t range_cnt = 1;
272
273 // Initialize with element 0
274 *ranges = raw[0][llc_idx].range;
275
276 // Go over all other cpus
277 CPU_LOOP: for(i; 1~cpus) {
278 // Check if the range is already there
279 idx_range_t candidate = raw[i][llc_idx].range;
280 for(j; range_cnt) {
281 idx_range_t exist = ranges[j];
282 // If the range is already there just jump to the next cpu
283 if(0 == strcmp(candidate, exist)) continue CPU_LOOP;
284 }
285
286 // The range wasn't there, added to the list
287 ranges = alloc(range_cnt + 1, ranges`realloc);
288 ranges[range_cnt] = candidate;
289 range_cnt++;
290 }
291
292 // return what we have
293 return [ranges, range_cnt];
294}
295
296struct cpu_pairing_t {
297 unsigned cpu;
298 unsigned llc_id;
299};
300
301int ?<?( cpu_pairing_t lhs, cpu_pairing_t rhs ) {
302 return lhs.llc_id < rhs.llc_id;
303}
304
305static [[]cpu_pairing_t] get_cpu_pairings(unsigned cpus, raw_cache_instance ** raw, idx_range_t * maps, size_t map_cnt) {
306 cpu_pairing_t * pairings = alloc(cpus);
307
308 CPU_LOOP: for(i; cpus) {
309 pairings[i].cpu = i;
310 idx_range_t want = raw[i][0].range;
311 MAP_LOOP: for(j; map_cnt) {
312 if(0 != strcmp(want, maps[j])) continue MAP_LOOP;
313
314 pairings[i].llc_id = j;
315 continue CPU_LOOP;
316 }
317
318 /* paranoid */ verifyf( false, "Cpu %u map doesn't match", i );
319 }
320
321 return pairings;
322}
323
324extern "C" {
325 void __cfaabi_device_startup( void ) {
326 int cpus = count_cpus();
327 int idxs = count_cache_indexes();
328
329 // Count actual cache levels
330 unsigned cache_levels = 0;
331 unsigned llc = 0;
332 {
333 unsigned char prev = -1u;
334 void first(unsigned idx, unsigned char level, const char * map, size_t len) {
335 /* paranoid */ verifyf(level < prev, "Index %u of cpu 0 has cache levels out of order: %u then %u", idx, (unsigned)prev, (unsigned)level);
336 llc = max(llc, level);
337 prev = level;
338 cache_levels++;
339 }
340 foreach_cacheidx(0, idxs, first);
341 }
342
343 // Read in raw data
344 raw_cache_instance ** raw = build_raw_cache_table(cpus, idxs, cache_levels);
345
346 // Find number of distinct cache instances
347 idx_range_t * maps;
348 size_t map_cnt;
349 [maps, map_cnt] = distinct_llcs(cpus, cache_levels - llc, raw);
350
351 /* paranoid */ verify((map_cnt * raw[0][cache_levels - llc].width) == cpus);
352
353 // Get mappings from cpu to cache instance
354 cpu_pairing_t * pairings = get_cpu_pairings(cpus, raw, maps, map_cnt);
355
356 // Sort by cache instance
357 qsort(pairings, cpus);
358
359 unsigned llc_width = raw[0][cache_levels - llc].width;
360
361 // From the mappins build the actual cpu map we want
362 struct cpu_map_entry_t * entries = alloc(cpus);
363 for(i; cpus) { entries[i].count = 0; }
364 for(i; cpus) {
365 unsigned c = pairings[i].cpu;
366 entries[c].start = pairings[i].llc_id * llc_width;
367 entries[c].count = llc_width;
368 }
369
370 // get rid of the temporary data
371 free(maps);
372 free(pairings);
373
374 for(i; cpus) {
375 for(j; cache_levels) {
376 ^(raw[i][j]){};
377 }
378 free(raw[i]);
379 }
380 free(raw);
381
382 cpu_info.llc_map = entries;
383 cpu_info.hthrd_count = cpus;
384 }
385
386 void __cfaabi_device_shutdown( void ) {
387 free(cpu_info.llc_map);
388 }
389}
Note: See TracBrowser for help on using the repository browser.