1 | // |
---|
2 | // Cforall Version 1.0.0 Copyright (C) 2021 University of Waterloo |
---|
3 | // |
---|
4 | // The contents of this file are covered under the licence agreement in the |
---|
5 | // file "LICENCE" distributed with Cforall. |
---|
6 | // |
---|
7 | // topology.cfa -- read the data structure |
---|
8 | // |
---|
9 | // Author : Thierry Delisle |
---|
10 | // Created On : Thu Jun 10 16:13:07 2021 |
---|
11 | // Last Modified By : |
---|
12 | // Last Modified On : |
---|
13 | // Update Count : |
---|
14 | // |
---|
15 | |
---|
16 | #include "device/cpu.hfa" |
---|
17 | |
---|
18 | #include <math.hfa> |
---|
19 | #include <stdlib.hfa> |
---|
20 | |
---|
21 | #include <errno.h> |
---|
22 | #include <stdio.h> |
---|
23 | #include <string.h> |
---|
24 | #include <unistd.h> |
---|
25 | |
---|
26 | extern "C" { |
---|
27 | #include <dirent.h> |
---|
28 | #include <sys/types.h> |
---|
29 | #include <sys/stat.h> |
---|
30 | #include <fcntl.h> |
---|
31 | } |
---|
32 | |
---|
33 | #include "bits/defs.hfa" |
---|
34 | #include "algorithms/range_iterator.hfa" |
---|
35 | |
---|
36 | // search a string for character 'character' but looking atmost at len |
---|
37 | // chars |
---|
38 | static const char * strnchr(const char * str, int character, size_t len) { |
---|
39 | return (const char *)memchr(str, character, strnlen(str, len)); |
---|
40 | } |
---|
41 | |
---|
42 | // Check if have string matches the want string |
---|
43 | // ignoring any characters that are longer than the want string |
---|
44 | static bool strmatch(const char * want, char * have) { |
---|
45 | size_t w = strlen(want); |
---|
46 | return strncmp(want, have, w) == 0; |
---|
47 | } |
---|
48 | |
---|
49 | typedef const char * idx_range_t; |
---|
50 | |
---|
51 | // read the value of a string and evaluate it |
---|
52 | // get the end pointer and make sure it is all evaluated |
---|
53 | static unsigned read_value(idx_range_t map, size_t len, const char ** end) { |
---|
54 | unsigned long val = strtoul(map, (char**)end, 10); |
---|
55 | /* paranoid */ __attribute__((unused)) size_t read = (*end - map); |
---|
56 | /* paranoid */ verifyf(read <= len, "String '%s' passed with inconsistent length %zu", map, len); |
---|
57 | /* paranoid */ verifyf(read == len, "String %.*s not entirely a number, %zu chars left", (int)len, map, len - read); |
---|
58 | return val; |
---|
59 | } |
---|
60 | |
---|
61 | // Evaluate the width of a comma seperated list of idx |
---|
62 | // for example 'A-B,C-D,E,F' has a width of '(B-A) + (D-C) + 1 + 1' |
---|
63 | // Also has an (non-optional) end ptr like strtoul and friends |
---|
64 | // |
---|
65 | // FIXME : the current implementation only supports 1 comma |
---|
66 | static unsigned read_width(idx_range_t map, size_t len, const char ** end) { |
---|
67 | // Do we have a comma |
---|
68 | const char * comma = strnchr(map, ',', len); |
---|
69 | if(comma != 0p) { |
---|
70 | // We do! recurse and sum the widths |
---|
71 | const char * _; |
---|
72 | size_t split = comma - map; |
---|
73 | unsigned lhs = read_width(map, split, &_); |
---|
74 | unsigned rhs = read_width(comma + 1, len - split - 1, end); |
---|
75 | return lhs + rhs; |
---|
76 | } |
---|
77 | |
---|
78 | // No commas, check for a range |
---|
79 | const char * dash = strnchr(map, '-', len); |
---|
80 | if(dash != 0p) { |
---|
81 | const char * _; |
---|
82 | size_t split = dash - map; |
---|
83 | unsigned lhs = read_value(map, split, &_); |
---|
84 | unsigned rhs = read_value(dash + 1, len - split - 1, end); |
---|
85 | return rhs - lhs + 1; |
---|
86 | } |
---|
87 | |
---|
88 | // No range, no comma, just a single value |
---|
89 | // It's width is 1 and we can consume everything |
---|
90 | /* paranoid */ verifyf( ({strtoul(map, (char**)end, 10); *end == (map + len); }), "Value in range '%.*s' not a number", (int)len, map); |
---|
91 | *end = map + len; |
---|
92 | return 1; |
---|
93 | } |
---|
94 | |
---|
95 | // go through a directory calling fn on each file |
---|
96 | static int iterate_dir( const char * path, void (*fn)(struct dirent * ent) ) { |
---|
97 | // open the directory |
---|
98 | DIR *dir = opendir(path); |
---|
99 | if(dir == 0p) { return ENOTDIR; } |
---|
100 | |
---|
101 | // call fn for each |
---|
102 | struct dirent * ent; |
---|
103 | while ((ent = readdir(dir)) != 0p) { |
---|
104 | fn( ent ); |
---|
105 | } |
---|
106 | |
---|
107 | // no longer need this |
---|
108 | closedir(dir); |
---|
109 | return 0; |
---|
110 | } |
---|
111 | |
---|
112 | // count the number of directories with the specified prefix |
---|
113 | // the directories counted have the form '[prefix]N' where prefix is the parameter |
---|
114 | // and N is an base 10 integer. |
---|
115 | static int count_prefix_dirs(const char * path, const char * prefix) { |
---|
116 | // read the directory and find the cpu count |
---|
117 | // and make sure everything is as expected |
---|
118 | int max = -1; |
---|
119 | int count = 0; |
---|
120 | void lambda(struct dirent * ent) { |
---|
121 | // were are looking for prefixX, where X is a number |
---|
122 | // check that it starts with 'cpu |
---|
123 | char * s = strstr(ent->d_name, prefix); |
---|
124 | if(s == 0p) { return; } |
---|
125 | if(s != ent->d_name) { return; } |
---|
126 | |
---|
127 | // check that the next part is a number |
---|
128 | s += strlen(prefix); |
---|
129 | char * end; |
---|
130 | long int val = strtol(s, &end, 10); |
---|
131 | if(*end != '\0' || val < 0) { return; } |
---|
132 | |
---|
133 | // check that it's a directory |
---|
134 | if(ent->d_type != DT_DIR) { return; } |
---|
135 | |
---|
136 | // it's a match! |
---|
137 | max = max(val, max); |
---|
138 | count++; |
---|
139 | } |
---|
140 | int ret = iterate_dir(path, lambda); |
---|
141 | if(ret == ENOTDIR) return 0; |
---|
142 | |
---|
143 | /* paranoid */ verifyf(count == max + 1, "Inconsistent %s count, counted %d, but max %s was %d", prefix, count, prefix, (int)max); |
---|
144 | |
---|
145 | return count; |
---|
146 | } |
---|
147 | |
---|
148 | // Count number of cpus in the system |
---|
149 | static [int, const char *] count_cpus(void) { |
---|
150 | const char * fpath = "/sys/devices/system/cpu/online"; |
---|
151 | int fd = open(fpath, 0, O_RDONLY); |
---|
152 | /* paranoid */ verifyf(fd >= 0, "Could not open file %s", fpath); |
---|
153 | |
---|
154 | char buff[128]; |
---|
155 | ssize_t r = read(fd, buff, 128); |
---|
156 | /* paranoid */ verifyf(r > 0, "Could not read file %s", fpath); |
---|
157 | /* paranoid */ verify( buff[r-1] == '\n' ); |
---|
158 | buff[r-1] = '\0'; |
---|
159 | |
---|
160 | /* paranoid */ __attribute__((unused)) int ret = |
---|
161 | close(fd); |
---|
162 | /* paranoid */ verifyf(ret == 0, "Could not close file %s", fpath); |
---|
163 | |
---|
164 | const char * _; |
---|
165 | return [read_width(buff, r - 1, &_), strndup(buff, r - 1)]; |
---|
166 | } |
---|
167 | |
---|
168 | // Count number of cache *indexes* in the system |
---|
169 | // cache indexes are distinct from cache level as Data or Instruction cache |
---|
170 | // can share a level but not an index |
---|
171 | // PITFALL: assumes all cpus have the same indexes as cpu0 |
---|
172 | static int count_cache_indexes(void) { |
---|
173 | return count_prefix_dirs("/sys/devices/system/cpu/cpu0/cache", "index"); |
---|
174 | } |
---|
175 | |
---|
176 | |
---|
177 | // read information about a spcficic cache index/cpu file into the output buffer |
---|
178 | static size_t read_cpuidxinfo_into(unsigned cpu, unsigned idx, const char * file, char * out, size_t out_len) { |
---|
179 | // Pick the file we want and read it |
---|
180 | char buf[128]; |
---|
181 | /* paranoid */ __attribute__((unused)) int len = |
---|
182 | snprintf(buf, 128, "/sys/devices/system/cpu/cpu%u/cache/index%u/%s", cpu, idx, file); |
---|
183 | /* paranoid */ verifyf(len > 0, "Could not generate '%s' filename for cpu %u, index %u", file, cpu, idx); |
---|
184 | |
---|
185 | int fd = open(buf, 0, O_RDONLY); |
---|
186 | /* paranoid */ verifyf(fd > 0, "Could not open file '%s'", buf); |
---|
187 | |
---|
188 | ssize_t r = read(fd, out, out_len); |
---|
189 | /* paranoid */ verifyf(r > 0, "Could not read file '%s'", buf); |
---|
190 | |
---|
191 | /* paranoid */ __attribute__((unused)) int ret = |
---|
192 | close(fd); |
---|
193 | /* paranoid */ verifyf(ret == 0, "Could not close file '%s'", buf); |
---|
194 | return r; |
---|
195 | } |
---|
196 | |
---|
197 | // Iterate over the cache indexes of a given cpu |
---|
198 | typedef void (*handle_func_t)(unsigned idx, unsigned char level, idx_range_t range, size_t len); |
---|
199 | static void foreach_cacheidx(unsigned cpu, unsigned idxs, handle_func_t handle) { |
---|
200 | for(i; idxs) { |
---|
201 | unsigned idx = idxs - 1 - i; |
---|
202 | char buf[32]; |
---|
203 | |
---|
204 | // Type says what kind of cache this is, |
---|
205 | // Options are: Unified, Data, Instruction |
---|
206 | read_cpuidxinfo_into(cpu, idx, "type", buf, 32); |
---|
207 | if((!strmatch("Unified", buf)) && (!strmatch("Data", buf))) { |
---|
208 | // We don't care about instruction caches |
---|
209 | continue; |
---|
210 | } |
---|
211 | |
---|
212 | // Level is the cache level: higher means bigger and slower |
---|
213 | read_cpuidxinfo_into(cpu, idx, "level", buf, 32); |
---|
214 | char * end; |
---|
215 | unsigned long level = strtoul(buf, &end, 10); |
---|
216 | /* paranoid */ verifyf(level <= 250, "Cpu %u has more than 250 levels of cache, this is not supported", cpu); |
---|
217 | |
---|
218 | // shared_cpu_list is a range of cpus that share this particular cache |
---|
219 | size_t n = read_cpuidxinfo_into(cpu, idx, "shared_cpu_list", buf, 32); |
---|
220 | /* paranoid */ verify( buf[n-1] == '\n' ); |
---|
221 | buf[n-1] = '\0'; |
---|
222 | |
---|
223 | // Simply call the functor |
---|
224 | handle(idx, level, buf, n - 1); |
---|
225 | } |
---|
226 | } |
---|
227 | |
---|
228 | |
---|
229 | struct raw_cache_instance { |
---|
230 | idx_range_t range; // A text description of the cpus covered |
---|
231 | unsigned width; // The number of cpus covered |
---|
232 | unsigned char level; // the cache level |
---|
233 | // FIXME add at least size and type |
---|
234 | }; |
---|
235 | |
---|
236 | static void ?{}(raw_cache_instance & this) { this.range = 0p;} |
---|
237 | static void ^?{}(raw_cache_instance & this) { free(this.range);} |
---|
238 | |
---|
239 | // Returns a 2D array of instances of size [cpu count][cache levels] |
---|
240 | // where cache level doesn't include instruction caches |
---|
241 | raw_cache_instance ** build_raw_cache_table(unsigned cpus_c, idx_range_t cpus, unsigned idxs, unsigned cache_levels) |
---|
242 | { |
---|
243 | raw_cache_instance ** raw = alloc(cpus_c, '\0'`fill); |
---|
244 | |
---|
245 | RangeIter rc = { cpus }; |
---|
246 | while(moveNext(rc)) { |
---|
247 | unsigned i = rc.com; |
---|
248 | raw[i] = alloc(cache_levels); |
---|
249 | void addcache(unsigned fidx, unsigned char level, idx_range_t range, size_t len) { |
---|
250 | /* paranoid */ verifyf(level <= cache_levels, "Unexpected cache level %d on cpu %u index %u", (int)level, i, fidx); |
---|
251 | |
---|
252 | unsigned idx = cache_levels - level; |
---|
253 | raw_cache_instance & r = raw[i][idx]; |
---|
254 | r.range = strndup(range, len); |
---|
255 | r.level = level; |
---|
256 | const char * end; |
---|
257 | r.width = read_width(range, len, &end); |
---|
258 | } |
---|
259 | foreach_cacheidx(i, idxs, addcache); |
---|
260 | } |
---|
261 | |
---|
262 | return raw; |
---|
263 | } |
---|
264 | |
---|
265 | struct llc_map_t { |
---|
266 | raw_cache_instance * raw; |
---|
267 | unsigned count; |
---|
268 | unsigned start; |
---|
269 | }; |
---|
270 | |
---|
271 | // returns an allocate list of all the different distinct last level caches |
---|
272 | static [*llc_map_t, size_t cnt] distinct_llcs(idx_range_t cpus, unsigned llc_idx, raw_cache_instance ** raw) { |
---|
273 | // Allocate at least one element |
---|
274 | llc_map_t* ranges = alloc(); |
---|
275 | size_t range_cnt = 1; |
---|
276 | |
---|
277 | RangeIter rc = { cpus }; |
---|
278 | __attribute__((unused)) bool ret = |
---|
279 | moveNext(rc); |
---|
280 | /* paranoid */ verify( ret ); |
---|
281 | /* paranoid */ verify( rc.com >= 0 ); |
---|
282 | |
---|
283 | // Initialize with element 0 |
---|
284 | ranges->raw = &raw[rc.com][llc_idx]; |
---|
285 | ranges->count = 0; |
---|
286 | ranges->start = -1u; |
---|
287 | |
---|
288 | // Go over all other cpus |
---|
289 | CPU_LOOP: while(moveNext(rc)) { |
---|
290 | unsigned i = rc.com; |
---|
291 | // Check if the range is already there |
---|
292 | raw_cache_instance * candidate = &raw[i][llc_idx]; |
---|
293 | for(j; range_cnt) { |
---|
294 | llc_map_t & exist = ranges[j]; |
---|
295 | // If the range is already there just jump to the next cpu |
---|
296 | if(0 == strcmp(candidate->range, exist.raw->range)) continue CPU_LOOP; |
---|
297 | } |
---|
298 | |
---|
299 | // The range wasn't there, added to the list |
---|
300 | ranges = alloc(range_cnt + 1, ranges`realloc); |
---|
301 | ranges[range_cnt].raw = candidate; |
---|
302 | ranges[range_cnt].count = 0; |
---|
303 | ranges[range_cnt].start = -1u; |
---|
304 | range_cnt++; |
---|
305 | } |
---|
306 | |
---|
307 | // return what we have |
---|
308 | return [ranges, range_cnt]; |
---|
309 | } |
---|
310 | |
---|
311 | struct cpu_pairing_t { |
---|
312 | unsigned cpu; |
---|
313 | unsigned id; |
---|
314 | }; |
---|
315 | |
---|
316 | int ?<?( cpu_pairing_t lhs, cpu_pairing_t rhs ) { |
---|
317 | return lhs.id < rhs.id; |
---|
318 | } |
---|
319 | |
---|
320 | static [[]cpu_pairing_t] get_cpu_pairings(unsigned cpus_c, idx_range_t cpus, raw_cache_instance ** raw, llc_map_t * maps, size_t map_cnt) { |
---|
321 | cpu_pairing_t * pairings = alloc(cpus_c); |
---|
322 | |
---|
323 | RangeIter rc = { cpus }; |
---|
324 | CPU_LOOP: while(moveNext(rc)) { |
---|
325 | unsigned i = rc.com; |
---|
326 | pairings[i].cpu = i; |
---|
327 | idx_range_t want = raw[i][0].range; |
---|
328 | MAP_LOOP: for(j; map_cnt) { |
---|
329 | if(0 != strcmp(want, maps[j].raw->range)) continue MAP_LOOP; |
---|
330 | |
---|
331 | pairings[i].id = j; |
---|
332 | continue CPU_LOOP; |
---|
333 | } |
---|
334 | |
---|
335 | /* paranoid */ verifyf( false, "Cpu %u map doesn't match", i ); |
---|
336 | } |
---|
337 | |
---|
338 | return pairings; |
---|
339 | } |
---|
340 | |
---|
341 | #include <fstream.hfa> |
---|
342 | |
---|
343 | extern "C" { |
---|
344 | void __cfaabi_device_startup( void ) { |
---|
345 | int cpus_c; |
---|
346 | const char * cpus; |
---|
347 | [cpus_c, cpus] = count_cpus(); |
---|
348 | #if defined(__CFA_WITH_VERIFY__) |
---|
349 | // Verify that the mapping is self consistant. |
---|
350 | { |
---|
351 | RangeIter rc = { cpus }; |
---|
352 | while(moveNext(rc)) { |
---|
353 | unsigned i = rc.com; |
---|
354 | verify(cpus_c > i); |
---|
355 | } |
---|
356 | } |
---|
357 | #endif |
---|
358 | |
---|
359 | int idxs = count_cache_indexes(); |
---|
360 | |
---|
361 | // Count actual cache levels |
---|
362 | unsigned cache_levels = 0; |
---|
363 | unsigned llc = 0; |
---|
364 | if (idxs != 0) { |
---|
365 | unsigned char prev = -1u; |
---|
366 | void first(unsigned idx, unsigned char level, const char * map, size_t len) { |
---|
367 | /* paranoid */ verifyf(level < prev, "Index %u of cpu 0 has cache levels out of order: %u then %u", idx, (unsigned)prev, (unsigned)level); |
---|
368 | llc = max(llc, level); |
---|
369 | prev = level; |
---|
370 | cache_levels++; |
---|
371 | } |
---|
372 | foreach_cacheidx(0, idxs, first); |
---|
373 | } |
---|
374 | |
---|
375 | // Read in raw data |
---|
376 | raw_cache_instance ** raw = build_raw_cache_table(cpus_c, cpus, idxs, cache_levels); |
---|
377 | |
---|
378 | // Find number of distinct cache instances |
---|
379 | llc_map_t * maps; |
---|
380 | size_t map_cnt; |
---|
381 | [maps, map_cnt] = distinct_llcs(cpus, cache_levels - llc, raw); |
---|
382 | |
---|
383 | #if defined(__CFA_WITH_VERIFY__) |
---|
384 | // Verify that the caches cover the all the cpus |
---|
385 | { |
---|
386 | unsigned width1 = 0; |
---|
387 | unsigned width2 = 0; |
---|
388 | for(i; map_cnt) { |
---|
389 | const char * _; |
---|
390 | width1 += read_width(maps[i].raw->range, strlen(maps[i].raw->range), &_); |
---|
391 | width2 += maps[i].raw->width; |
---|
392 | } |
---|
393 | verify(width1 == cpus_c); |
---|
394 | verify(width2 == cpus_c); |
---|
395 | } |
---|
396 | #endif |
---|
397 | |
---|
398 | // Get mappings from cpu to cache instance |
---|
399 | cpu_pairing_t * pairings = get_cpu_pairings(cpus_c, cpus, raw, maps, map_cnt); |
---|
400 | |
---|
401 | // Sort by cache instance |
---|
402 | qsort(pairings, cpus_c); |
---|
403 | |
---|
404 | { |
---|
405 | unsigned it = 0; |
---|
406 | RangeIter rc = { cpus }; |
---|
407 | while(moveNext(rc)) { |
---|
408 | unsigned i = rc.com; |
---|
409 | unsigned llc_id = pairings[i].id; |
---|
410 | if(maps[llc_id].start == -1u) { |
---|
411 | maps[llc_id].start = it; |
---|
412 | it += maps[llc_id].raw->width; |
---|
413 | /* paranoid */ verify(maps[llc_id].start < it); |
---|
414 | /* paranoid */ verify(it != -1u); |
---|
415 | } |
---|
416 | } |
---|
417 | /* paranoid */ verify(it == cpus_c); |
---|
418 | } |
---|
419 | |
---|
420 | // From the mappings build the actual cpu map we want |
---|
421 | struct cpu_map_entry_t * entries = alloc(cpus_c); |
---|
422 | for(i; cpus_c) { entries[i].count = 0; } |
---|
423 | |
---|
424 | RangeIter rc = { cpus }; |
---|
425 | while(moveNext(rc)) { |
---|
426 | unsigned i = rc.com; |
---|
427 | /* paranoid */ verify(pairings[i].id < map_cnt); |
---|
428 | unsigned c = pairings[i].cpu; |
---|
429 | unsigned llc_id = pairings[i].id; |
---|
430 | unsigned start = maps[llc_id].start; |
---|
431 | entries[c].count = maps[llc_id].raw->width; |
---|
432 | entries[c].start = start; |
---|
433 | entries[c].self = start + (maps[llc_id].count++); |
---|
434 | entries[c].cache = llc_id; |
---|
435 | } |
---|
436 | |
---|
437 | // get rid of the temporary data |
---|
438 | free(maps); |
---|
439 | free(pairings); |
---|
440 | |
---|
441 | for(i; cpus_c) { |
---|
442 | if( raw[i] ) for(j; cache_levels) { |
---|
443 | ^(raw[i][j]){}; |
---|
444 | } |
---|
445 | free(raw[i]); |
---|
446 | } |
---|
447 | free(raw); |
---|
448 | |
---|
449 | cpu_info.llc_map = entries; |
---|
450 | cpu_info.hthrd_count = cpus_c; |
---|
451 | cpu_info.llc_count = map_cnt; |
---|
452 | } |
---|
453 | |
---|
454 | void __cfaabi_device_shutdown( void ) { |
---|
455 | free(cpu_info.llc_map); |
---|
456 | } |
---|
457 | } |
---|
458 | |
---|
459 | libcfa_public cpu_info_t cpu_info; |
---|