source: libcfa/src/device/cpu.cfa @ ddd2ec9

ADTast-experimentalenumforall-pointer-decaypthread-emulationqualifiedEnum
Last change on this file since ddd2ec9 was ddd2ec9, checked in by Thierry Delisle <tdelisle@…>, 2 years ago

Fix cache detention when only some cpus are present.

  • Property mode set to 100644
File size: 12.2 KB
Line 
1//
2// Cforall Version 1.0.0 Copyright (C) 2021 University of Waterloo
3//
4// The contents of this file are covered under the licence agreement in the
5// file "LICENCE" distributed with Cforall.
6//
7// topology.cfa -- read the data structure
8//
9// Author           : Thierry Delisle
10// Created On       : Thu Jun 10 16:13:07 2021
11// Last Modified By :
12// Last Modified On :
13// Update Count     :
14//
15
16#include "device/cpu.hfa"
17
18#include <math.hfa>
19#include <stdlib.hfa>
20
21#include <errno.h>
22#include <stdio.h>
23#include <string.h>
24#include <unistd.h>
25
26extern "C" {
27        #include <dirent.h>
28        #include <sys/types.h>
29        #include <sys/stat.h>
30        #include <fcntl.h>
31}
32
33// search a string for character 'character' but looking atmost at len
34// chars
35static const char * strnchr(const char * str, int character, size_t len) {
36        return (const char *)memchr(str, character, strnlen(str, len));
37}
38
39// Check if have string matches the want string
40// ignoring any characters that are longer than the want string
41static bool strmatch(const char * want, char * have) {
42        size_t w = strlen(want);
43        return strncmp(want, have, w) == 0;
44}
45
46typedef const char * idx_range_t;
47
48// read the value of a string and evaluate it
49// get the end pointer and make sure it is all evaluated
50static unsigned read_value(idx_range_t map, size_t len, const char ** end) {
51        unsigned long val = strtoul(map, (char**)end, 10);
52        /* paranoid */ __attribute__((unused)) size_t read = (*end - map);
53        /* paranoid */ verifyf(read <= len, "String '%s' passed with inconsistent length %zu", map, len);
54        /* paranoid */ verifyf(read == len, "String %.*s not entirely a number, %zu chars left", (int)len, map, len - read);
55        return val;
56}
57
58// Evaluate the width of a comma seperated list of idx
59// for example 'A-B,C-D,E,F' has a width of '(B-A) + (D-C) + 1 + 1'
60// Also has an (non-optional) end ptr like strtoul and friends
61//
62// FIXME : the current implementation only supports 1 comma
63static unsigned read_width(idx_range_t map, size_t len, const char ** end) {
64        // Do we have a comma
65        const char * comma = strnchr(map, ',', len);
66        if(comma != 0p) {
67                // We do! recurse and sum the widths
68                const char * _;
69                size_t split = comma - map;
70                unsigned lhs = read_width(map, split, &_);
71                unsigned rhs = read_width(comma + 1, len - split - 1, end);
72                return lhs + rhs;
73        }
74
75        // No commas, check for a range
76        const char * dash = strnchr(map, '-', len);
77        if(dash != 0p) {
78                const char * _;
79                size_t split = dash - map;
80                unsigned lhs = read_value(map, split, &_);
81                unsigned rhs = read_value(dash + 1, len - split - 1, end);
82                return rhs - lhs + 1;
83        }
84
85        // No range, no comma, just a single value
86        // It's width is 1 and we can consume everything
87        /* paranoid */ verifyf( ({strtoul(map, (char**)end, 10); *end == (map + len); }), "Value in range '%.*s' not a number", (int)len, map);
88        *end = map + len;
89        return 1;
90}
91
92// go through a directory calling fn on each file
93static int iterate_dir( const char * path, void (*fn)(struct dirent * ent) ) {
94        // open the directory
95        DIR *dir = opendir(path);
96        if(dir == 0p) { return ENOTDIR; }
97
98        // call fn for each
99        struct dirent * ent;
100        while ((ent = readdir(dir)) != 0p) {
101                fn( ent );
102        }
103
104        // no longer need this
105        closedir(dir);
106        return 0;
107}
108
109// count the number of directories with the specified prefix
110// the directories counted have the form '[prefix]N' where prefix is the parameter
111// and N is an base 10 integer.
112static int count_prefix_dirs(const char * path, const char * prefix) {
113        // read the directory and find the cpu count
114        // and make sure everything is as expected
115        int max = -1;
116        int count = 0;
117        void lambda(struct dirent * ent) {
118                // were are looking for prefixX, where X is a number
119                // check that it starts with 'cpu
120                char * s = strstr(ent->d_name, prefix);
121                if(s == 0p) { return; }
122                if(s != ent->d_name) { return; }
123
124                // check that the next part is a number
125                s += strlen(prefix);
126                char * end;
127                long int val = strtol(s, &end, 10);
128                if(*end != '\0' || val < 0) { return; }
129
130                // check that it's a directory
131                if(ent->d_type != DT_DIR) { return; }
132
133                // it's a match!
134                max = max(val, max);
135                count++;
136        }
137        iterate_dir(path, lambda);
138
139        /* paranoid */ verifyf(count == max + 1, "Inconsistent %s count, counted %d, but max %s was %d", prefix, count, prefix, (int)max);
140
141        return count;
142}
143
144// Count number of cpus in the system
145static int count_cpus(void) {
146        const char * fpath = "/sys/devices/system/cpu/online";
147        int fd = open(fpath, 0, O_RDONLY);
148        /* paranoid */ verifyf(fd >= 0, "Could not open file %s", fpath);
149
150        char buff[128];
151        ssize_t r = read(fd, buff, 128);
152        /* paranoid */ verifyf(r > 0, "Could not read file %s", fpath);
153        /* paranoid */ verify( buff[r-1] == '\n' );
154        buff[r-1] = '\0';
155
156        /* paranoid */ __attribute__((unused)) int ret =
157        close(fd);
158        /* paranoid */ verifyf(ret == 0, "Could not close file %s", fpath);
159
160        const char * _;
161        return read_width(buff, r - 1, &_);;
162}
163
164// Count number of cache *indexes* in the system
165// cache indexes are distinct from cache level as Data or Instruction cache
166// can share a level but not an index
167// PITFALL: assumes all cpus have the same indexes as cpu0
168static int count_cache_indexes(void) {
169        return count_prefix_dirs("/sys/devices/system/cpu/cpu0/cache", "index");
170}
171
172
173// read information about a spcficic cache index/cpu file into the output buffer
174static size_t read_cpuidxinfo_into(unsigned cpu, unsigned idx, const char * file, char * out, size_t out_len) {
175        // Pick the file we want and read it
176        char buf[128];
177        /* paranoid */ __attribute__((unused)) int len =
178        snprintf(buf, 128, "/sys/devices/system/cpu/cpu%u/cache/index%u/%s", cpu, idx, file);
179        /* paranoid */ verifyf(len > 0, "Could not generate '%s' filename for cpu %u, index %u", file, cpu, idx);
180
181        int fd = open(buf, 0, O_RDONLY);
182        /* paranoid */ verifyf(fd > 0, "Could not open file '%s'", buf);
183
184        ssize_t r = read(fd, out, out_len);
185        /* paranoid */ verifyf(r > 0, "Could not read file '%s'", buf);
186
187        /* paranoid */ __attribute__((unused)) int ret =
188        close(fd);
189        /* paranoid */ verifyf(ret == 0, "Could not close file '%s'", buf);
190        return r;
191}
192
193// Iterate over the cache indexes of a given cpu
194typedef void (*handle_func_t)(unsigned idx, unsigned char level, idx_range_t range, size_t len);
195static void foreach_cacheidx(unsigned cpu, unsigned idxs, handle_func_t handle) {
196        for(i; idxs) {
197                unsigned idx = idxs - 1 - i;
198                char buf[32];
199
200                // Type says what kind of cache this is,
201                // Options are: Unified, Data, Instruction
202                read_cpuidxinfo_into(cpu, idx, "type", buf, 32);
203                if((!strmatch("Unified", buf)) && (!strmatch("Data", buf))) {
204                        // We don't care about instruction caches
205                        continue;
206                }
207
208                // Level is the cache level: higher means bigger and slower
209                read_cpuidxinfo_into(cpu, idx, "level", buf, 32);
210                char * end;
211                unsigned long level = strtoul(buf, &end, 10);
212                /* paranoid */ verifyf(level <= 250, "Cpu %u has more than 250 levels of cache, this is not supported", cpu);
213
214                // shared_cpu_list is a range of cpus that share this particular cache
215                size_t n = read_cpuidxinfo_into(cpu, idx, "shared_cpu_list", buf, 32);
216                /* paranoid */ verify( buf[n-1] == '\n' );
217                buf[n-1] = '\0';
218
219                // Simply call the functor
220                handle(idx, level, buf, n - 1);
221        }
222}
223
224
225struct raw_cache_instance {
226        idx_range_t range;
227        unsigned width;
228        unsigned char level;
229        // FIXME add at least size and type
230};
231
232static void  ?{}(raw_cache_instance & this) { this.range = 0p;}
233static void ^?{}(raw_cache_instance & this) { free(this.range);}
234
235raw_cache_instance ** build_raw_cache_table(unsigned cpus, unsigned idxs, unsigned cache_levels)
236{
237        raw_cache_instance ** raw = alloc(cpus);
238
239        // TODO: this loop is broken, it only works if the present cpu start at 0 and are contiguous which is not guaranteed.
240        for(i; cpus) {
241                raw[i] = alloc(cache_levels);
242                void addcache(unsigned fidx, unsigned char level, idx_range_t range, size_t len) {
243                        /* paranoid */ verifyf(level <= cache_levels, "Unexpected cache level %d on cpu %u index %u", (int)level, i, fidx);
244
245                        unsigned idx = cache_levels - level;
246                        raw_cache_instance & r = raw[i][idx];
247                        r.range = strndup(range, len);
248                        r.level = level;
249                        const char * end;
250                        r.width = read_width(range, len, &end);
251                }
252                foreach_cacheidx(i, idxs, addcache);
253        }
254
255        return raw;
256}
257
258struct llc_map_t {
259        raw_cache_instance * raw;
260        unsigned count;
261        unsigned start;
262};
263
264// returns an allocate list of all the different distinct last level caches
265static [*llc_map_t, size_t cnt] distinct_llcs(unsigned cpus, unsigned llc_idx, raw_cache_instance ** raw) {
266        // Allocate at least one element
267        llc_map_t* ranges = alloc();
268        size_t range_cnt = 1;
269
270        // Initialize with element 0
271        ranges->raw = &raw[0][llc_idx];
272        ranges->count = 0;
273        ranges->start = -1u;
274
275        // Go over all other cpus
276        CPU_LOOP: for(i; 1~cpus) {
277                // Check if the range is already there
278                raw_cache_instance * candidate = &raw[i][llc_idx];
279                for(j; range_cnt) {
280                        llc_map_t & exist = ranges[j];
281                        // If the range is already there just jump to the next cpu
282                        if(0 == strcmp(candidate->range, exist.raw->range)) continue CPU_LOOP;
283                }
284
285                // The range wasn't there, added to the list
286                ranges = alloc(range_cnt + 1, ranges`realloc);
287                ranges[range_cnt].raw = candidate;
288                ranges[range_cnt].count = 0;
289                ranges[range_cnt].start = -1u;
290                range_cnt++;
291        }
292
293        // return what we have
294        return [ranges, range_cnt];
295}
296
297struct cpu_pairing_t {
298        unsigned cpu;
299        unsigned id;
300};
301
302int ?<?( cpu_pairing_t lhs, cpu_pairing_t rhs ) {
303        return lhs.id < rhs.id;
304}
305
306static [[]cpu_pairing_t] get_cpu_pairings(unsigned cpus, raw_cache_instance ** raw, llc_map_t * maps, size_t map_cnt) {
307        cpu_pairing_t * pairings = alloc(cpus);
308
309        CPU_LOOP: for(i; cpus) {
310                pairings[i].cpu = i;
311                idx_range_t want = raw[i][0].range;
312                MAP_LOOP: for(j; map_cnt) {
313                        if(0 != strcmp(want, maps[j].raw->range)) continue MAP_LOOP;
314
315                        pairings[i].id = j;
316                        continue CPU_LOOP;
317                }
318
319                /* paranoid */ verifyf( false, "Cpu %u map doesn't match", i );
320        }
321
322        return pairings;
323}
324
325#include <fstream.hfa>
326
327extern "C" {
328        void __cfaabi_device_startup( void ) {
329                int cpus = count_cpus();
330                int idxs = count_cache_indexes();
331
332                // Count actual cache levels
333                unsigned cache_levels = 0;
334                unsigned llc = 0;
335                {
336                        unsigned char prev = -1u;
337                        void first(unsigned idx, unsigned char level, const char * map, size_t len) {
338                                /* paranoid */ verifyf(level < prev, "Index %u of cpu 0 has cache levels out of order: %u then %u", idx, (unsigned)prev, (unsigned)level);
339                                llc = max(llc, level);
340                                prev = level;
341                                cache_levels++;
342                        }
343                        foreach_cacheidx(0, idxs, first);
344                }
345
346                // Read in raw data
347                raw_cache_instance ** raw = build_raw_cache_table(cpus, idxs, cache_levels);
348
349                // Find number of distinct cache instances
350                llc_map_t * maps;
351                size_t map_cnt;
352                [maps, map_cnt] =  distinct_llcs(cpus, cache_levels - llc, raw);
353
354                #if defined(__CFA_WITH_VERIFY__)
355                // Verify that the caches cover the all the cpus
356                {
357                        unsigned width1 = 0;
358                        unsigned width2 = 0;
359                        for(i; map_cnt) {
360                                const char * _;
361                                width1 += read_width(maps[i].raw->range, strlen(maps[i].raw->range), &_);
362                                width2 += maps[i].raw->width;
363                        }
364                        verify(width1 == cpus);
365                        verify(width2 == cpus);
366                }
367                #endif
368
369                // Get mappings from cpu to cache instance
370                cpu_pairing_t * pairings = get_cpu_pairings(cpus, raw, maps, map_cnt);
371
372                // Sort by cache instance
373                qsort(pairings, cpus);
374
375                {
376                        unsigned it = 0;
377                        for(i; cpus) {
378                                unsigned llc_id = pairings[i].id;
379                                if(maps[llc_id].start == -1u) {
380                                        maps[llc_id].start = it;
381                                        it += maps[llc_id].raw->width;
382                                        /* paranoid */ verify(maps[llc_id].start < it);
383                                        /* paranoid */ verify(it != -1u);
384                                }
385                        }
386                        /* paranoid */ verify(it == cpus);
387                }
388
389                // From the mappings build the actual cpu map we want
390                struct cpu_map_entry_t * entries = alloc(cpus);
391                for(i; cpus) { entries[i].count = 0; }
392                for(i; cpus) {
393                        /* paranoid */ verify(pairings[i].id < map_cnt);
394                        unsigned c = pairings[i].cpu;
395                        unsigned llc_id = pairings[i].id;
396                        unsigned width = maps[llc_id].raw->width;
397                        unsigned start = maps[llc_id].start;
398                        unsigned self  = start + (maps[llc_id].count++);
399                        entries[c].count = width;
400                        entries[c].start = start;
401                        entries[c].self  = self;
402                }
403
404                // get rid of the temporary data
405                free(maps);
406                free(pairings);
407
408                for(i; cpus) {
409                        for(j; cache_levels) {
410                                ^(raw[i][j]){};
411                        }
412                        free(raw[i]);
413                }
414                free(raw);
415
416                cpu_info.llc_map = entries;
417                cpu_info.hthrd_count = cpus;
418        }
419
420        void __cfaabi_device_shutdown( void ) {
421                free(cpu_info.llc_map);
422        }
423}
424
425cpu_info_t cpu_info;
Note: See TracBrowser for help on using the repository browser.