source: libcfa/src/device/cpu.cfa @ 8fceae2

Last change on this file since 8fceae2 was 72b5805e, checked in by Thierry Delisle <tdelisle@…>, 2 years ago

Fixed device/cpu to support systems without caches

  • Property mode set to 100644
File size: 13.6 KB
Line 
1//
2// Cforall Version 1.0.0 Copyright (C) 2021 University of Waterloo
3//
4// The contents of this file are covered under the licence agreement in the
5// file "LICENCE" distributed with Cforall.
6//
7// topology.cfa -- read the data structure
8//
9// Author           : Thierry Delisle
10// Created On       : Thu Jun 10 16:13:07 2021
11// Last Modified By :
12// Last Modified On :
13// Update Count     :
14//
15
16#include "device/cpu.hfa"
17
18#include <math.hfa>
19#include <stdlib.hfa>
20
21#include <errno.h>
22#include <stdio.h>
23#include <string.h>
24#include <unistd.h>
25
26extern "C" {
27        #include <dirent.h>
28        #include <sys/types.h>
29        #include <sys/stat.h>
30        #include <fcntl.h>
31}
32
33#include "bits/defs.hfa"
34#include "algorithms/range_iterator.hfa"
35
36// search a string for character 'character' but looking atmost at len
37// chars
38static const char * strnchr(const char * str, int character, size_t len) {
39        return (const char *)memchr(str, character, strnlen(str, len));
40}
41
42// Check if have string matches the want string
43// ignoring any characters that are longer than the want string
44static bool strmatch(const char * want, char * have) {
45        size_t w = strlen(want);
46        return strncmp(want, have, w) == 0;
47}
48
49typedef const char * idx_range_t;
50
51// read the value of a string and evaluate it
52// get the end pointer and make sure it is all evaluated
53static unsigned read_value(idx_range_t map, size_t len, const char ** end) {
54        unsigned long val = strtoul(map, (char**)end, 10);
55        /* paranoid */ __attribute__((unused)) size_t read = (*end - map);
56        /* paranoid */ verifyf(read <= len, "String '%s' passed with inconsistent length %zu", map, len);
57        /* paranoid */ verifyf(read == len, "String %.*s not entirely a number, %zu chars left", (int)len, map, len - read);
58        return val;
59}
60
61// Evaluate the width of a comma seperated list of idx
62// for example 'A-B,C-D,E,F' has a width of '(B-A) + (D-C) + 1 + 1'
63// Also has an (non-optional) end ptr like strtoul and friends
64//
65// FIXME : the current implementation only supports 1 comma
66static unsigned read_width(idx_range_t map, size_t len, const char ** end) {
67        // Do we have a comma
68        const char * comma = strnchr(map, ',', len);
69        if(comma != 0p) {
70                // We do! recurse and sum the widths
71                const char * _;
72                size_t split = comma - map;
73                unsigned lhs = read_width(map, split, &_);
74                unsigned rhs = read_width(comma + 1, len - split - 1, end);
75                return lhs + rhs;
76        }
77
78        // No commas, check for a range
79        const char * dash = strnchr(map, '-', len);
80        if(dash != 0p) {
81                const char * _;
82                size_t split = dash - map;
83                unsigned lhs = read_value(map, split, &_);
84                unsigned rhs = read_value(dash + 1, len - split - 1, end);
85                return rhs - lhs + 1;
86        }
87
88        // No range, no comma, just a single value
89        // It's width is 1 and we can consume everything
90        /* paranoid */ verifyf( ({strtoul(map, (char**)end, 10); *end == (map + len); }), "Value in range '%.*s' not a number", (int)len, map);
91        *end = map + len;
92        return 1;
93}
94
95// go through a directory calling fn on each file
96static int iterate_dir( const char * path, void (*fn)(struct dirent * ent) ) {
97        // open the directory
98        DIR *dir = opendir(path);
99        if(dir == 0p) { return ENOTDIR; }
100
101        // call fn for each
102        struct dirent * ent;
103        while ((ent = readdir(dir)) != 0p) {
104                fn( ent );
105        }
106
107        // no longer need this
108        closedir(dir);
109        return 0;
110}
111
112// count the number of directories with the specified prefix
113// the directories counted have the form '[prefix]N' where prefix is the parameter
114// and N is an base 10 integer.
115static int count_prefix_dirs(const char * path, const char * prefix) {
116        // read the directory and find the cpu count
117        // and make sure everything is as expected
118        int max = -1;
119        int count = 0;
120        void lambda(struct dirent * ent) {
121                // were are looking for prefixX, where X is a number
122                // check that it starts with 'cpu
123                char * s = strstr(ent->d_name, prefix);
124                if(s == 0p) { return; }
125                if(s != ent->d_name) { return; }
126
127                // check that the next part is a number
128                s += strlen(prefix);
129                char * end;
130                long int val = strtol(s, &end, 10);
131                if(*end != '\0' || val < 0) { return; }
132
133                // check that it's a directory
134                if(ent->d_type != DT_DIR) { return; }
135
136                // it's a match!
137                max = max(val, max);
138                count++;
139        }
140        int ret = iterate_dir(path, lambda);
141        if(ret == ENOTDIR) return 0;
142
143        /* paranoid */ verifyf(count == max + 1, "Inconsistent %s count, counted %d, but max %s was %d", prefix, count, prefix, (int)max);
144
145        return count;
146}
147
148// Count number of cpus in the system
149static [int, const char *] count_cpus(void) {
150        const char * fpath = "/sys/devices/system/cpu/online";
151        int fd = open(fpath, 0, O_RDONLY);
152        /* paranoid */ verifyf(fd >= 0, "Could not open file %s", fpath);
153
154        char buff[128];
155        ssize_t r = read(fd, buff, 128);
156        /* paranoid */ verifyf(r > 0, "Could not read file %s", fpath);
157        /* paranoid */ verify( buff[r-1] == '\n' );
158        buff[r-1] = '\0';
159
160        /* paranoid */ __attribute__((unused)) int ret =
161        close(fd);
162        /* paranoid */ verifyf(ret == 0, "Could not close file %s", fpath);
163
164        const char * _;
165        return [read_width(buff, r - 1, &_), strndup(buff, r - 1)];
166}
167
168// Count number of cache *indexes* in the system
169// cache indexes are distinct from cache level as Data or Instruction cache
170// can share a level but not an index
171// PITFALL: assumes all cpus have the same indexes as cpu0
172static int count_cache_indexes(void) {
173        return count_prefix_dirs("/sys/devices/system/cpu/cpu0/cache", "index");
174}
175
176
177// read information about a spcficic cache index/cpu file into the output buffer
178static size_t read_cpuidxinfo_into(unsigned cpu, unsigned idx, const char * file, char * out, size_t out_len) {
179        // Pick the file we want and read it
180        char buf[128];
181        /* paranoid */ __attribute__((unused)) int len =
182        snprintf(buf, 128, "/sys/devices/system/cpu/cpu%u/cache/index%u/%s", cpu, idx, file);
183        /* paranoid */ verifyf(len > 0, "Could not generate '%s' filename for cpu %u, index %u", file, cpu, idx);
184
185        int fd = open(buf, 0, O_RDONLY);
186        /* paranoid */ verifyf(fd > 0, "Could not open file '%s'", buf);
187
188        ssize_t r = read(fd, out, out_len);
189        /* paranoid */ verifyf(r > 0, "Could not read file '%s'", buf);
190
191        /* paranoid */ __attribute__((unused)) int ret =
192        close(fd);
193        /* paranoid */ verifyf(ret == 0, "Could not close file '%s'", buf);
194        return r;
195}
196
197// Iterate over the cache indexes of a given cpu
198typedef void (*handle_func_t)(unsigned idx, unsigned char level, idx_range_t range, size_t len);
199static void foreach_cacheidx(unsigned cpu, unsigned idxs, handle_func_t handle) {
200        for(i; idxs) {
201                unsigned idx = idxs - 1 - i;
202                char buf[32];
203
204                // Type says what kind of cache this is,
205                // Options are: Unified, Data, Instruction
206                read_cpuidxinfo_into(cpu, idx, "type", buf, 32);
207                if((!strmatch("Unified", buf)) && (!strmatch("Data", buf))) {
208                        // We don't care about instruction caches
209                        continue;
210                }
211
212                // Level is the cache level: higher means bigger and slower
213                read_cpuidxinfo_into(cpu, idx, "level", buf, 32);
214                char * end;
215                unsigned long level = strtoul(buf, &end, 10);
216                /* paranoid */ verifyf(level <= 250, "Cpu %u has more than 250 levels of cache, this is not supported", cpu);
217
218                // shared_cpu_list is a range of cpus that share this particular cache
219                size_t n = read_cpuidxinfo_into(cpu, idx, "shared_cpu_list", buf, 32);
220                /* paranoid */ verify( buf[n-1] == '\n' );
221                buf[n-1] = '\0';
222
223                // Simply call the functor
224                handle(idx, level, buf, n - 1);
225        }
226}
227
228
229struct raw_cache_instance {
230        idx_range_t range;      // A text description of the cpus covered
231        unsigned width;         // The number of cpus covered
232        unsigned char level;    // the cache level
233        // FIXME add at least size and type
234};
235
236static void  ?{}(raw_cache_instance & this) { this.range = 0p;}
237static void ^?{}(raw_cache_instance & this) { free(this.range);}
238
239// Returns a 2D array of instances of size [cpu count][cache levels]
240// where cache level doesn't include instruction caches
241raw_cache_instance ** build_raw_cache_table(unsigned cpus_c, idx_range_t cpus, unsigned idxs, unsigned cache_levels)
242{
243        raw_cache_instance ** raw = alloc(cpus_c, '\0'`fill);
244
245        RangeIter rc = { cpus };
246        while(moveNext(rc)) {
247                unsigned i = rc.com;
248                raw[i] = alloc(cache_levels);
249                void addcache(unsigned fidx, unsigned char level, idx_range_t range, size_t len) {
250                        /* paranoid */ verifyf(level <= cache_levels, "Unexpected cache level %d on cpu %u index %u", (int)level, i, fidx);
251
252                        unsigned idx = cache_levels - level;
253                        raw_cache_instance & r = raw[i][idx];
254                        r.range = strndup(range, len);
255                        r.level = level;
256                        const char * end;
257                        r.width = read_width(range, len, &end);
258                }
259                foreach_cacheidx(i, idxs, addcache);
260        }
261
262        return raw;
263}
264
265struct llc_map_t {
266        raw_cache_instance * raw;
267        unsigned count;
268        unsigned start;
269};
270
271// returns an allocate list of all the different distinct last level caches
272static [*llc_map_t, size_t cnt] distinct_llcs(idx_range_t cpus, unsigned llc_idx, raw_cache_instance ** raw) {
273        // Allocate at least one element
274        llc_map_t* ranges = alloc();
275        size_t range_cnt = 1;
276
277        RangeIter rc = { cpus };
278        __attribute__((unused)) bool ret =
279        moveNext(rc);
280        /* paranoid */ verify( ret );
281        /* paranoid */ verify( rc.com >= 0 );
282
283        // Initialize with element 0
284        ranges->raw = &raw[rc.com][llc_idx];
285        ranges->count = 0;
286        ranges->start = -1u;
287
288        // Go over all other cpus
289        CPU_LOOP: while(moveNext(rc)) {
290                unsigned i = rc.com;
291                // Check if the range is already there
292                raw_cache_instance * candidate = &raw[i][llc_idx];
293                for(j; range_cnt) {
294                        llc_map_t & exist = ranges[j];
295                        // If the range is already there just jump to the next cpu
296                        if(0 == strcmp(candidate->range, exist.raw->range)) continue CPU_LOOP;
297                }
298
299                // The range wasn't there, added to the list
300                ranges = alloc(range_cnt + 1, ranges`realloc);
301                ranges[range_cnt].raw = candidate;
302                ranges[range_cnt].count = 0;
303                ranges[range_cnt].start = -1u;
304                range_cnt++;
305        }
306
307        // return what we have
308        return [ranges, range_cnt];
309}
310
311struct cpu_pairing_t {
312        unsigned cpu;
313        unsigned id;
314};
315
316int ?<?( cpu_pairing_t lhs, cpu_pairing_t rhs ) {
317        return lhs.id < rhs.id;
318}
319
320static [[]cpu_pairing_t] get_cpu_pairings(unsigned cpus_c, idx_range_t cpus, raw_cache_instance ** raw, llc_map_t * maps, size_t map_cnt) {
321        cpu_pairing_t * pairings = alloc(cpus_c);
322
323        RangeIter rc = { cpus };
324        CPU_LOOP: while(moveNext(rc)) {
325                unsigned i = rc.com;
326                pairings[i].cpu = i;
327                idx_range_t want = raw[i][0].range;
328                MAP_LOOP: for(j; map_cnt) {
329                        if(0 != strcmp(want, maps[j].raw->range)) continue MAP_LOOP;
330
331                        pairings[i].id = j;
332                        continue CPU_LOOP;
333                }
334
335                /* paranoid */ verifyf( false, "Cpu %u map doesn't match", i );
336        }
337
338        return pairings;
339}
340
341#include <fstream.hfa>
342
343extern "C" {
344        void __cfaabi_device_startup( void ) {
345                int cpus_c;
346                const char * cpus;
347                [cpus_c, cpus] = count_cpus();
348                #if defined(__CFA_WITH_VERIFY__)
349                // Verify that the mapping is self consistant.
350                {
351                        RangeIter rc = { cpus };
352                        while(moveNext(rc)) {
353                                unsigned i = rc.com;
354                                verify(cpus_c > i);
355                        }
356                }
357                #endif
358
359                int idxs = count_cache_indexes();
360
361                // Do we actually have a cache?
362                if(idxs == 0) {
363                        // if not just fake the data structure, it makes things easier.
364                        cpu_info.hthrd_count = cpus_c;
365                        cpu_info.llc_count = 0;
366                        struct cpu_map_entry_t * entries = alloc(cpu_info.hthrd_count);
367                        for(i; cpu_info.hthrd_count) {
368                                entries[i].self  = i;
369                                entries[i].start = 0;
370                                entries[i].count = cpu_info.hthrd_count;
371                                entries[i].cache = 0;
372                        }
373                        cpu_info.llc_map = entries;
374                        return;
375                }
376
377                // Count actual cache levels
378                unsigned cache_levels = 0;
379                unsigned llc = 0;
380
381                unsigned char prev = -1u;
382                void first(unsigned idx, unsigned char level, const char * map, size_t len) {
383                        /* paranoid */ verifyf(level < prev, "Index %u of cpu 0 has cache levels out of order: %u then %u", idx, (unsigned)prev, (unsigned)level);
384                        llc = max(llc, level);
385                        prev = level;
386                        cache_levels++;
387                }
388                foreach_cacheidx(0, idxs, first);
389
390                // Read in raw data
391                raw_cache_instance ** raw = build_raw_cache_table(cpus_c, cpus, idxs, cache_levels);
392
393                // Find number of distinct cache instances
394                llc_map_t * maps;
395                size_t map_cnt;
396                [maps, map_cnt] =  distinct_llcs(cpus, cache_levels - llc, raw);
397
398                #if defined(__CFA_WITH_VERIFY__)
399                // Verify that the caches cover the all the cpus
400                {
401                        unsigned width1 = 0;
402                        unsigned width2 = 0;
403                        for(i; map_cnt) {
404                                const char * _;
405                                width1 += read_width(maps[i].raw->range, strlen(maps[i].raw->range), &_);
406                                width2 += maps[i].raw->width;
407                        }
408                        verify(width1 == cpus_c);
409                        verify(width2 == cpus_c);
410                }
411                #endif
412
413                // Get mappings from cpu to cache instance
414                cpu_pairing_t * pairings = get_cpu_pairings(cpus_c, cpus, raw, maps, map_cnt);
415
416                // Sort by cache instance
417                qsort(pairings, cpus_c);
418
419                {
420                        unsigned it = 0;
421                        RangeIter rc = { cpus };
422                        while(moveNext(rc)) {
423                                unsigned i = rc.com;
424                                unsigned llc_id = pairings[i].id;
425                                if(maps[llc_id].start == -1u) {
426                                        maps[llc_id].start = it;
427                                        it += maps[llc_id].raw->width;
428                                        /* paranoid */ verify(maps[llc_id].start < it);
429                                        /* paranoid */ verify(it != -1u);
430                                }
431                        }
432                        /* paranoid */ verify(it == cpus_c);
433                }
434
435                // From the mappings build the actual cpu map we want
436                struct cpu_map_entry_t * entries = alloc(cpus_c);
437                for(i; cpus_c) { entries[i].count = 0; }
438
439                RangeIter rc = { cpus };
440                while(moveNext(rc)) {
441                        unsigned i = rc.com;
442                        /* paranoid */ verify(pairings[i].id < map_cnt);
443                        unsigned c = pairings[i].cpu;
444                        unsigned llc_id = pairings[i].id;
445                        unsigned start = maps[llc_id].start;
446                        entries[c].count = maps[llc_id].raw->width;
447                        entries[c].start = start;
448                        entries[c].self  = start + (maps[llc_id].count++);
449                        entries[c].cache = llc_id;
450                }
451
452                // get rid of the temporary data
453                free(maps);
454                free(pairings);
455
456                for(i; cpus_c) {
457                        if( raw[i] ) for(j; cache_levels) {
458                                ^(raw[i][j]){};
459                        }
460                        free(raw[i]);
461                }
462                free(raw);
463
464                cpu_info.llc_map = entries;
465                cpu_info.hthrd_count = cpus_c;
466                cpu_info.llc_count = map_cnt;
467        }
468
469        void __cfaabi_device_shutdown( void ) {
470                free(cpu_info.llc_map);
471        }
472}
473
474libcfa_public cpu_info_t cpu_info;
Note: See TracBrowser for help on using the repository browser.