source: libcfa/src/concurrency/kernel/cluster.cfa @ ffac259

Last change on this file since ffac259 was a8667ab, checked in by Peter A. Buhr <pabuhr@…>, 21 months ago

expunge all rseq code from runtime

  • Property mode set to 100644
File size: 15.4 KB
RevLine 
[c42b8a1]1//
2// Cforall Version 1.0.0 Copyright (C) 2022 University of Waterloo
3//
4// The contents of this file are covered under the licence agreement in the
5// file "LICENCE" distributed with Cforall.
6//
[708ae38]7// cluster.cfa -- file that includes helpers for subsystem that need cluster wide support
[c42b8a1]8//
9// Author           : Thierry Delisle
[708ae38]10// Created On       : Fri Mar 11 12:39:24 2022
[c42b8a1]11// Last Modified By :
12// Last Modified On :
13// Update Count     :
14//
15
16#define __cforall_thread__
17
18#include "bits/defs.hfa"
19#include "device/cpu.hfa"
[708ae38]20#include "kernel/cluster.hfa"
21#include "kernel/private.hfa"
[c42b8a1]22
23#include "stdlib.hfa"
24#include "limits.hfa"
25#include "math.hfa"
26
27#include "ready_subqueue.hfa"
[78a580d]28#include "io/types.hfa"
[c42b8a1]29
30#include <errno.h>
31#include <unistd.h>
32
33extern "C" {
34        #include <sys/syscall.h>  // __NR_xxx
35}
36
37// No overriden function, no environment variable, no define
38// fall back to a magic number
39#ifndef __CFA_MAX_PROCESSORS__
40        #define __CFA_MAX_PROCESSORS__ 1024
41#endif
42
43#if !defined(__CFA_NO_STATISTICS__)
44        #define __STATS(...) __VA_ARGS__
45#else
46        #define __STATS(...)
47#endif
48
49// returns the maximum number of processors the RWLock support
[c18bf9e]50__attribute__((weak)) unsigned __max_processors() libcfa_public {
[c42b8a1]51        const char * max_cores_s = getenv("CFA_MAX_PROCESSORS");
52        if(!max_cores_s) {
53                __cfadbg_print_nolock(ready_queue, "No CFA_MAX_PROCESSORS in ENV\n");
54                return __CFA_MAX_PROCESSORS__;
55        }
56
57        char * endptr = 0p;
58        long int max_cores_l = strtol(max_cores_s, &endptr, 10);
59        if(max_cores_l < 1 || max_cores_l > 65535) {
60                __cfadbg_print_nolock(ready_queue, "CFA_MAX_PROCESSORS out of range : %ld\n", max_cores_l);
61                return __CFA_MAX_PROCESSORS__;
62        }
63        if('\0' != *endptr) {
64                __cfadbg_print_nolock(ready_queue, "CFA_MAX_PROCESSORS not a decimal number : %s\n", max_cores_s);
65                return __CFA_MAX_PROCESSORS__;
66        }
67
68        return max_cores_l;
69}
70
71//=======================================================================
72// Cluster wide reader-writer lock
73//=======================================================================
74void  ?{}(__scheduler_RWLock_t & this) {
[741e22c]75        this.lock.max   = __max_processors();
76        this.lock.alloc = 0;
77        this.lock.ready = 0;
78        this.lock.data  = alloc(this.lock.max);
79        this.lock.write_lock  = false;
[c42b8a1]80
[741e22c]81        /*paranoid*/ verify(__atomic_is_lock_free(sizeof(this.lock.alloc), &this.lock.alloc));
82        /*paranoid*/ verify(__atomic_is_lock_free(sizeof(this.lock.ready), &this.lock.ready));
[c42b8a1]83
84}
85void ^?{}(__scheduler_RWLock_t & this) {
[741e22c]86        free(this.lock.data);
[c42b8a1]87}
88
89
90//=======================================================================
91// Lock-Free registering/unregistering of threads
[cd3fc46]92unsigned register_proc_id( void ) with(__scheduler_lock.lock) {
[c42b8a1]93        bool * handle = (bool *)&kernelTLS().sched_lock;
94
95        // Step - 1 : check if there is already space in the data
96        uint_fast32_t s = ready;
97
98        // Check among all the ready
99        for(uint_fast32_t i = 0; i < s; i++) {
100                bool * volatile * cell = (bool * volatile *)&data[i]; // Cforall is bugged and the double volatiles causes problems
101                /* paranoid */ verify( handle != *cell );
102
103                bool * null = 0p; // Re-write every loop since compare thrashes it
104                if( __atomic_load_n(cell, (int)__ATOMIC_RELAXED) == null
105                        && __atomic_compare_exchange_n( cell, &null, handle, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
106                        /* paranoid */ verify(i < ready);
107                        /* paranoid */ verify( (kernelTLS().sched_id = i, true) );
108                        return i;
109                }
110        }
111
[cd3fc46]112        if(max <= alloc) abort("Trying to create more than %ud processors", __scheduler_lock.lock.max);
[c42b8a1]113
114        // Step - 2 : F&A to get a new spot in the array.
115        uint_fast32_t n = __atomic_fetch_add(&alloc, 1, __ATOMIC_SEQ_CST);
[cd3fc46]116        if(max <= n) abort("Trying to create more than %ud processors", __scheduler_lock.lock.max);
[c42b8a1]117
118        // Step - 3 : Mark space as used and then publish it.
119        data[n] = handle;
120        while() {
121                unsigned copy = n;
122                if( __atomic_load_n(&ready, __ATOMIC_RELAXED) == n
123                        && __atomic_compare_exchange_n(&ready, &copy, n + 1, true, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
124                        break;
125                Pause();
126        }
127
128        // Return new spot.
129        /* paranoid */ verify(n < ready);
130        /* paranoid */ verify( (kernelTLS().sched_id = n, true) );
131        return n;
132}
133
[cd3fc46]134void unregister_proc_id( unsigned id ) with(__scheduler_lock.lock) {
[c42b8a1]135        /* paranoid */ verify(id < ready);
136        /* paranoid */ verify(id == kernelTLS().sched_id);
137        /* paranoid */ verify(data[id] == &kernelTLS().sched_lock);
138
139        bool * volatile * cell = (bool * volatile *)&data[id]; // Cforall is bugged and the double volatiles causes problems
140
141        __atomic_store_n(cell, 0p, __ATOMIC_RELEASE);
142}
143
144//-----------------------------------------------------------------------
145// Writer side : acquire when changing the ready queue, e.g. adding more
146//  queues or removing them.
[cd3fc46]147uint_fast32_t ready_mutate_lock( void ) with(__scheduler_lock.lock) {
[c42b8a1]148        /* paranoid */ verify( ! __preemption_enabled() );
149
150        // Step 1 : lock global lock
151        // It is needed to avoid processors that register mid Critical-Section
152        //   to simply lock their own lock and enter.
153        __atomic_acquire( &write_lock );
154
155        // Make sure we won't deadlock ourself
156        // Checking before acquiring the writer lock isn't safe
157        // because someone else could have locked us.
158        /* paranoid */ verify( ! kernelTLS().sched_lock );
159
160        // Step 2 : lock per-proc lock
161        // Processors that are currently being registered aren't counted
162        //   but can't be in read_lock or in the critical section.
163        // All other processors are counted
164        uint_fast32_t s = ready;
165        for(uint_fast32_t i = 0; i < s; i++) {
166                volatile bool * llock = data[i];
167                if(llock) __atomic_acquire( llock );
168        }
169
170        /* paranoid */ verify( ! __preemption_enabled() );
171        return s;
172}
173
[cd3fc46]174void ready_mutate_unlock( uint_fast32_t last_s ) with(__scheduler_lock.lock) {
[c42b8a1]175        /* paranoid */ verify( ! __preemption_enabled() );
176
177        // Step 1 : release local locks
178        // This must be done while the global lock is held to avoid
179        //   threads that where created mid critical section
180        //   to race to lock their local locks and have the writer
181        //   immidiately unlock them
182        // Alternative solution : return s in write_lock and pass it to write_unlock
183        for(uint_fast32_t i = 0; i < last_s; i++) {
184                volatile bool * llock = data[i];
185                if(llock) __atomic_store_n(llock, (bool)false, __ATOMIC_RELEASE);
186        }
187
188        // Step 2 : release global lock
189        /*paranoid*/ assert(true == write_lock);
190        __atomic_store_n(&write_lock, (bool)false, __ATOMIC_RELEASE);
191
192        /* paranoid */ verify( ! __preemption_enabled() );
193}
194
195//=======================================================================
196// Cluster growth
197static const unsigned __readyq_single_shard = 2;
198
[31c967b]199void  ?{}(__timestamp_t & this) { this.t.tv = 0; this.t.ma = 0; }
200void ^?{}(__timestamp_t &) {}
201
[c42b8a1]202//-----------------------------------------------------------------------
203// Check that all the intrusive queues in the data structure are still consistent
[884f3f67]204static void check_readyQ( cluster * cltr ) with (cltr->sched) {
[c42b8a1]205        #if defined(__CFA_WITH_VERIFY__)
206                {
[884f3f67]207                        const unsigned lanes_count = readyQ.count;
208                        for( idx ; lanes_count ) {
209                                __intrusive_lane_t & sl = readyQ.data[idx];
[2af1943]210                                assert(!readyQ.data[idx].l.lock);
[c42b8a1]211
212                                        if(is_empty(sl)) {
[2af1943]213                                                assert( sl.l.anchor.next == 0p );
214                                                assert( sl.l.anchor.ts   == MAX );
215                                                assert( mock_head(sl)  == sl.l.prev );
[c42b8a1]216                                        } else {
[2af1943]217                                                assert( sl.l.anchor.next != 0p );
218                                                assert( sl.l.anchor.ts   != MAX );
219                                                assert( mock_head(sl)  != sl.l.prev );
[c42b8a1]220                                        }
221                        }
222                }
223        #endif
224}
225
226// Call this function of the intrusive list was moved using memcpy
227// fixes the list so that the pointers back to anchors aren't left dangling
228static inline void fix(__intrusive_lane_t & ll) {
[708ae38]229        if(is_empty(ll)) {
[2af1943]230                verify(ll.l.anchor.next == 0p);
231                ll.l.prev = mock_head(ll);
[708ae38]232        }
[c42b8a1]233}
234
[1756e08]235static void assign_list(unsigned & valrq, unsigned & valio, dlist(struct processor) & list, unsigned count) {
236        struct processor * it = &list`first;
[c42b8a1]237        for(unsigned i = 0; i < count; i++) {
238                /* paranoid */ verifyf( it, "Unexpected null iterator, at index %u of %u\n", i, count);
[adb3ea1]239                it->rdq.id = valrq;
[b035046]240                it->rdq.target = UINT_MAX;
[adb3ea1]241                valrq += __shard_factor.readyq;
[1a567d0]242                #if defined(CFA_HAVE_LINUX_IO_URING_H)
243                        it->io.ctx->cq.id = valio;
[b035046]244                        it->io.target = UINT_MAX;
[1a567d0]245                        valio += __shard_factor.io;
246                #endif
[c42b8a1]247                it = &(*it)`next;
248        }
249}
250
251static void reassign_cltr_id(struct cluster * cltr) {
[adb3ea1]252        unsigned prefrq = 0;
253        unsigned prefio = 0;
254        assign_list(prefrq, prefio, cltr->procs.actives, cltr->procs.total - cltr->procs.idle);
255        assign_list(prefrq, prefio, cltr->procs.idles  , cltr->procs.idle );
256}
257
[1a567d0]258#if defined(CFA_HAVE_LINUX_IO_URING_H)
[1756e08]259        static void assign_io(io_context$ ** data, size_t count, dlist(struct processor) & list) {
260                struct processor * it = &list`first;
[1a567d0]261                while(it) {
262                        /* paranoid */ verifyf( it, "Unexpected null iterator\n");
263                        /* paranoid */ verifyf( it->io.ctx->cq.id < count, "Processor %p has id %u above count %zu\n", it, it->rdq.id, count);
264                        data[it->io.ctx->cq.id] = it->io.ctx;
265                        it = &(*it)`next;
266                }
[adb3ea1]267        }
268
[1a567d0]269        static void reassign_cltr_io(struct cluster * cltr) {
270                assign_io(cltr->sched.io.data, cltr->sched.io.count, cltr->procs.actives);
271                assign_io(cltr->sched.io.data, cltr->sched.io.count, cltr->procs.idles  );
272        }
273#else
274        static void reassign_cltr_io(struct cluster *) {}
275#endif
[c42b8a1]276
[884f3f67]277static void fix_times( __timestamp_t * volatile & tscs, unsigned count ) {
278        tscs = alloc(count, tscs`realloc);
279        for(i; count) {
[2af1943]280                tscs[i].t.tv = rdtscl();
281                tscs[i].t.ma = 0;
[c42b8a1]282        }
283}
284
285// Grow the ready queue
286void ready_queue_grow(struct cluster * cltr) {
287        int target = cltr->procs.total;
288
289        /* paranoid */ verify( ready_mutate_islocked() );
290        __cfadbg_print_safe(ready_queue, "Kernel : Growing ready queue\n");
291
292        // Make sure that everything is consistent
[884f3f67]293        /* paranoid */ check_readyQ( cltr );
[c42b8a1]294
295
[884f3f67]296        // Find new count
297        // Make sure we always have atleast 1 list
298        size_t ocount = cltr->sched.readyQ.count;
299        size_t ncount = max(target * __shard_factor.readyq, __readyq_single_shard);
[c42b8a1]300
[884f3f67]301        // Do we have to do anything?
302        if( ocount != ncount ) {
303
304                // grow the ready queue
305                with( cltr->sched ) {
306
307                        // Allocate new array (uses realloc and memcpies the data)
308                        readyQ.data = alloc( ncount, readyQ.data`realloc );
[c42b8a1]309
[884f3f67]310                        // Fix the moved data
311                        for( idx; ocount ) {
312                                fix(readyQ.data[idx]);
313                        }
314
315                        // Construct new data
316                        for( idx; ocount ~ ncount) {
317                                (readyQ.data[idx]){};
318                        }
319
320                        // Update original count
321                        readyQ.count = ncount;
[c42b8a1]322                }
323
324
[884f3f67]325                fix_times(cltr->sched.readyQ.tscs, cltr->sched.readyQ.count);
[c42b8a1]326        }
327
[708ae38]328        // Fix the io times
[adb3ea1]329        cltr->sched.io.count = target * __shard_factor.io;
[708ae38]330        fix_times(cltr->sched.io.tscs, cltr->sched.io.count);
331
[884f3f67]332        // realloc the caches
333        cltr->sched.caches = alloc( target, cltr->sched.caches`realloc );
[c42b8a1]334
[884f3f67]335        // reassign the clusters.
[c42b8a1]336        reassign_cltr_id(cltr);
337
[adb3ea1]338        cltr->sched.io.data = alloc( cltr->sched.io.count, cltr->sched.io.data`realloc );
339        reassign_cltr_io(cltr);
340
[c42b8a1]341        // Make sure that everything is consistent
[884f3f67]342        /* paranoid */ check_readyQ( cltr );
343        /* paranoid */ verify( (target == 0) == (cltr->sched.caches == 0p) );
[c42b8a1]344
345        __cfadbg_print_safe(ready_queue, "Kernel : Growing ready queue done\n");
346
347        /* paranoid */ verify( ready_mutate_islocked() );
348}
349
350// Shrink the ready queue
351void ready_queue_shrink(struct cluster * cltr) {
352        /* paranoid */ verify( ready_mutate_islocked() );
353        __cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue\n");
354
355        // Make sure that everything is consistent
[884f3f67]356        /* paranoid */ check_readyQ( cltr );
[c42b8a1]357
358        int target = cltr->procs.total;
359
[884f3f67]360        with( cltr->sched ) {
[c42b8a1]361                // Remember old count
[884f3f67]362                size_t ocount = readyQ.count;
[c42b8a1]363
364                // Find new count
365                // Make sure we always have atleast 1 list
[884f3f67]366                size_t ncount = max(target * __shard_factor.readyq, __readyq_single_shard);
367                /* paranoid */ verifyf( ocount >= ncount, "Error in shrinking size calculation, %zu >= %zu", ocount, ncount );
368                /* paranoid */ verifyf( ncount == target * __shard_factor.readyq || ncount == __readyq_single_shard,
[bfb9bf5]369                /* paranoid */          "Error in shrinking size calculation, expected %u or %u, got %zu", target * __shard_factor.readyq, __readyq_single_shard, ncount );
[884f3f67]370
371                readyQ.count = ncount;
[c42b8a1]372
373                // for printing count the number of displaced threads
374                #if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_READY_QUEUE__)
375                        __attribute__((unused)) size_t displaced = 0;
376                #endif
377
378                // redistribute old data
[884f3f67]379                for( idx; ncount ~ ocount) {
[c42b8a1]380                        // Lock is not strictly needed but makes checking invariants much easier
[2af1943]381                        __attribute__((unused)) bool locked = __atomic_try_acquire(&readyQ.data[idx].l.lock);
[c42b8a1]382                        verify(locked);
383
384                        // As long as we can pop from this lane to push the threads somewhere else in the queue
[884f3f67]385                        while(!is_empty(readyQ.data[idx])) {
[c42b8a1]386                                struct thread$ * thrd;
387                                unsigned long long _;
[884f3f67]388                                [thrd, _] = pop(readyQ.data[idx]);
[c42b8a1]389
390                                push(cltr, thrd, true);
391
392                                // for printing count the number of displaced threads
393                                #if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_READY_QUEUE__)
394                                        displaced++;
395                                #endif
396                        }
397
398                        // Unlock the lane
[2af1943]399                        __atomic_unlock(&readyQ.data[idx].l.lock);
[c42b8a1]400
401                        // TODO print the queue statistics here
402
[884f3f67]403                        ^(readyQ.data[idx]){};
[c42b8a1]404                }
405
406                __cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue displaced %zu threads\n", displaced);
407
408                // Allocate new array (uses realloc and memcpies the data)
[884f3f67]409                readyQ.data = alloc( ncount, readyQ.data`realloc );
[c42b8a1]410
411                // Fix the moved data
[884f3f67]412                for( idx; ncount ) {
413                        fix(readyQ.data[idx]);
[c42b8a1]414                }
415
[884f3f67]416                fix_times(readyQ.tscs, ncount);
[c42b8a1]417        }
[884f3f67]418        cltr->sched.caches = alloc( target, cltr->sched.caches`realloc );
[c42b8a1]419
[708ae38]420        // Fix the io times
[adb3ea1]421        cltr->sched.io.count = target * __shard_factor.io;
[708ae38]422        fix_times(cltr->sched.io.tscs, cltr->sched.io.count);
[c42b8a1]423
424        reassign_cltr_id(cltr);
425
[adb3ea1]426        cltr->sched.io.data = alloc( cltr->sched.io.count, cltr->sched.io.data`realloc );
427        reassign_cltr_io(cltr);
428
[c42b8a1]429        // Make sure that everything is consistent
[884f3f67]430        /* paranoid */ verify( (target == 0) == (cltr->sched.caches == 0p) );
431        /* paranoid */ check_readyQ( cltr );
[c42b8a1]432
433        __cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue done\n");
434        /* paranoid */ verify( ready_mutate_islocked() );
435}
436
[884f3f67]437void ready_queue_close(struct cluster * cltr) {
438        free( cltr->sched.readyQ.data );
439        free( cltr->sched.readyQ.tscs );
440        cltr->sched.readyQ.data = 0p;
441        cltr->sched.readyQ.tscs = 0p;
442        cltr->sched.readyQ.count = 0;
443
444        free( cltr->sched.io.tscs );
445        free( cltr->sched.caches );
446}
447
[2af1943]448#define nested_offsetof(type, field) ((off_t)(&(((type*)0)-> field)))
449
[c42b8a1]450// Ctor
451void ?{}( __intrusive_lane_t & this ) {
[2af1943]452        this.l.lock = false;
453        this.l.prev = mock_head(this);
454        this.l.anchor.next = 0p;
455        this.l.anchor.ts   = MAX;
[c42b8a1]456        #if !defined(__CFA_NO_STATISTICS__)
[2af1943]457                this.l.cnt  = 0;
[c42b8a1]458        #endif
459
460        // We add a boat-load of assertions here because the anchor code is very fragile
[15c93d8]461        /* paranoid */ _Static_assert( offsetof( thread$, rdy_link ) == nested_offsetof(__intrusive_lane_t, l.anchor) );
462        /* paranoid */ verify( offsetof( thread$, rdy_link ) == nested_offsetof(__intrusive_lane_t, l.anchor) );
463        /* paranoid */ verify( ((uintptr_t)( mock_head(this) ) + offsetof( thread$, rdy_link )) == (uintptr_t)(&this.l.anchor) );
464        /* paranoid */ verify( &mock_head(this)->rdy_link.next == &this.l.anchor.next );
465        /* paranoid */ verify( &mock_head(this)->rdy_link.ts   == &this.l.anchor.ts   );
466        /* paranoid */ verify( mock_head(this)->rdy_link.next == 0p );
467        /* paranoid */ verify( mock_head(this)->rdy_link.ts   == MAX );
[2af1943]468        /* paranoid */ verify( mock_head(this) == this.l.prev );
469        /* paranoid */ verify( __alignof__(__intrusive_lane_t) == 64 );
470        /* paranoid */ verify( __alignof__(this) == 64 );
471        /* paranoid */ verifyf( ((intptr_t)(&this) % 64) == 0, "Expected address to be aligned %p %% 64 == %zd", &this, ((intptr_t)(&this) % 64) );
[c42b8a1]472}
473
[2af1943]474#undef nested_offsetof
475
[c42b8a1]476// Dtor is trivial
477void ^?{}( __intrusive_lane_t & this ) {
478        // Make sure the list is empty
[2af1943]479        /* paranoid */ verify( this.l.anchor.next == 0p );
480        /* paranoid */ verify( this.l.anchor.ts   == MAX );
481        /* paranoid */ verify( mock_head(this)    == this.l.prev );
[c42b8a1]482}
Note: See TracBrowser for help on using the repository browser.