source: libcfa/src/concurrency/kernel_private.hfa @ 6ddef36

ADTast-experimentalenumforall-pointer-decaypthread-emulationqualifiedEnum
Last change on this file since 6ddef36 was 059ad16, checked in by Thierry Delisle <tdelisle@…>, 2 years ago

Flush now supports blocking until at least one I/O op terminates.

  • Property mode set to 100644
File size: 11.0 KB
Line 
1//
2// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
3//
4// The contents of this file are covered under the licence agreement in the
5// file "LICENCE" distributed with Cforall.
6//
7// kernel_private.hfa --
8//
9// Author           : Thierry Delisle
10// Created On       : Mon Feb 13 12:27:26 2017
11// Last Modified By : Peter A. Buhr
12// Last Modified On : Wed Aug 12 08:21:33 2020
13// Update Count     : 9
14//
15
16#pragma once
17
18#if !defined(__cforall_thread__)
19        #error kernel_private.hfa should only be included in libcfathread source
20#endif
21
22#include "kernel.hfa"
23#include "thread.hfa"
24
25#include "alarm.hfa"
26#include "stats.hfa"
27
28extern "C" {
29#if   defined(CFA_HAVE_LINUX_LIBRSEQ)
30        #include <rseq/rseq.h>
31#elif defined(CFA_HAVE_LINUX_RSEQ_H)
32        #include <linux/rseq.h>
33#else
34        #ifndef _GNU_SOURCE
35        #error kernel_private requires gnu_source
36        #endif
37        #include <sched.h>
38#endif
39}
40
41// #define IO_URING_IDLE
42
43//-----------------------------------------------------------------------------
44// Scheduler
45extern "C" {
46        void disable_interrupts() OPTIONAL_THREAD;
47        void enable_interrupts( bool poll = true );
48}
49
50void schedule_thread$( thread$ *, unpark_hint hint ) __attribute__((nonnull (1)));
51
52extern bool __preemption_enabled();
53
54//release/wake-up the following resources
55void __thread_finish( thread$ * thrd );
56
57//-----------------------------------------------------------------------------
58// Hardware
59
60#if   defined(CFA_HAVE_LINUX_LIBRSEQ)
61        // No data needed
62#elif defined(CFA_HAVE_LINUX_RSEQ_H)
63        extern "Cforall" {
64                extern __attribute__((aligned(128))) thread_local volatile struct rseq __cfaabi_rseq;
65        }
66#else
67        // No data needed
68#endif
69
70static inline int __kernel_getcpu() {
71        /* paranoid */ verify( ! __preemption_enabled() );
72#if   defined(CFA_HAVE_LINUX_LIBRSEQ)
73        return rseq_current_cpu();
74#elif defined(CFA_HAVE_LINUX_RSEQ_H)
75        int r = __cfaabi_rseq.cpu_id;
76        /* paranoid */ verify( r >= 0 );
77        return r;
78#else
79        return sched_getcpu();
80#endif
81}
82
83//-----------------------------------------------------------------------------
84// Processor
85void main(processorCtx_t *);
86
87void * __create_pthread( pthread_t *, void * (*)(void *), void * );
88void __destroy_pthread( pthread_t pthread, void * stack, void ** retval );
89
90extern cluster * mainCluster;
91
92//-----------------------------------------------------------------------------
93// Threads
94extern "C" {
95      void __cfactx_invoke_thread(void (*main)(void *), void * this);
96}
97
98__cfaabi_dbg_debug_do(
99        extern void __cfaabi_dbg_thread_register  ( thread$ * thrd );
100        extern void __cfaabi_dbg_thread_unregister( thread$ * thrd );
101)
102
103#define TICKET_BLOCKED (-1) // thread is blocked
104#define TICKET_RUNNING ( 0) // thread is running
105#define TICKET_UNBLOCK ( 1) // thread should ignore next block
106
107//-----------------------------------------------------------------------------
108// Utils
109void doregister( struct cluster * cltr, struct thread$ & thrd );
110void unregister( struct cluster * cltr, struct thread$ & thrd );
111
112//-----------------------------------------------------------------------------
113// I/O
114$io_arbiter * create(void);
115void destroy($io_arbiter *);
116
117//=======================================================================
118// Cluster lock API
119//=======================================================================
120// Lock-Free registering/unregistering of threads
121// Register a processor to a given cluster and get its unique id in return
122unsigned register_proc_id( void );
123
124// Unregister a processor from a given cluster using its id, getting back the original pointer
125void unregister_proc_id( unsigned );
126
127//=======================================================================
128// Reader-writer lock implementation
129// Concurrent with doregister/unregister,
130//    i.e., threads can be added at any point during or between the entry/exit
131
132//-----------------------------------------------------------------------
133// simple spinlock underlying the RWLock
134// Blocking acquire
135static inline void __atomic_acquire(volatile bool * ll) {
136        while( __builtin_expect(__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST), false) ) {
137                while(__atomic_load_n(ll, (int)__ATOMIC_RELAXED))
138                        Pause();
139        }
140        /* paranoid */ verify(*ll);
141}
142
143// Non-Blocking acquire
144static inline bool __atomic_try_acquire(volatile bool * ll) {
145        return !__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST);
146}
147
148// Release
149static inline void __atomic_unlock(volatile bool * ll) {
150        /* paranoid */ verify(*ll);
151        __atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE);
152}
153
154//-----------------------------------------------------------------------
155// Reader-Writer lock protecting the ready-queues
156// while this lock is mostly generic some aspects
157// have been hard-coded to for the ready-queue for
158// simplicity and performance
159struct __scheduler_RWLock_t {
160        // total cachelines allocated
161        unsigned int max;
162
163        // cachelines currently in use
164        volatile unsigned int alloc;
165
166        // cachelines ready to itereate over
167        // (!= to alloc when thread is in second half of doregister)
168        volatile unsigned int ready;
169
170        // writer lock
171        volatile bool write_lock;
172
173        // data pointer
174        volatile bool * volatile * data;
175};
176
177void  ?{}(__scheduler_RWLock_t & this);
178void ^?{}(__scheduler_RWLock_t & this);
179
180extern __scheduler_RWLock_t * __scheduler_lock;
181
182//-----------------------------------------------------------------------
183// Reader side : acquire when using the ready queue to schedule but not
184//  creating/destroying queues
185static inline void ready_schedule_lock(void) with(*__scheduler_lock) {
186        /* paranoid */ verify( ! __preemption_enabled() );
187        /* paranoid */ verify( ! kernelTLS().in_sched_lock );
188        /* paranoid */ verify( data[kernelTLS().sched_id] == &kernelTLS().sched_lock );
189        /* paranoid */ verify( !kernelTLS().this_processor || kernelTLS().this_processor->unique_id == kernelTLS().sched_id );
190
191        // Step 1 : make sure no writer are in the middle of the critical section
192        while(__atomic_load_n(&write_lock, (int)__ATOMIC_RELAXED))
193                Pause();
194
195        // Fence needed because we don't want to start trying to acquire the lock
196        // before we read a false.
197        // Not needed on x86
198        // std::atomic_thread_fence(std::memory_order_seq_cst);
199
200        // Step 2 : acquire our local lock
201        __atomic_acquire( &kernelTLS().sched_lock );
202        /*paranoid*/ verify(kernelTLS().sched_lock);
203
204        #ifdef __CFA_WITH_VERIFY__
205                // Debug, check if this is owned for reading
206                kernelTLS().in_sched_lock = true;
207        #endif
208}
209
210static inline void ready_schedule_unlock(void) with(*__scheduler_lock) {
211        /* paranoid */ verify( ! __preemption_enabled() );
212        /* paranoid */ verify( data[kernelTLS().sched_id] == &kernelTLS().sched_lock );
213        /* paranoid */ verify( !kernelTLS().this_processor || kernelTLS().this_processor->unique_id == kernelTLS().sched_id );
214        /* paranoid */ verify( kernelTLS().sched_lock );
215        /* paranoid */ verify( kernelTLS().in_sched_lock );
216        #ifdef __CFA_WITH_VERIFY__
217                // Debug, check if this is owned for reading
218                kernelTLS().in_sched_lock = false;
219        #endif
220        __atomic_unlock(&kernelTLS().sched_lock);
221}
222
223#ifdef __CFA_WITH_VERIFY__
224        static inline bool ready_schedule_islocked(void) {
225                /* paranoid */ verify( ! __preemption_enabled() );
226                /* paranoid */ verify( (!kernelTLS().in_sched_lock) || kernelTLS().sched_lock );
227                return kernelTLS().sched_lock;
228        }
229
230        static inline bool ready_mutate_islocked() {
231                return __scheduler_lock->write_lock;
232        }
233#endif
234
235//-----------------------------------------------------------------------
236// Writer side : acquire when changing the ready queue, e.g. adding more
237//  queues or removing them.
238uint_fast32_t ready_mutate_lock( void );
239
240void ready_mutate_unlock( uint_fast32_t /* value returned by lock */ );
241
242//-----------------------------------------------------------------------
243// Lock-Free registering/unregistering of threads
244// Register a processor to a given cluster and get its unique id in return
245// For convenience, also acquires the lock
246static inline [unsigned, uint_fast32_t] ready_mutate_register() {
247        unsigned id = register_proc_id();
248        uint_fast32_t last = ready_mutate_lock();
249        return [id, last];
250}
251
252// Unregister a processor from a given cluster using its id, getting back the original pointer
253// assumes the lock is acquired
254static inline void ready_mutate_unregister( unsigned id, uint_fast32_t last_s ) {
255        ready_mutate_unlock( last_s );
256        unregister_proc_id( id );
257}
258
259//-----------------------------------------------------------------------
260// Cluster idle lock/unlock
261static inline void lock(__cluster_proc_list & this) {
262        /* paranoid */ verify( ! __preemption_enabled() );
263
264        // Start by locking the global RWlock so that we know no-one is
265        // adding/removing processors while we mess with the idle lock
266        ready_schedule_lock();
267
268        lock( this.lock __cfaabi_dbg_ctx2 );
269
270        /* paranoid */ verify( ! __preemption_enabled() );
271}
272
273static inline bool try_lock(__cluster_proc_list & this) {
274        /* paranoid */ verify( ! __preemption_enabled() );
275
276        // Start by locking the global RWlock so that we know no-one is
277        // adding/removing processors while we mess with the idle lock
278        ready_schedule_lock();
279
280        if(try_lock( this.lock __cfaabi_dbg_ctx2 )) {
281                // success
282                /* paranoid */ verify( ! __preemption_enabled() );
283                return true;
284        }
285
286        // failed to lock
287        ready_schedule_unlock();
288
289        /* paranoid */ verify( ! __preemption_enabled() );
290        return false;
291}
292
293static inline void unlock(__cluster_proc_list & this) {
294        /* paranoid */ verify( ! __preemption_enabled() );
295
296        unlock(this.lock);
297
298        // Release the global lock, which we acquired when locking
299        ready_schedule_unlock();
300
301        /* paranoid */ verify( ! __preemption_enabled() );
302}
303
304//=======================================================================
305// Ready-Queue API
306//-----------------------------------------------------------------------
307// push thread onto a ready queue for a cluster
308// returns true if the list was previously empty, false otherwise
309__attribute__((hot)) void push(struct cluster * cltr, struct thread$ * thrd, unpark_hint hint);
310
311//-----------------------------------------------------------------------
312// pop thread from the local queues of a cluster
313// returns 0p if empty
314// May return 0p spuriously
315__attribute__((hot)) struct thread$ * pop_fast(struct cluster * cltr);
316
317//-----------------------------------------------------------------------
318// pop thread from any ready queue of a cluster
319// returns 0p if empty
320// May return 0p spuriously
321__attribute__((hot)) struct thread$ * pop_slow(struct cluster * cltr);
322
323//-----------------------------------------------------------------------
324// search all ready queues of a cluster for any thread
325// returns 0p if empty
326// guaranteed to find any threads added before this call
327__attribute__((hot)) struct thread$ * pop_search(struct cluster * cltr);
328
329//-----------------------------------------------------------------------
330// get preferred ready for new thread
331unsigned ready_queue_new_preferred();
332
333//-----------------------------------------------------------------------
334// Increase the width of the ready queue (number of lanes) by 4
335void ready_queue_grow  (struct cluster * cltr);
336
337//-----------------------------------------------------------------------
338// Decrease the width of the ready queue (number of lanes) by 4
339void ready_queue_shrink(struct cluster * cltr);
340
341
342// Local Variables: //
343// mode: c //
344// tab-width: 4 //
345// End: //
Note: See TracBrowser for help on using the repository browser.