source: libcfa/src/concurrency/kernel_private.hfa @ 920dca3

ADTarm-ehast-experimentalenumforall-pointer-decayjacob/cs343-translationnew-astnew-ast-unique-exprpthread-emulationqualifiedEnum
Last change on this file since 920dca3 was f00b26d4, checked in by Thierry Delisle <tdelisle@…>, 4 years ago

Re-worked IO to use epoll and support multiple io_contexts per cluster.
Also redid how cluster options are handled.
Changed how iofwd calls are passed to support future features and io_contexts rework.

  • Property mode set to 100644
File size: 10.6 KB
Line 
1//
2// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
3//
4// The contents of this file are covered under the licence agreement in the
5// file "LICENCE" distributed with Cforall.
6//
7// kernel_private.hfa --
8//
9// Author           : Thierry Delisle
10// Created On       : Mon Feb 13 12:27:26 2017
11// Last Modified By : Peter A. Buhr
12// Last Modified On : Sat Nov 30 19:25:02 2019
13// Update Count     : 8
14//
15
16#pragma once
17
18#include "kernel.hfa"
19#include "thread.hfa"
20
21#include "alarm.hfa"
22#include "stats.hfa"
23
24#include "bits/random.hfa"
25
26
27//-----------------------------------------------------------------------------
28// Scheduler
29
30struct __attribute__((aligned(128))) __scheduler_lock_id_t;
31
32extern "C" {
33        void disable_interrupts() OPTIONAL_THREAD;
34        void enable_interrupts_noPoll();
35        void enable_interrupts( __cfaabi_dbg_ctx_param );
36}
37
38void __schedule_thread( struct __processor_id_t *, $thread * )
39#if defined(NDEBUG) || (!defined(__CFA_DEBUG__) && !defined(__CFA_VERIFY__))
40        __attribute__((nonnull (2)))
41#endif
42;
43
44//Block current thread and release/wake-up the following resources
45void __leave_thread() __attribute__((noreturn));
46
47//-----------------------------------------------------------------------------
48// Processor
49void main(processorCtx_t *);
50
51void * __create_pthread( pthread_t *, void * (*)(void *), void * );
52
53
54
55struct event_kernel_t {
56        alarm_list_t alarms;
57        __spinlock_t lock;
58};
59
60extern event_kernel_t * event_kernel;
61
62struct __cfa_kernel_preemption_state_t {
63        bool enabled;
64        bool in_progress;
65        unsigned short disable_count;
66};
67
68extern volatile thread_local __cfa_kernel_preemption_state_t preemption_state __attribute__ ((tls_model ( "initial-exec" )));
69
70extern cluster * mainCluster;
71
72//-----------------------------------------------------------------------------
73// Threads
74extern "C" {
75      void __cfactx_invoke_thread(void (*main)(void *), void * this);
76}
77
78__cfaabi_dbg_debug_do(
79        extern void __cfaabi_dbg_thread_register  ( $thread * thrd );
80        extern void __cfaabi_dbg_thread_unregister( $thread * thrd );
81)
82
83// KERNEL ONLY unpark with out disabling interrupts
84void __unpark( struct __processor_id_t *, $thread * thrd __cfaabi_dbg_ctx_param2 );
85
86static inline bool __post(single_sem & this, struct __processor_id_t * id) {
87        for() {
88                struct $thread * expected = this.ptr;
89                if(expected == 1p) return false;
90                if(expected == 0p) {
91                        if(__atomic_compare_exchange_n(&this.ptr, &expected, 1p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
92                                return false;
93                        }
94                }
95                else {
96                        if(__atomic_compare_exchange_n(&this.ptr, &expected, 0p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
97                                __unpark( id, expected __cfaabi_dbg_ctx2 );
98                                return true;
99                        }
100                }
101        }
102}
103
104//-----------------------------------------------------------------------------
105// Utils
106#define KERNEL_STORAGE(T,X) __attribute((aligned(__alignof__(T)))) static char storage_##X[sizeof(T)]
107
108static inline uint64_t __tls_rand() {
109        #if defined(__SIZEOF_INT128__)
110                return __lehmer64( kernelTLS.rand_seed );
111        #else
112                return __xorshift64( kernelTLS.rand_seed );
113        #endif
114}
115
116
117void doregister( struct cluster & cltr );
118void unregister( struct cluster & cltr );
119
120void doregister( struct cluster * cltr, struct $thread & thrd );
121void unregister( struct cluster * cltr, struct $thread & thrd );
122
123//-----------------------------------------------------------------------------
124// I/O
125void __kernel_io_startup     ();
126void __kernel_io_shutdown    ();
127
128static inline io_context * __get_io_context( void ) {
129        cluster * cltr = active_cluster();
130        /* paranoid */ verifyf( cltr, "No active cluster for io operation\n");
131        assertf( cltr->io.cnt > 0, "Cluster %p has no default io contexts and no context was specified\n", cltr );
132        /* paranoid */ verifyf( cltr->io.ctxs, "default io contexts for cluster %p are missing\n", cltr);
133        return &cltr->io.ctxs[ __tls_rand() % cltr->io.cnt ];
134}
135
136void ^?{}(io_context & this, bool );
137
138//=======================================================================
139// Cluster lock API
140//=======================================================================
141// Cells use by the reader writer lock
142// while not generic it only relies on a opaque pointer
143struct __attribute__((aligned(128))) __scheduler_lock_id_t {
144        // Spin lock used as the underlying lock
145        volatile bool lock;
146
147        // Handle pointing to the proc owning this cell
148        // Used for allocating cells and debugging
149        __processor_id_t * volatile handle;
150
151        #ifdef __CFA_WITH_VERIFY__
152                // Debug, check if this is owned for reading
153                bool owned;
154        #endif
155};
156
157static_assert( sizeof(struct __scheduler_lock_id_t) <= __alignof(struct __scheduler_lock_id_t));
158
159// Lock-Free registering/unregistering of threads
160// Register a processor to a given cluster and get its unique id in return
161unsigned doregister( struct __processor_id_t * proc );
162
163// Unregister a processor from a given cluster using its id, getting back the original pointer
164void     unregister( struct __processor_id_t * proc );
165
166//=======================================================================
167// Reader-writer lock implementation
168// Concurrent with doregister/unregister,
169//    i.e., threads can be added at any point during or between the entry/exit
170
171//-----------------------------------------------------------------------
172// simple spinlock underlying the RWLock
173// Blocking acquire
174static inline void __atomic_acquire(volatile bool * ll) {
175        while( __builtin_expect(__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST), false) ) {
176                while(__atomic_load_n(ll, (int)__ATOMIC_RELAXED))
177                        asm volatile("pause");
178        }
179        /* paranoid */ verify(*ll);
180}
181
182// Non-Blocking acquire
183static inline bool __atomic_try_acquire(volatile bool * ll) {
184        return !__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST);
185}
186
187// Release
188static inline void __atomic_unlock(volatile bool * ll) {
189        /* paranoid */ verify(*ll);
190        __atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE);
191}
192
193//-----------------------------------------------------------------------
194// Reader-Writer lock protecting the ready-queues
195// while this lock is mostly generic some aspects
196// have been hard-coded to for the ready-queue for
197// simplicity and performance
198struct __scheduler_RWLock_t {
199        // total cachelines allocated
200        unsigned int max;
201
202        // cachelines currently in use
203        volatile unsigned int alloc;
204
205        // cachelines ready to itereate over
206        // (!= to alloc when thread is in second half of doregister)
207        volatile unsigned int ready;
208
209        // writer lock
210        volatile bool lock;
211
212        // data pointer
213        __scheduler_lock_id_t * data;
214};
215
216void  ?{}(__scheduler_RWLock_t & this);
217void ^?{}(__scheduler_RWLock_t & this);
218
219extern __scheduler_RWLock_t * __scheduler_lock;
220
221//-----------------------------------------------------------------------
222// Reader side : acquire when using the ready queue to schedule but not
223//  creating/destroying queues
224static inline void ready_schedule_lock( struct __processor_id_t * proc) with(*__scheduler_lock) {
225        unsigned iproc = proc->id;
226        /*paranoid*/ verify(data[iproc].handle == proc);
227        /*paranoid*/ verify(iproc < ready);
228
229        // Step 1 : make sure no writer are in the middle of the critical section
230        while(__atomic_load_n(&lock, (int)__ATOMIC_RELAXED))
231                asm volatile("pause");
232
233        // Fence needed because we don't want to start trying to acquire the lock
234        // before we read a false.
235        // Not needed on x86
236        // std::atomic_thread_fence(std::memory_order_seq_cst);
237
238        // Step 2 : acquire our local lock
239        __atomic_acquire( &data[iproc].lock );
240        /*paranoid*/ verify(data[iproc].lock);
241
242        #ifdef __CFA_WITH_VERIFY__
243                // Debug, check if this is owned for reading
244                data[iproc].owned = true;
245        #endif
246}
247
248static inline void ready_schedule_unlock( struct __processor_id_t * proc) with(*__scheduler_lock) {
249        unsigned iproc = proc->id;
250        /*paranoid*/ verify(data[iproc].handle == proc);
251        /*paranoid*/ verify(iproc < ready);
252        /*paranoid*/ verify(data[iproc].lock);
253        /*paranoid*/ verify(data[iproc].owned);
254        #ifdef __CFA_WITH_VERIFY__
255                // Debug, check if this is owned for reading
256                data[iproc].owned = false;
257        #endif
258        __atomic_unlock(&data[iproc].lock);
259}
260
261#ifdef __CFA_WITH_VERIFY__
262        static inline bool ready_schedule_islocked( struct __processor_id_t * proc) {
263                return __scheduler_lock->data[proc->id].owned;
264        }
265
266        static inline bool ready_mutate_islocked() {
267                return __scheduler_lock->lock;
268        }
269#endif
270
271//-----------------------------------------------------------------------
272// Writer side : acquire when changing the ready queue, e.g. adding more
273//  queues or removing them.
274uint_fast32_t ready_mutate_lock( void );
275
276void ready_mutate_unlock( uint_fast32_t /* value returned by lock */ );
277
278//=======================================================================
279// Ready-Queue API
280//-----------------------------------------------------------------------
281// pop thread from the ready queue of a cluster
282// returns 0p if empty
283__attribute__((hot)) bool query(struct cluster * cltr);
284
285//-----------------------------------------------------------------------
286// push thread onto a ready queue for a cluster
287// returns true if the list was previously empty, false otherwise
288__attribute__((hot)) bool push(struct cluster * cltr, struct $thread * thrd);
289
290//-----------------------------------------------------------------------
291// pop thread from the ready queue of a cluster
292// returns 0p if empty
293__attribute__((hot)) struct $thread * pop(struct cluster * cltr);
294
295//-----------------------------------------------------------------------
296// remove thread from the ready queue of a cluster
297// returns bool if it wasn't found
298bool remove_head(struct cluster * cltr, struct $thread * thrd);
299
300//-----------------------------------------------------------------------
301// Increase the width of the ready queue (number of lanes) by 4
302void ready_queue_grow  (struct cluster * cltr, int target);
303
304//-----------------------------------------------------------------------
305// Decrease the width of the ready queue (number of lanes) by 4
306void ready_queue_shrink(struct cluster * cltr, int target);
307
308//-----------------------------------------------------------------------
309// IO user data
310struct __io_user_data_t {
311        int32_t result;
312        $thread * thrd;
313};
314
315//-----------------------------------------------------------------------
316// Statics call at the end of each thread to register statistics
317#if !defined(__CFA_NO_STATISTICS__)
318        static inline struct __stats_t * __tls_stats() {
319                /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
320                /* paranoid */ verify( kernelTLS.this_stats );
321                return kernelTLS.this_stats;
322        }
323
324        #define __STATS__(in_kernel, ...) { \
325                if( !(in_kernel) ) disable_interrupts(); \
326                with( *__tls_stats() ) { \
327                        __VA_ARGS__ \
328                } \
329                if( !(in_kernel) ) enable_interrupts( __cfaabi_dbg_ctx ); \
330        }
331#else
332        #define __STATS__(in_kernel, ...)
333#endif
334
335// Local Variables: //
336// mode: c //
337// tab-width: 4 //
338// End: //
Note: See TracBrowser for help on using the repository browser.