source: libcfa/src/concurrency/kernel_private.hfa @ 9b1dcc2

ADTarm-ehast-experimentalenumforall-pointer-decayjacob/cs343-translationnew-astnew-ast-unique-exprpthread-emulationqualifiedEnum
Last change on this file since 9b1dcc2 was 9b1dcc2, checked in by Thierry Delisle <tdelisle@…>, 3 years ago

Changed scheduling API to adapt to non-Processors scheduling threads.

  • Property mode set to 100644
File size: 8.1 KB
Line 
1//
2// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
3//
4// The contents of this file are covered under the licence agreement in the
5// file "LICENCE" distributed with Cforall.
6//
7// kernel_private.hfa --
8//
9// Author           : Thierry Delisle
10// Created On       : Mon Feb 13 12:27:26 2017
11// Last Modified By : Peter A. Buhr
12// Last Modified On : Sat Nov 30 19:25:02 2019
13// Update Count     : 8
14//
15
16#pragma once
17
18#include "kernel.hfa"
19#include "thread.hfa"
20
21#include "alarm.hfa"
22
23
24//-----------------------------------------------------------------------------
25// Scheduler
26
27struct __attribute__((aligned(64))) __scheduler_lock_id_t;
28
29extern "C" {
30        void disable_interrupts() OPTIONAL_THREAD;
31        void enable_interrupts_noPoll();
32        void enable_interrupts( __cfaabi_dbg_ctx_param );
33}
34
35void __schedule_thread( struct __processor_id_t *, $thread * ) __attribute__((nonnull (2)));
36
37//Block current thread and release/wake-up the following resources
38void __leave_thread() __attribute__((noreturn));
39
40//-----------------------------------------------------------------------------
41// Processor
42void main(processorCtx_t *);
43
44void * __create_pthread( pthread_t *, void * (*)(void *), void * );
45
46
47
48struct event_kernel_t {
49        alarm_list_t alarms;
50        __spinlock_t lock;
51};
52
53extern event_kernel_t * event_kernel;
54
55struct __cfa_kernel_preemption_state_t {
56        bool enabled;
57        bool in_progress;
58        unsigned short disable_count;
59};
60
61extern volatile thread_local __cfa_kernel_preemption_state_t preemption_state __attribute__ ((tls_model ( "initial-exec" )));
62
63extern cluster * mainCluster;
64
65//-----------------------------------------------------------------------------
66// Threads
67extern "C" {
68      void __cfactx_invoke_thread(void (*main)(void *), void * this);
69}
70
71__cfaabi_dbg_debug_do(
72        extern void __cfaabi_dbg_thread_register  ( $thread * thrd );
73        extern void __cfaabi_dbg_thread_unregister( $thread * thrd );
74)
75
76// KERNEL ONLY unpark with out disabling interrupts
77void __unpark( struct __processor_id_t *, $thread * thrd __cfaabi_dbg_ctx_param2 );
78
79//-----------------------------------------------------------------------------
80// I/O
81void __kernel_io_startup     ( cluster &, unsigned, bool );
82void __kernel_io_finish_start( cluster & );
83void __kernel_io_prepare_stop( cluster & );
84void __kernel_io_shutdown    ( cluster &, bool );
85
86//-----------------------------------------------------------------------------
87// Utils
88#define KERNEL_STORAGE(T,X) __attribute((aligned(__alignof__(T)))) static char storage_##X[sizeof(T)]
89
90static inline uint32_t __tls_rand() {
91        kernelTLS.rand_seed ^= kernelTLS.rand_seed << 6;
92        kernelTLS.rand_seed ^= kernelTLS.rand_seed >> 21;
93        kernelTLS.rand_seed ^= kernelTLS.rand_seed << 7;
94        return kernelTLS.rand_seed;
95}
96
97
98void doregister( struct cluster & cltr );
99void unregister( struct cluster & cltr );
100
101void doregister( struct cluster * cltr, struct $thread & thrd );
102void unregister( struct cluster * cltr, struct $thread & thrd );
103
104void doregister( struct cluster * cltr, struct processor * proc );
105void unregister( struct cluster * cltr, struct processor * proc );
106
107//=======================================================================
108// Cluster lock API
109//=======================================================================
110// Cells use by the reader writer lock
111// while not generic it only relies on a opaque pointer
112struct __attribute__((aligned(64))) __scheduler_lock_id_t {
113        __processor_id_t * volatile handle;
114        volatile bool lock;
115};
116
117// Lock-Free registering/unregistering of threads
118// Register a processor to a given cluster and get its unique id in return
119unsigned doregister( struct __processor_id_t * proc );
120
121// Unregister a processor from a given cluster using its id, getting back the original pointer
122void     unregister( struct __processor_id_t * proc );
123
124//=======================================================================
125// Reader-writer lock implementation
126// Concurrent with doregister/unregister,
127//    i.e., threads can be added at any point during or between the entry/exit
128
129//-----------------------------------------------------------------------
130// simple spinlock underlying the RWLock
131// Blocking acquire
132static inline void __atomic_acquire(volatile bool * ll) {
133        while( __builtin_expect(__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST), false) ) {
134                while(__atomic_load_n(ll, (int)__ATOMIC_RELAXED))
135                        asm volatile("pause");
136        }
137        /* paranoid */ verify(*ll);
138}
139
140// Non-Blocking acquire
141static inline bool __atomic_try_acquire(volatile bool * ll) {
142        return !__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST);
143}
144
145// Release
146static inline void __atomic_unlock(volatile bool * ll) {
147        /* paranoid */ verify(*ll);
148        __atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE);
149}
150
151//-----------------------------------------------------------------------
152// Reader-Writer lock protecting the ready-queues
153// while this lock is mostly generic some aspects
154// have been hard-coded to for the ready-queue for
155// simplicity and performance
156struct __scheduler_RWLock_t {
157        // total cachelines allocated
158        unsigned int max;
159
160        // cachelines currently in use
161        volatile unsigned int alloc;
162
163        // cachelines ready to itereate over
164        // (!= to alloc when thread is in second half of doregister)
165        volatile unsigned int ready;
166
167        // writer lock
168        volatile bool lock;
169
170        // data pointer
171        __scheduler_lock_id_t * data;
172};
173
174void  ?{}(__scheduler_RWLock_t & this);
175void ^?{}(__scheduler_RWLock_t & this);
176
177extern __scheduler_RWLock_t * __scheduler_lock;
178
179//-----------------------------------------------------------------------
180// Reader side : acquire when using the ready queue to schedule but not
181//  creating/destroying queues
182static inline void ready_schedule_lock( struct __processor_id_t * proc) with(*__scheduler_lock) {
183        unsigned iproc = proc->id;
184        /*paranoid*/ verify(data[iproc].handle == proc);
185        /*paranoid*/ verify(iproc < ready);
186
187        // Step 1 : make sure no writer are in the middle of the critical section
188        while(__atomic_load_n(&lock, (int)__ATOMIC_RELAXED))
189                asm volatile("pause");
190
191        // Fence needed because we don't want to start trying to acquire the lock
192        // before we read a false.
193        // Not needed on x86
194        // std::atomic_thread_fence(std::memory_order_seq_cst);
195
196        // Step 2 : acquire our local lock
197        __atomic_acquire( &data[iproc].lock );
198        /*paranoid*/ verify(data[iproc].lock);
199}
200
201static inline void ready_schedule_unlock( struct __processor_id_t * proc) with(*__scheduler_lock) {
202        unsigned iproc = proc->id;
203        /*paranoid*/ verify(data[iproc].handle == proc);
204        /*paranoid*/ verify(iproc < ready);
205        /*paranoid*/ verify(data[iproc].lock);
206        __atomic_unlock(&data[iproc].lock);
207}
208
209//-----------------------------------------------------------------------
210// Writer side : acquire when changing the ready queue, e.g. adding more
211//  queues or removing them.
212uint_fast32_t ready_mutate_lock( void );
213
214void ready_mutate_unlock( uint_fast32_t /* value returned by lock */ );
215
216//=======================================================================
217// Ready-Queue API
218//-----------------------------------------------------------------------
219// push thread onto a ready queue for a cluster
220// returns true if the list was previously empty, false otherwise
221__attribute__((hot)) bool push(struct cluster * cltr, struct $thread * thrd);
222
223//-----------------------------------------------------------------------
224// pop thread from the ready queue of a cluster
225// returns 0p if empty
226__attribute__((hot)) struct $thread * pop(struct cluster * cltr);
227
228//-----------------------------------------------------------------------
229// Increase the width of the ready queue (number of lanes) by 4
230void ready_queue_grow  (struct cluster * cltr);
231
232//-----------------------------------------------------------------------
233// Decrease the width of the ready queue (number of lanes) by 4
234void ready_queue_shrink(struct cluster * cltr);
235
236//-----------------------------------------------------------------------
237// Statics call at the end of each thread to register statistics
238#if !defined(__CFA_NO_STATISTICS__)
239void stats_tls_tally(struct cluster * cltr);
240#else
241static inline void stats_tls_tally(struct cluster * cltr) {}
242#endif
243
244// Local Variables: //
245// mode: c //
246// tab-width: 4 //
247// End: //
Note: See TracBrowser for help on using the repository browser.