source: libcfa/src/concurrency/kernel_private.hfa @ 37ba662

ADTarm-ehast-experimentalenumforall-pointer-decayjacob/cs343-translationnew-astnew-ast-unique-exprpthread-emulationqualifiedEnum
Last change on this file since 37ba662 was 37ba662, checked in by Thierry Delisle <tdelisle@…>, 4 years ago

Minor improvements to alignments and memory layout

  • Property mode set to 100644
File size: 8.2 KB
Line 
1//
2// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
3//
4// The contents of this file are covered under the licence agreement in the
5// file "LICENCE" distributed with Cforall.
6//
7// kernel_private.hfa --
8//
9// Author           : Thierry Delisle
10// Created On       : Mon Feb 13 12:27:26 2017
11// Last Modified By : Peter A. Buhr
12// Last Modified On : Sat Nov 30 19:25:02 2019
13// Update Count     : 8
14//
15
16#pragma once
17
18#include "kernel.hfa"
19#include "thread.hfa"
20
21#include "alarm.hfa"
22#include "stats.hfa"
23
24
25//-----------------------------------------------------------------------------
26// Scheduler
27
28struct __attribute__((aligned(128))) __scheduler_lock_id_t;
29
30extern "C" {
31        void disable_interrupts() OPTIONAL_THREAD;
32        void enable_interrupts_noPoll();
33        void enable_interrupts( __cfaabi_dbg_ctx_param );
34}
35
36void __schedule_thread( struct __processor_id_t *, $thread * ) __attribute__((nonnull (2)));
37
38//Block current thread and release/wake-up the following resources
39void __leave_thread() __attribute__((noreturn));
40
41//-----------------------------------------------------------------------------
42// Processor
43void main(processorCtx_t *);
44
45void * __create_pthread( pthread_t *, void * (*)(void *), void * );
46
47
48
49struct event_kernel_t {
50        alarm_list_t alarms;
51        __spinlock_t lock;
52};
53
54extern event_kernel_t * event_kernel;
55
56struct __cfa_kernel_preemption_state_t {
57        bool enabled;
58        bool in_progress;
59        unsigned short disable_count;
60};
61
62extern volatile thread_local __cfa_kernel_preemption_state_t preemption_state __attribute__ ((tls_model ( "initial-exec" )));
63
64extern cluster * mainCluster;
65
66//-----------------------------------------------------------------------------
67// Threads
68extern "C" {
69      void __cfactx_invoke_thread(void (*main)(void *), void * this);
70}
71
72__cfaabi_dbg_debug_do(
73        extern void __cfaabi_dbg_thread_register  ( $thread * thrd );
74        extern void __cfaabi_dbg_thread_unregister( $thread * thrd );
75)
76
77// KERNEL ONLY unpark with out disabling interrupts
78void __unpark( struct __processor_id_t *, $thread * thrd __cfaabi_dbg_ctx_param2 );
79
80//-----------------------------------------------------------------------------
81// I/O
82void __kernel_io_startup     ( cluster &, unsigned, bool );
83void __kernel_io_finish_start( cluster & );
84void __kernel_io_prepare_stop( cluster & );
85void __kernel_io_shutdown    ( cluster &, bool );
86
87//-----------------------------------------------------------------------------
88// Utils
89#define KERNEL_STORAGE(T,X) __attribute((aligned(__alignof__(T)))) static char storage_##X[sizeof(T)]
90
91static inline uint32_t __tls_rand() {
92        kernelTLS.rand_seed ^= kernelTLS.rand_seed << 6;
93        kernelTLS.rand_seed ^= kernelTLS.rand_seed >> 21;
94        kernelTLS.rand_seed ^= kernelTLS.rand_seed << 7;
95        return kernelTLS.rand_seed;
96}
97
98
99void doregister( struct cluster & cltr );
100void unregister( struct cluster & cltr );
101
102void doregister( struct cluster * cltr, struct $thread & thrd );
103void unregister( struct cluster * cltr, struct $thread & thrd );
104
105void doregister( struct cluster * cltr, struct processor * proc );
106void unregister( struct cluster * cltr, struct processor * proc );
107
108//=======================================================================
109// Cluster lock API
110//=======================================================================
111// Cells use by the reader writer lock
112// while not generic it only relies on a opaque pointer
113struct __attribute__((aligned(128))) __scheduler_lock_id_t {
114        __processor_id_t * volatile handle;
115        volatile bool lock;
116};
117
118// Lock-Free registering/unregistering of threads
119// Register a processor to a given cluster and get its unique id in return
120unsigned doregister( struct __processor_id_t * proc );
121
122// Unregister a processor from a given cluster using its id, getting back the original pointer
123void     unregister( struct __processor_id_t * proc );
124
125//=======================================================================
126// Reader-writer lock implementation
127// Concurrent with doregister/unregister,
128//    i.e., threads can be added at any point during or between the entry/exit
129
130//-----------------------------------------------------------------------
131// simple spinlock underlying the RWLock
132// Blocking acquire
133static inline void __atomic_acquire(volatile bool * ll) {
134        while( __builtin_expect(__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST), false) ) {
135                while(__atomic_load_n(ll, (int)__ATOMIC_RELAXED))
136                        asm volatile("pause");
137        }
138        /* paranoid */ verify(*ll);
139}
140
141// Non-Blocking acquire
142static inline bool __atomic_try_acquire(volatile bool * ll) {
143        return !__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST);
144}
145
146// Release
147static inline void __atomic_unlock(volatile bool * ll) {
148        /* paranoid */ verify(*ll);
149        __atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE);
150}
151
152//-----------------------------------------------------------------------
153// Reader-Writer lock protecting the ready-queues
154// while this lock is mostly generic some aspects
155// have been hard-coded to for the ready-queue for
156// simplicity and performance
157struct __scheduler_RWLock_t {
158        // total cachelines allocated
159        unsigned int max;
160
161        // cachelines currently in use
162        volatile unsigned int alloc;
163
164        // cachelines ready to itereate over
165        // (!= to alloc when thread is in second half of doregister)
166        volatile unsigned int ready;
167
168        // writer lock
169        volatile bool lock;
170
171        // data pointer
172        __scheduler_lock_id_t * data;
173};
174
175void  ?{}(__scheduler_RWLock_t & this);
176void ^?{}(__scheduler_RWLock_t & this);
177
178extern __scheduler_RWLock_t * __scheduler_lock;
179
180//-----------------------------------------------------------------------
181// Reader side : acquire when using the ready queue to schedule but not
182//  creating/destroying queues
183static inline void ready_schedule_lock( struct __processor_id_t * proc) with(*__scheduler_lock) {
184        unsigned iproc = proc->id;
185        /*paranoid*/ verify(data[iproc].handle == proc);
186        /*paranoid*/ verify(iproc < ready);
187
188        // Step 1 : make sure no writer are in the middle of the critical section
189        while(__atomic_load_n(&lock, (int)__ATOMIC_RELAXED))
190                asm volatile("pause");
191
192        // Fence needed because we don't want to start trying to acquire the lock
193        // before we read a false.
194        // Not needed on x86
195        // std::atomic_thread_fence(std::memory_order_seq_cst);
196
197        // Step 2 : acquire our local lock
198        __atomic_acquire( &data[iproc].lock );
199        /*paranoid*/ verify(data[iproc].lock);
200}
201
202static inline void ready_schedule_unlock( struct __processor_id_t * proc) with(*__scheduler_lock) {
203        unsigned iproc = proc->id;
204        /*paranoid*/ verify(data[iproc].handle == proc);
205        /*paranoid*/ verify(iproc < ready);
206        /*paranoid*/ verify(data[iproc].lock);
207        __atomic_unlock(&data[iproc].lock);
208}
209
210//-----------------------------------------------------------------------
211// Writer side : acquire when changing the ready queue, e.g. adding more
212//  queues or removing them.
213uint_fast32_t ready_mutate_lock( void );
214
215void ready_mutate_unlock( uint_fast32_t /* value returned by lock */ );
216
217//=======================================================================
218// Ready-Queue API
219//-----------------------------------------------------------------------
220// push thread onto a ready queue for a cluster
221// returns true if the list was previously empty, false otherwise
222__attribute__((hot)) bool push(struct cluster * cltr, struct $thread * thrd);
223
224//-----------------------------------------------------------------------
225// pop thread from the ready queue of a cluster
226// returns 0p if empty
227__attribute__((hot)) struct $thread * pop(struct cluster * cltr);
228
229//-----------------------------------------------------------------------
230// Increase the width of the ready queue (number of lanes) by 4
231void ready_queue_grow  (struct cluster * cltr);
232
233//-----------------------------------------------------------------------
234// Decrease the width of the ready queue (number of lanes) by 4
235void ready_queue_shrink(struct cluster * cltr);
236
237//-----------------------------------------------------------------------
238// Statics call at the end of each thread to register statistics
239#if !defined(__CFA_NO_STATISTICS__)
240static inline struct __stats_t * __tls_stats() {
241        /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
242        /* paranoid */ verify( kernelTLS.this_stats );
243        return kernelTLS.this_stats;
244}
245#endif
246
247// Local Variables: //
248// mode: c //
249// tab-width: 4 //
250// End: //
Note: See TracBrowser for help on using the repository browser.