source: libcfa/src/concurrency/kernel_private.hfa @ b7d6a36

ADTarm-ehast-experimentalenumforall-pointer-decayjacob/cs343-translationnew-astnew-ast-unique-exprpthread-emulationqualifiedEnum
Last change on this file since b7d6a36 was b7d6a36, checked in by Thierry Delisle <tdelisle@…>, 4 years ago

Merge branch 'master' into relaxed_ready

  • Property mode set to 100644
File size: 7.6 KB
Line 
1//
2// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
3//
4// The contents of this file are covered under the licence agreement in the
5// file "LICENCE" distributed with Cforall.
6//
7// kernel_private.hfa --
8//
9// Author           : Thierry Delisle
10// Created On       : Mon Feb 13 12:27:26 2017
11// Last Modified By : Peter A. Buhr
12// Last Modified On : Sat Nov 30 19:25:02 2019
13// Update Count     : 8
14//
15
16#pragma once
17
18#include "kernel.hfa"
19#include "thread.hfa"
20
21#include "alarm.hfa"
22
23
24//-----------------------------------------------------------------------------
25// Scheduler
26
27extern "C" {
28        void disable_interrupts() OPTIONAL_THREAD;
29        void enable_interrupts_noPoll();
30        void enable_interrupts( __cfaabi_dbg_ctx_param );
31}
32
33void ScheduleThread( thread_desc * );
34static inline void WakeThread( thread_desc * thrd ) {
35        if( !thrd ) return;
36
37        verify(thrd->state == Inactive);
38
39        disable_interrupts();
40        ScheduleThread( thrd );
41        enable_interrupts( __cfaabi_dbg_ctx );
42}
43thread_desc * nextThread(cluster * this);
44
45//Block current thread and release/wake-up the following resources
46void BlockInternal(void);
47void BlockInternal(__spinlock_t * lock);
48void BlockInternal(thread_desc * thrd);
49void BlockInternal(__spinlock_t * lock, thread_desc * thrd);
50void BlockInternal(__spinlock_t * locks [], unsigned short count);
51void BlockInternal(__spinlock_t * locks [], unsigned short count, thread_desc * thrds [], unsigned short thrd_count);
52void BlockInternal(__finish_callback_fptr_t callback);
53void LeaveThread(__spinlock_t * lock, thread_desc * thrd);
54
55//-----------------------------------------------------------------------------
56// Processor
57void main(processorCtx_t *);
58
59void * create_pthread( pthread_t *, void * (*)(void *), void * );
60
61static inline void wake_fast(processor * this) {
62        __cfaabi_dbg_print_safe("Kernel : Waking up processor %p\n", this);
63        post( this->idleLock );
64}
65
66static inline void wake(processor * this) {
67        disable_interrupts();
68        wake_fast(this);
69        enable_interrupts( __cfaabi_dbg_ctx );
70}
71
72struct event_kernel_t {
73        alarm_list_t alarms;
74        __spinlock_t lock;
75};
76
77extern event_kernel_t * event_kernel;
78
79struct __cfa_kernel_preemption_state_t {
80        bool enabled;
81        bool in_progress;
82        unsigned short disable_count;
83};
84
85extern volatile thread_local __cfa_kernel_preemption_state_t preemption_state __attribute__ ((tls_model ( "initial-exec" )));
86
87//-----------------------------------------------------------------------------
88// Threads
89extern "C" {
90      void CtxInvokeThread(void (*main)(void *), void * this);
91}
92
93extern void ThreadCtxSwitch(coroutine_desc * src, coroutine_desc * dst);
94
95__cfaabi_dbg_debug_do(
96        extern void __cfaabi_dbg_thread_register  ( thread_desc * thrd );
97        extern void __cfaabi_dbg_thread_unregister( thread_desc * thrd );
98)
99
100//-----------------------------------------------------------------------------
101// Utils
102#define KERNEL_STORAGE(T,X) __attribute((aligned(__alignof__(T)))) static char storage_##X[sizeof(T)]
103
104static inline uint32_t tls_rand() {
105        kernelTLS.rand_seed ^= kernelTLS.rand_seed << 6;
106        kernelTLS.rand_seed ^= kernelTLS.rand_seed >> 21;
107        kernelTLS.rand_seed ^= kernelTLS.rand_seed << 7;
108        return kernelTLS.rand_seed;
109}
110
111
112void doregister( struct cluster & cltr );
113void unregister( struct cluster & cltr );
114
115void doregister( struct cluster * cltr, struct thread_desc & thrd );
116void unregister( struct cluster * cltr, struct thread_desc & thrd );
117
118//=======================================================================
119// Cluster lock API
120//=======================================================================
121struct __attribute__((aligned(64))) __processor_id {
122        processor * volatile handle;
123        volatile bool lock;
124};
125
126// Lock-Free registering/unregistering of threads
127// Register a processor to a given cluster and get its unique id in return
128unsigned doregister( struct cluster * cltr, struct processor * proc );
129
130// Unregister a processor from a given cluster using its id, getting back the original pointer
131void     unregister( struct cluster * cltr, struct processor * proc );
132
133//=======================================================================
134// Reader-writer lock implementation
135// Concurrent with doregister/unregister,
136//    i.e., threads can be added at any point during or between the entry/exit
137
138//-----------------------------------------------------------------------
139// simple spinlock underlying the RWLock
140// Blocking acquire
141static inline void __atomic_acquire(volatile bool * ll) {
142        while( __builtin_expect(__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST), false) ) {
143                while(__atomic_load_n(ll, (int)__ATOMIC_RELAXED))
144                        asm volatile("pause");
145        }
146        /* paranoid */ verify(*ll);
147}
148
149// Non-Blocking acquire
150static inline bool __atomic_try_acquire(volatile bool * ll) {
151        return !__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST);
152}
153
154// Release
155static inline void __atomic_unlock(volatile bool * ll) {
156        /* paranoid */ verify(*ll);
157        __atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE);
158}
159
160//-----------------------------------------------------------------------
161// Reader side : acquire when using the ready queue to schedule but not
162//  creating/destroying queues
163static inline void ready_schedule_lock( struct cluster * cltr, struct processor * proc) with(cltr->ready_lock) {
164        unsigned iproc = proc->id;
165        /*paranoid*/ verify(data[iproc].handle == proc);
166        /*paranoid*/ verify(iproc < ready);
167
168        // Step 1 : make sure no writer are in the middle of the critical section
169        while(__atomic_load_n(&lock, (int)__ATOMIC_RELAXED))
170                asm volatile("pause");
171
172        // Fence needed because we don't want to start trying to acquire the lock
173        // before we read a false.
174        // Not needed on x86
175        // std::atomic_thread_fence(std::memory_order_seq_cst);
176
177        // Step 2 : acquire our local lock
178        __atomic_acquire( &data[iproc].lock );
179        /*paranoid*/ verify(data[iproc].lock);
180}
181
182static inline void ready_schedule_unlock( struct cluster * cltr, struct processor * proc) with(cltr->ready_lock) {
183        unsigned iproc = proc->id;
184        /*paranoid*/ verify(data[iproc].handle == proc);
185        /*paranoid*/ verify(iproc < ready);
186        /*paranoid*/ verify(data[iproc].lock);
187        __atomic_unlock(&data[iproc].lock);
188}
189
190//-----------------------------------------------------------------------
191// Writer side : acquire when changing the ready queue, e.g. adding more
192//  queues or removing them.
193uint_fast32_t ready_mutate_lock( struct cluster & cltr );
194
195void ready_mutate_unlock( struct cluster & cltr, uint_fast32_t /* value returned by lock */ );
196
197//=======================================================================
198// Ready-Queue API
199//-----------------------------------------------------------------------
200// push thread onto a ready queue for a cluster
201// returns true if the list was previously empty, false otherwise
202__attribute__((hot)) bool push(struct cluster * cltr, struct thread_desc * thrd);
203
204//-----------------------------------------------------------------------
205// pop thread from the ready queue of a cluster
206// returns 0p if empty
207__attribute__((hot)) thread_desc * pop(struct cluster * cltr);
208
209//-----------------------------------------------------------------------
210// Increase the width of the ready queue (number of lanes) by 4
211void ready_queue_grow  (struct cluster * cltr);
212
213//-----------------------------------------------------------------------
214// Decrease the width of the ready queue (number of lanes) by 4
215void ready_queue_shrink(struct cluster * cltr);
216
217//-----------------------------------------------------------------------
218// Statics call at the end of each thread to register statistics
219#if !defined(__CFA_NO_STATISTICS__)
220void stats_tls_tally(struct cluster * cltr);
221#else
222static inline void stats_tls_tally(struct cluster * cltr) {}
223#endif
224
225// Local Variables: //
226// mode: c //
227// tab-width: 4 //
228// End: //
Note: See TracBrowser for help on using the repository browser.