Context Navigation

kernel_private.hfa @ a76efc8

ADTarm-ehast-experimentalenumforall-pointer-decayjacob/cs343-translationnew-ast-unique-exprpthread-emulationqualifiedEnum

Last change on this file since a76efc8 was 254ad1b, checked in by Thierry Delisle <tdelisle@…>, 3 years ago
Separate schedule_thread from the scheduler lock
Property mode set to `100644`
File size: 10.0 KB

Line
1	//
2	// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
3	//
4	// The contents of this file are covered under the licence agreement in the
5	// file "LICENCE" distributed with Cforall.
6	//
7	// kernel_private.hfa --
8	//
9	// Author : Thierry Delisle
10	// Created On : Mon Feb 13 12:27:26 2017
11	// Last Modified By : Peter A. Buhr
12	// Last Modified On : Wed Aug 12 08:21:33 2020
13	// Update Count : 9
14	//
15
16	#pragma once
17
18	#include "kernel.hfa"
19	#include "thread.hfa"
20
21	#include "alarm.hfa"
22	#include "stats.hfa"
23
24	//-----------------------------------------------------------------------------
25	// Scheduler
26
27	struct __attribute__((aligned(128))) __scheduler_lock_id_t;
28
29	extern "C" {
30	void disable_interrupts() OPTIONAL_THREAD;
31	void enable_interrupts( bool poll = true );
32	}
33
34	void schedule_thread$( $thread * ) __attribute__((nonnull (1)));
35
36	extern bool __preemption_enabled();
37
38	//release/wake-up the following resources
39	void __thread_finish( $thread * thrd );
40
41	//-----------------------------------------------------------------------------
42	// Processor
43	void main(processorCtx_t *);
44
45	void * __create_pthread( pthread_t , void ()(void ), void * );
46	void __destroy_pthread( pthread_t pthread, void * stack, void ** retval );
47
48
49
50	extern cluster * mainCluster;
51
52	//-----------------------------------------------------------------------------
53	// Threads
54	extern "C" {
55	void __cfactx_invoke_thread(void (main)(void ), void * this);
56	}
57
58	__cfaabi_dbg_debug_do(
59	extern void __cfaabi_dbg_thread_register ( $thread * thrd );
60	extern void __cfaabi_dbg_thread_unregister( $thread * thrd );
61	)
62
63	#define TICKET_BLOCKED (-1) // thread is blocked
64	#define TICKET_RUNNING ( 0) // thread is running
65	#define TICKET_UNBLOCK ( 1) // thread should ignore next block
66
67	//-----------------------------------------------------------------------------
68	// Utils
69	void doregister( struct cluster * cltr, struct $thread & thrd );
70	void unregister( struct cluster * cltr, struct $thread & thrd );
71
72	//-----------------------------------------------------------------------------
73	// I/O
74	$io_arbiter * create(void);
75	void destroy($io_arbiter *);
76
77	//=======================================================================
78	// Cluster lock API
79	//=======================================================================
80	// Lock-Free registering/unregistering of threads
81	// Register a processor to a given cluster and get its unique id in return
82	void register_proc_id( struct __processor_id_t * );
83
84	// Unregister a processor from a given cluster using its id, getting back the original pointer
85	void unregister_proc_id( struct __processor_id_t * proc );
86
87	//=======================================================================
88	// Reader-writer lock implementation
89	// Concurrent with doregister/unregister,
90	// i.e., threads can be added at any point during or between the entry/exit
91
92	//-----------------------------------------------------------------------
93	// simple spinlock underlying the RWLock
94	// Blocking acquire
95	static inline void __atomic_acquire(volatile bool * ll) {
96	while( __builtin_expect(__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST), false) ) {
97	while(__atomic_load_n(ll, (int)__ATOMIC_RELAXED))
98	Pause();
99	}
100	/* paranoid / verify(ll);
101	}
102
103	// Non-Blocking acquire
104	static inline bool __atomic_try_acquire(volatile bool * ll) {
105	return !__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST);
106	}
107
108	// Release
109	static inline void __atomic_unlock(volatile bool * ll) {
110	/* paranoid / verify(ll);
111	__atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE);
112	}
113
114	// Cells use by the reader writer lock
115	// while not generic it only relies on a opaque pointer
116	struct __attribute__((aligned(128))) __scheduler_lock_id_t {
117	// Spin lock used as the underlying lock
118	volatile bool lock;
119
120	// Handle pointing to the proc owning this cell
121	// Used for allocating cells and debugging
122	__processor_id_t * volatile handle;
123
124	#ifdef __CFA_WITH_VERIFY__
125	// Debug, check if this is owned for reading
126	bool owned;
127	#endif
128	};
129
130	static_assert( sizeof(struct __scheduler_lock_id_t) <= __alignof(struct __scheduler_lock_id_t));
131
132	//-----------------------------------------------------------------------
133	// Reader-Writer lock protecting the ready-queues
134	// while this lock is mostly generic some aspects
135	// have been hard-coded to for the ready-queue for
136	// simplicity and performance
137	struct __scheduler_RWLock_t {
138	// total cachelines allocated
139	unsigned int max;
140
141	// cachelines currently in use
142	volatile unsigned int alloc;
143
144	// cachelines ready to itereate over
145	// (!= to alloc when thread is in second half of doregister)
146	volatile unsigned int ready;
147
148	// writer lock
149	volatile bool lock;
150
151	// data pointer
152	__scheduler_lock_id_t * data;
153	};
154
155	void ?{}(__scheduler_RWLock_t & this);
156	void ^?{}(__scheduler_RWLock_t & this);
157
158	extern __scheduler_RWLock_t * __scheduler_lock;
159
160	//-----------------------------------------------------------------------
161	// Reader side : acquire when using the ready queue to schedule but not
162	// creating/destroying queues
163	static inline void ready_schedule_lock(void) with(*__scheduler_lock) {
164	/* paranoid */ verify( ! __preemption_enabled() );
165	/* paranoid */ verify( kernelTLS().this_proc_id );
166
167	unsigned iproc = kernelTLS().this_proc_id->id;
168	/paranoid/ verify(data[iproc].handle == kernelTLS().this_proc_id);
169	/paranoid/ verify(iproc < ready);
170
171	// Step 1 : make sure no writer are in the middle of the critical section
172	while(__atomic_load_n(&lock, (int)__ATOMIC_RELAXED))
173	Pause();
174
175	// Fence needed because we don't want to start trying to acquire the lock
176	// before we read a false.
177	// Not needed on x86
178	// std::atomic_thread_fence(std::memory_order_seq_cst);
179
180	// Step 2 : acquire our local lock
181	__atomic_acquire( &data[iproc].lock );
182	/paranoid/ verify(data[iproc].lock);
183
184	#ifdef __CFA_WITH_VERIFY__
185	// Debug, check if this is owned for reading
186	data[iproc].owned = true;
187	#endif
188	}
189
190	static inline void ready_schedule_unlock(void) with(*__scheduler_lock) {
191	/* paranoid */ verify( ! __preemption_enabled() );
192	/* paranoid */ verify( kernelTLS().this_proc_id );
193
194	unsigned iproc = kernelTLS().this_proc_id->id;
195	/paranoid/ verify(data[iproc].handle == kernelTLS().this_proc_id);
196	/paranoid/ verify(iproc < ready);
197	/paranoid/ verify(data[iproc].lock);
198	/paranoid/ verify(data[iproc].owned);
199	#ifdef __CFA_WITH_VERIFY__
200	// Debug, check if this is owned for reading
201	data[iproc].owned = false;
202	#endif
203	__atomic_unlock(&data[iproc].lock);
204	}
205
206	#ifdef __CFA_WITH_VERIFY__
207	static inline bool ready_schedule_islocked(void) {
208	/* paranoid */ verify( ! __preemption_enabled() );
209	/paranoid/ verify( kernelTLS().this_proc_id );
210	__processor_id_t * proc = kernelTLS().this_proc_id;
211	return __scheduler_lock->data[proc->id].owned;
212	}
213
214	static inline bool ready_mutate_islocked() {
215	return __scheduler_lock->lock;
216	}
217	#endif
218
219	//-----------------------------------------------------------------------
220	// Writer side : acquire when changing the ready queue, e.g. adding more
221	// queues or removing them.
222	uint_fast32_t ready_mutate_lock( void );
223
224	void ready_mutate_unlock( uint_fast32_t /* value returned by lock */ );
225
226	//-----------------------------------------------------------------------
227	// Lock-Free registering/unregistering of threads
228	// Register a processor to a given cluster and get its unique id in return
229	// For convenience, also acquires the lock
230	static inline uint_fast32_t ready_mutate_register( struct __processor_id_t * proc ) {
231	register_proc_id( proc );
232	return ready_mutate_lock();
233	}
234
235	// Unregister a processor from a given cluster using its id, getting back the original pointer
236	// assumes the lock is acquired
237	static inline void ready_mutate_unregister( struct __processor_id_t * proc, uint_fast32_t last_s ) {
238	ready_mutate_unlock( last_s );
239	unregister_proc_id( proc );
240	}
241
242	//-----------------------------------------------------------------------
243	// Cluster idle lock/unlock
244	static inline void lock(__cluster_proc_list & this) {
245	/* paranoid */ verify( ! __preemption_enabled() );
246
247	// Start by locking the global RWlock so that we know no-one is
248	// adding/removing processors while we mess with the idle lock
249	ready_schedule_lock();
250
251	// Simple counting lock, acquired, acquired by incrementing the counter
252	// to an odd number
253	for() {
254	uint64_t l = this.lock;
255	if(
256	(0 == (l % 2))
257	&& __atomic_compare_exchange_n(&this.lock, &l, l + 1, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
258	) return;
259	Pause();
260	}
261
262	/* paranoid */ verify( ! __preemption_enabled() );
263	}
264
265	static inline void unlock(__cluster_proc_list & this) {
266	/* paranoid */ verify( ! __preemption_enabled() );
267
268	/* paranoid */ verify( 1 == (this.lock % 2) );
269	// Simple couting lock, release by incrementing to an even number
270	__atomic_fetch_add( &this.lock, 1, __ATOMIC_SEQ_CST );
271
272	// Release the global lock, which we acquired when locking
273	ready_schedule_unlock();
274
275	/* paranoid */ verify( ! __preemption_enabled() );
276	}
277
278	//=======================================================================
279	// Ready-Queue API
280	//-----------------------------------------------------------------------
281	// push thread onto a ready queue for a cluster
282	// returns true if the list was previously empty, false otherwise
283	__attribute__((hot)) void push(struct cluster * cltr, struct $thread * thrd);
284
285	//-----------------------------------------------------------------------
286	// pop thread from the ready queue of a cluster
287	// returns 0p if empty
288	// May return 0p spuriously
289	__attribute__((hot)) struct $thread * pop_fast(struct cluster * cltr);
290
291	//-----------------------------------------------------------------------
292	// pop thread from the ready queue of a cluster
293	// returns 0p if empty
294	// guaranteed to find any threads added before this call
295	__attribute__((hot)) struct $thread * pop_slow(struct cluster * cltr);
296
297	//-----------------------------------------------------------------------
298	// Increase the width of the ready queue (number of lanes) by 4
299	void ready_queue_grow (struct cluster * cltr);
300
301	//-----------------------------------------------------------------------
302	// Decrease the width of the ready queue (number of lanes) by 4
303	void ready_queue_shrink(struct cluster * cltr);
304
305
306	// Local Variables: //
307	// mode: c //
308	// tab-width: 4 //
309	// End: //

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format