Context Navigation

kernel_private.hfa @ 920dca3

ADTarm-ehast-experimentalenumforall-pointer-decayjacob/cs343-translationnew-astnew-ast-unique-exprpthread-emulationqualifiedEnum

Last change on this file since 920dca3 was f00b26d4, checked in by Thierry Delisle <tdelisle@…>, 4 years ago
Re-worked IO to use epoll and support multiple io_contexts per cluster. Also redid how cluster options are handled. Changed how iofwd calls are passed to support future features and io_contexts rework.
Property mode set to `100644`
File size: 10.6 KB

Line
1	//
2	// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
3	//
4	// The contents of this file are covered under the licence agreement in the
5	// file "LICENCE" distributed with Cforall.
6	//
7	// kernel_private.hfa --
8	//
9	// Author : Thierry Delisle
10	// Created On : Mon Feb 13 12:27:26 2017
11	// Last Modified By : Peter A. Buhr
12	// Last Modified On : Sat Nov 30 19:25:02 2019
13	// Update Count : 8
14	//
15
16	#pragma once
17
18	#include "kernel.hfa"
19	#include "thread.hfa"
20
21	#include "alarm.hfa"
22	#include "stats.hfa"
23
24	#include "bits/random.hfa"
25
26
27	//-----------------------------------------------------------------------------
28	// Scheduler
29
30	struct __attribute__((aligned(128))) __scheduler_lock_id_t;
31
32	extern "C" {
33	void disable_interrupts() OPTIONAL_THREAD;
34	void enable_interrupts_noPoll();
35	void enable_interrupts( __cfaabi_dbg_ctx_param );
36	}
37
38	void __schedule_thread( struct __processor_id_t , $thread )
39	#if defined(NDEBUG) \|\| (!defined(__CFA_DEBUG__) && !defined(__CFA_VERIFY__))
40	__attribute__((nonnull (2)))
41	#endif
42	;
43
44	//Block current thread and release/wake-up the following resources
45	void __leave_thread() __attribute__((noreturn));
46
47	//-----------------------------------------------------------------------------
48	// Processor
49	void main(processorCtx_t *);
50
51	void * __create_pthread( pthread_t , void ()(void ), void * );
52
53
54
55	struct event_kernel_t {
56	alarm_list_t alarms;
57	__spinlock_t lock;
58	};
59
60	extern event_kernel_t * event_kernel;
61
62	struct __cfa_kernel_preemption_state_t {
63	bool enabled;
64	bool in_progress;
65	unsigned short disable_count;
66	};
67
68	extern volatile thread_local __cfa_kernel_preemption_state_t preemption_state __attribute__ ((tls_model ( "initial-exec" )));
69
70	extern cluster * mainCluster;
71
72	//-----------------------------------------------------------------------------
73	// Threads
74	extern "C" {
75	void __cfactx_invoke_thread(void (main)(void ), void * this);
76	}
77
78	__cfaabi_dbg_debug_do(
79	extern void __cfaabi_dbg_thread_register ( $thread * thrd );
80	extern void __cfaabi_dbg_thread_unregister( $thread * thrd );
81	)
82
83	// KERNEL ONLY unpark with out disabling interrupts
84	void __unpark( struct __processor_id_t , $thread thrd __cfaabi_dbg_ctx_param2 );
85
86	static inline bool __post(single_sem & this, struct __processor_id_t * id) {
87	for() {
88	struct $thread * expected = this.ptr;
89	if(expected == 1p) return false;
90	if(expected == 0p) {
91	if(__atomic_compare_exchange_n(&this.ptr, &expected, 1p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
92	return false;
93	}
94	}
95	else {
96	if(__atomic_compare_exchange_n(&this.ptr, &expected, 0p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
97	__unpark( id, expected __cfaabi_dbg_ctx2 );
98	return true;
99	}
100	}
101	}
102	}
103
104	//-----------------------------------------------------------------------------
105	// Utils
106	#define KERNEL_STORAGE(T,X) __attribute((aligned(__alignof__(T)))) static char storage_##X[sizeof(T)]
107
108	static inline uint64_t __tls_rand() {
109	#if defined(__SIZEOF_INT128__)
110	return __lehmer64( kernelTLS.rand_seed );
111	#else
112	return __xorshift64( kernelTLS.rand_seed );
113	#endif
114	}
115
116
117	void doregister( struct cluster & cltr );
118	void unregister( struct cluster & cltr );
119
120	void doregister( struct cluster * cltr, struct $thread & thrd );
121	void unregister( struct cluster * cltr, struct $thread & thrd );
122
123	//-----------------------------------------------------------------------------
124	// I/O
125	void __kernel_io_startup ();
126	void __kernel_io_shutdown ();
127
128	static inline io_context * __get_io_context( void ) {
129	cluster * cltr = active_cluster();
130	/* paranoid */ verifyf( cltr, "No active cluster for io operation\n");
131	assertf( cltr->io.cnt > 0, "Cluster %p has no default io contexts and no context was specified\n", cltr );
132	/* paranoid */ verifyf( cltr->io.ctxs, "default io contexts for cluster %p are missing\n", cltr);
133	return &cltr->io.ctxs[ __tls_rand() % cltr->io.cnt ];
134	}
135
136	void ^?{}(io_context & this, bool );
137
138	//=======================================================================
139	// Cluster lock API
140	//=======================================================================
141	// Cells use by the reader writer lock
142	// while not generic it only relies on a opaque pointer
143	struct __attribute__((aligned(128))) __scheduler_lock_id_t {
144	// Spin lock used as the underlying lock
145	volatile bool lock;
146
147	// Handle pointing to the proc owning this cell
148	// Used for allocating cells and debugging
149	__processor_id_t * volatile handle;
150
151	#ifdef __CFA_WITH_VERIFY__
152	// Debug, check if this is owned for reading
153	bool owned;
154	#endif
155	};
156
157	static_assert( sizeof(struct __scheduler_lock_id_t) <= __alignof(struct __scheduler_lock_id_t));
158
159	// Lock-Free registering/unregistering of threads
160	// Register a processor to a given cluster and get its unique id in return
161	unsigned doregister( struct __processor_id_t * proc );
162
163	// Unregister a processor from a given cluster using its id, getting back the original pointer
164	void unregister( struct __processor_id_t * proc );
165
166	//=======================================================================
167	// Reader-writer lock implementation
168	// Concurrent with doregister/unregister,
169	// i.e., threads can be added at any point during or between the entry/exit
170
171	//-----------------------------------------------------------------------
172	// simple spinlock underlying the RWLock
173	// Blocking acquire
174	static inline void __atomic_acquire(volatile bool * ll) {
175	while( __builtin_expect(__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST), false) ) {
176	while(__atomic_load_n(ll, (int)__ATOMIC_RELAXED))
177	asm volatile("pause");
178	}
179	/* paranoid / verify(ll);
180	}
181
182	// Non-Blocking acquire
183	static inline bool __atomic_try_acquire(volatile bool * ll) {
184	return !__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST);
185	}
186
187	// Release
188	static inline void __atomic_unlock(volatile bool * ll) {
189	/* paranoid / verify(ll);
190	__atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE);
191	}
192
193	//-----------------------------------------------------------------------
194	// Reader-Writer lock protecting the ready-queues
195	// while this lock is mostly generic some aspects
196	// have been hard-coded to for the ready-queue for
197	// simplicity and performance
198	struct __scheduler_RWLock_t {
199	// total cachelines allocated
200	unsigned int max;
201
202	// cachelines currently in use
203	volatile unsigned int alloc;
204
205	// cachelines ready to itereate over
206	// (!= to alloc when thread is in second half of doregister)
207	volatile unsigned int ready;
208
209	// writer lock
210	volatile bool lock;
211
212	// data pointer
213	__scheduler_lock_id_t * data;
214	};
215
216	void ?{}(__scheduler_RWLock_t & this);
217	void ^?{}(__scheduler_RWLock_t & this);
218
219	extern __scheduler_RWLock_t * __scheduler_lock;
220
221	//-----------------------------------------------------------------------
222	// Reader side : acquire when using the ready queue to schedule but not
223	// creating/destroying queues
224	static inline void ready_schedule_lock( struct __processor_id_t * proc) with(*__scheduler_lock) {
225	unsigned iproc = proc->id;
226	/paranoid/ verify(data[iproc].handle == proc);
227	/paranoid/ verify(iproc < ready);
228
229	// Step 1 : make sure no writer are in the middle of the critical section
230	while(__atomic_load_n(&lock, (int)__ATOMIC_RELAXED))
231	asm volatile("pause");
232
233	// Fence needed because we don't want to start trying to acquire the lock
234	// before we read a false.
235	// Not needed on x86
236	// std::atomic_thread_fence(std::memory_order_seq_cst);
237
238	// Step 2 : acquire our local lock
239	__atomic_acquire( &data[iproc].lock );
240	/paranoid/ verify(data[iproc].lock);
241
242	#ifdef __CFA_WITH_VERIFY__
243	// Debug, check if this is owned for reading
244	data[iproc].owned = true;
245	#endif
246	}
247
248	static inline void ready_schedule_unlock( struct __processor_id_t * proc) with(*__scheduler_lock) {
249	unsigned iproc = proc->id;
250	/paranoid/ verify(data[iproc].handle == proc);
251	/paranoid/ verify(iproc < ready);
252	/paranoid/ verify(data[iproc].lock);
253	/paranoid/ verify(data[iproc].owned);
254	#ifdef __CFA_WITH_VERIFY__
255	// Debug, check if this is owned for reading
256	data[iproc].owned = false;
257	#endif
258	__atomic_unlock(&data[iproc].lock);
259	}
260
261	#ifdef __CFA_WITH_VERIFY__
262	static inline bool ready_schedule_islocked( struct __processor_id_t * proc) {
263	return __scheduler_lock->data[proc->id].owned;
264	}
265
266	static inline bool ready_mutate_islocked() {
267	return __scheduler_lock->lock;
268	}
269	#endif
270
271	//-----------------------------------------------------------------------
272	// Writer side : acquire when changing the ready queue, e.g. adding more
273	// queues or removing them.
274	uint_fast32_t ready_mutate_lock( void );
275
276	void ready_mutate_unlock( uint_fast32_t /* value returned by lock */ );
277
278	//=======================================================================
279	// Ready-Queue API
280	//-----------------------------------------------------------------------
281	// pop thread from the ready queue of a cluster
282	// returns 0p if empty
283	__attribute__((hot)) bool query(struct cluster * cltr);
284
285	//-----------------------------------------------------------------------
286	// push thread onto a ready queue for a cluster
287	// returns true if the list was previously empty, false otherwise
288	__attribute__((hot)) bool push(struct cluster * cltr, struct $thread * thrd);
289
290	//-----------------------------------------------------------------------
291	// pop thread from the ready queue of a cluster
292	// returns 0p if empty
293	__attribute__((hot)) struct $thread * pop(struct cluster * cltr);
294
295	//-----------------------------------------------------------------------
296	// remove thread from the ready queue of a cluster
297	// returns bool if it wasn't found
298	bool remove_head(struct cluster * cltr, struct $thread * thrd);
299
300	//-----------------------------------------------------------------------
301	// Increase the width of the ready queue (number of lanes) by 4
302	void ready_queue_grow (struct cluster * cltr, int target);
303
304	//-----------------------------------------------------------------------
305	// Decrease the width of the ready queue (number of lanes) by 4
306	void ready_queue_shrink(struct cluster * cltr, int target);
307
308	//-----------------------------------------------------------------------
309	// IO user data
310	struct __io_user_data_t {
311	int32_t result;
312	$thread * thrd;
313	};
314
315	//-----------------------------------------------------------------------
316	// Statics call at the end of each thread to register statistics
317	#if !defined(__CFA_NO_STATISTICS__)
318	static inline struct __stats_t * __tls_stats() {
319	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
320	/* paranoid */ verify( kernelTLS.this_stats );
321	return kernelTLS.this_stats;
322	}
323
324	#define __STATS__(in_kernel, ...) { \
325	if( !(in_kernel) ) disable_interrupts(); \
326	with( *__tls_stats() ) { \
327	__VA_ARGS__ \
328	} \
329	if( !(in_kernel) ) enable_interrupts( __cfaabi_dbg_ctx ); \
330	}
331	#else
332	#define __STATS__(in_kernel, ...)
333	#endif
334
335	// Local Variables: //
336	// mode: c //
337	// tab-width: 4 //
338	// End: //

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format