Context Navigation

kernel_private.hfa @ dca5802

ADTarm-ehast-experimentalenumforall-pointer-decayjacob/cs343-translationnew-astnew-ast-unique-exprpthread-emulationqualifiedEnum

Last change on this file since dca5802 was dca5802, checked in by Thierry Delisle <tdelisle@…>, 4 years ago
Started doing some of the x86 implementations and some changes after a code review
Property mode set to `100644`
File size: 7.6 KB

Line
1	//
2	// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
3	//
4	// The contents of this file are covered under the licence agreement in the
5	// file "LICENCE" distributed with Cforall.
6	//
7	// kernel_private.hfa --
8	//
9	// Author : Thierry Delisle
10	// Created On : Mon Feb 13 12:27:26 2017
11	// Last Modified By : Peter A. Buhr
12	// Last Modified On : Sat Nov 30 19:25:02 2019
13	// Update Count : 8
14	//
15
16	#pragma once
17
18	#include "kernel.hfa"
19	#include "thread.hfa"
20
21	#include "alarm.hfa"
22
23
24	//-----------------------------------------------------------------------------
25	// Scheduler
26
27	extern "C" {
28	void disable_interrupts() OPTIONAL_THREAD;
29	void enable_interrupts_noPoll();
30	void enable_interrupts( __cfaabi_dbg_ctx_param );
31	}
32
33	void ScheduleThread( thread_desc * );
34	static inline void WakeThread( thread_desc * thrd ) {
35	if( !thrd ) return;
36
37	verify(thrd->state == Inactive);
38
39	disable_interrupts();
40	ScheduleThread( thrd );
41	enable_interrupts( __cfaabi_dbg_ctx );
42	}
43	thread_desc * nextThread(cluster * this);
44
45	//Block current thread and release/wake-up the following resources
46	void BlockInternal(void);
47	void BlockInternal(__spinlock_t * lock);
48	void BlockInternal(thread_desc * thrd);
49	void BlockInternal(__spinlock_t * lock, thread_desc * thrd);
50	void BlockInternal(__spinlock_t * locks [], unsigned short count);
51	void BlockInternal(__spinlock_t * locks [], unsigned short count, thread_desc * thrds [], unsigned short thrd_count);
52	void BlockInternal(__finish_callback_fptr_t callback);
53	void LeaveThread(__spinlock_t * lock, thread_desc * thrd);
54
55	//-----------------------------------------------------------------------------
56	// Processor
57	void main(processorCtx_t *);
58
59	void * create_pthread( pthread_t , void ()(void ), void * );
60
61	static inline void wake_fast(processor * this) {
62	__cfaabi_dbg_print_safe("Kernel : Waking up processor %p\n", this);
63	post( this->idleLock );
64	}
65
66	static inline void wake(processor * this) {
67	disable_interrupts();
68	wake_fast(this);
69	enable_interrupts( __cfaabi_dbg_ctx );
70	}
71
72	struct event_kernel_t {
73	alarm_list_t alarms;
74	__spinlock_t lock;
75	};
76
77	extern event_kernel_t * event_kernel;
78
79	struct __cfa_kernel_preemption_state_t {
80	bool enabled;
81	bool in_progress;
82	unsigned short disable_count;
83	};
84
85	extern volatile thread_local __cfa_kernel_preemption_state_t preemption_state __attribute__ ((tls_model ( "initial-exec" )));
86
87	//-----------------------------------------------------------------------------
88	// Threads
89	extern "C" {
90	forall(dtype T \| is_thread(T))
91	void CtxInvokeThread(T * this);
92	}
93
94	extern void ThreadCtxSwitch(coroutine_desc * src, coroutine_desc * dst);
95
96	__cfaabi_dbg_debug_do(
97	extern void __cfaabi_dbg_thread_register ( thread_desc * thrd );
98	extern void __cfaabi_dbg_thread_unregister( thread_desc * thrd );
99	)
100
101	//-----------------------------------------------------------------------------
102	// Utils
103	#define KERNEL_STORAGE(T,X) __attribute((aligned(__alignof__(T)))) static char storage_##X[sizeof(T)]
104
105	static inline uint32_t tls_rand() {
106	kernelTLS.rand_seed ^= kernelTLS.rand_seed << 6;
107	kernelTLS.rand_seed ^= kernelTLS.rand_seed >> 21;
108	kernelTLS.rand_seed ^= kernelTLS.rand_seed << 7;
109	return kernelTLS.rand_seed;
110	}
111
112
113	void doregister( struct cluster & cltr );
114	void unregister( struct cluster & cltr );
115
116	void doregister( struct cluster * cltr, struct thread_desc & thrd );
117	void unregister( struct cluster * cltr, struct thread_desc & thrd );
118
119	//=======================================================================
120	// Cluster lock API
121	//=======================================================================
122	struct __attribute__((aligned(64))) __processor_id {
123	processor * volatile handle;
124	volatile bool lock;
125	};
126
127	// Lock-Free registering/unregistering of threads
128	// Register a processor to a given cluster and get its unique id in return
129	unsigned doregister( struct cluster * cltr, struct processor * proc );
130
131	// Unregister a processor from a given cluster using its id, getting back the original pointer
132	void unregister( struct cluster * cltr, struct processor * proc );
133
134	//=======================================================================
135	// Reader-writer lock implementation
136	// Concurrent with doregister/unregister,
137	// i.e., threads can be added at any point during or between the entry/exit
138
139	//-----------------------------------------------------------------------
140	// simple spinlock underlying the RWLock
141	// Blocking acquire
142	static inline void __atomic_acquire(volatile bool * ll) {
143	while( __builtin_expect(__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST), false) ) {
144	while(__atomic_load_n(ll, (int)__ATOMIC_RELAXED))
145	asm volatile("pause");
146	}
147	/* paranoid / verify(ll);
148	}
149
150	// Non-Blocking acquire
151	static inline bool __atomic_try_acquire(volatile bool * ll) {
152	return !__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST);
153	}
154
155	// Release
156	static inline void __atomic_unlock(volatile bool * ll) {
157	/* paranoid / verify(ll);
158	__atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE);
159	}
160
161	//-----------------------------------------------------------------------
162	// Reader side : acquire when using the ready queue to schedule but not
163	// creating/destroying queues
164	static inline void ready_schedule_lock( struct cluster * cltr, struct processor * proc) with(cltr->ready_lock) {
165	unsigned iproc = proc->id;
166	/paranoid/ verify(data[iproc].handle == proc);
167	/paranoid/ verify(iproc < ready);
168
169	// Step 1 : make sure no writer are in the middle of the critical section
170	while(__atomic_load_n(&lock, (int)__ATOMIC_RELAXED))
171	asm volatile("pause");
172
173	// Fence needed because we don't want to start trying to acquire the lock
174	// before we read a false.
175	// Not needed on x86
176	// std::atomic_thread_fence(std::memory_order_seq_cst);
177
178	// Step 2 : acquire our local lock
179	__atomic_acquire( &data[iproc].lock );
180	/paranoid/ verify(data[iproc].lock);
181	}
182
183	static inline void ready_schedule_unlock( struct cluster * cltr, struct processor * proc) with(cltr->ready_lock) {
184	unsigned iproc = proc->id;
185	/paranoid/ verify(data[iproc].handle == proc);
186	/paranoid/ verify(iproc < ready);
187	/paranoid/ verify(data[iproc].lock);
188	__atomic_unlock(&data[iproc].lock);
189	}
190
191	//-----------------------------------------------------------------------
192	// Writer side : acquire when changing the ready queue, e.g. adding more
193	// queues or removing them.
194	uint_fast32_t ready_mutate_lock( struct cluster & cltr );
195
196	void ready_mutate_unlock( struct cluster & cltr, uint_fast32_t /* value returned by lock */ );
197
198	//=======================================================================
199	// Ready-Queue API
200	//-----------------------------------------------------------------------
201	// push thread onto a ready queue for a cluster
202	// returns true if the list was previously empty, false otherwise
203	__attribute__((hot)) bool push(struct cluster * cltr, struct thread_desc * thrd);
204
205	//-----------------------------------------------------------------------
206	// pop thread from the ready queue of a cluster
207	// returns 0p if empty
208	__attribute__((hot)) thread_desc * pop(struct cluster * cltr);
209
210	//-----------------------------------------------------------------------
211	// Increase the width of the ready queue (number of lanes) by 4
212	void ready_queue_grow (struct cluster * cltr);
213
214	//-----------------------------------------------------------------------
215	// Decrease the width of the ready queue (number of lanes) by 4
216	void ready_queue_shrink(struct cluster * cltr);
217
218	//-----------------------------------------------------------------------
219	// Statics call at the end of each thread to register statistics
220	#if !defined(__CFA_NO_STATISTICS__)
221	void stats_tls_tally(struct cluster * cltr);
222	#else
223	static inline void stats_tls_tally(struct cluster * cltr) {}
224	#endif
225
226	// Local Variables: //
227	// mode: c //
228	// tab-width: 4 //
229	// End: //

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format