source: libcfa/src/concurrency/kernel.hfa @ b7d6a36

ADTarm-ehast-experimentalenumforall-pointer-decayjacob/cs343-translationnew-astnew-ast-unique-exprpthread-emulationqualifiedEnum
Last change on this file since b7d6a36 was b7d6a36, checked in by Thierry Delisle <tdelisle@…>, 5 years ago

Merge branch 'master' into relaxed_ready

  • Property mode set to 100644
File size: 9.1 KB
Line 
1//
2// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
3//
4// The contents of this file are covered under the licence agreement in the
5// file "LICENCE" distributed with Cforall.
6//
7// kernel --
8//
9// Author           : Thierry Delisle
10// Created On       : Tue Jan 17 12:27:26 2017
11// Last Modified By : Peter A. Buhr
12// Last Modified On : Tue Feb  4 12:29:26 2020
13// Update Count     : 22
14//
15
16#pragma once
17
18#include <stdbool.h>
19
20#include "invoke.h"
21#include "time_t.hfa"
22#include "coroutine.hfa"
23
24extern "C" {
25#include <pthread.h>
26#include <semaphore.h>
27}
28
29//-----------------------------------------------------------------------------
30// Locks
31struct semaphore {
32        __spinlock_t lock;
33        int count;
34        __queue_t(thread_desc) waiting;
35};
36
37void  ?{}(semaphore & this, int count = 1);
38void ^?{}(semaphore & this);
39void   P (semaphore & this);
40void   V (semaphore & this);
41
42
43//-----------------------------------------------------------------------------
44// Processor
45extern struct cluster * mainCluster;
46
47enum FinishOpCode { No_Action, Release, Schedule, Release_Schedule, Release_Multi, Release_Multi_Schedule, Callback };
48
49typedef void (*__finish_callback_fptr_t)(void);
50
51//TODO use union, many of these fields are mutually exclusive (i.e. MULTI vs NOMULTI)
52struct FinishAction {
53        FinishOpCode action_code;
54        /*
55        // Union of possible actions
56        union {
57                // Option 1 : locks and threads
58                struct {
59                        // 1 thread or N thread
60                        union {
61                                thread_desc * thrd;
62                                struct {
63                                        thread_desc ** thrds;
64                                        unsigned short thrd_count;
65                                };
66                        };
67                        // 1 lock or N lock
68                        union {
69                                __spinlock_t * lock;
70                                struct {
71                                        __spinlock_t ** locks;
72                                        unsigned short lock_count;
73                                };
74                        };
75                };
76                // Option 2 : action pointer
77                __finish_callback_fptr_t callback;
78        };
79        /*/
80        thread_desc * thrd;
81        thread_desc ** thrds;
82        unsigned short thrd_count;
83        __spinlock_t * lock;
84        __spinlock_t ** locks;
85        unsigned short lock_count;
86        __finish_callback_fptr_t callback;
87        //*/
88};
89static inline void ?{}(FinishAction & this) {
90        this.action_code = No_Action;
91        this.thrd = 0p;
92        this.lock = 0p;
93}
94static inline void ^?{}(FinishAction &) {}
95
96// Processor
97coroutine processorCtx_t {
98        struct processor * proc;
99};
100
101// Wrapper around kernel threads
102struct processor {
103        // Main state
104        // Coroutine ctx who does keeps the state of the processor
105        struct processorCtx_t runner;
106
107        // Cluster from which to get threads
108        struct cluster * cltr;
109        unsigned int id;
110
111        // Name of the processor
112        const char * name;
113
114        // Handle to pthreads
115        pthread_t kernel_thread;
116
117        // RunThread data
118        // Action to do after a thread is ran
119        struct FinishAction finish;
120
121        // Preemption data
122        // Node which is added in the discrete event simulaiton
123        struct alarm_node_t * preemption_alarm;
124
125        // If true, a preemption was triggered in an unsafe region, the processor must preempt as soon as possible
126        bool pending_preemption;
127
128        // Idle lock
129        __bin_sem_t idleLock;
130
131        // Termination
132        // Set to true to notify the processor should terminate
133        volatile bool do_terminate;
134
135        // Termination synchronisation
136        semaphore terminated;
137
138        // pthread Stack
139        void * stack;
140
141        // Link lists fields
142        struct __dbg_node_proc {
143                struct processor * next;
144                struct processor * prev;
145        } node;
146
147#ifdef __CFA_DEBUG__
148        // Last function to enable preemption on this processor
149        const char * last_enable;
150#endif
151};
152
153void  ?{}(processor & this, const char name[], struct cluster & cltr);
154void ^?{}(processor & this);
155
156static inline void  ?{}(processor & this)                    { this{ "Anonymous Processor", *mainCluster}; }
157static inline void  ?{}(processor & this, struct cluster & cltr)    { this{ "Anonymous Processor", cltr}; }
158static inline void  ?{}(processor & this, const char name[]) { this{name, *mainCluster }; }
159
160static inline [processor *&, processor *& ] __get( processor & this ) {
161        return this.node.[next, prev];
162}
163
164
165//-----------------------------------------------------------------------------
166// Cluster Tools
167
168// Cells use by the reader writer lock
169// while not generic it only relies on a opaque pointer
170struct __processor_id;
171
172// Reader-Writer lock protecting the ready-queue
173// while this lock is mostly generic some aspects
174// have been hard-coded to for the ready-queue for
175// simplicity and performance
176struct __clusterRWLock_t {
177        // total cachelines allocated
178        unsigned int max;
179
180        // cachelines currently in use
181        volatile unsigned int alloc;
182
183        // cachelines ready to itereate over
184        // (!= to alloc when thread is in second half of doregister)
185        volatile unsigned int ready;
186
187        // writer lock
188        volatile bool lock;
189
190        // data pointer
191        __processor_id * data;
192};
193
194void  ?{}(__clusterRWLock_t & this);
195void ^?{}(__clusterRWLock_t & this);
196
197// Intrusives lanes which are used by the relaxed ready queue
198struct __attribute__((aligned(128))) __intrusive_lane_t {
199        // spin lock protecting the queue
200        volatile bool lock;
201
202        // anchor for the head and the tail of the queue
203        struct __sentinel_t {
204                // Link lists fields
205                // instrusive link field for threads
206                // must be exactly as in thread_desc
207                __thread_desc_link link;
208        } before, after;
209
210#if defined(__CFA_WITH_VERIFY__)
211        // id of last processor to acquire the lock
212        // needed only to check for mutual exclusion violations
213        unsigned int last_id;
214
215        // number of items on this list
216        // needed only to check for deadlocks
217        unsigned int count;
218#endif
219
220        // Optional statistic counters
221        #if !defined(__CFA_NO_SCHED_STATS__)
222                struct __attribute__((aligned(64))) {
223                        // difference between number of push and pops
224                        ssize_t diff;
225
226                        // total number of pushes and pops
227                        size_t  push;
228                        size_t  pop ;
229                } stat;
230        #endif
231};
232
233void  ?{}(__intrusive_lane_t & this);
234void ^?{}(__intrusive_lane_t & this);
235
236typedef unsigned long long __cfa_readyQ_mask_t;
237
238// enum {
239//      __cfa_ready_queue_mask_size = (64 - sizeof(size_t)) / sizeof(size_t),
240//      __cfa_max_ready_queues = __cfa_ready_queue_mask_size * 8 * sizeof(size_t)
241// };
242
243#define __cfa_lane_mask_size ((64 - sizeof(size_t)) / sizeof(__cfa_readyQ_mask_t))
244#define __cfa_max_lanes (__cfa_lane_mask_size * 8 * sizeof(__cfa_readyQ_mask_t))
245
246//TODO adjust cache size to ARCHITECTURE
247// Structure holding the relaxed ready queue
248struct __attribute__((aligned(128))) __ready_queue_t {
249        // Data tracking how many/which lanes are used
250        // Aligned to 128 for cache locality
251        struct {
252                // number of non-empty lanes
253                volatile size_t count;
254
255                // bit mask, set bits indentify which lanes are non-empty
256                volatile __cfa_readyQ_mask_t mask[ __cfa_lane_mask_size ];
257        } used;
258
259        // Data tracking the actual lanes
260        // On a seperate cacheline from the used struct since
261        // used can change on each push/pop but this data
262        // only changes on shrink/grow
263        struct __attribute__((aligned(64))) {
264                // Arary of lanes
265                __intrusive_lane_t * volatile data;
266
267                // Number of lanes (empty or not)
268                volatile size_t count;
269        } lanes;
270
271        // Statistics
272        #if !defined(__CFA_NO_STATISTICS__)
273                __attribute__((aligned(64))) struct {
274                        struct {
275                                // Push statistic
276                                struct {
277                                        // number of attemps at pushing something
278                                        volatile size_t attempt;
279
280                                        // number of successes at pushing
281                                        volatile size_t success;
282                                } push;
283
284                                // Pop statistic
285                                struct {
286                                        // number of reads of the mask
287                                        // picking an empty __cfa_readyQ_mask_t counts here
288                                        // but not as an attempt
289                                        volatile size_t maskrds;
290
291                                        // number of attemps at poping something
292                                        volatile size_t attempt;
293
294                                        // number of successes at poping
295                                        volatile size_t success;
296                                } pop;
297                        } pick;
298
299                        // stats on the "used" struct of the queue
300                        // tracks average number of queues that are not empty
301                        // when pushing / poping
302                        struct {
303                                volatile size_t value;
304                                volatile size_t count;
305                        } used;
306                } global_stats;
307
308        #endif
309};
310
311void  ?{}(__ready_queue_t & this);
312void ^?{}(__ready_queue_t & this);
313
314//-----------------------------------------------------------------------------
315// Cluster
316struct cluster {
317        // Ready queue locks
318        __clusterRWLock_t ready_lock;
319
320        // Ready queue for threads
321        __ready_queue_t ready_queue;
322
323        // Name of the cluster
324        const char * name;
325
326        // Preemption rate on this cluster
327        Duration preemption_rate;
328
329        // List of processors
330        __spinlock_t proc_list_lock;
331        __dllist_t(struct processor) idles;
332
333        // List of threads
334        __spinlock_t thread_list_lock;
335        __dllist_t(struct thread_desc) threads;
336        unsigned int nthreads;
337
338        // Link lists fields
339        struct __dbg_node_cltr {
340                cluster * next;
341                cluster * prev;
342        } node;
343};
344extern Duration default_preemption();
345
346void ?{} (cluster & this, const char name[], Duration preemption_rate);
347void ^?{}(cluster & this);
348
349static inline void ?{} (cluster & this)                           { this{"Anonymous Cluster", default_preemption()}; }
350static inline void ?{} (cluster & this, Duration preemption_rate) { this{"Anonymous Cluster", preemption_rate}; }
351static inline void ?{} (cluster & this, const char name[])        { this{name, default_preemption()}; }
352
353static inline [cluster *&, cluster *& ] __get( cluster & this ) {
354        return this.node.[next, prev];
355}
356
357static inline struct processor * active_processor() { return TL_GET( this_processor ); } // UNSAFE
358static inline struct cluster   * active_cluster  () { return TL_GET( this_processor )->cltr; }
359
360// Local Variables: //
361// mode: c //
362// tab-width: 4 //
363// End: //
Note: See TracBrowser for help on using the repository browser.