source: libcfa/src/concurrency/preemption.cfa @ 231b18f

ADTarm-ehast-experimentalenumforall-pointer-decayjacob/cs343-translationnew-ast-unique-exprpthread-emulationqualifiedEnum
Last change on this file since 231b18f was 231b18f, checked in by Peter A. Buhr <pabuhr@…>, 3 years ago

add documentation describing the race on the ARM processor accessing TLS storage

  • Property mode set to 100644
File size: 20.2 KB
Line 
1//
2// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
3//
4// The contents of this file are covered under the licence agreement in the
5// file "LICENCE" distributed with Cforall.
6//
7// signal.c --
8//
9// Author           : Thierry Delisle
10// Created On       : Mon Jun 5 14:20:42 2017
11// Last Modified By : Peter A. Buhr
12// Last Modified On : Fri Nov  6 07:42:13 2020
13// Update Count     : 54
14//
15
16#define __cforall_thread__
17
18#include "preemption.hfa"
19#include <assert.h>
20
21#include <errno.h>
22#include <stdio.h>
23#include <string.h>
24#include <unistd.h>
25#include <limits.h>                                                                             // PTHREAD_STACK_MIN
26
27#include "bits/signal.hfa"
28#include "kernel_private.hfa"
29
30#if !defined(__CFA_DEFAULT_PREEMPTION__)
31#define __CFA_DEFAULT_PREEMPTION__ 10`ms
32#endif
33
34Duration default_preemption() __attribute__((weak)) {
35        return __CFA_DEFAULT_PREEMPTION__;
36}
37
38// FwdDeclarations : timeout handlers
39static void preempt( processor   * this );
40static void timeout( $thread * this );
41
42// FwdDeclarations : Signal handlers
43static void sigHandler_ctxSwitch( __CFA_SIGPARMS__ );
44static void sigHandler_alarm    ( __CFA_SIGPARMS__ );
45static void sigHandler_segv     ( __CFA_SIGPARMS__ );
46static void sigHandler_ill      ( __CFA_SIGPARMS__ );
47static void sigHandler_fpe      ( __CFA_SIGPARMS__ );
48static void sigHandler_abort    ( __CFA_SIGPARMS__ );
49
50// FwdDeclarations : alarm thread main
51static void * alarm_loop( __attribute__((unused)) void * args );
52
53// Machine specific register name
54#if   defined( __i386 )
55#define CFA_REG_IP gregs[REG_EIP]
56#elif defined( __x86_64 )
57#define CFA_REG_IP gregs[REG_RIP]
58#elif defined( __arm__ )
59#define CFA_REG_IP arm_pc
60#elif defined( __aarch64__ )
61#define CFA_REG_IP pc
62#else
63#error unsupported hardware architecture
64#endif
65
66KERNEL_STORAGE(event_kernel_t, event_kernel);         // private storage for event kernel
67event_kernel_t * event_kernel;                        // kernel public handle to even kernel
68static pthread_t alarm_thread;                        // pthread handle to alarm thread
69static void * alarm_stack;                                                        // pthread stack for alarm thread
70
71static void ?{}(event_kernel_t & this) with( this ) {
72        alarms{};
73        lock{};
74}
75
76enum {
77        PREEMPT_NORMAL    = 0,
78        PREEMPT_TERMINATE = 1,
79};
80
81//=============================================================================================
82// Kernel Preemption logic
83//=============================================================================================
84
85// Get next expired node
86static inline alarm_node_t * get_expired( alarm_list_t * alarms, Time currtime ) {
87        if( ! & (*alarms)`first ) return 0p;                                            // If no alarms return null
88        if( (*alarms)`first.alarm >= currtime ) return 0p;      // If alarms head not expired return null
89        return pop(alarms);                                                                     // Otherwise just pop head
90}
91
92// Tick one frame of the Discrete Event Simulation for alarms
93static void tick_preemption(void) {
94        alarm_node_t * node = 0p;                                                       // Used in the while loop but cannot be declared in the while condition
95        alarm_list_t * alarms = &event_kernel->alarms;          // Local copy for ease of reading
96        Time currtime = __kernel_get_time();                            // Check current time once so everything "happens at once"
97
98        //Loop throught every thing expired
99        while( node = get_expired( alarms, currtime ) ) {
100                // __cfaabi_dbg_print_buffer_decl( " KERNEL: preemption tick.\n" );
101                Duration period = node->period;
102                if( period == 0) {
103                        node->set = false;                  // Node is one-shot, just mark it as not pending
104                }
105
106                // Check if this is a kernel
107                if( node->type == Kernel ) {
108                        preempt( node->proc );
109                }
110                else if( node->type == User ) {
111                        timeout( node->thrd );
112                }
113                else {
114                        node->callback(*node);
115                }
116
117                // Check if this is a periodic alarm
118                if( period > 0 ) {
119                        // __cfaabi_dbg_print_buffer_local( " KERNEL: alarm period is %lu.\n", period.tv );
120                        node->alarm = currtime + period;    // Alarm is periodic, add currtime to it (used cached current time)
121                        insert( alarms, node );             // Reinsert the node for the next time it triggers
122                }
123        }
124
125        // If there are still alarms pending, reset the timer
126        if( & (*alarms)`first ) {
127                __cfadbg_print_buffer_decl(preemption, " KERNEL: @%ju(%ju) resetting alarm to %ju.\n", currtime.tv, __kernel_get_time().tv, (alarms->head->alarm - currtime).tv);
128                Duration delta = (*alarms)`first.alarm - currtime;
129                Duration capped = max(delta, 50`us);
130                // itimerval tim  = { caped };
131                // __cfaabi_dbg_print_buffer_local( "    Values are %lu, %lu, %lu %lu.\n", delta.tv, caped.tv, tim.it_value.tv_sec, tim.it_value.tv_usec);
132
133                __kernel_set_timer( capped );
134        }
135}
136
137// Update the preemption of a processor and notify interested parties
138void update_preemption( processor * this, Duration duration ) {
139        alarm_node_t * alarm = this->preemption_alarm;
140
141        // Alarms need to be enabled
142        if ( duration > 0 && ! alarm->set ) {
143                alarm->alarm = __kernel_get_time() + duration;
144                alarm->period = duration;
145                register_self( alarm );
146        }
147        // Zero duration but alarm is set
148        else if ( duration == 0 && alarm->set ) {
149                unregister_self( alarm );
150                alarm->alarm = 0;
151                alarm->period = 0;
152        }
153        // If alarm is different from previous, change it
154        else if ( duration > 0 && alarm->period != duration ) {
155                unregister_self( alarm );
156                alarm->alarm = __kernel_get_time() + duration;
157                alarm->period = duration;
158                register_self( alarm );
159        }
160}
161
162//=============================================================================================
163// Kernel Signal Tools
164//=============================================================================================
165
166// In a user-level threading system, there are handful of thread-local variables where this problem occurs on the ARM.
167//
168// For each kernel thread running user-level threads, there is a flag variable to indicate if interrupts are
169// enabled/disabled for that kernel thread. Therefore, this variable is made thread local.
170//
171// For example, this code fragment sets the state of the "interrupt" variable in thread-local memory.
172//
173// _Thread_local volatile int interrupts;
174// int main() {
175//     interrupts = 0; // disable interrupts }
176//
177// which generates the following code on the ARM
178//
179// (gdb) disassemble main
180// Dump of assembler code for function main:
181//    0x0000000000000610 <+0>:  mrs     x1, tpidr_el0
182//    0x0000000000000614 <+4>:  mov     w0, #0x0                        // #0
183//    0x0000000000000618 <+8>:  add     x1, x1, #0x0, lsl #12
184//    0x000000000000061c <+12>: add     x1, x1, #0x10
185//    0x0000000000000620 <+16>: str     wzr, [x1]
186//    0x0000000000000624 <+20>: ret
187//
188// The mrs moves a pointer from coprocessor register tpidr_el0 into register x1.  Register w0 is set to 0. The two adds
189// increase the TLS pointer with the displacement (offset) 0x10, which is the location in the TSL of variable
190// "interrupts".  Finally, 0 is stored into "interrupts" through the pointer in register x1 that points into the
191// TSL. Now once x1 has the pointer to the location of the TSL for kernel thread N, it can be be preempted at a
192// user-level and the user thread is put on the user-level ready-queue. When the preempted thread gets to the front of
193// the user-level ready-queue it is run on kernel thread M. It now stores 0 into "interrupts" back on kernel thread N,
194// turning off interrupt on the wrong kernel thread.
195//
196// On the x86, the following code is generated for the same code fragment.
197//
198// (gdb) disassemble main
199// Dump of assembler code for function main:
200//    0x0000000000400420 <+0>:  movl   $0x0,%fs:0xfffffffffffffffc
201//    0x000000000040042c <+12>: xor    %eax,%eax
202//    0x000000000040042e <+14>: retq   
203//
204// and there is base-displacement addressing used to atomically reset variable "interrupts" off of the TSL pointer in
205// register "fs".
206//
207// Hence, the ARM has base-displacement address for the general purpose registers, BUT not to the coprocessor
208// registers. As a result, generating the address for the write into variable "interrupts" is no longer atomic.
209//
210// Note this problem does NOT occur when just using multiple kernel threads because the preemption ALWAYS restarts the
211// thread on the same kernel thread.
212//
213// The obvious question is why does ARM use a coprocessor register to store the TSL pointer given that coprocessor
214// registers are second-class registers with respect to the instruction set. One possible answer is that they did not
215// want to dedicate one of the general registers to hold the TLS pointer and there was a free coprocessor register
216// available.
217
218__cfaabi_dbg_debug_do( static thread_local void * last_interrupt = 0; )
219
220extern "C" {
221        // Disable interrupts by incrementing the counter
222        void disable_interrupts() {
223                with( kernelTLS.preemption_state ) {
224                        #if GCC_VERSION > 50000
225                        static_assert(__atomic_always_lock_free(sizeof(enabled), &enabled), "Must be lock-free");
226                        #endif
227
228                        // Set enabled flag to false
229                        // should be atomic to avoid preemption in the middle of the operation.
230                        // use memory order RELAXED since there is no inter-thread on this variable requirements
231                        __atomic_store_n(&enabled, false, __ATOMIC_RELAXED);
232
233                        // Signal the compiler that a fence is needed but only for signal handlers
234                        __atomic_signal_fence(__ATOMIC_ACQUIRE);
235
236                        __attribute__((unused)) unsigned short new_val = disable_count + 1;
237                        disable_count = new_val;
238                        verify( new_val < 65_000u );              // If this triggers someone is disabling interrupts without enabling them
239                }
240        }
241
242        // Enable interrupts by decrementing the counter
243        // If counter reaches 0, execute any pending __cfactx_switch
244        void enable_interrupts( __cfaabi_dbg_ctx_param ) {
245                processor   * proc = kernelTLS.this_processor; // Cache the processor now since interrupts can start happening after the atomic store
246                /* paranoid */ verify( proc );
247
248                with( kernelTLS.preemption_state ){
249                        unsigned short prev = disable_count;
250                        disable_count -= 1;
251                        verify( prev != 0u );                     // If this triggers someone is enabled already enabled interruptsverify( prev != 0u );
252
253                        // Check if we need to prempt the thread because an interrupt was missed
254                        if( prev == 1 ) {
255                                #if GCC_VERSION > 50000
256                                static_assert(__atomic_always_lock_free(sizeof(enabled), &enabled), "Must be lock-free");
257                                #endif
258
259                                // Set enabled flag to true
260                                // should be atomic to avoid preemption in the middle of the operation.
261                                // use memory order RELAXED since there is no inter-thread on this variable requirements
262                                __atomic_store_n(&enabled, true, __ATOMIC_RELAXED);
263
264                                // Signal the compiler that a fence is needed but only for signal handlers
265                                __atomic_signal_fence(__ATOMIC_RELEASE);
266                                if( proc->pending_preemption ) {
267                                        proc->pending_preemption = false;
268                                        force_yield( __POLL_PREEMPTION );
269                                }
270                        }
271                }
272
273                // For debugging purposes : keep track of the last person to enable the interrupts
274                __cfaabi_dbg_debug_do( proc->last_enable = caller; )
275        }
276
277        // Disable interrupts by incrementint the counter
278        // Don't execute any pending __cfactx_switch even if counter reaches 0
279        void enable_interrupts_noPoll() {
280                unsigned short prev = kernelTLS.preemption_state.disable_count;
281                kernelTLS.preemption_state.disable_count -= 1;
282                verifyf( prev != 0u, "Incremented from %u\n", prev );                     // If this triggers someone is enabled already enabled interrupts
283                if( prev == 1 ) {
284                        #if GCC_VERSION > 50000
285                        static_assert(__atomic_always_lock_free(sizeof(kernelTLS.preemption_state.enabled), &kernelTLS.preemption_state.enabled), "Must be lock-free");
286                        #endif
287                        // Set enabled flag to true
288                        // should be atomic to avoid preemption in the middle of the operation.
289                        // use memory order RELAXED since there is no inter-thread on this variable requirements
290                        __atomic_store_n(&kernelTLS.preemption_state.enabled, true, __ATOMIC_RELAXED);
291
292                        // Signal the compiler that a fence is needed but only for signal handlers
293                        __atomic_signal_fence(__ATOMIC_RELEASE);
294                }
295        }
296}
297
298// sigprocmask wrapper : unblock a single signal
299static inline void signal_unblock( int sig ) {
300        sigset_t mask;
301        sigemptyset( &mask );
302        sigaddset( &mask, sig );
303
304        if ( pthread_sigmask( SIG_UNBLOCK, &mask, 0p ) == -1 ) {
305            abort( "internal error, pthread_sigmask" );
306        }
307}
308
309// sigprocmask wrapper : block a single signal
310static inline void signal_block( int sig ) {
311        sigset_t mask;
312        sigemptyset( &mask );
313        sigaddset( &mask, sig );
314
315        if ( pthread_sigmask( SIG_BLOCK, &mask, 0p ) == -1 ) {
316                abort( "internal error, pthread_sigmask" );
317        }
318}
319
320// kill wrapper : signal a processor
321static void preempt( processor * this ) {
322        sigval_t value = { PREEMPT_NORMAL };
323        pthread_sigqueue( this->kernel_thread, SIGUSR1, value );
324}
325
326// reserved for future use
327static void timeout( $thread * this ) {
328        #if !defined( __CFA_NO_STATISTICS__ )
329                kernelTLS.this_stats = this->curr_cluster->stats;
330        #endif
331        unpark( this );
332}
333
334// KERNEL ONLY
335// Check if a __cfactx_switch signal handler shoud defer
336// If true  : preemption is safe
337// If false : preemption is unsafe and marked as pending
338static inline bool preemption_ready() {
339        // Check if preemption is safe
340        bool ready = kernelTLS.preemption_state.enabled && ! kernelTLS.preemption_state.in_progress;
341
342        // Adjust the pending flag accordingly
343        kernelTLS.this_processor->pending_preemption = !ready;
344        return ready;
345}
346
347//=============================================================================================
348// Kernel Signal Startup/Shutdown logic
349//=============================================================================================
350
351// Startup routine to activate preemption
352// Called from kernel_startup
353void __kernel_alarm_startup() {
354        __cfaabi_dbg_print_safe( "Kernel : Starting preemption\n" );
355
356        // Start with preemption disabled until ready
357        kernelTLS.preemption_state.enabled = false;
358        kernelTLS.preemption_state.disable_count = 1;
359
360        // Initialize the event kernel
361        event_kernel = (event_kernel_t *)&storage_event_kernel;
362        (*event_kernel){};
363
364        // Setup proper signal handlers
365        __cfaabi_sigaction( SIGUSR1, sigHandler_ctxSwitch, SA_SIGINFO | SA_RESTART ); // __cfactx_switch handler
366        __cfaabi_sigaction( SIGALRM, sigHandler_alarm    , SA_SIGINFO | SA_RESTART ); // debug handler
367
368        signal_block( SIGALRM );
369
370        alarm_stack = __create_pthread( &alarm_thread, alarm_loop, 0p );
371}
372
373// Shutdown routine to deactivate preemption
374// Called from kernel_shutdown
375void __kernel_alarm_shutdown() {
376        __cfaabi_dbg_print_safe( "Kernel : Preemption stopping\n" );
377
378        // Block all signals since we are already shutting down
379        sigset_t mask;
380        sigfillset( &mask );
381        sigprocmask( SIG_BLOCK, &mask, 0p );
382
383        // Notify the alarm thread of the shutdown
384        sigval val = { 1 };
385        pthread_sigqueue( alarm_thread, SIGALRM, val );
386
387        // Wait for the preemption thread to finish
388
389        pthread_join( alarm_thread, 0p );
390        free( alarm_stack );
391
392        // Preemption is now fully stopped
393
394        __cfaabi_dbg_print_safe( "Kernel : Preemption stopped\n" );
395}
396
397// Raii ctor/dtor for the preemption_scope
398// Used by thread to control when they want to receive preemption signals
399void ?{}( preemption_scope & this, processor * proc ) {
400        (this.alarm){ proc, (Time){ 0 }, 0`s };
401        this.proc = proc;
402        this.proc->preemption_alarm = &this.alarm;
403
404        update_preemption( this.proc, this.proc->cltr->preemption_rate );
405}
406
407void ^?{}( preemption_scope & this ) {
408        disable_interrupts();
409
410        update_preemption( this.proc, 0`s );
411}
412
413//=============================================================================================
414// Kernel Signal Handlers
415//=============================================================================================
416
417// Context switch signal handler
418// Receives SIGUSR1 signal and causes the current thread to yield
419static void sigHandler_ctxSwitch( __CFA_SIGPARMS__ ) {
420        __cfaabi_dbg_debug_do( last_interrupt = (void *)(cxt->uc_mcontext.CFA_REG_IP); )
421
422        // SKULLDUGGERY: if a thread creates a processor and the immediately deletes it,
423        // the interrupt that is supposed to force the kernel thread to preempt might arrive
424        // before the kernel thread has even started running. When that happens, an interrupt
425        // with a null 'this_processor' will be caught, just ignore it.
426        if(! kernelTLS.this_processor ) return;
427
428        choose(sfp->si_value.sival_int) {
429                case PREEMPT_NORMAL   : ;// Normal case, nothing to do here
430                case PREEMPT_TERMINATE: verify( __atomic_load_n( &kernelTLS.this_processor->do_terminate, __ATOMIC_SEQ_CST ) );
431                default:
432                        abort( "internal error, signal value is %d", sfp->si_value.sival_int );
433        }
434
435        // Check if it is safe to preempt here
436        if( !preemption_ready() ) { return; }
437
438        __cfaabi_dbg_print_buffer_decl( " KERNEL: preempting core %p (%p @ %p).\n", kernelTLS.this_processor, kernelTLS.this_thread, (void *)(cxt->uc_mcontext.CFA_REG_IP) );
439
440        // Sync flag : prevent recursive calls to the signal handler
441        kernelTLS.preemption_state.in_progress = true;
442
443        // Clear sighandler mask before context switching.
444        #if GCC_VERSION > 50000
445        static_assert( sizeof( sigset_t ) == sizeof( cxt->uc_sigmask ), "Expected cxt->uc_sigmask to be of sigset_t" );
446        #endif
447        if ( pthread_sigmask( SIG_SETMASK, (sigset_t *)&(cxt->uc_sigmask), 0p ) == -1 ) {
448                abort( "internal error, sigprocmask" );
449        }
450
451        // TODO: this should go in finish action
452        // Clear the in progress flag
453        kernelTLS.preemption_state.in_progress = false;
454
455        // Preemption can occur here
456
457        force_yield( __ALARM_PREEMPTION ); // Do the actual __cfactx_switch
458}
459
460static void sigHandler_alarm( __CFA_SIGPARMS__ ) {
461        abort("SIGALRM should never reach the signal handler");
462}
463
464// Main of the alarm thread
465// Waits on SIGALRM and send SIGUSR1 to whom ever needs it
466static void * alarm_loop( __attribute__((unused)) void * args ) {
467        __processor_id_t id;
468        id.full_proc = false;
469        id.id = doregister(&id);
470        kernelTLS.this_proc_id = &id;
471
472        // Block sigalrms to control when they arrive
473        sigset_t mask;
474        sigfillset(&mask);
475        if ( pthread_sigmask( SIG_BLOCK, &mask, 0p ) == -1 ) {
476            abort( "internal error, pthread_sigmask" );
477        }
478
479        sigemptyset( &mask );
480        sigaddset( &mask, SIGALRM );
481
482        // Main loop
483        while( true ) {
484                // Wait for a sigalrm
485                siginfo_t info;
486                int sig = sigwaitinfo( &mask, &info );
487
488                if( sig < 0 ) {
489                        //Error!
490                        int err = errno;
491                        switch( err ) {
492                                case EAGAIN :
493                                case EINTR :
494                                        {__cfaabi_dbg_print_buffer_decl( " KERNEL: Spurious wakeup %d.\n", err );}
495                                        continue;
496                                case EINVAL :
497                                        abort( "Timeout was invalid." );
498                                default:
499                                        abort( "Unhandled error %d", err);
500                        }
501                }
502
503                // If another signal arrived something went wrong
504                assertf(sig == SIGALRM, "Kernel Internal Error, sigwait: Unexpected signal %d (%d : %d)\n", sig, info.si_code, info.si_value.sival_int);
505
506                // __cfaabi_dbg_print_safe( "Kernel : Caught alarm from %d with %d\n", info.si_code, info.si_value.sival_int );
507                // Switch on the code (a.k.a. the sender) to
508                switch( info.si_code )
509                {
510                // Timers can apparently be marked as sent for the kernel
511                // In either case, tick preemption
512                case SI_TIMER:
513                case SI_KERNEL:
514                        // __cfaabi_dbg_print_safe( "Kernel : Preemption thread tick\n" );
515                        lock( event_kernel->lock __cfaabi_dbg_ctx2 );
516                        tick_preemption();
517                        unlock( event_kernel->lock );
518                        break;
519                // Signal was not sent by the kernel but by an other thread
520                case SI_QUEUE:
521                        // For now, other thread only signal the alarm thread to shut it down
522                        // If this needs to change use info.si_value and handle the case here
523                        goto EXIT;
524                }
525        }
526
527EXIT:
528        __cfaabi_dbg_print_safe( "Kernel : Preemption thread stopping\n" );
529        unregister(&id);
530        return 0p;
531}
532
533//=============================================================================================
534// Kernel Signal Debug
535//=============================================================================================
536
537void __cfaabi_check_preemption() {
538        bool ready = kernelTLS.preemption_state.enabled;
539        if(!ready) { abort("Preemption should be ready"); }
540
541        sigset_t oldset;
542        int ret;
543        ret = pthread_sigmask(0, ( const sigset_t * ) 0p, &oldset);  // workaround trac#208: cast should be unnecessary
544        if(ret != 0) { abort("ERROR sigprocmask returned %d", ret); }
545
546        ret = sigismember(&oldset, SIGUSR1);
547        if(ret <  0) { abort("ERROR sigismember returned %d", ret); }
548        if(ret == 1) { abort("ERROR SIGUSR1 is disabled"); }
549
550        ret = sigismember(&oldset, SIGALRM);
551        if(ret <  0) { abort("ERROR sigismember returned %d", ret); }
552        if(ret == 0) { abort("ERROR SIGALRM is enabled"); }
553
554        ret = sigismember(&oldset, SIGTERM);
555        if(ret <  0) { abort("ERROR sigismember returned %d", ret); }
556        if(ret == 1) { abort("ERROR SIGTERM is disabled"); }
557}
558
559#ifdef __CFA_WITH_VERIFY__
560bool __cfaabi_dbg_in_kernel() {
561        return !kernelTLS.preemption_state.enabled;
562}
563#endif
564
565// Local Variables: //
566// mode: c //
567// tab-width: 4 //
568// End: //
Note: See TracBrowser for help on using the repository browser.