Ignore:
File:
1 edited

Legend:

Unmodified
Added
Removed
  • libcfa/src/concurrency/preemption.cfa

    r8fc652e0 r231b18f  
    1010// Created On       : Mon Jun 5 14:20:42 2017
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Wed Aug 26 16:46:03 2020
    13 // Update Count     : 53
     12// Last Modified On : Fri Nov  6 07:42:13 2020
     13// Update Count     : 54
    1414//
    1515
     
    164164//=============================================================================================
    165165
    166 //----------
    167 // special case for preemption since used often
    168 bool __preemption_enabled() {
    169         // create a assembler label before
    170         // marked as clobber all to avoid movement
    171         asm volatile("__cfaasm_check_before:":::"memory");
    172 
    173         // access tls as normal
    174         bool enabled = __cfaabi_tls.preemption_state.enabled;
    175 
    176         // create a assembler label after
    177         // marked as clobber all to avoid movement
    178         asm volatile("__cfaasm_check_after:":::"memory");
    179         return enabled;
    180 }
    181 
    182 //----------
    183 // Get data from the TLS block
    184 uintptr_t __cfatls_get( unsigned long int offset ) __attribute__((__noinline__)); //no inline to avoid problems
    185 uintptr_t __cfatls_get( unsigned long int offset ) {
    186         // create a assembler label before
    187         // marked as clobber all to avoid movement
    188         asm volatile("__cfaasm_get_before:":::"memory");
    189 
    190         // access tls as normal (except for pointer arithmetic)
    191         uintptr_t val = *(uintptr_t*)((uintptr_t)&__cfaabi_tls + offset);
    192 
    193         // create a assembler label after
    194         // marked as clobber all to avoid movement
    195         asm volatile("__cfaasm_get_after:":::"memory");
    196         return val;
    197 }
    198 
    199 // //----------
    200 // // Write data to the TLS block
    201 // // sadly it looses the type information and can only write 1 word at a time
    202 // // use with __builtin_offsetof
    203 // void __cfatls_set(uintptr_t offset, void * value) __attribute__((__noinline__));
    204 // void __cfatls_set(uintptr_t offset, void * value) {
    205 //     // create a assembler label before
    206 //     // marked as clobber all to avoid movement
    207 //     asm volatile("__cfaasm_set_before:":::"memory");
    208 
    209 //     // access tls as normal (except for type information)
    210 //     *(void**)(offset + (uintptr_t)&my_tls) = value;
    211 
    212 //     // create a assembler label after
    213 //     // marked as clobber all to avoid movement
    214 //     asm volatile("__cfaasm_set_after:":::"memory");
    215 // }
    216 
    217 // //----------
    218 // #include <stdio.h>
     166// In a user-level threading system, there are handful of thread-local variables where this problem occurs on the ARM.
     167//
     168// For each kernel thread running user-level threads, there is a flag variable to indicate if interrupts are
     169// enabled/disabled for that kernel thread. Therefore, this variable is made thread local.
     170//
     171// For example, this code fragment sets the state of the "interrupt" variable in thread-local memory.
     172//
     173// _Thread_local volatile int interrupts;
    219174// int main() {
    220 //     // Get the information
    221 //     // Must use inline assembly to get access to label
    222 //     // C is annoying here because this could easily be a static const but "initializer element is not a compile-time constant"
    223 //     // The big advantage of this approach is that there is 0 overhead for the read and writes function
    224 //     void * __cfaasm_addr_get_before = ({ void * value; asm("movq $__cfaasm_get_before, %[v]\n\t" : [v]"=r"(value) ); value; });
    225 //     void * __cfaasm_addr_get_after  = ({ void * value; asm("movq $__cfaasm_get_after , %[v]\n\t" : [v]"=r"(value) ); value; });
    226 //     void * __cfaasm_addr_set_before = ({ void * value; asm("movq $__cfaasm_set_before, %[v]\n\t" : [v]"=r"(value) ); value; });
    227 //     void * __cfaasm_addr_set_after  = ({ void * value; asm("movq $__cfaasm_set_after , %[v]\n\t" : [v]"=r"(value) ); value; });
    228 
    229 //     printf("%p to %p\n", __cfaasm_addr_get_before, __cfaasm_addr_get_after);
    230 //     printf("%p to %p\n", __cfaasm_addr_set_before, __cfaasm_addr_set_after);
    231 //     return 0;
    232 // }
     175//     interrupts = 0; // disable interrupts }
     176//
     177// which generates the following code on the ARM
     178//
     179// (gdb) disassemble main
     180// Dump of assembler code for function main:
     181//    0x0000000000000610 <+0>:  mrs     x1, tpidr_el0
     182//    0x0000000000000614 <+4>:  mov     w0, #0x0                        // #0
     183//    0x0000000000000618 <+8>:  add     x1, x1, #0x0, lsl #12
     184//    0x000000000000061c <+12>: add     x1, x1, #0x10
     185//    0x0000000000000620 <+16>: str     wzr, [x1]
     186//    0x0000000000000624 <+20>: ret
     187//
     188// The mrs moves a pointer from coprocessor register tpidr_el0 into register x1.  Register w0 is set to 0. The two adds
     189// increase the TLS pointer with the displacement (offset) 0x10, which is the location in the TSL of variable
     190// "interrupts".  Finally, 0 is stored into "interrupts" through the pointer in register x1 that points into the
     191// TSL. Now once x1 has the pointer to the location of the TSL for kernel thread N, it can be be preempted at a
     192// user-level and the user thread is put on the user-level ready-queue. When the preempted thread gets to the front of
     193// the user-level ready-queue it is run on kernel thread M. It now stores 0 into "interrupts" back on kernel thread N,
     194// turning off interrupt on the wrong kernel thread.
     195//
     196// On the x86, the following code is generated for the same code fragment.
     197//
     198// (gdb) disassemble main
     199// Dump of assembler code for function main:
     200//    0x0000000000400420 <+0>:  movl   $0x0,%fs:0xfffffffffffffffc
     201//    0x000000000040042c <+12>: xor    %eax,%eax
     202//    0x000000000040042e <+14>: retq   
     203//
     204// and there is base-displacement addressing used to atomically reset variable "interrupts" off of the TSL pointer in
     205// register "fs".
     206//
     207// Hence, the ARM has base-displacement address for the general purpose registers, BUT not to the coprocessor
     208// registers. As a result, generating the address for the write into variable "interrupts" is no longer atomic.
     209//
     210// Note this problem does NOT occur when just using multiple kernel threads because the preemption ALWAYS restarts the
     211// thread on the same kernel thread.
     212//
     213// The obvious question is why does ARM use a coprocessor register to store the TSL pointer given that coprocessor
     214// registers are second-class registers with respect to the instruction set. One possible answer is that they did not
     215// want to dedicate one of the general registers to hold the TLS pointer and there was a free coprocessor register
     216// available.
    233217
    234218__cfaabi_dbg_debug_do( static thread_local void * last_interrupt = 0; )
     
    237221        // Disable interrupts by incrementing the counter
    238222        void disable_interrupts() {
    239                 // create a assembler label before
    240                 // marked as clobber all to avoid movement
    241                 asm volatile("__cfaasm_disable_before:":::"memory");
    242 
    243                 with( __cfaabi_tls.preemption_state ) {
     223                with( kernelTLS.preemption_state ) {
    244224                        #if GCC_VERSION > 50000
    245225                        static_assert(__atomic_always_lock_free(sizeof(enabled), &enabled), "Must be lock-free");
     
    258238                        verify( new_val < 65_000u );              // If this triggers someone is disabling interrupts without enabling them
    259239                }
    260 
    261                 // create a assembler label after
    262                 // marked as clobber all to avoid movement
    263                 asm volatile("__cfaasm_disable_after:":::"memory");
    264240        }
    265241
     
    267243        // If counter reaches 0, execute any pending __cfactx_switch
    268244        void enable_interrupts( __cfaabi_dbg_ctx_param ) {
    269                 // create a assembler label before
    270                 // marked as clobber all to avoid movement
    271                 asm volatile("__cfaasm_enable_before:":::"memory");
    272 
    273                 processor   * proc = __cfaabi_tls.this_processor; // Cache the processor now since interrupts can start happening after the atomic store
     245                processor   * proc = kernelTLS.this_processor; // Cache the processor now since interrupts can start happening after the atomic store
    274246                /* paranoid */ verify( proc );
    275247
    276                 with( __cfaabi_tls.preemption_state ){
     248                with( kernelTLS.preemption_state ){
    277249                        unsigned short prev = disable_count;
    278250                        disable_count -= 1;
     
    301273                // For debugging purposes : keep track of the last person to enable the interrupts
    302274                __cfaabi_dbg_debug_do( proc->last_enable = caller; )
    303 
    304                 // create a assembler label after
    305                 // marked as clobber all to avoid movement
    306                 asm volatile("__cfaasm_enable_after:":::"memory");
    307275        }
    308276
     
    310278        // Don't execute any pending __cfactx_switch even if counter reaches 0
    311279        void enable_interrupts_noPoll() {
    312                 // create a assembler label before
    313                 // marked as clobber all to avoid movement
    314                 asm volatile("__cfaasm_nopoll_before:":::"memory");
    315 
    316                 unsigned short prev = __cfaabi_tls.preemption_state.disable_count;
    317                 __cfaabi_tls.preemption_state.disable_count -= 1;
     280                unsigned short prev = kernelTLS.preemption_state.disable_count;
     281                kernelTLS.preemption_state.disable_count -= 1;
    318282                verifyf( prev != 0u, "Incremented from %u\n", prev );                     // If this triggers someone is enabled already enabled interrupts
    319283                if( prev == 1 ) {
    320284                        #if GCC_VERSION > 50000
    321                         static_assert(__atomic_always_lock_free(sizeof(__cfaabi_tls.preemption_state.enabled), &__cfaabi_tls.preemption_state.enabled), "Must be lock-free");
     285                        static_assert(__atomic_always_lock_free(sizeof(kernelTLS.preemption_state.enabled), &kernelTLS.preemption_state.enabled), "Must be lock-free");
    322286                        #endif
    323287                        // Set enabled flag to true
    324288                        // should be atomic to avoid preemption in the middle of the operation.
    325289                        // use memory order RELAXED since there is no inter-thread on this variable requirements
    326                         __atomic_store_n(&__cfaabi_tls.preemption_state.enabled, true, __ATOMIC_RELAXED);
     290                        __atomic_store_n(&kernelTLS.preemption_state.enabled, true, __ATOMIC_RELAXED);
    327291
    328292                        // Signal the compiler that a fence is needed but only for signal handlers
    329293                        __atomic_signal_fence(__ATOMIC_RELEASE);
    330294                }
    331 
    332                 // create a assembler label after
    333                 // marked as clobber all to avoid movement
    334                 asm volatile("__cfaasm_nopoll_after:":::"memory");
    335295        }
    336296}
     
    367327static void timeout( $thread * this ) {
    368328        #if !defined( __CFA_NO_STATISTICS__ )
    369                 kernelTLS().this_stats = this->curr_cluster->stats;
     329                kernelTLS.this_stats = this->curr_cluster->stats;
    370330        #endif
    371331        unpark( this );
     
    378338static inline bool preemption_ready() {
    379339        // Check if preemption is safe
    380         bool ready = __cfaabi_tls.preemption_state.enabled && ! __cfaabi_tls.preemption_state.in_progress;
     340        bool ready = kernelTLS.preemption_state.enabled && ! kernelTLS.preemption_state.in_progress;
    381341
    382342        // Adjust the pending flag accordingly
    383         __cfaabi_tls.this_processor->pending_preemption = !ready;
     343        kernelTLS.this_processor->pending_preemption = !ready;
    384344        return ready;
    385345}
     
    395355
    396356        // Start with preemption disabled until ready
    397         __cfaabi_tls.preemption_state.enabled = false;
    398         __cfaabi_tls.preemption_state.disable_count = 1;
     357        kernelTLS.preemption_state.enabled = false;
     358        kernelTLS.preemption_state.disable_count = 1;
    399359
    400360        // Initialize the event kernel
     
    454414// Kernel Signal Handlers
    455415//=============================================================================================
    456 struct asm_region {
    457         void * before;
    458         void * after;
    459 };
    460 
    461 //-----------------------------------------------------------------------------
    462 // Some assembly required
    463 #if defined( __i386 )
    464         #define __cfaasm_label( label ) \
    465                 ({ \
    466                         struct asm_region region; \
    467                         asm( \
    468                                 "movl $__cfaasm_" #label "_before, %[vb]\n\t" \
    469                                 "movl $__cfaasm_" #label "_after , %[va]\n\t" \
    470                                  : [vb]"=r"(region.before), [vb]"=r"(region.before) \
    471                         ); \
    472                         region; \
    473                 });
    474 #elif defined( __x86_64 )
    475         #ifdef __PIC__
    476                 #define PLT "@PLT"
    477         #else
    478                 #define PLT ""
    479         #endif
    480         #define __cfaasm_label( label ) \
    481                 ({ \
    482                         struct asm_region region; \
    483                         asm( \
    484                                 "movq $__cfaasm_" #label "_before" PLT ", %[vb]\n\t" \
    485                                 "movq $__cfaasm_" #label "_after"  PLT ", %[va]\n\t" \
    486                                  : [vb]"=r"(region.before), [va]"=r"(region.after) \
    487                         ); \
    488                         region; \
    489                 });
    490 #elif defined( __aarch64__ )
    491         #error __cfaasm_label undefined for arm
    492 #else
    493         #error unknown hardware architecture
    494 #endif
    495416
    496417// Context switch signal handler
    497418// Receives SIGUSR1 signal and causes the current thread to yield
    498419static void sigHandler_ctxSwitch( __CFA_SIGPARMS__ ) {
    499         void * ip = (void *)(cxt->uc_mcontext.CFA_REG_IP);
    500         __cfaabi_dbg_debug_do( last_interrupt = ip; )
     420        __cfaabi_dbg_debug_do( last_interrupt = (void *)(cxt->uc_mcontext.CFA_REG_IP); )
    501421
    502422        // SKULLDUGGERY: if a thread creates a processor and the immediately deletes it,
     
    504424        // before the kernel thread has even started running. When that happens, an interrupt
    505425        // with a null 'this_processor' will be caught, just ignore it.
    506         if(! __cfaabi_tls.this_processor ) return;
     426        if(! kernelTLS.this_processor ) return;
    507427
    508428        choose(sfp->si_value.sival_int) {
    509429                case PREEMPT_NORMAL   : ;// Normal case, nothing to do here
    510                 case PREEMPT_TERMINATE: verify( __atomic_load_n( &__cfaabi_tls.this_processor->do_terminate, __ATOMIC_SEQ_CST ) );
     430                case PREEMPT_TERMINATE: verify( __atomic_load_n( &kernelTLS.this_processor->do_terminate, __ATOMIC_SEQ_CST ) );
    511431                default:
    512432                        abort( "internal error, signal value is %d", sfp->si_value.sival_int );
     
    516436        if( !preemption_ready() ) { return; }
    517437
    518         struct asm_region region;
    519         region = __cfaasm_label( get     ); if( ip >= region.before && ip <= region.after ) return;
    520         region = __cfaasm_label( check   ); if( ip >= region.before && ip <= region.after ) return;
    521         region = __cfaasm_label( disable ); if( ip >= region.before && ip <= region.after ) return;
    522         region = __cfaasm_label( enable  ); if( ip >= region.before && ip <= region.after ) return;
    523         region = __cfaasm_label( nopoll  ); if( ip >= region.before && ip <= region.after ) return;
    524 
    525         __cfaabi_dbg_print_buffer_decl( " KERNEL: preempting core %p (%p @ %p).\n", __cfaabi_tls.this_processor, __cfaabi_tls.this_thread, (void *)(cxt->uc_mcontext.CFA_REG_IP) );
     438        __cfaabi_dbg_print_buffer_decl( " KERNEL: preempting core %p (%p @ %p).\n", kernelTLS.this_processor, kernelTLS.this_thread, (void *)(cxt->uc_mcontext.CFA_REG_IP) );
    526439
    527440        // Sync flag : prevent recursive calls to the signal handler
    528         __cfaabi_tls.preemption_state.in_progress = true;
     441        kernelTLS.preemption_state.in_progress = true;
    529442
    530443        // Clear sighandler mask before context switching.
     
    536449        }
    537450
     451        // TODO: this should go in finish action
    538452        // Clear the in progress flag
    539         __cfaabi_tls.preemption_state.in_progress = false;
     453        kernelTLS.preemption_state.in_progress = false;
    540454
    541455        // Preemption can occur here
     
    554468        id.full_proc = false;
    555469        id.id = doregister(&id);
    556         __cfaabi_tls.this_proc_id = &id;
     470        kernelTLS.this_proc_id = &id;
    557471
    558472        // Block sigalrms to control when they arrive
     
    622536
    623537void __cfaabi_check_preemption() {
    624         bool ready = __preemption_enabled();
     538        bool ready = kernelTLS.preemption_state.enabled;
    625539        if(!ready) { abort("Preemption should be ready"); }
    626540
     
    645559#ifdef __CFA_WITH_VERIFY__
    646560bool __cfaabi_dbg_in_kernel() {
    647         return !__preemption_enabled();
     561        return !kernelTLS.preemption_state.enabled;
    648562}
    649563#endif
Note: See TracChangeset for help on using the changeset viewer.