Ignore:
File:
1 edited

Legend:

Unmodified
Added
Removed
  • libcfa/src/concurrency/preemption.cfa

    r231b18f r8fc652e0  
    1010// Created On       : Mon Jun 5 14:20:42 2017
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Fri Nov  6 07:42:13 2020
    13 // Update Count     : 54
     12// Last Modified On : Wed Aug 26 16:46:03 2020
     13// Update Count     : 53
    1414//
    1515
     
    164164//=============================================================================================
    165165
    166 // In a user-level threading system, there are handful of thread-local variables where this problem occurs on the ARM.
    167 //
    168 // For each kernel thread running user-level threads, there is a flag variable to indicate if interrupts are
    169 // enabled/disabled for that kernel thread. Therefore, this variable is made thread local.
    170 //
    171 // For example, this code fragment sets the state of the "interrupt" variable in thread-local memory.
    172 //
    173 // _Thread_local volatile int interrupts;
     166//----------
     167// special case for preemption since used often
     168bool __preemption_enabled() {
     169        // create a assembler label before
     170        // marked as clobber all to avoid movement
     171        asm volatile("__cfaasm_check_before:":::"memory");
     172
     173        // access tls as normal
     174        bool enabled = __cfaabi_tls.preemption_state.enabled;
     175
     176        // create a assembler label after
     177        // marked as clobber all to avoid movement
     178        asm volatile("__cfaasm_check_after:":::"memory");
     179        return enabled;
     180}
     181
     182//----------
     183// Get data from the TLS block
     184uintptr_t __cfatls_get( unsigned long int offset ) __attribute__((__noinline__)); //no inline to avoid problems
     185uintptr_t __cfatls_get( unsigned long int offset ) {
     186        // create a assembler label before
     187        // marked as clobber all to avoid movement
     188        asm volatile("__cfaasm_get_before:":::"memory");
     189
     190        // access tls as normal (except for pointer arithmetic)
     191        uintptr_t val = *(uintptr_t*)((uintptr_t)&__cfaabi_tls + offset);
     192
     193        // create a assembler label after
     194        // marked as clobber all to avoid movement
     195        asm volatile("__cfaasm_get_after:":::"memory");
     196        return val;
     197}
     198
     199// //----------
     200// // Write data to the TLS block
     201// // sadly it looses the type information and can only write 1 word at a time
     202// // use with __builtin_offsetof
     203// void __cfatls_set(uintptr_t offset, void * value) __attribute__((__noinline__));
     204// void __cfatls_set(uintptr_t offset, void * value) {
     205//     // create a assembler label before
     206//     // marked as clobber all to avoid movement
     207//     asm volatile("__cfaasm_set_before:":::"memory");
     208
     209//     // access tls as normal (except for type information)
     210//     *(void**)(offset + (uintptr_t)&my_tls) = value;
     211
     212//     // create a assembler label after
     213//     // marked as clobber all to avoid movement
     214//     asm volatile("__cfaasm_set_after:":::"memory");
     215// }
     216
     217// //----------
     218// #include <stdio.h>
    174219// int main() {
    175 //     interrupts = 0; // disable interrupts }
    176 //
    177 // which generates the following code on the ARM
    178 //
    179 // (gdb) disassemble main
    180 // Dump of assembler code for function main:
    181 //    0x0000000000000610 <+0>:  mrs     x1, tpidr_el0
    182 //    0x0000000000000614 <+4>:  mov     w0, #0x0                        // #0
    183 //    0x0000000000000618 <+8>:  add     x1, x1, #0x0, lsl #12
    184 //    0x000000000000061c <+12>: add     x1, x1, #0x10
    185 //    0x0000000000000620 <+16>: str     wzr, [x1]
    186 //    0x0000000000000624 <+20>: ret
    187 //
    188 // The mrs moves a pointer from coprocessor register tpidr_el0 into register x1.  Register w0 is set to 0. The two adds
    189 // increase the TLS pointer with the displacement (offset) 0x10, which is the location in the TSL of variable
    190 // "interrupts".  Finally, 0 is stored into "interrupts" through the pointer in register x1 that points into the
    191 // TSL. Now once x1 has the pointer to the location of the TSL for kernel thread N, it can be be preempted at a
    192 // user-level and the user thread is put on the user-level ready-queue. When the preempted thread gets to the front of
    193 // the user-level ready-queue it is run on kernel thread M. It now stores 0 into "interrupts" back on kernel thread N,
    194 // turning off interrupt on the wrong kernel thread.
    195 //
    196 // On the x86, the following code is generated for the same code fragment.
    197 //
    198 // (gdb) disassemble main
    199 // Dump of assembler code for function main:
    200 //    0x0000000000400420 <+0>:  movl   $0x0,%fs:0xfffffffffffffffc
    201 //    0x000000000040042c <+12>: xor    %eax,%eax
    202 //    0x000000000040042e <+14>: retq   
    203 //
    204 // and there is base-displacement addressing used to atomically reset variable "interrupts" off of the TSL pointer in
    205 // register "fs".
    206 //
    207 // Hence, the ARM has base-displacement address for the general purpose registers, BUT not to the coprocessor
    208 // registers. As a result, generating the address for the write into variable "interrupts" is no longer atomic.
    209 //
    210 // Note this problem does NOT occur when just using multiple kernel threads because the preemption ALWAYS restarts the
    211 // thread on the same kernel thread.
    212 //
    213 // The obvious question is why does ARM use a coprocessor register to store the TSL pointer given that coprocessor
    214 // registers are second-class registers with respect to the instruction set. One possible answer is that they did not
    215 // want to dedicate one of the general registers to hold the TLS pointer and there was a free coprocessor register
    216 // available.
     220//     // Get the information
     221//     // Must use inline assembly to get access to label
     222//     // C is annoying here because this could easily be a static const but "initializer element is not a compile-time constant"
     223//     // The big advantage of this approach is that there is 0 overhead for the read and writes function
     224//     void * __cfaasm_addr_get_before = ({ void * value; asm("movq $__cfaasm_get_before, %[v]\n\t" : [v]"=r"(value) ); value; });
     225//     void * __cfaasm_addr_get_after  = ({ void * value; asm("movq $__cfaasm_get_after , %[v]\n\t" : [v]"=r"(value) ); value; });
     226//     void * __cfaasm_addr_set_before = ({ void * value; asm("movq $__cfaasm_set_before, %[v]\n\t" : [v]"=r"(value) ); value; });
     227//     void * __cfaasm_addr_set_after  = ({ void * value; asm("movq $__cfaasm_set_after , %[v]\n\t" : [v]"=r"(value) ); value; });
     228
     229//     printf("%p to %p\n", __cfaasm_addr_get_before, __cfaasm_addr_get_after);
     230//     printf("%p to %p\n", __cfaasm_addr_set_before, __cfaasm_addr_set_after);
     231//     return 0;
     232// }
    217233
    218234__cfaabi_dbg_debug_do( static thread_local void * last_interrupt = 0; )
     
    221237        // Disable interrupts by incrementing the counter
    222238        void disable_interrupts() {
    223                 with( kernelTLS.preemption_state ) {
     239                // create a assembler label before
     240                // marked as clobber all to avoid movement
     241                asm volatile("__cfaasm_disable_before:":::"memory");
     242
     243                with( __cfaabi_tls.preemption_state ) {
    224244                        #if GCC_VERSION > 50000
    225245                        static_assert(__atomic_always_lock_free(sizeof(enabled), &enabled), "Must be lock-free");
     
    238258                        verify( new_val < 65_000u );              // If this triggers someone is disabling interrupts without enabling them
    239259                }
     260
     261                // create a assembler label after
     262                // marked as clobber all to avoid movement
     263                asm volatile("__cfaasm_disable_after:":::"memory");
    240264        }
    241265
     
    243267        // If counter reaches 0, execute any pending __cfactx_switch
    244268        void enable_interrupts( __cfaabi_dbg_ctx_param ) {
    245                 processor   * proc = kernelTLS.this_processor; // Cache the processor now since interrupts can start happening after the atomic store
     269                // create a assembler label before
     270                // marked as clobber all to avoid movement
     271                asm volatile("__cfaasm_enable_before:":::"memory");
     272
     273                processor   * proc = __cfaabi_tls.this_processor; // Cache the processor now since interrupts can start happening after the atomic store
    246274                /* paranoid */ verify( proc );
    247275
    248                 with( kernelTLS.preemption_state ){
     276                with( __cfaabi_tls.preemption_state ){
    249277                        unsigned short prev = disable_count;
    250278                        disable_count -= 1;
     
    273301                // For debugging purposes : keep track of the last person to enable the interrupts
    274302                __cfaabi_dbg_debug_do( proc->last_enable = caller; )
     303
     304                // create a assembler label after
     305                // marked as clobber all to avoid movement
     306                asm volatile("__cfaasm_enable_after:":::"memory");
    275307        }
    276308
     
    278310        // Don't execute any pending __cfactx_switch even if counter reaches 0
    279311        void enable_interrupts_noPoll() {
    280                 unsigned short prev = kernelTLS.preemption_state.disable_count;
    281                 kernelTLS.preemption_state.disable_count -= 1;
     312                // create a assembler label before
     313                // marked as clobber all to avoid movement
     314                asm volatile("__cfaasm_nopoll_before:":::"memory");
     315
     316                unsigned short prev = __cfaabi_tls.preemption_state.disable_count;
     317                __cfaabi_tls.preemption_state.disable_count -= 1;
    282318                verifyf( prev != 0u, "Incremented from %u\n", prev );                     // If this triggers someone is enabled already enabled interrupts
    283319                if( prev == 1 ) {
    284320                        #if GCC_VERSION > 50000
    285                         static_assert(__atomic_always_lock_free(sizeof(kernelTLS.preemption_state.enabled), &kernelTLS.preemption_state.enabled), "Must be lock-free");
     321                        static_assert(__atomic_always_lock_free(sizeof(__cfaabi_tls.preemption_state.enabled), &__cfaabi_tls.preemption_state.enabled), "Must be lock-free");
    286322                        #endif
    287323                        // Set enabled flag to true
    288324                        // should be atomic to avoid preemption in the middle of the operation.
    289325                        // use memory order RELAXED since there is no inter-thread on this variable requirements
    290                         __atomic_store_n(&kernelTLS.preemption_state.enabled, true, __ATOMIC_RELAXED);
     326                        __atomic_store_n(&__cfaabi_tls.preemption_state.enabled, true, __ATOMIC_RELAXED);
    291327
    292328                        // Signal the compiler that a fence is needed but only for signal handlers
    293329                        __atomic_signal_fence(__ATOMIC_RELEASE);
    294330                }
     331
     332                // create a assembler label after
     333                // marked as clobber all to avoid movement
     334                asm volatile("__cfaasm_nopoll_after:":::"memory");
    295335        }
    296336}
     
    327367static void timeout( $thread * this ) {
    328368        #if !defined( __CFA_NO_STATISTICS__ )
    329                 kernelTLS.this_stats = this->curr_cluster->stats;
     369                kernelTLS().this_stats = this->curr_cluster->stats;
    330370        #endif
    331371        unpark( this );
     
    338378static inline bool preemption_ready() {
    339379        // Check if preemption is safe
    340         bool ready = kernelTLS.preemption_state.enabled && ! kernelTLS.preemption_state.in_progress;
     380        bool ready = __cfaabi_tls.preemption_state.enabled && ! __cfaabi_tls.preemption_state.in_progress;
    341381
    342382        // Adjust the pending flag accordingly
    343         kernelTLS.this_processor->pending_preemption = !ready;
     383        __cfaabi_tls.this_processor->pending_preemption = !ready;
    344384        return ready;
    345385}
     
    355395
    356396        // Start with preemption disabled until ready
    357         kernelTLS.preemption_state.enabled = false;
    358         kernelTLS.preemption_state.disable_count = 1;
     397        __cfaabi_tls.preemption_state.enabled = false;
     398        __cfaabi_tls.preemption_state.disable_count = 1;
    359399
    360400        // Initialize the event kernel
     
    414454// Kernel Signal Handlers
    415455//=============================================================================================
     456struct asm_region {
     457        void * before;
     458        void * after;
     459};
     460
     461//-----------------------------------------------------------------------------
     462// Some assembly required
     463#if defined( __i386 )
     464        #define __cfaasm_label( label ) \
     465                ({ \
     466                        struct asm_region region; \
     467                        asm( \
     468                                "movl $__cfaasm_" #label "_before, %[vb]\n\t" \
     469                                "movl $__cfaasm_" #label "_after , %[va]\n\t" \
     470                                 : [vb]"=r"(region.before), [vb]"=r"(region.before) \
     471                        ); \
     472                        region; \
     473                });
     474#elif defined( __x86_64 )
     475        #ifdef __PIC__
     476                #define PLT "@PLT"
     477        #else
     478                #define PLT ""
     479        #endif
     480        #define __cfaasm_label( label ) \
     481                ({ \
     482                        struct asm_region region; \
     483                        asm( \
     484                                "movq $__cfaasm_" #label "_before" PLT ", %[vb]\n\t" \
     485                                "movq $__cfaasm_" #label "_after"  PLT ", %[va]\n\t" \
     486                                 : [vb]"=r"(region.before), [va]"=r"(region.after) \
     487                        ); \
     488                        region; \
     489                });
     490#elif defined( __aarch64__ )
     491        #error __cfaasm_label undefined for arm
     492#else
     493        #error unknown hardware architecture
     494#endif
    416495
    417496// Context switch signal handler
    418497// Receives SIGUSR1 signal and causes the current thread to yield
    419498static void sigHandler_ctxSwitch( __CFA_SIGPARMS__ ) {
    420         __cfaabi_dbg_debug_do( last_interrupt = (void *)(cxt->uc_mcontext.CFA_REG_IP); )
     499        void * ip = (void *)(cxt->uc_mcontext.CFA_REG_IP);
     500        __cfaabi_dbg_debug_do( last_interrupt = ip; )
    421501
    422502        // SKULLDUGGERY: if a thread creates a processor and the immediately deletes it,
     
    424504        // before the kernel thread has even started running. When that happens, an interrupt
    425505        // with a null 'this_processor' will be caught, just ignore it.
    426         if(! kernelTLS.this_processor ) return;
     506        if(! __cfaabi_tls.this_processor ) return;
    427507
    428508        choose(sfp->si_value.sival_int) {
    429509                case PREEMPT_NORMAL   : ;// Normal case, nothing to do here
    430                 case PREEMPT_TERMINATE: verify( __atomic_load_n( &kernelTLS.this_processor->do_terminate, __ATOMIC_SEQ_CST ) );
     510                case PREEMPT_TERMINATE: verify( __atomic_load_n( &__cfaabi_tls.this_processor->do_terminate, __ATOMIC_SEQ_CST ) );
    431511                default:
    432512                        abort( "internal error, signal value is %d", sfp->si_value.sival_int );
     
    436516        if( !preemption_ready() ) { return; }
    437517
    438         __cfaabi_dbg_print_buffer_decl( " KERNEL: preempting core %p (%p @ %p).\n", kernelTLS.this_processor, kernelTLS.this_thread, (void *)(cxt->uc_mcontext.CFA_REG_IP) );
     518        struct asm_region region;
     519        region = __cfaasm_label( get     ); if( ip >= region.before && ip <= region.after ) return;
     520        region = __cfaasm_label( check   ); if( ip >= region.before && ip <= region.after ) return;
     521        region = __cfaasm_label( disable ); if( ip >= region.before && ip <= region.after ) return;
     522        region = __cfaasm_label( enable  ); if( ip >= region.before && ip <= region.after ) return;
     523        region = __cfaasm_label( nopoll  ); if( ip >= region.before && ip <= region.after ) return;
     524
     525        __cfaabi_dbg_print_buffer_decl( " KERNEL: preempting core %p (%p @ %p).\n", __cfaabi_tls.this_processor, __cfaabi_tls.this_thread, (void *)(cxt->uc_mcontext.CFA_REG_IP) );
    439526
    440527        // Sync flag : prevent recursive calls to the signal handler
    441         kernelTLS.preemption_state.in_progress = true;
     528        __cfaabi_tls.preemption_state.in_progress = true;
    442529
    443530        // Clear sighandler mask before context switching.
     
    449536        }
    450537
    451         // TODO: this should go in finish action
    452538        // Clear the in progress flag
    453         kernelTLS.preemption_state.in_progress = false;
     539        __cfaabi_tls.preemption_state.in_progress = false;
    454540
    455541        // Preemption can occur here
     
    468554        id.full_proc = false;
    469555        id.id = doregister(&id);
    470         kernelTLS.this_proc_id = &id;
     556        __cfaabi_tls.this_proc_id = &id;
    471557
    472558        // Block sigalrms to control when they arrive
     
    536622
    537623void __cfaabi_check_preemption() {
    538         bool ready = kernelTLS.preemption_state.enabled;
     624        bool ready = __preemption_enabled();
    539625        if(!ready) { abort("Preemption should be ready"); }
    540626
     
    559645#ifdef __CFA_WITH_VERIFY__
    560646bool __cfaabi_dbg_in_kernel() {
    561         return !kernelTLS.preemption_state.enabled;
     647        return !__preemption_enabled();
    562648}
    563649#endif
Note: See TracChangeset for help on using the changeset viewer.