Ignore:
File:
1 edited

Legend:

Unmodified
Added
Removed
  • libcfa/src/concurrency/preemption.cfa

    r8fc652e0 r82a2fed  
    1010// Created On       : Mon Jun 5 14:20:42 2017
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Wed Aug 26 16:46:03 2020
    13 // Update Count     : 53
     12// Last Modified On : Fri Nov  6 07:42:13 2020
     13// Update Count     : 54
    1414//
    1515
     
    163163// Kernel Signal Tools
    164164//=============================================================================================
     165// In a user-level threading system, there are handful of thread-local variables where this problem occurs on the ARM.
     166//
     167// For each kernel thread running user-level threads, there is a flag variable to indicate if interrupts are
     168// enabled/disabled for that kernel thread. Therefore, this variable is made thread local.
     169//
     170// For example, this code fragment sets the state of the "interrupt" variable in thread-local memory.
     171//
     172// _Thread_local volatile int interrupts;
     173// int main() {
     174//     interrupts = 0; // disable interrupts }
     175//
     176// which generates the following code on the ARM
     177//
     178// (gdb) disassemble main
     179// Dump of assembler code for function main:
     180//    0x0000000000000610 <+0>:  mrs     x1, tpidr_el0
     181//    0x0000000000000614 <+4>:  mov     w0, #0x0                        // #0
     182//    0x0000000000000618 <+8>:  add     x1, x1, #0x0, lsl #12
     183//    0x000000000000061c <+12>: add     x1, x1, #0x10
     184//    0x0000000000000620 <+16>: str     wzr, [x1]
     185//    0x0000000000000624 <+20>: ret
     186//
     187// The mrs moves a pointer from coprocessor register tpidr_el0 into register x1.  Register w0 is set to 0. The two adds
     188// increase the TLS pointer with the displacement (offset) 0x10, which is the location in the TSL of variable
     189// "interrupts".  Finally, 0 is stored into "interrupts" through the pointer in register x1 that points into the
     190// TSL. Now once x1 has the pointer to the location of the TSL for kernel thread N, it can be be preempted at a
     191// user-level and the user thread is put on the user-level ready-queue. When the preempted thread gets to the front of
     192// the user-level ready-queue it is run on kernel thread M. It now stores 0 into "interrupts" back on kernel thread N,
     193// turning off interrupt on the wrong kernel thread.
     194//
     195// On the x86, the following code is generated for the same code fragment.
     196//
     197// (gdb) disassemble main
     198// Dump of assembler code for function main:
     199//    0x0000000000400420 <+0>:  movl   $0x0,%fs:0xfffffffffffffffc
     200//    0x000000000040042c <+12>: xor    %eax,%eax
     201//    0x000000000040042e <+14>: retq
     202//
     203// and there is base-displacement addressing used to atomically reset variable "interrupts" off of the TSL pointer in
     204// register "fs".
     205//
     206// Hence, the ARM has base-displacement address for the general purpose registers, BUT not to the coprocessor
     207// registers. As a result, generating the address for the write into variable "interrupts" is no longer atomic.
     208//
     209// Note this problem does NOT occur when just using multiple kernel threads because the preemption ALWAYS restarts the
     210// thread on the same kernel thread.
     211//
     212// The obvious question is why does ARM use a coprocessor register to store the TSL pointer given that coprocessor
     213// registers are second-class registers with respect to the instruction set. One possible answer is that they did not
     214// want to dedicate one of the general registers to hold the TLS pointer and there was a free coprocessor register
     215// available.
    165216
    166217//----------
     
    180231}
    181232
     233struct asm_region {
     234        void * before;
     235        void * after;
     236};
     237
     238static inline bool __cfaasm_in( void * ip, struct asm_region & region ) {
     239        return ip >= region.before && ip <= region.after;
     240}
     241
     242
    182243//----------
    183244// Get data from the TLS block
     245// struct asm_region __cfaasm_get;
    184246uintptr_t __cfatls_get( unsigned long int offset ) __attribute__((__noinline__)); //no inline to avoid problems
    185247uintptr_t __cfatls_get( unsigned long int offset ) {
     248        // __cfaasm_get.before = ({ void * value; asm("movq $__cfaasm_get_before, %[v]\n\t" : [v]"=r"(value) ); value; });
     249        // __cfaasm_get.after  = ({ void * value; asm("movq $__cfaasm_get_after , %[v]\n\t" : [v]"=r"(value) ); value; });
    186250        // create a assembler label before
    187251        // marked as clobber all to avoid movement
     
    196260        return val;
    197261}
    198 
    199 // //----------
    200 // // Write data to the TLS block
    201 // // sadly it looses the type information and can only write 1 word at a time
    202 // // use with __builtin_offsetof
    203 // void __cfatls_set(uintptr_t offset, void * value) __attribute__((__noinline__));
    204 // void __cfatls_set(uintptr_t offset, void * value) {
    205 //     // create a assembler label before
    206 //     // marked as clobber all to avoid movement
    207 //     asm volatile("__cfaasm_set_before:":::"memory");
    208 
    209 //     // access tls as normal (except for type information)
    210 //     *(void**)(offset + (uintptr_t)&my_tls) = value;
    211 
    212 //     // create a assembler label after
    213 //     // marked as clobber all to avoid movement
    214 //     asm volatile("__cfaasm_set_after:":::"memory");
    215 // }
    216 
    217 // //----------
    218 // #include <stdio.h>
    219 // int main() {
    220 //     // Get the information
    221 //     // Must use inline assembly to get access to label
    222 //     // C is annoying here because this could easily be a static const but "initializer element is not a compile-time constant"
    223 //     // The big advantage of this approach is that there is 0 overhead for the read and writes function
    224 //     void * __cfaasm_addr_get_before = ({ void * value; asm("movq $__cfaasm_get_before, %[v]\n\t" : [v]"=r"(value) ); value; });
    225 //     void * __cfaasm_addr_get_after  = ({ void * value; asm("movq $__cfaasm_get_after , %[v]\n\t" : [v]"=r"(value) ); value; });
    226 //     void * __cfaasm_addr_set_before = ({ void * value; asm("movq $__cfaasm_set_before, %[v]\n\t" : [v]"=r"(value) ); value; });
    227 //     void * __cfaasm_addr_set_after  = ({ void * value; asm("movq $__cfaasm_set_after , %[v]\n\t" : [v]"=r"(value) ); value; });
    228 
    229 //     printf("%p to %p\n", __cfaasm_addr_get_before, __cfaasm_addr_get_after);
    230 //     printf("%p to %p\n", __cfaasm_addr_set_before, __cfaasm_addr_set_after);
    231 //     return 0;
    232 // }
    233 
    234 __cfaabi_dbg_debug_do( static thread_local void * last_interrupt = 0; )
    235262
    236263extern "C" {
     
    239266                // create a assembler label before
    240267                // marked as clobber all to avoid movement
    241                 asm volatile("__cfaasm_disable_before:":::"memory");
     268                asm volatile("__cfaasm_dsable_before:":::"memory");
    242269
    243270                with( __cfaabi_tls.preemption_state ) {
     
    261288                // create a assembler label after
    262289                // marked as clobber all to avoid movement
    263                 asm volatile("__cfaasm_disable_after:":::"memory");
     290                asm volatile("__cfaasm_dsable_after:":::"memory");
    264291        }
    265292
     
    269296                // create a assembler label before
    270297                // marked as clobber all to avoid movement
    271                 asm volatile("__cfaasm_enable_before:":::"memory");
     298                asm volatile("__cfaasm_enble_before:":::"memory");
    272299
    273300                processor   * proc = __cfaabi_tls.this_processor; // Cache the processor now since interrupts can start happening after the atomic store
     
    304331                // create a assembler label after
    305332                // marked as clobber all to avoid movement
    306                 asm volatile("__cfaasm_enable_after:":::"memory");
     333                asm volatile("__cfaasm_enble_after:":::"memory");
    307334        }
    308335
     
    372399}
    373400
     401//-----------------------------------------------------------------------------
     402// Some assembly required
     403#if defined( __i386 )
     404        #ifdef __PIC__
     405                #define RELOC_PRELUDE( label ) \
     406                        "calll   .Lcfaasm_prelude_" #label "$pb\n\t" \
     407                        ".Lcfaasm_prelude_" #label "$pb:\n\t" \
     408                        "popl    %%eax\n\t" \
     409                        ".Lcfaasm_prelude_" #label "_end:\n\t" \
     410                        "addl    $_GLOBAL_OFFSET_TABLE_+(.Lcfaasm_prelude_" #label "_end-.Lcfaasm_prelude_" #label "$pb), %%eax\n\t"
     411                #define RELOC_PREFIX ""
     412                #define RELOC_SUFFIX "@GOT(%%eax)"
     413        #else
     414                #define RELOC_PREFIX "$"
     415                #define RELOC_SUFFIX ""
     416        #endif
     417        #define __cfaasm_label( label ) static struct asm_region label = \
     418                ({ \
     419                        struct asm_region region; \
     420                        asm( \
     421                                RELOC_PRELUDE( label ) \
     422                                "movl " RELOC_PREFIX "__cfaasm_" #label "_before" RELOC_SUFFIX ", %[vb]\n\t" \
     423                                "movl " RELOC_PREFIX "__cfaasm_" #label "_after"  RELOC_SUFFIX ", %[va]\n\t" \
     424                                 : [vb]"=r"(region.before), [va]"=r"(region.after) \
     425                        ); \
     426                        region; \
     427                });
     428#elif defined( __x86_64 )
     429        #ifdef __PIC__
     430                #define RELOC_PREFIX ""
     431                #define RELOC_SUFFIX "@GOTPCREL(%%rip)"
     432        #else
     433                #define RELOC_PREFIX "$"
     434                #define RELOC_SUFFIX ""
     435        #endif
     436        #define __cfaasm_label( label ) static struct asm_region label = \
     437                ({ \
     438                        struct asm_region region; \
     439                        asm( \
     440                                "movq " RELOC_PREFIX "__cfaasm_" #label "_before" RELOC_SUFFIX ", %[vb]\n\t" \
     441                                "movq " RELOC_PREFIX "__cfaasm_" #label "_after"  RELOC_SUFFIX ", %[va]\n\t" \
     442                                 : [vb]"=r"(region.before), [va]"=r"(region.after) \
     443                        ); \
     444                        region; \
     445                });
     446#elif defined( __aarch64__ )
     447        #ifdef __PIC__
     448                #define RELOC_TAG "@PLT"
     449        #else
     450                #define RELOC_TAG ""
     451        #endif
     452        #define __cfaasm_label( label ) \
     453                ({ \
     454                        struct asm_region region; \
     455                        asm( \
     456                                "mov %[vb], __cfaasm_" #label "_before@GOTPCREL(%%rip)"  "\n\t" \
     457                                "mov %[va], __cfaasm_" #label "_after@GOTPCREL(%%rip)"   "\n\t" \
     458                                 : [vb]"=r"(region.before), [va]"=r"(region.after) \
     459                        ); \
     460                        region; \
     461                });
     462#else
     463        #error unknown hardware architecture
     464#endif
     465
    374466// KERNEL ONLY
    375467// Check if a __cfactx_switch signal handler shoud defer
    376468// If true  : preemption is safe
    377469// If false : preemption is unsafe and marked as pending
    378 static inline bool preemption_ready() {
     470static inline bool preemption_ready( void * ip ) {
     471        // Get all the region for which it is not safe to preempt
     472        __cfaasm_label( get    );
     473        __cfaasm_label( check  );
     474        __cfaasm_label( dsable );
     475        __cfaasm_label( enble );
     476        __cfaasm_label( nopoll );
     477
    379478        // Check if preemption is safe
    380         bool ready = __cfaabi_tls.preemption_state.enabled && ! __cfaabi_tls.preemption_state.in_progress;
    381 
     479        bool ready = true;
     480        if( __cfaasm_in( ip, get    ) ) { ready = false; goto EXIT; };
     481        if( __cfaasm_in( ip, check  ) ) { ready = false; goto EXIT; };
     482        if( __cfaasm_in( ip, dsable ) ) { ready = false; goto EXIT; };
     483        if( __cfaasm_in( ip, enble  ) ) { ready = false; goto EXIT; };
     484        if( __cfaasm_in( ip, nopoll ) ) { ready = false; goto EXIT; };
     485        if( !__cfaabi_tls.preemption_state.enabled) { ready = false; goto EXIT; };
     486        if( __cfaabi_tls.preemption_state.in_progress ) { ready = false; goto EXIT; };
     487
     488EXIT:
    382489        // Adjust the pending flag accordingly
    383490        __cfaabi_tls.this_processor->pending_preemption = !ready;
     
    454561// Kernel Signal Handlers
    455562//=============================================================================================
    456 struct asm_region {
    457         void * before;
    458         void * after;
    459 };
    460 
    461 //-----------------------------------------------------------------------------
    462 // Some assembly required
    463 #if defined( __i386 )
    464         #define __cfaasm_label( label ) \
    465                 ({ \
    466                         struct asm_region region; \
    467                         asm( \
    468                                 "movl $__cfaasm_" #label "_before, %[vb]\n\t" \
    469                                 "movl $__cfaasm_" #label "_after , %[va]\n\t" \
    470                                  : [vb]"=r"(region.before), [vb]"=r"(region.before) \
    471                         ); \
    472                         region; \
    473                 });
    474 #elif defined( __x86_64 )
    475         #ifdef __PIC__
    476                 #define PLT "@PLT"
    477         #else
    478                 #define PLT ""
    479         #endif
    480         #define __cfaasm_label( label ) \
    481                 ({ \
    482                         struct asm_region region; \
    483                         asm( \
    484                                 "movq $__cfaasm_" #label "_before" PLT ", %[vb]\n\t" \
    485                                 "movq $__cfaasm_" #label "_after"  PLT ", %[va]\n\t" \
    486                                  : [vb]"=r"(region.before), [va]"=r"(region.after) \
    487                         ); \
    488                         region; \
    489                 });
    490 #elif defined( __aarch64__ )
    491         #error __cfaasm_label undefined for arm
    492 #else
    493         #error unknown hardware architecture
    494 #endif
     563__cfaabi_dbg_debug_do( static thread_local void * last_interrupt = 0; )
    495564
    496565// Context switch signal handler
     
    514583
    515584        // Check if it is safe to preempt here
    516         if( !preemption_ready() ) { return; }
    517 
    518         struct asm_region region;
    519         region = __cfaasm_label( get     ); if( ip >= region.before && ip <= region.after ) return;
    520         region = __cfaasm_label( check   ); if( ip >= region.before && ip <= region.after ) return;
    521         region = __cfaasm_label( disable ); if( ip >= region.before && ip <= region.after ) return;
    522         region = __cfaasm_label( enable  ); if( ip >= region.before && ip <= region.after ) return;
    523         region = __cfaasm_label( nopoll  ); if( ip >= region.before && ip <= region.after ) return;
     585        if( !preemption_ready( ip ) ) { return; }
    524586
    525587        __cfaabi_dbg_print_buffer_decl( " KERNEL: preempting core %p (%p @ %p).\n", __cfaabi_tls.this_processor, __cfaabi_tls.this_thread, (void *)(cxt->uc_mcontext.CFA_REG_IP) );
Note: See TracChangeset for help on using the changeset viewer.