Ignore:
File:
1 edited

Legend:

Unmodified
Added
Removed
  • libcfa/src/concurrency/preemption.cfa

    r82a2fed r8fc652e0  
    1010// Created On       : Mon Jun 5 14:20:42 2017
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Fri Nov  6 07:42:13 2020
    13 // Update Count     : 54
     12// Last Modified On : Wed Aug 26 16:46:03 2020
     13// Update Count     : 53
    1414//
    1515
     
    163163// Kernel Signal Tools
    164164//=============================================================================================
    165 // In a user-level threading system, there are handful of thread-local variables where this problem occurs on the ARM.
    166 //
    167 // For each kernel thread running user-level threads, there is a flag variable to indicate if interrupts are
    168 // enabled/disabled for that kernel thread. Therefore, this variable is made thread local.
    169 //
    170 // For example, this code fragment sets the state of the "interrupt" variable in thread-local memory.
    171 //
    172 // _Thread_local volatile int interrupts;
    173 // int main() {
    174 //     interrupts = 0; // disable interrupts }
    175 //
    176 // which generates the following code on the ARM
    177 //
    178 // (gdb) disassemble main
    179 // Dump of assembler code for function main:
    180 //    0x0000000000000610 <+0>:  mrs     x1, tpidr_el0
    181 //    0x0000000000000614 <+4>:  mov     w0, #0x0                        // #0
    182 //    0x0000000000000618 <+8>:  add     x1, x1, #0x0, lsl #12
    183 //    0x000000000000061c <+12>: add     x1, x1, #0x10
    184 //    0x0000000000000620 <+16>: str     wzr, [x1]
    185 //    0x0000000000000624 <+20>: ret
    186 //
    187 // The mrs moves a pointer from coprocessor register tpidr_el0 into register x1.  Register w0 is set to 0. The two adds
    188 // increase the TLS pointer with the displacement (offset) 0x10, which is the location in the TSL of variable
    189 // "interrupts".  Finally, 0 is stored into "interrupts" through the pointer in register x1 that points into the
    190 // TSL. Now once x1 has the pointer to the location of the TSL for kernel thread N, it can be be preempted at a
    191 // user-level and the user thread is put on the user-level ready-queue. When the preempted thread gets to the front of
    192 // the user-level ready-queue it is run on kernel thread M. It now stores 0 into "interrupts" back on kernel thread N,
    193 // turning off interrupt on the wrong kernel thread.
    194 //
    195 // On the x86, the following code is generated for the same code fragment.
    196 //
    197 // (gdb) disassemble main
    198 // Dump of assembler code for function main:
    199 //    0x0000000000400420 <+0>:  movl   $0x0,%fs:0xfffffffffffffffc
    200 //    0x000000000040042c <+12>: xor    %eax,%eax
    201 //    0x000000000040042e <+14>: retq
    202 //
    203 // and there is base-displacement addressing used to atomically reset variable "interrupts" off of the TSL pointer in
    204 // register "fs".
    205 //
    206 // Hence, the ARM has base-displacement address for the general purpose registers, BUT not to the coprocessor
    207 // registers. As a result, generating the address for the write into variable "interrupts" is no longer atomic.
    208 //
    209 // Note this problem does NOT occur when just using multiple kernel threads because the preemption ALWAYS restarts the
    210 // thread on the same kernel thread.
    211 //
    212 // The obvious question is why does ARM use a coprocessor register to store the TSL pointer given that coprocessor
    213 // registers are second-class registers with respect to the instruction set. One possible answer is that they did not
    214 // want to dedicate one of the general registers to hold the TLS pointer and there was a free coprocessor register
    215 // available.
    216165
    217166//----------
     
    231180}
    232181
    233 struct asm_region {
    234         void * before;
    235         void * after;
    236 };
    237 
    238 static inline bool __cfaasm_in( void * ip, struct asm_region & region ) {
    239         return ip >= region.before && ip <= region.after;
    240 }
    241 
    242 
    243182//----------
    244183// Get data from the TLS block
    245 // struct asm_region __cfaasm_get;
    246184uintptr_t __cfatls_get( unsigned long int offset ) __attribute__((__noinline__)); //no inline to avoid problems
    247185uintptr_t __cfatls_get( unsigned long int offset ) {
    248         // __cfaasm_get.before = ({ void * value; asm("movq $__cfaasm_get_before, %[v]\n\t" : [v]"=r"(value) ); value; });
    249         // __cfaasm_get.after  = ({ void * value; asm("movq $__cfaasm_get_after , %[v]\n\t" : [v]"=r"(value) ); value; });
    250186        // create a assembler label before
    251187        // marked as clobber all to avoid movement
     
    260196        return val;
    261197}
     198
     199// //----------
     200// // Write data to the TLS block
     201// // sadly it looses the type information and can only write 1 word at a time
     202// // use with __builtin_offsetof
     203// void __cfatls_set(uintptr_t offset, void * value) __attribute__((__noinline__));
     204// void __cfatls_set(uintptr_t offset, void * value) {
     205//     // create a assembler label before
     206//     // marked as clobber all to avoid movement
     207//     asm volatile("__cfaasm_set_before:":::"memory");
     208
     209//     // access tls as normal (except for type information)
     210//     *(void**)(offset + (uintptr_t)&my_tls) = value;
     211
     212//     // create a assembler label after
     213//     // marked as clobber all to avoid movement
     214//     asm volatile("__cfaasm_set_after:":::"memory");
     215// }
     216
     217// //----------
     218// #include <stdio.h>
     219// int main() {
     220//     // Get the information
     221//     // Must use inline assembly to get access to label
     222//     // C is annoying here because this could easily be a static const but "initializer element is not a compile-time constant"
     223//     // The big advantage of this approach is that there is 0 overhead for the read and writes function
     224//     void * __cfaasm_addr_get_before = ({ void * value; asm("movq $__cfaasm_get_before, %[v]\n\t" : [v]"=r"(value) ); value; });
     225//     void * __cfaasm_addr_get_after  = ({ void * value; asm("movq $__cfaasm_get_after , %[v]\n\t" : [v]"=r"(value) ); value; });
     226//     void * __cfaasm_addr_set_before = ({ void * value; asm("movq $__cfaasm_set_before, %[v]\n\t" : [v]"=r"(value) ); value; });
     227//     void * __cfaasm_addr_set_after  = ({ void * value; asm("movq $__cfaasm_set_after , %[v]\n\t" : [v]"=r"(value) ); value; });
     228
     229//     printf("%p to %p\n", __cfaasm_addr_get_before, __cfaasm_addr_get_after);
     230//     printf("%p to %p\n", __cfaasm_addr_set_before, __cfaasm_addr_set_after);
     231//     return 0;
     232// }
     233
     234__cfaabi_dbg_debug_do( static thread_local void * last_interrupt = 0; )
    262235
    263236extern "C" {
     
    266239                // create a assembler label before
    267240                // marked as clobber all to avoid movement
    268                 asm volatile("__cfaasm_dsable_before:":::"memory");
     241                asm volatile("__cfaasm_disable_before:":::"memory");
    269242
    270243                with( __cfaabi_tls.preemption_state ) {
     
    288261                // create a assembler label after
    289262                // marked as clobber all to avoid movement
    290                 asm volatile("__cfaasm_dsable_after:":::"memory");
     263                asm volatile("__cfaasm_disable_after:":::"memory");
    291264        }
    292265
     
    296269                // create a assembler label before
    297270                // marked as clobber all to avoid movement
    298                 asm volatile("__cfaasm_enble_before:":::"memory");
     271                asm volatile("__cfaasm_enable_before:":::"memory");
    299272
    300273                processor   * proc = __cfaabi_tls.this_processor; // Cache the processor now since interrupts can start happening after the atomic store
     
    331304                // create a assembler label after
    332305                // marked as clobber all to avoid movement
    333                 asm volatile("__cfaasm_enble_after:":::"memory");
     306                asm volatile("__cfaasm_enable_after:":::"memory");
    334307        }
    335308
     
    399372}
    400373
    401 //-----------------------------------------------------------------------------
    402 // Some assembly required
    403 #if defined( __i386 )
    404         #ifdef __PIC__
    405                 #define RELOC_PRELUDE( label ) \
    406                         "calll   .Lcfaasm_prelude_" #label "$pb\n\t" \
    407                         ".Lcfaasm_prelude_" #label "$pb:\n\t" \
    408                         "popl    %%eax\n\t" \
    409                         ".Lcfaasm_prelude_" #label "_end:\n\t" \
    410                         "addl    $_GLOBAL_OFFSET_TABLE_+(.Lcfaasm_prelude_" #label "_end-.Lcfaasm_prelude_" #label "$pb), %%eax\n\t"
    411                 #define RELOC_PREFIX ""
    412                 #define RELOC_SUFFIX "@GOT(%%eax)"
    413         #else
    414                 #define RELOC_PREFIX "$"
    415                 #define RELOC_SUFFIX ""
    416         #endif
    417         #define __cfaasm_label( label ) static struct asm_region label = \
    418                 ({ \
    419                         struct asm_region region; \
    420                         asm( \
    421                                 RELOC_PRELUDE( label ) \
    422                                 "movl " RELOC_PREFIX "__cfaasm_" #label "_before" RELOC_SUFFIX ", %[vb]\n\t" \
    423                                 "movl " RELOC_PREFIX "__cfaasm_" #label "_after"  RELOC_SUFFIX ", %[va]\n\t" \
    424                                  : [vb]"=r"(region.before), [va]"=r"(region.after) \
    425                         ); \
    426                         region; \
    427                 });
    428 #elif defined( __x86_64 )
    429         #ifdef __PIC__
    430                 #define RELOC_PREFIX ""
    431                 #define RELOC_SUFFIX "@GOTPCREL(%%rip)"
    432         #else
    433                 #define RELOC_PREFIX "$"
    434                 #define RELOC_SUFFIX ""
    435         #endif
    436         #define __cfaasm_label( label ) static struct asm_region label = \
    437                 ({ \
    438                         struct asm_region region; \
    439                         asm( \
    440                                 "movq " RELOC_PREFIX "__cfaasm_" #label "_before" RELOC_SUFFIX ", %[vb]\n\t" \
    441                                 "movq " RELOC_PREFIX "__cfaasm_" #label "_after"  RELOC_SUFFIX ", %[va]\n\t" \
    442                                  : [vb]"=r"(region.before), [va]"=r"(region.after) \
    443                         ); \
    444                         region; \
    445                 });
    446 #elif defined( __aarch64__ )
    447         #ifdef __PIC__
    448                 #define RELOC_TAG "@PLT"
    449         #else
    450                 #define RELOC_TAG ""
    451         #endif
    452         #define __cfaasm_label( label ) \
    453                 ({ \
    454                         struct asm_region region; \
    455                         asm( \
    456                                 "mov %[vb], __cfaasm_" #label "_before@GOTPCREL(%%rip)"  "\n\t" \
    457                                 "mov %[va], __cfaasm_" #label "_after@GOTPCREL(%%rip)"   "\n\t" \
    458                                  : [vb]"=r"(region.before), [va]"=r"(region.after) \
    459                         ); \
    460                         region; \
    461                 });
    462 #else
    463         #error unknown hardware architecture
    464 #endif
    465 
    466374// KERNEL ONLY
    467375// Check if a __cfactx_switch signal handler shoud defer
    468376// If true  : preemption is safe
    469377// If false : preemption is unsafe and marked as pending
    470 static inline bool preemption_ready( void * ip ) {
    471         // Get all the region for which it is not safe to preempt
    472         __cfaasm_label( get    );
    473         __cfaasm_label( check  );
    474         __cfaasm_label( dsable );
    475         __cfaasm_label( enble );
    476         __cfaasm_label( nopoll );
    477 
     378static inline bool preemption_ready() {
    478379        // Check if preemption is safe
    479         bool ready = true;
    480         if( __cfaasm_in( ip, get    ) ) { ready = false; goto EXIT; };
    481         if( __cfaasm_in( ip, check  ) ) { ready = false; goto EXIT; };
    482         if( __cfaasm_in( ip, dsable ) ) { ready = false; goto EXIT; };
    483         if( __cfaasm_in( ip, enble  ) ) { ready = false; goto EXIT; };
    484         if( __cfaasm_in( ip, nopoll ) ) { ready = false; goto EXIT; };
    485         if( !__cfaabi_tls.preemption_state.enabled) { ready = false; goto EXIT; };
    486         if( __cfaabi_tls.preemption_state.in_progress ) { ready = false; goto EXIT; };
    487 
    488 EXIT:
     380        bool ready = __cfaabi_tls.preemption_state.enabled && ! __cfaabi_tls.preemption_state.in_progress;
     381
    489382        // Adjust the pending flag accordingly
    490383        __cfaabi_tls.this_processor->pending_preemption = !ready;
     
    561454// Kernel Signal Handlers
    562455//=============================================================================================
    563 __cfaabi_dbg_debug_do( static thread_local void * last_interrupt = 0; )
     456struct asm_region {
     457        void * before;
     458        void * after;
     459};
     460
     461//-----------------------------------------------------------------------------
     462// Some assembly required
     463#if defined( __i386 )
     464        #define __cfaasm_label( label ) \
     465                ({ \
     466                        struct asm_region region; \
     467                        asm( \
     468                                "movl $__cfaasm_" #label "_before, %[vb]\n\t" \
     469                                "movl $__cfaasm_" #label "_after , %[va]\n\t" \
     470                                 : [vb]"=r"(region.before), [vb]"=r"(region.before) \
     471                        ); \
     472                        region; \
     473                });
     474#elif defined( __x86_64 )
     475        #ifdef __PIC__
     476                #define PLT "@PLT"
     477        #else
     478                #define PLT ""
     479        #endif
     480        #define __cfaasm_label( label ) \
     481                ({ \
     482                        struct asm_region region; \
     483                        asm( \
     484                                "movq $__cfaasm_" #label "_before" PLT ", %[vb]\n\t" \
     485                                "movq $__cfaasm_" #label "_after"  PLT ", %[va]\n\t" \
     486                                 : [vb]"=r"(region.before), [va]"=r"(region.after) \
     487                        ); \
     488                        region; \
     489                });
     490#elif defined( __aarch64__ )
     491        #error __cfaasm_label undefined for arm
     492#else
     493        #error unknown hardware architecture
     494#endif
    564495
    565496// Context switch signal handler
     
    583514
    584515        // Check if it is safe to preempt here
    585         if( !preemption_ready( ip ) ) { return; }
     516        if( !preemption_ready() ) { return; }
     517
     518        struct asm_region region;
     519        region = __cfaasm_label( get     ); if( ip >= region.before && ip <= region.after ) return;
     520        region = __cfaasm_label( check   ); if( ip >= region.before && ip <= region.after ) return;
     521        region = __cfaasm_label( disable ); if( ip >= region.before && ip <= region.after ) return;
     522        region = __cfaasm_label( enable  ); if( ip >= region.before && ip <= region.after ) return;
     523        region = __cfaasm_label( nopoll  ); if( ip >= region.before && ip <= region.after ) return;
    586524
    587525        __cfaabi_dbg_print_buffer_decl( " KERNEL: preempting core %p (%p @ %p).\n", __cfaabi_tls.this_processor, __cfaabi_tls.this_thread, (void *)(cxt->uc_mcontext.CFA_REG_IP) );
Note: See TracChangeset for help on using the changeset viewer.