Context Navigation

Reverse Diff

preemption.cfa [8fc652e0:231b18f]

File:

: 1 edited

libcfa/src/concurrency/preemption.cfa (modified) (17 diffs)

Legend:

: Unmodified
: Added
: Removed

libcfa/src/concurrency/preemption.cfa

-              r8fc652e0
+              r231b18f
 // Created On       : Mon Jun 5 14:20:42 2017
 // Last Modified By : Peter A. Buhr
 // Last Modified On : Wed Aug 26 16:46:03 2020
 // Update Count     : 53
+// Last Modified On : Fri Nov  6 07:42:13 2020
+// Update Count     : 54
 //
 …
 //=============================================================================================
+//----------
+// special case for preemption since used often
+bool __preemption_enabled() {
+        // create a assembler label before
+        // marked as clobber all to avoid movement
+        asm volatile("__cfaasm_check_before:":::"memory");
+        // access tls as normal
+        bool enabled = __cfaabi_tls.preemption_state.enabled;
+        // create a assembler label after
+        // marked as clobber all to avoid movement
+        asm volatile("__cfaasm_check_after:":::"memory");
+        return enabled;
+}
+//----------
+// Get data from the TLS block
+uintptr_t __cfatls_get( unsigned long int offset ) __attribute__((__noinline__)); //no inline to avoid problems
+uintptr_t __cfatls_get( unsigned long int offset ) {
+        // create a assembler label before
+        // marked as clobber all to avoid movement
+        asm volatile("__cfaasm_get_before:":::"memory");
+        // access tls as normal (except for pointer arithmetic)
+        uintptr_t val = *(uintptr_t*)((uintptr_t)&__cfaabi_tls + offset);
+        // create a assembler label after
+        // marked as clobber all to avoid movement
+        asm volatile("__cfaasm_get_after:":::"memory");
+        return val;
+}
+// //----------
+// // Write data to the TLS block
+// // sadly it looses the type information and can only write 1 word at a time
+// // use with __builtin_offsetof
+// void __cfatls_set(uintptr_t offset, void * value) __attribute__((__noinline__));
+// void __cfatls_set(uintptr_t offset, void * value) {
+//     // create a assembler label before
+//     // marked as clobber all to avoid movement
+//     asm volatile("__cfaasm_set_before:":::"memory");
+//     // access tls as normal (except for type information)
+//     *(void**)(offset + (uintptr_t)&my_tls) = value;
+//     // create a assembler label after
+//     // marked as clobber all to avoid movement
+//     asm volatile("__cfaasm_set_after:":::"memory");
+// }
+// //----------
+// #include <stdio.h>
+// In a user-level threading system, there are handful of thread-local variables where this problem occurs on the ARM.
+//
+// For each kernel thread running user-level threads, there is a flag variable to indicate if interrupts are
+// enabled/disabled for that kernel thread. Therefore, this variable is made thread local.
+//
+// For example, this code fragment sets the state of the "interrupt" variable in thread-local memory.
+//
+// _Thread_local volatile int interrupts;
 // int main() {
+//     // Get the information
+//     // Must use inline assembly to get access to label
+//     // C is annoying here because this could easily be a static const but "initializer element is not a compile-time constant"
+//     // The big advantage of this approach is that there is 0 overhead for the read and writes function
+//     void * __cfaasm_addr_get_before = ({ void * value; asm("movq $__cfaasm_get_before, %[v]\n\t" : [v]"=r"(value) ); value; });
+//     void * __cfaasm_addr_get_after  = ({ void * value; asm("movq $__cfaasm_get_after , %[v]\n\t" : [v]"=r"(value) ); value; });
+//     void * __cfaasm_addr_set_before = ({ void * value; asm("movq $__cfaasm_set_before, %[v]\n\t" : [v]"=r"(value) ); value; });
+//     void * __cfaasm_addr_set_after  = ({ void * value; asm("movq $__cfaasm_set_after , %[v]\n\t" : [v]"=r"(value) ); value; });
+//     printf("%p to %p\n", __cfaasm_addr_get_before, __cfaasm_addr_get_after);
+//     printf("%p to %p\n", __cfaasm_addr_set_before, __cfaasm_addr_set_after);
+//     return 0;
+// }
+//     interrupts = 0; // disable interrupts }
+//
+// which generates the following code on the ARM
+//
+// (gdb) disassemble main
+// Dump of assembler code for function main:
+//    0x0000000000000610 <+0>:  mrs     x1, tpidr_el0
+//    0x0000000000000614 <+4>:  mov     w0, #0x0                        // #0
+//    0x0000000000000618 <+8>:  add     x1, x1, #0x0, lsl #12
+//    0x000000000000061c <+12>: add     x1, x1, #0x10
+//    0x0000000000000620 <+16>: str     wzr, [x1]
+//    0x0000000000000624 <+20>: ret
+//
+// The mrs moves a pointer from coprocessor register tpidr_el0 into register x1.  Register w0 is set to 0. The two adds
+// increase the TLS pointer with the displacement (offset) 0x10, which is the location in the TSL of variable
+// "interrupts".  Finally, 0 is stored into "interrupts" through the pointer in register x1 that points into the
+// TSL. Now once x1 has the pointer to the location of the TSL for kernel thread N, it can be be preempted at a
+// user-level and the user thread is put on the user-level ready-queue. When the preempted thread gets to the front of
+// the user-level ready-queue it is run on kernel thread M. It now stores 0 into "interrupts" back on kernel thread N,
+// turning off interrupt on the wrong kernel thread.
+//
+// On the x86, the following code is generated for the same code fragment.
+//
+// (gdb) disassemble main
+// Dump of assembler code for function main:
+//    0x0000000000400420 <+0>:  movl   $0x0,%fs:0xfffffffffffffffc
+//    0x000000000040042c <+12>: xor    %eax,%eax
+//    0x000000000040042e <+14>: retq
+//
+// and there is base-displacement addressing used to atomically reset variable "interrupts" off of the TSL pointer in
+// register "fs".
+//
+// Hence, the ARM has base-displacement address for the general purpose registers, BUT not to the coprocessor
+// registers. As a result, generating the address for the write into variable "interrupts" is no longer atomic.
+//
+// Note this problem does NOT occur when just using multiple kernel threads because the preemption ALWAYS restarts the
+// thread on the same kernel thread.
+//
+// The obvious question is why does ARM use a coprocessor register to store the TSL pointer given that coprocessor
+// registers are second-class registers with respect to the instruction set. One possible answer is that they did not
+// want to dedicate one of the general registers to hold the TLS pointer and there was a free coprocessor register
+// available.
 __cfaabi_dbg_debug_do( static thread_local void * last_interrupt = 0; )
 …
         // Disable interrupts by incrementing the counter
         void disable_interrupts() {
+                // create a assembler label before
+                // marked as clobber all to avoid movement
+                asm volatile("__cfaasm_disable_before:":::"memory");
+                with( __cfaabi_tls.preemption_state ) {
+                with( kernelTLS.preemption_state ) {
                         #if GCC_VERSION > 50000
                         static_assert(__atomic_always_lock_free(sizeof(enabled), &enabled), "Must be lock-free");
 …
                         verify( new_val < 65_000u );              // If this triggers someone is disabling interrupts without enabling them
+                }
-                // create a assembler label after
-                // marked as clobber all to avoid movement
-                asm volatile("__cfaasm_disable_after:":::"memory");
+        }
 …
         // If counter reaches 0, execute any pending __cfactx_switch
         void enable_interrupts( __cfaabi_dbg_ctx_param ) {
+                // create a assembler label before
+                // marked as clobber all to avoid movement
+                asm volatile("__cfaasm_enable_before:":::"memory");
+                processor   * proc = __cfaabi_tls.this_processor; // Cache the processor now since interrupts can start happening after the atomic store
+                processor   * proc = kernelTLS.this_processor; // Cache the processor now since interrupts can start happening after the atomic store
                 /* paranoid */ verify( proc );
                 with( __cfaabi_tls.preemption_state ){
+                with( kernelTLS.preemption_state ){
                         unsigned short prev = disable_count;
                         disable_count -= 1;
 …
                 // For debugging purposes : keep track of the last person to enable the interrupts
                 __cfaabi_dbg_debug_do( proc->last_enable = caller; )
-                // create a assembler label after
-                // marked as clobber all to avoid movement
-                asm volatile("__cfaasm_enable_after:":::"memory");
+        }
 …
         // Don't execute any pending __cfactx_switch even if counter reaches 0
         void enable_interrupts_noPoll() {
+                // create a assembler label before
+                // marked as clobber all to avoid movement
+                asm volatile("__cfaasm_nopoll_before:":::"memory");
+                unsigned short prev = __cfaabi_tls.preemption_state.disable_count;
+                __cfaabi_tls.preemption_state.disable_count -= 1;
+                unsigned short prev = kernelTLS.preemption_state.disable_count;
+                kernelTLS.preemption_state.disable_count -= 1;
                 verifyf( prev != 0u, "Incremented from %u\n", prev );                     // If this triggers someone is enabled already enabled interrupts
                 if( prev == 1 ) {
                         #if GCC_VERSION > 50000
                         static_assert(__atomic_always_lock_free(sizeof(__cfaabi_tls.preemption_state.enabled), &__cfaabi_tls.preemption_state.enabled), "Must be lock-free");
+                        static_assert(__atomic_always_lock_free(sizeof(kernelTLS.preemption_state.enabled), &kernelTLS.preemption_state.enabled), "Must be lock-free");
                         #endif
                         // Set enabled flag to true
                         // should be atomic to avoid preemption in the middle of the operation.
                         // use memory order RELAXED since there is no inter-thread on this variable requirements
                         __atomic_store_n(&__cfaabi_tls.preemption_state.enabled, true, __ATOMIC_RELAXED);
+                        __atomic_store_n(&kernelTLS.preemption_state.enabled, true, __ATOMIC_RELAXED);
                         // Signal the compiler that a fence is needed but only for signal handlers
                         __atomic_signal_fence(__ATOMIC_RELEASE);
+                }
-                // create a assembler label after
-                // marked as clobber all to avoid movement
-                asm volatile("__cfaasm_nopoll_after:":::"memory");
+        }
+}
 …
 static void timeout( $thread * this ) {
         #if !defined( __CFA_NO_STATISTICS__ )
                 kernelTLS().this_stats = this->curr_cluster->stats;
+                kernelTLS.this_stats = this->curr_cluster->stats;
         #endif
         unpark( this );
 …
 static inline bool preemption_ready() {
         // Check if preemption is safe
         bool ready = __cfaabi_tls.preemption_state.enabled && ! __cfaabi_tls.preemption_state.in_progress;
+        bool ready = kernelTLS.preemption_state.enabled && ! kernelTLS.preemption_state.in_progress;
         // Adjust the pending flag accordingly
         __cfaabi_tls.this_processor->pending_preemption = !ready;
+        kernelTLS.this_processor->pending_preemption = !ready;
         return ready;
+}
 …
         // Start with preemption disabled until ready
         __cfaabi_tls.preemption_state.enabled = false;
         __cfaabi_tls.preemption_state.disable_count = 1;
+        kernelTLS.preemption_state.enabled = false;
+        kernelTLS.preemption_state.disable_count = 1;
         // Initialize the event kernel
 …
 // Kernel Signal Handlers
 //=============================================================================================
-struct asm_region {
-        void * before;
-        void * after;
-};
-//-----------------------------------------------------------------------------
-// Some assembly required
-#if defined( __i386 )
-        #define __cfaasm_label( label ) \
-                ({ \
-                        struct asm_region region; \
-                        asm( \
-                                "movl $__cfaasm_" #label "_before, %[vb]\n\t" \
-                                "movl $__cfaasm_" #label "_after , %[va]\n\t" \
-                                 : [vb]"=r"(region.before), [vb]"=r"(region.before) \
-                        ); \
-                        region; \
-                });
-#elif defined( __x86_64 )
-        #ifdef __PIC__
-                #define PLT "@PLT"
-        #else
-                #define PLT ""
-        #endif
-        #define __cfaasm_label( label ) \
-                ({ \
-                        struct asm_region region; \
-                        asm( \
-                                "movq $__cfaasm_" #label "_before" PLT ", %[vb]\n\t" \
-                                "movq $__cfaasm_" #label "_after"  PLT ", %[va]\n\t" \
-                                 : [vb]"=r"(region.before), [va]"=r"(region.after) \
-                        ); \
-                        region; \
-                });
-#elif defined( __aarch64__ )
-        #error __cfaasm_label undefined for arm
-#else
-        #error unknown hardware architecture
-#endif
 // Context switch signal handler
 // Receives SIGUSR1 signal and causes the current thread to yield
 static void sigHandler_ctxSwitch( __CFA_SIGPARMS__ ) {
+        void * ip = (void *)(cxt->uc_mcontext.CFA_REG_IP);
+        __cfaabi_dbg_debug_do( last_interrupt = ip; )
+        __cfaabi_dbg_debug_do( last_interrupt = (void *)(cxt->uc_mcontext.CFA_REG_IP); )
         // SKULLDUGGERY: if a thread creates a processor and the immediately deletes it,
 …
         // before the kernel thread has even started running. When that happens, an interrupt
         // with a null 'this_processor' will be caught, just ignore it.
         if(! __cfaabi_tls.this_processor ) return;
+        if(! kernelTLS.this_processor ) return;
         choose(sfp->si_value.sival_int) {
                 case PREEMPT_NORMAL   : ;// Normal case, nothing to do here
                 case PREEMPT_TERMINATE: verify( __atomic_load_n( &__cfaabi_tls.this_processor->do_terminate, __ATOMIC_SEQ_CST ) );
+                case PREEMPT_TERMINATE: verify( __atomic_load_n( &kernelTLS.this_processor->do_terminate, __ATOMIC_SEQ_CST ) );
                 default:
                         abort( "internal error, signal value is %d", sfp->si_value.sival_int );
 …
         if( !preemption_ready() ) { return; }
+        struct asm_region region;
+        region = __cfaasm_label( get     ); if( ip >= region.before && ip <= region.after ) return;
+        region = __cfaasm_label( check   ); if( ip >= region.before && ip <= region.after ) return;
+        region = __cfaasm_label( disable ); if( ip >= region.before && ip <= region.after ) return;
+        region = __cfaasm_label( enable  ); if( ip >= region.before && ip <= region.after ) return;
+        region = __cfaasm_label( nopoll  ); if( ip >= region.before && ip <= region.after ) return;
+        __cfaabi_dbg_print_buffer_decl( " KERNEL: preempting core %p (%p @ %p).\n", __cfaabi_tls.this_processor, __cfaabi_tls.this_thread, (void *)(cxt->uc_mcontext.CFA_REG_IP) );
+        __cfaabi_dbg_print_buffer_decl( " KERNEL: preempting core %p (%p @ %p).\n", kernelTLS.this_processor, kernelTLS.this_thread, (void *)(cxt->uc_mcontext.CFA_REG_IP) );
         // Sync flag : prevent recursive calls to the signal handler
         __cfaabi_tls.preemption_state.in_progress = true;
+        kernelTLS.preemption_state.in_progress = true;
         // Clear sighandler mask before context switching.
 …
+        }
+        // TODO: this should go in finish action
         // Clear the in progress flag
         __cfaabi_tls.preemption_state.in_progress = false;
+        kernelTLS.preemption_state.in_progress = false;
         // Preemption can occur here
 …
         id.full_proc = false;
         id.id = doregister(&id);
         __cfaabi_tls.this_proc_id = &id;
+        kernelTLS.this_proc_id = &id;
         // Block sigalrms to control when they arrive
 …
 void __cfaabi_check_preemption() {
         bool ready = __preemption_enabled();
+        bool ready = kernelTLS.preemption_state.enabled;
         if(!ready) { abort("Preemption should be ready"); }
 …
 #ifdef __CFA_WITH_VERIFY__
 bool __cfaabi_dbg_in_kernel() {
         return !__preemption_enabled();
+        return !kernelTLS.preemption_state.enabled;
+}
 #endif

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changes in libcfa/src/concurrency/preemption.cfa [8fc652e0:231b18f]

Legend:

libcfa/src/concurrency/preemption.cfa

Download in other formats: