- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
libcfa/src/concurrency/preemption.cfa
r8fc652e0 r231b18f 10 10 // Created On : Mon Jun 5 14:20:42 2017 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Wed Aug 26 16:46:03 202013 // Update Count : 5 312 // Last Modified On : Fri Nov 6 07:42:13 2020 13 // Update Count : 54 14 14 // 15 15 … … 164 164 //============================================================================================= 165 165 166 //---------- 167 // special case for preemption since used often 168 bool __preemption_enabled() { 169 // create a assembler label before 170 // marked as clobber all to avoid movement 171 asm volatile("__cfaasm_check_before:":::"memory"); 172 173 // access tls as normal 174 bool enabled = __cfaabi_tls.preemption_state.enabled; 175 176 // create a assembler label after 177 // marked as clobber all to avoid movement 178 asm volatile("__cfaasm_check_after:":::"memory"); 179 return enabled; 180 } 181 182 //---------- 183 // Get data from the TLS block 184 uintptr_t __cfatls_get( unsigned long int offset ) __attribute__((__noinline__)); //no inline to avoid problems 185 uintptr_t __cfatls_get( unsigned long int offset ) { 186 // create a assembler label before 187 // marked as clobber all to avoid movement 188 asm volatile("__cfaasm_get_before:":::"memory"); 189 190 // access tls as normal (except for pointer arithmetic) 191 uintptr_t val = *(uintptr_t*)((uintptr_t)&__cfaabi_tls + offset); 192 193 // create a assembler label after 194 // marked as clobber all to avoid movement 195 asm volatile("__cfaasm_get_after:":::"memory"); 196 return val; 197 } 198 199 // //---------- 200 // // Write data to the TLS block 201 // // sadly it looses the type information and can only write 1 word at a time 202 // // use with __builtin_offsetof 203 // void __cfatls_set(uintptr_t offset, void * value) __attribute__((__noinline__)); 204 // void __cfatls_set(uintptr_t offset, void * value) { 205 // // create a assembler label before 206 // // marked as clobber all to avoid movement 207 // asm volatile("__cfaasm_set_before:":::"memory"); 208 209 // // access tls as normal (except for type information) 210 // *(void**)(offset + (uintptr_t)&my_tls) = value; 211 212 // // create a assembler label after 213 // // marked as clobber all to avoid movement 214 // asm volatile("__cfaasm_set_after:":::"memory"); 215 // } 216 217 // //---------- 218 // #include <stdio.h> 166 // In a user-level threading system, there are handful of thread-local variables where this problem occurs on the ARM. 167 // 168 // For each kernel thread running user-level threads, there is a flag variable to indicate if interrupts are 169 // enabled/disabled for that kernel thread. Therefore, this variable is made thread local. 170 // 171 // For example, this code fragment sets the state of the "interrupt" variable in thread-local memory. 172 // 173 // _Thread_local volatile int interrupts; 219 174 // int main() { 220 // // Get the information 221 // // Must use inline assembly to get access to label 222 // // C is annoying here because this could easily be a static const but "initializer element is not a compile-time constant" 223 // // The big advantage of this approach is that there is 0 overhead for the read and writes function 224 // void * __cfaasm_addr_get_before = ({ void * value; asm("movq $__cfaasm_get_before, %[v]\n\t" : [v]"=r"(value) ); value; }); 225 // void * __cfaasm_addr_get_after = ({ void * value; asm("movq $__cfaasm_get_after , %[v]\n\t" : [v]"=r"(value) ); value; }); 226 // void * __cfaasm_addr_set_before = ({ void * value; asm("movq $__cfaasm_set_before, %[v]\n\t" : [v]"=r"(value) ); value; }); 227 // void * __cfaasm_addr_set_after = ({ void * value; asm("movq $__cfaasm_set_after , %[v]\n\t" : [v]"=r"(value) ); value; }); 228 229 // printf("%p to %p\n", __cfaasm_addr_get_before, __cfaasm_addr_get_after); 230 // printf("%p to %p\n", __cfaasm_addr_set_before, __cfaasm_addr_set_after); 231 // return 0; 232 // } 175 // interrupts = 0; // disable interrupts } 176 // 177 // which generates the following code on the ARM 178 // 179 // (gdb) disassemble main 180 // Dump of assembler code for function main: 181 // 0x0000000000000610 <+0>: mrs x1, tpidr_el0 182 // 0x0000000000000614 <+4>: mov w0, #0x0 // #0 183 // 0x0000000000000618 <+8>: add x1, x1, #0x0, lsl #12 184 // 0x000000000000061c <+12>: add x1, x1, #0x10 185 // 0x0000000000000620 <+16>: str wzr, [x1] 186 // 0x0000000000000624 <+20>: ret 187 // 188 // The mrs moves a pointer from coprocessor register tpidr_el0 into register x1. Register w0 is set to 0. The two adds 189 // increase the TLS pointer with the displacement (offset) 0x10, which is the location in the TSL of variable 190 // "interrupts". Finally, 0 is stored into "interrupts" through the pointer in register x1 that points into the 191 // TSL. Now once x1 has the pointer to the location of the TSL for kernel thread N, it can be be preempted at a 192 // user-level and the user thread is put on the user-level ready-queue. When the preempted thread gets to the front of 193 // the user-level ready-queue it is run on kernel thread M. It now stores 0 into "interrupts" back on kernel thread N, 194 // turning off interrupt on the wrong kernel thread. 195 // 196 // On the x86, the following code is generated for the same code fragment. 197 // 198 // (gdb) disassemble main 199 // Dump of assembler code for function main: 200 // 0x0000000000400420 <+0>: movl $0x0,%fs:0xfffffffffffffffc 201 // 0x000000000040042c <+12>: xor %eax,%eax 202 // 0x000000000040042e <+14>: retq 203 // 204 // and there is base-displacement addressing used to atomically reset variable "interrupts" off of the TSL pointer in 205 // register "fs". 206 // 207 // Hence, the ARM has base-displacement address for the general purpose registers, BUT not to the coprocessor 208 // registers. As a result, generating the address for the write into variable "interrupts" is no longer atomic. 209 // 210 // Note this problem does NOT occur when just using multiple kernel threads because the preemption ALWAYS restarts the 211 // thread on the same kernel thread. 212 // 213 // The obvious question is why does ARM use a coprocessor register to store the TSL pointer given that coprocessor 214 // registers are second-class registers with respect to the instruction set. One possible answer is that they did not 215 // want to dedicate one of the general registers to hold the TLS pointer and there was a free coprocessor register 216 // available. 233 217 234 218 __cfaabi_dbg_debug_do( static thread_local void * last_interrupt = 0; ) … … 237 221 // Disable interrupts by incrementing the counter 238 222 void disable_interrupts() { 239 // create a assembler label before 240 // marked as clobber all to avoid movement 241 asm volatile("__cfaasm_disable_before:":::"memory"); 242 243 with( __cfaabi_tls.preemption_state ) { 223 with( kernelTLS.preemption_state ) { 244 224 #if GCC_VERSION > 50000 245 225 static_assert(__atomic_always_lock_free(sizeof(enabled), &enabled), "Must be lock-free"); … … 258 238 verify( new_val < 65_000u ); // If this triggers someone is disabling interrupts without enabling them 259 239 } 260 261 // create a assembler label after262 // marked as clobber all to avoid movement263 asm volatile("__cfaasm_disable_after:":::"memory");264 240 } 265 241 … … 267 243 // If counter reaches 0, execute any pending __cfactx_switch 268 244 void enable_interrupts( __cfaabi_dbg_ctx_param ) { 269 // create a assembler label before 270 // marked as clobber all to avoid movement 271 asm volatile("__cfaasm_enable_before:":::"memory"); 272 273 processor * proc = __cfaabi_tls.this_processor; // Cache the processor now since interrupts can start happening after the atomic store 245 processor * proc = kernelTLS.this_processor; // Cache the processor now since interrupts can start happening after the atomic store 274 246 /* paranoid */ verify( proc ); 275 247 276 with( __cfaabi_tls.preemption_state ){248 with( kernelTLS.preemption_state ){ 277 249 unsigned short prev = disable_count; 278 250 disable_count -= 1; … … 301 273 // For debugging purposes : keep track of the last person to enable the interrupts 302 274 __cfaabi_dbg_debug_do( proc->last_enable = caller; ) 303 304 // create a assembler label after305 // marked as clobber all to avoid movement306 asm volatile("__cfaasm_enable_after:":::"memory");307 275 } 308 276 … … 310 278 // Don't execute any pending __cfactx_switch even if counter reaches 0 311 279 void enable_interrupts_noPoll() { 312 // create a assembler label before 313 // marked as clobber all to avoid movement 314 asm volatile("__cfaasm_nopoll_before:":::"memory"); 315 316 unsigned short prev = __cfaabi_tls.preemption_state.disable_count; 317 __cfaabi_tls.preemption_state.disable_count -= 1; 280 unsigned short prev = kernelTLS.preemption_state.disable_count; 281 kernelTLS.preemption_state.disable_count -= 1; 318 282 verifyf( prev != 0u, "Incremented from %u\n", prev ); // If this triggers someone is enabled already enabled interrupts 319 283 if( prev == 1 ) { 320 284 #if GCC_VERSION > 50000 321 static_assert(__atomic_always_lock_free(sizeof( __cfaabi_tls.preemption_state.enabled), &__cfaabi_tls.preemption_state.enabled), "Must be lock-free");285 static_assert(__atomic_always_lock_free(sizeof(kernelTLS.preemption_state.enabled), &kernelTLS.preemption_state.enabled), "Must be lock-free"); 322 286 #endif 323 287 // Set enabled flag to true 324 288 // should be atomic to avoid preemption in the middle of the operation. 325 289 // use memory order RELAXED since there is no inter-thread on this variable requirements 326 __atomic_store_n(& __cfaabi_tls.preemption_state.enabled, true, __ATOMIC_RELAXED);290 __atomic_store_n(&kernelTLS.preemption_state.enabled, true, __ATOMIC_RELAXED); 327 291 328 292 // Signal the compiler that a fence is needed but only for signal handlers 329 293 __atomic_signal_fence(__ATOMIC_RELEASE); 330 294 } 331 332 // create a assembler label after333 // marked as clobber all to avoid movement334 asm volatile("__cfaasm_nopoll_after:":::"memory");335 295 } 336 296 } … … 367 327 static void timeout( $thread * this ) { 368 328 #if !defined( __CFA_NO_STATISTICS__ ) 369 kernelTLS ().this_stats = this->curr_cluster->stats;329 kernelTLS.this_stats = this->curr_cluster->stats; 370 330 #endif 371 331 unpark( this ); … … 378 338 static inline bool preemption_ready() { 379 339 // Check if preemption is safe 380 bool ready = __cfaabi_tls.preemption_state.enabled && ! __cfaabi_tls.preemption_state.in_progress;340 bool ready = kernelTLS.preemption_state.enabled && ! kernelTLS.preemption_state.in_progress; 381 341 382 342 // Adjust the pending flag accordingly 383 __cfaabi_tls.this_processor->pending_preemption = !ready;343 kernelTLS.this_processor->pending_preemption = !ready; 384 344 return ready; 385 345 } … … 395 355 396 356 // Start with preemption disabled until ready 397 __cfaabi_tls.preemption_state.enabled = false;398 __cfaabi_tls.preemption_state.disable_count = 1;357 kernelTLS.preemption_state.enabled = false; 358 kernelTLS.preemption_state.disable_count = 1; 399 359 400 360 // Initialize the event kernel … … 454 414 // Kernel Signal Handlers 455 415 //============================================================================================= 456 struct asm_region {457 void * before;458 void * after;459 };460 461 //-----------------------------------------------------------------------------462 // Some assembly required463 #if defined( __i386 )464 #define __cfaasm_label( label ) \465 ({ \466 struct asm_region region; \467 asm( \468 "movl $__cfaasm_" #label "_before, %[vb]\n\t" \469 "movl $__cfaasm_" #label "_after , %[va]\n\t" \470 : [vb]"=r"(region.before), [vb]"=r"(region.before) \471 ); \472 region; \473 });474 #elif defined( __x86_64 )475 #ifdef __PIC__476 #define PLT "@PLT"477 #else478 #define PLT ""479 #endif480 #define __cfaasm_label( label ) \481 ({ \482 struct asm_region region; \483 asm( \484 "movq $__cfaasm_" #label "_before" PLT ", %[vb]\n\t" \485 "movq $__cfaasm_" #label "_after" PLT ", %[va]\n\t" \486 : [vb]"=r"(region.before), [va]"=r"(region.after) \487 ); \488 region; \489 });490 #elif defined( __aarch64__ )491 #error __cfaasm_label undefined for arm492 #else493 #error unknown hardware architecture494 #endif495 416 496 417 // Context switch signal handler 497 418 // Receives SIGUSR1 signal and causes the current thread to yield 498 419 static void sigHandler_ctxSwitch( __CFA_SIGPARMS__ ) { 499 void * ip = (void *)(cxt->uc_mcontext.CFA_REG_IP); 500 __cfaabi_dbg_debug_do( last_interrupt = ip; ) 420 __cfaabi_dbg_debug_do( last_interrupt = (void *)(cxt->uc_mcontext.CFA_REG_IP); ) 501 421 502 422 // SKULLDUGGERY: if a thread creates a processor and the immediately deletes it, … … 504 424 // before the kernel thread has even started running. When that happens, an interrupt 505 425 // with a null 'this_processor' will be caught, just ignore it. 506 if(! __cfaabi_tls.this_processor ) return;426 if(! kernelTLS.this_processor ) return; 507 427 508 428 choose(sfp->si_value.sival_int) { 509 429 case PREEMPT_NORMAL : ;// Normal case, nothing to do here 510 case PREEMPT_TERMINATE: verify( __atomic_load_n( & __cfaabi_tls.this_processor->do_terminate, __ATOMIC_SEQ_CST ) );430 case PREEMPT_TERMINATE: verify( __atomic_load_n( &kernelTLS.this_processor->do_terminate, __ATOMIC_SEQ_CST ) ); 511 431 default: 512 432 abort( "internal error, signal value is %d", sfp->si_value.sival_int ); … … 516 436 if( !preemption_ready() ) { return; } 517 437 518 struct asm_region region; 519 region = __cfaasm_label( get ); if( ip >= region.before && ip <= region.after ) return; 520 region = __cfaasm_label( check ); if( ip >= region.before && ip <= region.after ) return; 521 region = __cfaasm_label( disable ); if( ip >= region.before && ip <= region.after ) return; 522 region = __cfaasm_label( enable ); if( ip >= region.before && ip <= region.after ) return; 523 region = __cfaasm_label( nopoll ); if( ip >= region.before && ip <= region.after ) return; 524 525 __cfaabi_dbg_print_buffer_decl( " KERNEL: preempting core %p (%p @ %p).\n", __cfaabi_tls.this_processor, __cfaabi_tls.this_thread, (void *)(cxt->uc_mcontext.CFA_REG_IP) ); 438 __cfaabi_dbg_print_buffer_decl( " KERNEL: preempting core %p (%p @ %p).\n", kernelTLS.this_processor, kernelTLS.this_thread, (void *)(cxt->uc_mcontext.CFA_REG_IP) ); 526 439 527 440 // Sync flag : prevent recursive calls to the signal handler 528 __cfaabi_tls.preemption_state.in_progress = true;441 kernelTLS.preemption_state.in_progress = true; 529 442 530 443 // Clear sighandler mask before context switching. … … 536 449 } 537 450 451 // TODO: this should go in finish action 538 452 // Clear the in progress flag 539 __cfaabi_tls.preemption_state.in_progress = false;453 kernelTLS.preemption_state.in_progress = false; 540 454 541 455 // Preemption can occur here … … 554 468 id.full_proc = false; 555 469 id.id = doregister(&id); 556 __cfaabi_tls.this_proc_id = &id;470 kernelTLS.this_proc_id = &id; 557 471 558 472 // Block sigalrms to control when they arrive … … 622 536 623 537 void __cfaabi_check_preemption() { 624 bool ready = __preemption_enabled();538 bool ready = kernelTLS.preemption_state.enabled; 625 539 if(!ready) { abort("Preemption should be ready"); } 626 540 … … 645 559 #ifdef __CFA_WITH_VERIFY__ 646 560 bool __cfaabi_dbg_in_kernel() { 647 return ! __preemption_enabled();561 return !kernelTLS.preemption_state.enabled; 648 562 } 649 563 #endif
Note:
See TracChangeset
for help on using the changeset viewer.