- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
libcfa/src/concurrency/preemption.cfa
r231b18f r8fc652e0 10 10 // Created On : Mon Jun 5 14:20:42 2017 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Fri Nov 6 07:42:13 202013 // Update Count : 5 412 // Last Modified On : Wed Aug 26 16:46:03 2020 13 // Update Count : 53 14 14 // 15 15 … … 164 164 //============================================================================================= 165 165 166 // In a user-level threading system, there are handful of thread-local variables where this problem occurs on the ARM. 167 // 168 // For each kernel thread running user-level threads, there is a flag variable to indicate if interrupts are 169 // enabled/disabled for that kernel thread. Therefore, this variable is made thread local. 170 // 171 // For example, this code fragment sets the state of the "interrupt" variable in thread-local memory. 172 // 173 // _Thread_local volatile int interrupts; 166 //---------- 167 // special case for preemption since used often 168 bool __preemption_enabled() { 169 // create a assembler label before 170 // marked as clobber all to avoid movement 171 asm volatile("__cfaasm_check_before:":::"memory"); 172 173 // access tls as normal 174 bool enabled = __cfaabi_tls.preemption_state.enabled; 175 176 // create a assembler label after 177 // marked as clobber all to avoid movement 178 asm volatile("__cfaasm_check_after:":::"memory"); 179 return enabled; 180 } 181 182 //---------- 183 // Get data from the TLS block 184 uintptr_t __cfatls_get( unsigned long int offset ) __attribute__((__noinline__)); //no inline to avoid problems 185 uintptr_t __cfatls_get( unsigned long int offset ) { 186 // create a assembler label before 187 // marked as clobber all to avoid movement 188 asm volatile("__cfaasm_get_before:":::"memory"); 189 190 // access tls as normal (except for pointer arithmetic) 191 uintptr_t val = *(uintptr_t*)((uintptr_t)&__cfaabi_tls + offset); 192 193 // create a assembler label after 194 // marked as clobber all to avoid movement 195 asm volatile("__cfaasm_get_after:":::"memory"); 196 return val; 197 } 198 199 // //---------- 200 // // Write data to the TLS block 201 // // sadly it looses the type information and can only write 1 word at a time 202 // // use with __builtin_offsetof 203 // void __cfatls_set(uintptr_t offset, void * value) __attribute__((__noinline__)); 204 // void __cfatls_set(uintptr_t offset, void * value) { 205 // // create a assembler label before 206 // // marked as clobber all to avoid movement 207 // asm volatile("__cfaasm_set_before:":::"memory"); 208 209 // // access tls as normal (except for type information) 210 // *(void**)(offset + (uintptr_t)&my_tls) = value; 211 212 // // create a assembler label after 213 // // marked as clobber all to avoid movement 214 // asm volatile("__cfaasm_set_after:":::"memory"); 215 // } 216 217 // //---------- 218 // #include <stdio.h> 174 219 // int main() { 175 // interrupts = 0; // disable interrupts } 176 // 177 // which generates the following code on the ARM 178 // 179 // (gdb) disassemble main 180 // Dump of assembler code for function main: 181 // 0x0000000000000610 <+0>: mrs x1, tpidr_el0 182 // 0x0000000000000614 <+4>: mov w0, #0x0 // #0 183 // 0x0000000000000618 <+8>: add x1, x1, #0x0, lsl #12 184 // 0x000000000000061c <+12>: add x1, x1, #0x10 185 // 0x0000000000000620 <+16>: str wzr, [x1] 186 // 0x0000000000000624 <+20>: ret 187 // 188 // The mrs moves a pointer from coprocessor register tpidr_el0 into register x1. Register w0 is set to 0. The two adds 189 // increase the TLS pointer with the displacement (offset) 0x10, which is the location in the TSL of variable 190 // "interrupts". Finally, 0 is stored into "interrupts" through the pointer in register x1 that points into the 191 // TSL. Now once x1 has the pointer to the location of the TSL for kernel thread N, it can be be preempted at a 192 // user-level and the user thread is put on the user-level ready-queue. When the preempted thread gets to the front of 193 // the user-level ready-queue it is run on kernel thread M. It now stores 0 into "interrupts" back on kernel thread N, 194 // turning off interrupt on the wrong kernel thread. 195 // 196 // On the x86, the following code is generated for the same code fragment. 197 // 198 // (gdb) disassemble main 199 // Dump of assembler code for function main: 200 // 0x0000000000400420 <+0>: movl $0x0,%fs:0xfffffffffffffffc 201 // 0x000000000040042c <+12>: xor %eax,%eax 202 // 0x000000000040042e <+14>: retq 203 // 204 // and there is base-displacement addressing used to atomically reset variable "interrupts" off of the TSL pointer in 205 // register "fs". 206 // 207 // Hence, the ARM has base-displacement address for the general purpose registers, BUT not to the coprocessor 208 // registers. As a result, generating the address for the write into variable "interrupts" is no longer atomic. 209 // 210 // Note this problem does NOT occur when just using multiple kernel threads because the preemption ALWAYS restarts the 211 // thread on the same kernel thread. 212 // 213 // The obvious question is why does ARM use a coprocessor register to store the TSL pointer given that coprocessor 214 // registers are second-class registers with respect to the instruction set. One possible answer is that they did not 215 // want to dedicate one of the general registers to hold the TLS pointer and there was a free coprocessor register 216 // available. 220 // // Get the information 221 // // Must use inline assembly to get access to label 222 // // C is annoying here because this could easily be a static const but "initializer element is not a compile-time constant" 223 // // The big advantage of this approach is that there is 0 overhead for the read and writes function 224 // void * __cfaasm_addr_get_before = ({ void * value; asm("movq $__cfaasm_get_before, %[v]\n\t" : [v]"=r"(value) ); value; }); 225 // void * __cfaasm_addr_get_after = ({ void * value; asm("movq $__cfaasm_get_after , %[v]\n\t" : [v]"=r"(value) ); value; }); 226 // void * __cfaasm_addr_set_before = ({ void * value; asm("movq $__cfaasm_set_before, %[v]\n\t" : [v]"=r"(value) ); value; }); 227 // void * __cfaasm_addr_set_after = ({ void * value; asm("movq $__cfaasm_set_after , %[v]\n\t" : [v]"=r"(value) ); value; }); 228 229 // printf("%p to %p\n", __cfaasm_addr_get_before, __cfaasm_addr_get_after); 230 // printf("%p to %p\n", __cfaasm_addr_set_before, __cfaasm_addr_set_after); 231 // return 0; 232 // } 217 233 218 234 __cfaabi_dbg_debug_do( static thread_local void * last_interrupt = 0; ) … … 221 237 // Disable interrupts by incrementing the counter 222 238 void disable_interrupts() { 223 with( kernelTLS.preemption_state ) { 239 // create a assembler label before 240 // marked as clobber all to avoid movement 241 asm volatile("__cfaasm_disable_before:":::"memory"); 242 243 with( __cfaabi_tls.preemption_state ) { 224 244 #if GCC_VERSION > 50000 225 245 static_assert(__atomic_always_lock_free(sizeof(enabled), &enabled), "Must be lock-free"); … … 238 258 verify( new_val < 65_000u ); // If this triggers someone is disabling interrupts without enabling them 239 259 } 260 261 // create a assembler label after 262 // marked as clobber all to avoid movement 263 asm volatile("__cfaasm_disable_after:":::"memory"); 240 264 } 241 265 … … 243 267 // If counter reaches 0, execute any pending __cfactx_switch 244 268 void enable_interrupts( __cfaabi_dbg_ctx_param ) { 245 processor * proc = kernelTLS.this_processor; // Cache the processor now since interrupts can start happening after the atomic store 269 // create a assembler label before 270 // marked as clobber all to avoid movement 271 asm volatile("__cfaasm_enable_before:":::"memory"); 272 273 processor * proc = __cfaabi_tls.this_processor; // Cache the processor now since interrupts can start happening after the atomic store 246 274 /* paranoid */ verify( proc ); 247 275 248 with( kernelTLS.preemption_state ){276 with( __cfaabi_tls.preemption_state ){ 249 277 unsigned short prev = disable_count; 250 278 disable_count -= 1; … … 273 301 // For debugging purposes : keep track of the last person to enable the interrupts 274 302 __cfaabi_dbg_debug_do( proc->last_enable = caller; ) 303 304 // create a assembler label after 305 // marked as clobber all to avoid movement 306 asm volatile("__cfaasm_enable_after:":::"memory"); 275 307 } 276 308 … … 278 310 // Don't execute any pending __cfactx_switch even if counter reaches 0 279 311 void enable_interrupts_noPoll() { 280 unsigned short prev = kernelTLS.preemption_state.disable_count; 281 kernelTLS.preemption_state.disable_count -= 1; 312 // create a assembler label before 313 // marked as clobber all to avoid movement 314 asm volatile("__cfaasm_nopoll_before:":::"memory"); 315 316 unsigned short prev = __cfaabi_tls.preemption_state.disable_count; 317 __cfaabi_tls.preemption_state.disable_count -= 1; 282 318 verifyf( prev != 0u, "Incremented from %u\n", prev ); // If this triggers someone is enabled already enabled interrupts 283 319 if( prev == 1 ) { 284 320 #if GCC_VERSION > 50000 285 static_assert(__atomic_always_lock_free(sizeof( kernelTLS.preemption_state.enabled), &kernelTLS.preemption_state.enabled), "Must be lock-free");321 static_assert(__atomic_always_lock_free(sizeof(__cfaabi_tls.preemption_state.enabled), &__cfaabi_tls.preemption_state.enabled), "Must be lock-free"); 286 322 #endif 287 323 // Set enabled flag to true 288 324 // should be atomic to avoid preemption in the middle of the operation. 289 325 // use memory order RELAXED since there is no inter-thread on this variable requirements 290 __atomic_store_n(& kernelTLS.preemption_state.enabled, true, __ATOMIC_RELAXED);326 __atomic_store_n(&__cfaabi_tls.preemption_state.enabled, true, __ATOMIC_RELAXED); 291 327 292 328 // Signal the compiler that a fence is needed but only for signal handlers 293 329 __atomic_signal_fence(__ATOMIC_RELEASE); 294 330 } 331 332 // create a assembler label after 333 // marked as clobber all to avoid movement 334 asm volatile("__cfaasm_nopoll_after:":::"memory"); 295 335 } 296 336 } … … 327 367 static void timeout( $thread * this ) { 328 368 #if !defined( __CFA_NO_STATISTICS__ ) 329 kernelTLS .this_stats = this->curr_cluster->stats;369 kernelTLS().this_stats = this->curr_cluster->stats; 330 370 #endif 331 371 unpark( this ); … … 338 378 static inline bool preemption_ready() { 339 379 // Check if preemption is safe 340 bool ready = kernelTLS.preemption_state.enabled && ! kernelTLS.preemption_state.in_progress;380 bool ready = __cfaabi_tls.preemption_state.enabled && ! __cfaabi_tls.preemption_state.in_progress; 341 381 342 382 // Adjust the pending flag accordingly 343 kernelTLS.this_processor->pending_preemption = !ready;383 __cfaabi_tls.this_processor->pending_preemption = !ready; 344 384 return ready; 345 385 } … … 355 395 356 396 // Start with preemption disabled until ready 357 kernelTLS.preemption_state.enabled = false;358 kernelTLS.preemption_state.disable_count = 1;397 __cfaabi_tls.preemption_state.enabled = false; 398 __cfaabi_tls.preemption_state.disable_count = 1; 359 399 360 400 // Initialize the event kernel … … 414 454 // Kernel Signal Handlers 415 455 //============================================================================================= 456 struct asm_region { 457 void * before; 458 void * after; 459 }; 460 461 //----------------------------------------------------------------------------- 462 // Some assembly required 463 #if defined( __i386 ) 464 #define __cfaasm_label( label ) \ 465 ({ \ 466 struct asm_region region; \ 467 asm( \ 468 "movl $__cfaasm_" #label "_before, %[vb]\n\t" \ 469 "movl $__cfaasm_" #label "_after , %[va]\n\t" \ 470 : [vb]"=r"(region.before), [vb]"=r"(region.before) \ 471 ); \ 472 region; \ 473 }); 474 #elif defined( __x86_64 ) 475 #ifdef __PIC__ 476 #define PLT "@PLT" 477 #else 478 #define PLT "" 479 #endif 480 #define __cfaasm_label( label ) \ 481 ({ \ 482 struct asm_region region; \ 483 asm( \ 484 "movq $__cfaasm_" #label "_before" PLT ", %[vb]\n\t" \ 485 "movq $__cfaasm_" #label "_after" PLT ", %[va]\n\t" \ 486 : [vb]"=r"(region.before), [va]"=r"(region.after) \ 487 ); \ 488 region; \ 489 }); 490 #elif defined( __aarch64__ ) 491 #error __cfaasm_label undefined for arm 492 #else 493 #error unknown hardware architecture 494 #endif 416 495 417 496 // Context switch signal handler 418 497 // Receives SIGUSR1 signal and causes the current thread to yield 419 498 static void sigHandler_ctxSwitch( __CFA_SIGPARMS__ ) { 420 __cfaabi_dbg_debug_do( last_interrupt = (void *)(cxt->uc_mcontext.CFA_REG_IP); ) 499 void * ip = (void *)(cxt->uc_mcontext.CFA_REG_IP); 500 __cfaabi_dbg_debug_do( last_interrupt = ip; ) 421 501 422 502 // SKULLDUGGERY: if a thread creates a processor and the immediately deletes it, … … 424 504 // before the kernel thread has even started running. When that happens, an interrupt 425 505 // with a null 'this_processor' will be caught, just ignore it. 426 if(! kernelTLS.this_processor ) return;506 if(! __cfaabi_tls.this_processor ) return; 427 507 428 508 choose(sfp->si_value.sival_int) { 429 509 case PREEMPT_NORMAL : ;// Normal case, nothing to do here 430 case PREEMPT_TERMINATE: verify( __atomic_load_n( & kernelTLS.this_processor->do_terminate, __ATOMIC_SEQ_CST ) );510 case PREEMPT_TERMINATE: verify( __atomic_load_n( &__cfaabi_tls.this_processor->do_terminate, __ATOMIC_SEQ_CST ) ); 431 511 default: 432 512 abort( "internal error, signal value is %d", sfp->si_value.sival_int ); … … 436 516 if( !preemption_ready() ) { return; } 437 517 438 __cfaabi_dbg_print_buffer_decl( " KERNEL: preempting core %p (%p @ %p).\n", kernelTLS.this_processor, kernelTLS.this_thread, (void *)(cxt->uc_mcontext.CFA_REG_IP) ); 518 struct asm_region region; 519 region = __cfaasm_label( get ); if( ip >= region.before && ip <= region.after ) return; 520 region = __cfaasm_label( check ); if( ip >= region.before && ip <= region.after ) return; 521 region = __cfaasm_label( disable ); if( ip >= region.before && ip <= region.after ) return; 522 region = __cfaasm_label( enable ); if( ip >= region.before && ip <= region.after ) return; 523 region = __cfaasm_label( nopoll ); if( ip >= region.before && ip <= region.after ) return; 524 525 __cfaabi_dbg_print_buffer_decl( " KERNEL: preempting core %p (%p @ %p).\n", __cfaabi_tls.this_processor, __cfaabi_tls.this_thread, (void *)(cxt->uc_mcontext.CFA_REG_IP) ); 439 526 440 527 // Sync flag : prevent recursive calls to the signal handler 441 kernelTLS.preemption_state.in_progress = true;528 __cfaabi_tls.preemption_state.in_progress = true; 442 529 443 530 // Clear sighandler mask before context switching. … … 449 536 } 450 537 451 // TODO: this should go in finish action452 538 // Clear the in progress flag 453 kernelTLS.preemption_state.in_progress = false;539 __cfaabi_tls.preemption_state.in_progress = false; 454 540 455 541 // Preemption can occur here … … 468 554 id.full_proc = false; 469 555 id.id = doregister(&id); 470 kernelTLS.this_proc_id = &id;556 __cfaabi_tls.this_proc_id = &id; 471 557 472 558 // Block sigalrms to control when they arrive … … 536 622 537 623 void __cfaabi_check_preemption() { 538 bool ready = kernelTLS.preemption_state.enabled;624 bool ready = __preemption_enabled(); 539 625 if(!ready) { abort("Preemption should be ready"); } 540 626 … … 559 645 #ifdef __CFA_WITH_VERIFY__ 560 646 bool __cfaabi_dbg_in_kernel() { 561 return ! kernelTLS.preemption_state.enabled;647 return !__preemption_enabled(); 562 648 } 563 649 #endif
Note:
See TracChangeset
for help on using the changeset viewer.