- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
libcfa/src/concurrency/preemption.cfa
r82a2fed r8fc652e0 10 10 // Created On : Mon Jun 5 14:20:42 2017 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Fri Nov 6 07:42:13 202013 // Update Count : 5 412 // Last Modified On : Wed Aug 26 16:46:03 2020 13 // Update Count : 53 14 14 // 15 15 … … 163 163 // Kernel Signal Tools 164 164 //============================================================================================= 165 // In a user-level threading system, there are handful of thread-local variables where this problem occurs on the ARM.166 //167 // For each kernel thread running user-level threads, there is a flag variable to indicate if interrupts are168 // enabled/disabled for that kernel thread. Therefore, this variable is made thread local.169 //170 // For example, this code fragment sets the state of the "interrupt" variable in thread-local memory.171 //172 // _Thread_local volatile int interrupts;173 // int main() {174 // interrupts = 0; // disable interrupts }175 //176 // which generates the following code on the ARM177 //178 // (gdb) disassemble main179 // Dump of assembler code for function main:180 // 0x0000000000000610 <+0>: mrs x1, tpidr_el0181 // 0x0000000000000614 <+4>: mov w0, #0x0 // #0182 // 0x0000000000000618 <+8>: add x1, x1, #0x0, lsl #12183 // 0x000000000000061c <+12>: add x1, x1, #0x10184 // 0x0000000000000620 <+16>: str wzr, [x1]185 // 0x0000000000000624 <+20>: ret186 //187 // The mrs moves a pointer from coprocessor register tpidr_el0 into register x1. Register w0 is set to 0. The two adds188 // increase the TLS pointer with the displacement (offset) 0x10, which is the location in the TSL of variable189 // "interrupts". Finally, 0 is stored into "interrupts" through the pointer in register x1 that points into the190 // TSL. Now once x1 has the pointer to the location of the TSL for kernel thread N, it can be be preempted at a191 // user-level and the user thread is put on the user-level ready-queue. When the preempted thread gets to the front of192 // the user-level ready-queue it is run on kernel thread M. It now stores 0 into "interrupts" back on kernel thread N,193 // turning off interrupt on the wrong kernel thread.194 //195 // On the x86, the following code is generated for the same code fragment.196 //197 // (gdb) disassemble main198 // Dump of assembler code for function main:199 // 0x0000000000400420 <+0>: movl $0x0,%fs:0xfffffffffffffffc200 // 0x000000000040042c <+12>: xor %eax,%eax201 // 0x000000000040042e <+14>: retq202 //203 // and there is base-displacement addressing used to atomically reset variable "interrupts" off of the TSL pointer in204 // register "fs".205 //206 // Hence, the ARM has base-displacement address for the general purpose registers, BUT not to the coprocessor207 // registers. As a result, generating the address for the write into variable "interrupts" is no longer atomic.208 //209 // Note this problem does NOT occur when just using multiple kernel threads because the preemption ALWAYS restarts the210 // thread on the same kernel thread.211 //212 // The obvious question is why does ARM use a coprocessor register to store the TSL pointer given that coprocessor213 // registers are second-class registers with respect to the instruction set. One possible answer is that they did not214 // want to dedicate one of the general registers to hold the TLS pointer and there was a free coprocessor register215 // available.216 165 217 166 //---------- … … 231 180 } 232 181 233 struct asm_region {234 void * before;235 void * after;236 };237 238 static inline bool __cfaasm_in( void * ip, struct asm_region & region ) {239 return ip >= region.before && ip <= region.after;240 }241 242 243 182 //---------- 244 183 // Get data from the TLS block 245 // struct asm_region __cfaasm_get;246 184 uintptr_t __cfatls_get( unsigned long int offset ) __attribute__((__noinline__)); //no inline to avoid problems 247 185 uintptr_t __cfatls_get( unsigned long int offset ) { 248 // __cfaasm_get.before = ({ void * value; asm("movq $__cfaasm_get_before, %[v]\n\t" : [v]"=r"(value) ); value; });249 // __cfaasm_get.after = ({ void * value; asm("movq $__cfaasm_get_after , %[v]\n\t" : [v]"=r"(value) ); value; });250 186 // create a assembler label before 251 187 // marked as clobber all to avoid movement … … 260 196 return val; 261 197 } 198 199 // //---------- 200 // // Write data to the TLS block 201 // // sadly it looses the type information and can only write 1 word at a time 202 // // use with __builtin_offsetof 203 // void __cfatls_set(uintptr_t offset, void * value) __attribute__((__noinline__)); 204 // void __cfatls_set(uintptr_t offset, void * value) { 205 // // create a assembler label before 206 // // marked as clobber all to avoid movement 207 // asm volatile("__cfaasm_set_before:":::"memory"); 208 209 // // access tls as normal (except for type information) 210 // *(void**)(offset + (uintptr_t)&my_tls) = value; 211 212 // // create a assembler label after 213 // // marked as clobber all to avoid movement 214 // asm volatile("__cfaasm_set_after:":::"memory"); 215 // } 216 217 // //---------- 218 // #include <stdio.h> 219 // int main() { 220 // // Get the information 221 // // Must use inline assembly to get access to label 222 // // C is annoying here because this could easily be a static const but "initializer element is not a compile-time constant" 223 // // The big advantage of this approach is that there is 0 overhead for the read and writes function 224 // void * __cfaasm_addr_get_before = ({ void * value; asm("movq $__cfaasm_get_before, %[v]\n\t" : [v]"=r"(value) ); value; }); 225 // void * __cfaasm_addr_get_after = ({ void * value; asm("movq $__cfaasm_get_after , %[v]\n\t" : [v]"=r"(value) ); value; }); 226 // void * __cfaasm_addr_set_before = ({ void * value; asm("movq $__cfaasm_set_before, %[v]\n\t" : [v]"=r"(value) ); value; }); 227 // void * __cfaasm_addr_set_after = ({ void * value; asm("movq $__cfaasm_set_after , %[v]\n\t" : [v]"=r"(value) ); value; }); 228 229 // printf("%p to %p\n", __cfaasm_addr_get_before, __cfaasm_addr_get_after); 230 // printf("%p to %p\n", __cfaasm_addr_set_before, __cfaasm_addr_set_after); 231 // return 0; 232 // } 233 234 __cfaabi_dbg_debug_do( static thread_local void * last_interrupt = 0; ) 262 235 263 236 extern "C" { … … 266 239 // create a assembler label before 267 240 // marked as clobber all to avoid movement 268 asm volatile("__cfaasm_d sable_before:":::"memory");241 asm volatile("__cfaasm_disable_before:":::"memory"); 269 242 270 243 with( __cfaabi_tls.preemption_state ) { … … 288 261 // create a assembler label after 289 262 // marked as clobber all to avoid movement 290 asm volatile("__cfaasm_d sable_after:":::"memory");263 asm volatile("__cfaasm_disable_after:":::"memory"); 291 264 } 292 265 … … 296 269 // create a assembler label before 297 270 // marked as clobber all to avoid movement 298 asm volatile("__cfaasm_en ble_before:":::"memory");271 asm volatile("__cfaasm_enable_before:":::"memory"); 299 272 300 273 processor * proc = __cfaabi_tls.this_processor; // Cache the processor now since interrupts can start happening after the atomic store … … 331 304 // create a assembler label after 332 305 // marked as clobber all to avoid movement 333 asm volatile("__cfaasm_en ble_after:":::"memory");306 asm volatile("__cfaasm_enable_after:":::"memory"); 334 307 } 335 308 … … 399 372 } 400 373 401 //-----------------------------------------------------------------------------402 // Some assembly required403 #if defined( __i386 )404 #ifdef __PIC__405 #define RELOC_PRELUDE( label ) \406 "calll .Lcfaasm_prelude_" #label "$pb\n\t" \407 ".Lcfaasm_prelude_" #label "$pb:\n\t" \408 "popl %%eax\n\t" \409 ".Lcfaasm_prelude_" #label "_end:\n\t" \410 "addl $_GLOBAL_OFFSET_TABLE_+(.Lcfaasm_prelude_" #label "_end-.Lcfaasm_prelude_" #label "$pb), %%eax\n\t"411 #define RELOC_PREFIX ""412 #define RELOC_SUFFIX "@GOT(%%eax)"413 #else414 #define RELOC_PREFIX "$"415 #define RELOC_SUFFIX ""416 #endif417 #define __cfaasm_label( label ) static struct asm_region label = \418 ({ \419 struct asm_region region; \420 asm( \421 RELOC_PRELUDE( label ) \422 "movl " RELOC_PREFIX "__cfaasm_" #label "_before" RELOC_SUFFIX ", %[vb]\n\t" \423 "movl " RELOC_PREFIX "__cfaasm_" #label "_after" RELOC_SUFFIX ", %[va]\n\t" \424 : [vb]"=r"(region.before), [va]"=r"(region.after) \425 ); \426 region; \427 });428 #elif defined( __x86_64 )429 #ifdef __PIC__430 #define RELOC_PREFIX ""431 #define RELOC_SUFFIX "@GOTPCREL(%%rip)"432 #else433 #define RELOC_PREFIX "$"434 #define RELOC_SUFFIX ""435 #endif436 #define __cfaasm_label( label ) static struct asm_region label = \437 ({ \438 struct asm_region region; \439 asm( \440 "movq " RELOC_PREFIX "__cfaasm_" #label "_before" RELOC_SUFFIX ", %[vb]\n\t" \441 "movq " RELOC_PREFIX "__cfaasm_" #label "_after" RELOC_SUFFIX ", %[va]\n\t" \442 : [vb]"=r"(region.before), [va]"=r"(region.after) \443 ); \444 region; \445 });446 #elif defined( __aarch64__ )447 #ifdef __PIC__448 #define RELOC_TAG "@PLT"449 #else450 #define RELOC_TAG ""451 #endif452 #define __cfaasm_label( label ) \453 ({ \454 struct asm_region region; \455 asm( \456 "mov %[vb], __cfaasm_" #label "_before@GOTPCREL(%%rip)" "\n\t" \457 "mov %[va], __cfaasm_" #label "_after@GOTPCREL(%%rip)" "\n\t" \458 : [vb]"=r"(region.before), [va]"=r"(region.after) \459 ); \460 region; \461 });462 #else463 #error unknown hardware architecture464 #endif465 466 374 // KERNEL ONLY 467 375 // Check if a __cfactx_switch signal handler shoud defer 468 376 // If true : preemption is safe 469 377 // If false : preemption is unsafe and marked as pending 470 static inline bool preemption_ready( void * ip ) { 471 // Get all the region for which it is not safe to preempt 472 __cfaasm_label( get ); 473 __cfaasm_label( check ); 474 __cfaasm_label( dsable ); 475 __cfaasm_label( enble ); 476 __cfaasm_label( nopoll ); 477 378 static inline bool preemption_ready() { 478 379 // Check if preemption is safe 479 bool ready = true; 480 if( __cfaasm_in( ip, get ) ) { ready = false; goto EXIT; }; 481 if( __cfaasm_in( ip, check ) ) { ready = false; goto EXIT; }; 482 if( __cfaasm_in( ip, dsable ) ) { ready = false; goto EXIT; }; 483 if( __cfaasm_in( ip, enble ) ) { ready = false; goto EXIT; }; 484 if( __cfaasm_in( ip, nopoll ) ) { ready = false; goto EXIT; }; 485 if( !__cfaabi_tls.preemption_state.enabled) { ready = false; goto EXIT; }; 486 if( __cfaabi_tls.preemption_state.in_progress ) { ready = false; goto EXIT; }; 487 488 EXIT: 380 bool ready = __cfaabi_tls.preemption_state.enabled && ! __cfaabi_tls.preemption_state.in_progress; 381 489 382 // Adjust the pending flag accordingly 490 383 __cfaabi_tls.this_processor->pending_preemption = !ready; … … 561 454 // Kernel Signal Handlers 562 455 //============================================================================================= 563 __cfaabi_dbg_debug_do( static thread_local void * last_interrupt = 0; ) 456 struct asm_region { 457 void * before; 458 void * after; 459 }; 460 461 //----------------------------------------------------------------------------- 462 // Some assembly required 463 #if defined( __i386 ) 464 #define __cfaasm_label( label ) \ 465 ({ \ 466 struct asm_region region; \ 467 asm( \ 468 "movl $__cfaasm_" #label "_before, %[vb]\n\t" \ 469 "movl $__cfaasm_" #label "_after , %[va]\n\t" \ 470 : [vb]"=r"(region.before), [vb]"=r"(region.before) \ 471 ); \ 472 region; \ 473 }); 474 #elif defined( __x86_64 ) 475 #ifdef __PIC__ 476 #define PLT "@PLT" 477 #else 478 #define PLT "" 479 #endif 480 #define __cfaasm_label( label ) \ 481 ({ \ 482 struct asm_region region; \ 483 asm( \ 484 "movq $__cfaasm_" #label "_before" PLT ", %[vb]\n\t" \ 485 "movq $__cfaasm_" #label "_after" PLT ", %[va]\n\t" \ 486 : [vb]"=r"(region.before), [va]"=r"(region.after) \ 487 ); \ 488 region; \ 489 }); 490 #elif defined( __aarch64__ ) 491 #error __cfaasm_label undefined for arm 492 #else 493 #error unknown hardware architecture 494 #endif 564 495 565 496 // Context switch signal handler … … 583 514 584 515 // Check if it is safe to preempt here 585 if( !preemption_ready( ip ) ) { return; } 516 if( !preemption_ready() ) { return; } 517 518 struct asm_region region; 519 region = __cfaasm_label( get ); if( ip >= region.before && ip <= region.after ) return; 520 region = __cfaasm_label( check ); if( ip >= region.before && ip <= region.after ) return; 521 region = __cfaasm_label( disable ); if( ip >= region.before && ip <= region.after ) return; 522 region = __cfaasm_label( enable ); if( ip >= region.before && ip <= region.after ) return; 523 region = __cfaasm_label( nopoll ); if( ip >= region.before && ip <= region.after ) return; 586 524 587 525 __cfaabi_dbg_print_buffer_decl( " KERNEL: preempting core %p (%p @ %p).\n", __cfaabi_tls.this_processor, __cfaabi_tls.this_thread, (void *)(cxt->uc_mcontext.CFA_REG_IP) );
Note: See TracChangeset
for help on using the changeset viewer.