- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
libcfa/src/concurrency/preemption.cfa
r8fc652e0 r82a2fed 10 10 // Created On : Mon Jun 5 14:20:42 2017 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Wed Aug 26 16:46:03 202013 // Update Count : 5 312 // Last Modified On : Fri Nov 6 07:42:13 2020 13 // Update Count : 54 14 14 // 15 15 … … 163 163 // Kernel Signal Tools 164 164 //============================================================================================= 165 // In a user-level threading system, there are handful of thread-local variables where this problem occurs on the ARM. 166 // 167 // For each kernel thread running user-level threads, there is a flag variable to indicate if interrupts are 168 // enabled/disabled for that kernel thread. Therefore, this variable is made thread local. 169 // 170 // For example, this code fragment sets the state of the "interrupt" variable in thread-local memory. 171 // 172 // _Thread_local volatile int interrupts; 173 // int main() { 174 // interrupts = 0; // disable interrupts } 175 // 176 // which generates the following code on the ARM 177 // 178 // (gdb) disassemble main 179 // Dump of assembler code for function main: 180 // 0x0000000000000610 <+0>: mrs x1, tpidr_el0 181 // 0x0000000000000614 <+4>: mov w0, #0x0 // #0 182 // 0x0000000000000618 <+8>: add x1, x1, #0x0, lsl #12 183 // 0x000000000000061c <+12>: add x1, x1, #0x10 184 // 0x0000000000000620 <+16>: str wzr, [x1] 185 // 0x0000000000000624 <+20>: ret 186 // 187 // The mrs moves a pointer from coprocessor register tpidr_el0 into register x1. Register w0 is set to 0. The two adds 188 // increase the TLS pointer with the displacement (offset) 0x10, which is the location in the TSL of variable 189 // "interrupts". Finally, 0 is stored into "interrupts" through the pointer in register x1 that points into the 190 // TSL. Now once x1 has the pointer to the location of the TSL for kernel thread N, it can be be preempted at a 191 // user-level and the user thread is put on the user-level ready-queue. When the preempted thread gets to the front of 192 // the user-level ready-queue it is run on kernel thread M. It now stores 0 into "interrupts" back on kernel thread N, 193 // turning off interrupt on the wrong kernel thread. 194 // 195 // On the x86, the following code is generated for the same code fragment. 196 // 197 // (gdb) disassemble main 198 // Dump of assembler code for function main: 199 // 0x0000000000400420 <+0>: movl $0x0,%fs:0xfffffffffffffffc 200 // 0x000000000040042c <+12>: xor %eax,%eax 201 // 0x000000000040042e <+14>: retq 202 // 203 // and there is base-displacement addressing used to atomically reset variable "interrupts" off of the TSL pointer in 204 // register "fs". 205 // 206 // Hence, the ARM has base-displacement address for the general purpose registers, BUT not to the coprocessor 207 // registers. As a result, generating the address for the write into variable "interrupts" is no longer atomic. 208 // 209 // Note this problem does NOT occur when just using multiple kernel threads because the preemption ALWAYS restarts the 210 // thread on the same kernel thread. 211 // 212 // The obvious question is why does ARM use a coprocessor register to store the TSL pointer given that coprocessor 213 // registers are second-class registers with respect to the instruction set. One possible answer is that they did not 214 // want to dedicate one of the general registers to hold the TLS pointer and there was a free coprocessor register 215 // available. 165 216 166 217 //---------- … … 180 231 } 181 232 233 struct asm_region { 234 void * before; 235 void * after; 236 }; 237 238 static inline bool __cfaasm_in( void * ip, struct asm_region & region ) { 239 return ip >= region.before && ip <= region.after; 240 } 241 242 182 243 //---------- 183 244 // Get data from the TLS block 245 // struct asm_region __cfaasm_get; 184 246 uintptr_t __cfatls_get( unsigned long int offset ) __attribute__((__noinline__)); //no inline to avoid problems 185 247 uintptr_t __cfatls_get( unsigned long int offset ) { 248 // __cfaasm_get.before = ({ void * value; asm("movq $__cfaasm_get_before, %[v]\n\t" : [v]"=r"(value) ); value; }); 249 // __cfaasm_get.after = ({ void * value; asm("movq $__cfaasm_get_after , %[v]\n\t" : [v]"=r"(value) ); value; }); 186 250 // create a assembler label before 187 251 // marked as clobber all to avoid movement … … 196 260 return val; 197 261 } 198 199 // //----------200 // // Write data to the TLS block201 // // sadly it looses the type information and can only write 1 word at a time202 // // use with __builtin_offsetof203 // void __cfatls_set(uintptr_t offset, void * value) __attribute__((__noinline__));204 // void __cfatls_set(uintptr_t offset, void * value) {205 // // create a assembler label before206 // // marked as clobber all to avoid movement207 // asm volatile("__cfaasm_set_before:":::"memory");208 209 // // access tls as normal (except for type information)210 // *(void**)(offset + (uintptr_t)&my_tls) = value;211 212 // // create a assembler label after213 // // marked as clobber all to avoid movement214 // asm volatile("__cfaasm_set_after:":::"memory");215 // }216 217 // //----------218 // #include <stdio.h>219 // int main() {220 // // Get the information221 // // Must use inline assembly to get access to label222 // // C is annoying here because this could easily be a static const but "initializer element is not a compile-time constant"223 // // The big advantage of this approach is that there is 0 overhead for the read and writes function224 // void * __cfaasm_addr_get_before = ({ void * value; asm("movq $__cfaasm_get_before, %[v]\n\t" : [v]"=r"(value) ); value; });225 // void * __cfaasm_addr_get_after = ({ void * value; asm("movq $__cfaasm_get_after , %[v]\n\t" : [v]"=r"(value) ); value; });226 // void * __cfaasm_addr_set_before = ({ void * value; asm("movq $__cfaasm_set_before, %[v]\n\t" : [v]"=r"(value) ); value; });227 // void * __cfaasm_addr_set_after = ({ void * value; asm("movq $__cfaasm_set_after , %[v]\n\t" : [v]"=r"(value) ); value; });228 229 // printf("%p to %p\n", __cfaasm_addr_get_before, __cfaasm_addr_get_after);230 // printf("%p to %p\n", __cfaasm_addr_set_before, __cfaasm_addr_set_after);231 // return 0;232 // }233 234 __cfaabi_dbg_debug_do( static thread_local void * last_interrupt = 0; )235 262 236 263 extern "C" { … … 239 266 // create a assembler label before 240 267 // marked as clobber all to avoid movement 241 asm volatile("__cfaasm_d isable_before:":::"memory");268 asm volatile("__cfaasm_dsable_before:":::"memory"); 242 269 243 270 with( __cfaabi_tls.preemption_state ) { … … 261 288 // create a assembler label after 262 289 // marked as clobber all to avoid movement 263 asm volatile("__cfaasm_d isable_after:":::"memory");290 asm volatile("__cfaasm_dsable_after:":::"memory"); 264 291 } 265 292 … … 269 296 // create a assembler label before 270 297 // marked as clobber all to avoid movement 271 asm volatile("__cfaasm_en able_before:":::"memory");298 asm volatile("__cfaasm_enble_before:":::"memory"); 272 299 273 300 processor * proc = __cfaabi_tls.this_processor; // Cache the processor now since interrupts can start happening after the atomic store … … 304 331 // create a assembler label after 305 332 // marked as clobber all to avoid movement 306 asm volatile("__cfaasm_en able_after:":::"memory");333 asm volatile("__cfaasm_enble_after:":::"memory"); 307 334 } 308 335 … … 372 399 } 373 400 401 //----------------------------------------------------------------------------- 402 // Some assembly required 403 #if defined( __i386 ) 404 #ifdef __PIC__ 405 #define RELOC_PRELUDE( label ) \ 406 "calll .Lcfaasm_prelude_" #label "$pb\n\t" \ 407 ".Lcfaasm_prelude_" #label "$pb:\n\t" \ 408 "popl %%eax\n\t" \ 409 ".Lcfaasm_prelude_" #label "_end:\n\t" \ 410 "addl $_GLOBAL_OFFSET_TABLE_+(.Lcfaasm_prelude_" #label "_end-.Lcfaasm_prelude_" #label "$pb), %%eax\n\t" 411 #define RELOC_PREFIX "" 412 #define RELOC_SUFFIX "@GOT(%%eax)" 413 #else 414 #define RELOC_PREFIX "$" 415 #define RELOC_SUFFIX "" 416 #endif 417 #define __cfaasm_label( label ) static struct asm_region label = \ 418 ({ \ 419 struct asm_region region; \ 420 asm( \ 421 RELOC_PRELUDE( label ) \ 422 "movl " RELOC_PREFIX "__cfaasm_" #label "_before" RELOC_SUFFIX ", %[vb]\n\t" \ 423 "movl " RELOC_PREFIX "__cfaasm_" #label "_after" RELOC_SUFFIX ", %[va]\n\t" \ 424 : [vb]"=r"(region.before), [va]"=r"(region.after) \ 425 ); \ 426 region; \ 427 }); 428 #elif defined( __x86_64 ) 429 #ifdef __PIC__ 430 #define RELOC_PREFIX "" 431 #define RELOC_SUFFIX "@GOTPCREL(%%rip)" 432 #else 433 #define RELOC_PREFIX "$" 434 #define RELOC_SUFFIX "" 435 #endif 436 #define __cfaasm_label( label ) static struct asm_region label = \ 437 ({ \ 438 struct asm_region region; \ 439 asm( \ 440 "movq " RELOC_PREFIX "__cfaasm_" #label "_before" RELOC_SUFFIX ", %[vb]\n\t" \ 441 "movq " RELOC_PREFIX "__cfaasm_" #label "_after" RELOC_SUFFIX ", %[va]\n\t" \ 442 : [vb]"=r"(region.before), [va]"=r"(region.after) \ 443 ); \ 444 region; \ 445 }); 446 #elif defined( __aarch64__ ) 447 #ifdef __PIC__ 448 #define RELOC_TAG "@PLT" 449 #else 450 #define RELOC_TAG "" 451 #endif 452 #define __cfaasm_label( label ) \ 453 ({ \ 454 struct asm_region region; \ 455 asm( \ 456 "mov %[vb], __cfaasm_" #label "_before@GOTPCREL(%%rip)" "\n\t" \ 457 "mov %[va], __cfaasm_" #label "_after@GOTPCREL(%%rip)" "\n\t" \ 458 : [vb]"=r"(region.before), [va]"=r"(region.after) \ 459 ); \ 460 region; \ 461 }); 462 #else 463 #error unknown hardware architecture 464 #endif 465 374 466 // KERNEL ONLY 375 467 // Check if a __cfactx_switch signal handler shoud defer 376 468 // If true : preemption is safe 377 469 // If false : preemption is unsafe and marked as pending 378 static inline bool preemption_ready() { 470 static inline bool preemption_ready( void * ip ) { 471 // Get all the region for which it is not safe to preempt 472 __cfaasm_label( get ); 473 __cfaasm_label( check ); 474 __cfaasm_label( dsable ); 475 __cfaasm_label( enble ); 476 __cfaasm_label( nopoll ); 477 379 478 // Check if preemption is safe 380 bool ready = __cfaabi_tls.preemption_state.enabled && ! __cfaabi_tls.preemption_state.in_progress; 381 479 bool ready = true; 480 if( __cfaasm_in( ip, get ) ) { ready = false; goto EXIT; }; 481 if( __cfaasm_in( ip, check ) ) { ready = false; goto EXIT; }; 482 if( __cfaasm_in( ip, dsable ) ) { ready = false; goto EXIT; }; 483 if( __cfaasm_in( ip, enble ) ) { ready = false; goto EXIT; }; 484 if( __cfaasm_in( ip, nopoll ) ) { ready = false; goto EXIT; }; 485 if( !__cfaabi_tls.preemption_state.enabled) { ready = false; goto EXIT; }; 486 if( __cfaabi_tls.preemption_state.in_progress ) { ready = false; goto EXIT; }; 487 488 EXIT: 382 489 // Adjust the pending flag accordingly 383 490 __cfaabi_tls.this_processor->pending_preemption = !ready; … … 454 561 // Kernel Signal Handlers 455 562 //============================================================================================= 456 struct asm_region { 457 void * before; 458 void * after; 459 }; 460 461 //----------------------------------------------------------------------------- 462 // Some assembly required 463 #if defined( __i386 ) 464 #define __cfaasm_label( label ) \ 465 ({ \ 466 struct asm_region region; \ 467 asm( \ 468 "movl $__cfaasm_" #label "_before, %[vb]\n\t" \ 469 "movl $__cfaasm_" #label "_after , %[va]\n\t" \ 470 : [vb]"=r"(region.before), [vb]"=r"(region.before) \ 471 ); \ 472 region; \ 473 }); 474 #elif defined( __x86_64 ) 475 #ifdef __PIC__ 476 #define PLT "@PLT" 477 #else 478 #define PLT "" 479 #endif 480 #define __cfaasm_label( label ) \ 481 ({ \ 482 struct asm_region region; \ 483 asm( \ 484 "movq $__cfaasm_" #label "_before" PLT ", %[vb]\n\t" \ 485 "movq $__cfaasm_" #label "_after" PLT ", %[va]\n\t" \ 486 : [vb]"=r"(region.before), [va]"=r"(region.after) \ 487 ); \ 488 region; \ 489 }); 490 #elif defined( __aarch64__ ) 491 #error __cfaasm_label undefined for arm 492 #else 493 #error unknown hardware architecture 494 #endif 563 __cfaabi_dbg_debug_do( static thread_local void * last_interrupt = 0; ) 495 564 496 565 // Context switch signal handler … … 514 583 515 584 // Check if it is safe to preempt here 516 if( !preemption_ready() ) { return; } 517 518 struct asm_region region; 519 region = __cfaasm_label( get ); if( ip >= region.before && ip <= region.after ) return; 520 region = __cfaasm_label( check ); if( ip >= region.before && ip <= region.after ) return; 521 region = __cfaasm_label( disable ); if( ip >= region.before && ip <= region.after ) return; 522 region = __cfaasm_label( enable ); if( ip >= region.before && ip <= region.after ) return; 523 region = __cfaasm_label( nopoll ); if( ip >= region.before && ip <= region.after ) return; 585 if( !preemption_ready( ip ) ) { return; } 524 586 525 587 __cfaabi_dbg_print_buffer_decl( " KERNEL: preempting core %p (%p @ %p).\n", __cfaabi_tls.this_processor, __cfaabi_tls.this_thread, (void *)(cxt->uc_mcontext.CFA_REG_IP) );
Note: See TracChangeset
for help on using the changeset viewer.