Changeset 42daeb4 for libcfa/src
- Timestamp:
- Jan 13, 2022, 10:53:25 AM (4 years ago)
- Branches:
- ADT, ast-experimental, enum, forall-pointer-decay, master, pthread-emulation, qualifiedEnum
- Children:
- 3eaa689
- Parents:
- 3bb12921 (diff), 00f5fde (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the(diff)
links above to see all the changes relative to each parent. - Location:
- libcfa/src
- Files:
-
- 14 edited
Legend:
- Unmodified
- Added
- Removed
-
libcfa/src/bits/locks.hfa
r3bb12921 r42daeb4 31 31 // previous thread to acquire the lock 32 32 void* prev_thrd; 33 // keep track of number of times we had to spin, just in case the number is unexpectedly huge 34 size_t spin_count; 33 35 #endif 34 36 }; … … 48 50 static inline void ?{}( __spinlock_t & this ) { 49 51 this.lock = 0; 52 #ifdef __CFA_DEBUG__ 53 this.spin_count = 0; 54 #endif 50 55 } 51 56 … … 72 77 for ( unsigned int i = 1;; i += 1 ) { 73 78 if ( (this.lock == 0) && (__atomic_test_and_set( &this.lock, __ATOMIC_ACQUIRE ) == 0) ) break; 79 #ifdef __CFA_DEBUG__ 80 this.spin_count++; 81 #endif 74 82 #ifndef NOEXPBACK 75 83 // exponential spin -
libcfa/src/concurrency/invoke.h
r3bb12921 r42daeb4 10 10 // Created On : Tue Jan 17 12:27:26 2016 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Thu Jan 6 16:37:40202213 // Update Count : 4 712 // Last Modified On : Sun Jan 9 19:06:45 2022 13 // Update Count : 48 14 14 // 15 15 … … 211 211 struct processor * last_proc; 212 212 213 uint32_t random_state; // fast random numbers 214 213 215 #if defined( __CFA_WITH_VERIFY__ ) 214 216 void * canary; -
libcfa/src/concurrency/io.cfa
r3bb12921 r42daeb4 548 548 /* paranoid */ verify( proc == __cfaabi_tls.this_processor ); 549 549 /* paranoid */ verify( ! __preemption_enabled() ); 550 551 return true; 550 552 } 551 553 #endif -
libcfa/src/concurrency/kernel.cfa
r3bb12921 r42daeb4 554 554 /* paranoid */ verify( 0x0D15EA5E0D15EA5Ep == thrd->canary ); 555 555 556 const bool local = thrd->state != Start;557 556 if (thrd->preempted == __NO_PREEMPTION) thrd->state = Ready; 558 557 … … 737 736 738 737 // Check if there is a sleeping processor 739 int fd = __atomic_load_n(&this->procs.fd, __ATOMIC_SEQ_CST); 738 // int fd = __atomic_load_n(&this->procs.fd, __ATOMIC_SEQ_CST); 739 int fd = 0; 740 if( __atomic_load_n(&this->procs.fd, __ATOMIC_SEQ_CST) != 0 ) { 741 fd = __atomic_exchange_n(&this->procs.fd, 0, __ATOMIC_RELAXED); 742 } 740 743 741 744 // If no one is sleeping, we are done -
libcfa/src/concurrency/kernel/fwd.hfa
r3bb12921 r42daeb4 77 77 78 78 static inline uint64_t __tls_rand() { 79 return 79 80 #if defined(__SIZEOF_INT128__) 80 return__lehmer64( kernelTLS().rand_seed );81 __lehmer64( kernelTLS().rand_seed ); 81 82 #else 82 return__xorshift64( kernelTLS().rand_seed );83 __xorshift64( kernelTLS().rand_seed ); 83 84 #endif 84 85 } … … 91 92 92 93 static inline unsigned __tls_rand_fwd() { 93 94 94 kernelTLS().ready_rng.fwd_seed = (A * kernelTLS().ready_rng.fwd_seed + C) & (M - 1); 95 95 return kernelTLS().ready_rng.fwd_seed >> D; … … 112 112 } 113 113 } 114 115 116 114 117 115 extern void disable_interrupts(); -
libcfa/src/concurrency/kernel/startup.cfa
r3bb12921 r42daeb4 101 101 extern void __wake_proc(processor *); 102 102 extern int cfa_main_returned; // from interpose.cfa 103 extern uint32_t __global_random_seed; 103 104 104 105 //----------------------------------------------------------------------------- … … 489 490 preferred = ready_queue_new_preferred(); 490 491 last_proc = 0p; 492 random_state = __global_random_seed; 491 493 #if defined( __CFA_WITH_VERIFY__ ) 492 494 canary = 0x0D15EA5E0D15EA5Ep; -
libcfa/src/concurrency/ready_queue.cfa
r3bb12921 r42daeb4 681 681 // Actually pop the list 682 682 struct thread$ * thrd; 683 unsigned long long tsc_before = ts(lane); 683 #if defined(USE_WORK_STEALING) || defined(USE_CPU_WORK_STEALING) 684 unsigned long long tsc_before = ts(lane); 685 #endif 684 686 unsigned long long tsv; 685 687 [thrd, tsv] = pop(lane); -
libcfa/src/concurrency/thread.cfa
r3bb12921 r42daeb4 10 10 // Created On : Tue Jan 17 12:27:26 2017 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Wed Dec 4 09:17:49 201913 // Update Count : 912 // Last Modified On : Wed Jan 12 18:46:48 2022 13 // Update Count : 36 14 14 // 15 15 … … 27 27 uint64_t thread_rand(); 28 28 29 extern uint32_t __global_random_seed; 30 29 31 //----------------------------------------------------------------------------- 30 32 // Thread ctors and dtors 31 void ?{}( thread$ & this, const char * const name, cluster & cl, void * storage, size_t storageSize ) with( this ) {33 void ?{}( thread$ & this, const char * const name, cluster & cl, void * storage, size_t storageSize ) with( this ) { 32 34 context{ 0p, 0p }; 33 35 self_cor{ name, storage, storageSize }; … … 45 47 preferred = ready_queue_new_preferred(); 46 48 last_proc = 0p; 49 random_state = __global_random_seed; 47 50 #if defined( __CFA_WITH_VERIFY__ ) 48 51 canary = 0x0D15EA5E0D15EA5Ep; … … 177 180 return ret; 178 181 } 182 183 #define GENERATOR LCG 184 185 static inline uint32_t MarsagliaXor( uint32_t & state ) { 186 uint32_t ret = state; 187 state ^= state << 6; 188 state ^= state >> 21; 189 state ^= state << 7; 190 return ret; 191 } // MarsagliaXor 192 193 static inline uint32_t LCG( uint32_t & state ) { // linear congruential generator 194 uint32_t ret = state; 195 state = 36969 * (state & 65535) + (state >> 16); // 36969 is NOT prime! No not change it! 196 return ret; 197 } // LCG 198 199 void set_seed( uint32_t seed ) { 200 active_thread()->random_state = __global_random_seed = seed; 201 GENERATOR( active_thread()->random_state ); 202 } // set_seed 203 uint32_t prng( void ) { return GENERATOR( active_thread()->random_state ); } // [0,UINT_MAX] 179 204 180 205 // Local Variables: // -
libcfa/src/device/cpu.cfa
r3bb12921 r42daeb4 427 427 unsigned c = pairings[i].cpu; 428 428 unsigned llc_id = pairings[i].id; 429 unsigned width = maps[llc_id].raw->width;430 429 unsigned start = maps[llc_id].start; 431 unsigned self = start + (maps[llc_id].count++); 432 entries[c].count = width; 430 entries[c].count = maps[llc_id].raw->width; 433 431 entries[c].start = start; 434 entries[c].self = self; 432 entries[c].self = start + (maps[llc_id].count++); 433 entries[c].cache = llc_id; 435 434 } 436 435 -
libcfa/src/device/cpu.hfa
r3bb12921 r42daeb4 16 16 #include <stddef.h> 17 17 18 // Map from cpu entry to a structure detailling cpus with common topologies 19 // Note that the cpu-groups are contiguous so the indexing is different from 20 // the cpu indexing 18 21 struct cpu_map_entry_t { 22 // Where this particular cpu is in the group 19 23 unsigned self; 24 25 // Starting index of the cpus with the same topology 20 26 unsigned start; 27 28 // Number of cpus with the same topology 21 29 unsigned count; 30 31 // Index of the cache this entry describes 32 unsigned cache; 22 33 }; 23 34 -
libcfa/src/fstream.cfa
r3bb12921 r42daeb4 10 10 // Created On : Wed May 27 17:56:53 2015 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Sun Oct 10 11:23:05 202113 // Update Count : 51 212 // Last Modified On : Mon Jan 10 08:45:05 2022 13 // Update Count : 513 14 14 // 15 15 … … 52 52 inline void setPrt$( ofstream & os, bool state ) { os.prt$ = state; } 53 53 54 inline void lock( ofstream & os ) with( os ) { 54 inline void lock( ofstream & os ) with( os ) { lock( os.lock$ ); } 55 55 inline void unlock( ofstream & os ) { unlock( os.lock$ ); } 56 56 -
libcfa/src/startup.cfa
r3bb12921 r42daeb4 10 10 // Created On : Tue Jul 24 16:21:57 2018 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Sat Jan 9 23:18:23 202113 // Update Count : 3412 // Last Modified On : Wed Jan 12 18:51:24 2022 13 // Update Count : 51 14 14 // 15 15 … … 18 18 #include <stdlib.h> // getenv 19 19 #include "startup.hfa" 20 #include "bits/defs.hfa" 21 22 extern uint32_t __global_random_seed, __global_random_state; 20 23 21 24 extern "C" { … … 48 51 void __cfaabi_core_startup( void ) __attribute__(( constructor( STARTUP_PRIORITY_CORE ) )); 49 52 void __cfaabi_core_startup( void ) { 53 __global_random_state = __global_random_seed = rdtscl(); 50 54 __cfaabi_interpose_startup(); 51 55 __cfaabi_device_startup(); -
libcfa/src/stdlib.cfa
r3bb12921 r42daeb4 10 10 // Created On : Thu Jan 28 17:10:29 2016 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Mon Jan 3 09:36:27202213 // Update Count : 5 1912 // Last Modified On : Wed Jan 12 18:52:41 2022 13 // Update Count : 582 14 14 // 15 15 16 16 #include "stdlib.hfa" 17 //#include "concurrency/kernel/fwd.hfa" 18 #include "concurrency/invoke.h" // random_state 17 19 18 20 //--------------------------------------- … … 221 223 //--------------------------------------- 222 224 223 static uint32_t seed = 0; // current seed 224 static thread_local uint32_t state; // random state 225 226 void set_seed( uint32_t seed_ ) { state = seed = seed_; } 227 uint32_t get_seed() { return seed; } 225 // Pipelined to allow OoO overlap with reduced dependencies. Critically, return the current value, and compute and store 226 // the next value. 228 227 229 228 #define GENERATOR LCG 230 229 231 inline uint32_t MarsagliaXor( uint32_t & state ) { 232 if ( unlikely( seed == 0 ) ) set_seed( rdtscl() ); 233 else if ( unlikely( state == 0 ) ) state = seed; 230 static inline uint32_t MarsagliaXor( uint32_t & state ) { 231 uint32_t ret = state; 234 232 state ^= state << 6; 235 233 state ^= state >> 21; 236 234 state ^= state << 7; 237 return state;235 return ret; 238 236 } // MarsagliaXor 239 237 240 inline uint32_t LCG( uint32_t & state ) {// linear congruential generator241 if ( unlikely( seed == 0 ) ) set_seed( rdtscl() );242 else if ( unlikely( state == 0 ) ) state = seed;243 return state = 36969 * (state & 65535) + (state >> 16); // 36969 is NOT prime!238 static inline uint32_t LCG( uint32_t & state ) { // linear congruential generator 239 uint32_t ret = state; 240 state = 36969 * (state & 65535) + (state >> 16); // 36969 is NOT prime! No not change it! 241 return ret; 244 242 } // LCG 245 243 244 uint32_t __global_random_seed; // sequential/concurrent 245 uint32_t __global_random_state; // sequential only 246 247 void set_seed( PRNG & prng, uint32_t seed_ ) with( prng ) { state = seed = seed_; GENERATOR( state ); } // set seed 246 248 uint32_t prng( PRNG & prng ) with( prng ) { callcnt += 1; return GENERATOR( state ); } 247 249 248 uint32_t prng( void ) { return GENERATOR( state ); } 250 void set_seed( uint32_t seed ) { __global_random_seed = seed; GENERATOR( __global_random_state ); } 251 uint32_t get_seed() { return __global_random_seed; } 252 uint32_t prng( void ) { return GENERATOR( __global_random_state ); } // [0,UINT_MAX] 249 253 250 254 //--------------------------------------- -
libcfa/src/stdlib.hfa
r3bb12921 r42daeb4 10 10 // Created On : Thu Jan 28 17:12:35 2016 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Sun Jan 2 22:53:57202213 // Update Count : 59412 // Last Modified On : Wed Jan 12 18:56:13 2022 13 // Update Count : 621 14 14 // 15 15 … … 21 21 #include <stdlib.h> // *alloc, strto*, ato* 22 22 #include <heap.hfa> 23 23 24 24 25 // Reduce includes by explicitly defining these routines. … … 385 386 //--------------------------------------- 386 387 388 // Sequential Pseudo Random-Number Generator : generate repeatable sequence of values that appear random. 389 // 390 // Declaration : 391 // PRNG sprng = { 1009 } - set starting seed versus random seed 392 // 393 // Interface : 394 // set_seed( sprng, 1009 ) - set starting seed for ALL kernel threads versus random seed 395 // get_seed( sprng ) - read seed 396 // prng( sprng ) - generate random value in range [0,UINT_MAX] 397 // prng( sprng, u ) - generate random value in range [0,u) 398 // prng( sprng, l, u ) - generate random value in range [l,u] 399 // calls( sprng ) - number of generated random value so far 400 // 401 // Examples : generate random number between 5-21 402 // prng( sprng ) % 17 + 5; values 0-16 + 5 = 5-21 403 // prng( sprng, 16 + 1 ) + 5; 404 // prng( sprng, 5, 21 ); 405 // calls( sprng ); 406 387 407 struct PRNG { 388 408 uint32_t callcnt; // call count … … 391 411 }; // PRNG 392 412 393 extern uint32_t prng( PRNG & prng ) __attribute__(( warn_unused_result )); // [0,UINT_MAX] 413 void set_seed( PRNG & prng, uint32_t seed_ ); 414 uint32_t prng( PRNG & prng ) __attribute__(( warn_unused_result )); // [0,UINT_MAX] 394 415 static inline { 395 void set_seed( PRNG & prng, uint32_t seed_ ) with( prng ) { state = seed = seed_; } // set seed396 416 void ?{}( PRNG & prng ) { set_seed( prng, rdtscl() ); } // random seed 397 417 void ?{}( PRNG & prng, uint32_t seed ) { set_seed( prng, seed ); } // fixed seed … … 402 422 } // distribution 403 423 404 extern void set_seed( uint32_t seed ); // set per thread seed 405 extern uint32_t get_seed(); // get seed 406 extern uint32_t prng( void ) __attribute__(( warn_unused_result )); // [0,UINT_MAX] 424 // Concurrent Pseudo Random-Number Generator : generate repeatable sequence of values that appear random. 425 // 426 // Interface : 427 // set_seed( 1009 ) - fixed seed for all kernel threads versus random seed 428 // get_seed() - read seed 429 // prng() - generate random value in range [0,UINT_MAX] 430 // prng( u ) - generate random value in range [0,u) 431 // prng( l, u ) - generate random value in range [l,u] 432 // 433 // Examples : generate random number between 5-21 434 // prng() % 17 + 5; values 0-16 + 5 = 5-21 435 // prng( 16 + 1 ) + 5; 436 // prng( 5, 21 ); 437 438 void set_seed( uint32_t seed_ ) OPTIONAL_THREAD; 439 uint32_t get_seed() __attribute__(( warn_unused_result )); 440 uint32_t prng( void ) __attribute__(( warn_unused_result )) OPTIONAL_THREAD; // [0,UINT_MAX] 407 441 static inline { 408 uint32_t prng( uint32_t u ) __attribute__(( warn_unused_result )); 409 uint32_t prng( uint32_t u ) { return prng() % u; } // [0,u) 410 uint32_t prng( uint32_t l, uint32_t u ) __attribute__(( warn_unused_result )); 411 uint32_t prng( uint32_t l, uint32_t u ) { return prng( u - l + 1 ) + l; } // [l,u] 442 uint32_t prng( uint32_t u ) __attribute__(( warn_unused_result )) { return prng() % u; } // [0,u) 443 uint32_t prng( uint32_t l, uint32_t u ) __attribute__(( warn_unused_result )) { return prng( u - l + 1 ) + l; } // [l,u] 412 444 } // distribution 413 445
Note:
See TracChangeset
for help on using the changeset viewer.