Changeset 175f9f4
- Timestamp:
- Jan 18, 2022, 1:16:23 PM (3 years ago)
- Branches:
- ADT, ast-experimental, enum, forall-pointer-decay, master, pthread-emulation, qualifiedEnum
- Children:
- 1e8b4b49, adfd125
- Parents:
- 21a5bfb7 (diff), 91a72ef (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the(diff)
links above to see all the changes relative to each parent. - Files:
-
- 10 added
- 5 deleted
- 34 edited
Legend:
- Unmodified
- Added
- Removed
-
benchmark/readyQ/churn.cfa
r21a5bfb7 r175f9f4 21 21 wait( sem ); 22 22 for() { 23 uint 64_t r = thread_rand();23 uint32_t r = prng(); 24 24 bench_sem * next = __atomic_exchange_n(&spots[r % spot_cnt], &sem, __ATOMIC_SEQ_CST); 25 25 if(next) post( *next ); -
benchmark/readyQ/locality.cfa
r21a5bfb7 r175f9f4 128 128 __attribute__((noinline)) void work(MyData & data, size_t cnt_, uint64_t & state) { 129 129 for (cnt_) { 130 access(data, __xorshift64(state));130 access(data, xorshift_13_7_17(state)); 131 131 } 132 132 } 133 133 134 134 void main(MyThread & this) { 135 uint64_t state = thread_rand();135 uint64_t state = prng(); 136 136 137 137 // Wait for start … … 144 144 145 145 // Wait on a random spot 146 uint64_t idx = __xorshift64(state) % this.spots.len;146 uint64_t idx = xorshift_13_7_17(state) % this.spots.len; 147 147 bool closed = put(*this.spots.ptr[idx], this, this.data, this.share); 148 148 -
libcfa/prelude/Makefile.am
r21a5bfb7 r175f9f4 11 11 ## Created On : Sun May 31 08:54:01 2015 12 12 ## Last Modified By : Peter A. Buhr 13 ## Last Modified On : Mon Feb 3 21:27:18 202014 ## Update Count : 2 0813 ## Last Modified On : Thu Jan 13 17:06:27 2022 14 ## Update Count : 215 15 15 ############################################################################### 16 16 … … 37 37 # create extra forward types/declarations to reduce inclusion of library files 38 38 extras.cf : ${srcdir}/extras.regx ${srcdir}/extras.c 39 ${AM_V_GEN}gcc ${AM_CFLAGS} -E ${srcdir}/extras.c | grep -f ${srcdir}/extras.regx > extras.cf 40 ${AM_V_GEN}gcc ${AM_CFLAGS} -E ${srcdir}/extras.c | grep -zo -f ${srcdir}/extras.regx2 | tr '\0' '\n' >> extras.cf 39 @echo '# 2 "${@}" // needed for error messages from this file' > ${@} 40 ${AM_V_GEN}gcc ${AM_CFLAGS} -E ${srcdir}/extras.c | grep -f ${srcdir}/extras.regx >> ${@} 41 ${AM_V_GEN}gcc ${AM_CFLAGS} -E ${srcdir}/extras.c | grep -zo -f ${srcdir}/extras.regx2 | tr '\0' '\n' >> ${@} 41 42 42 43 # create forward declarations for gcc builtins 43 44 gcc-builtins.cf : gcc-builtins.c ${srcdir}/prototypes.sed 44 ${AM_V_GEN}gcc -I${srcdir} -E -P $< | sed -r -f ${srcdir}/prototypes.sed > $@ 45 @echo '# 2 "${@}" // needed for error messages from this file' > ${@} 46 ${AM_V_GEN}gcc -I${srcdir} -E -P $< | sed -r -f ${srcdir}/prototypes.sed >> ${@} 45 47 46 48 gcc-builtins.c : ${srcdir}/builtins.def ${srcdir}/prototypes.awk ${srcdir}/sync-builtins.cf ${srcdir}/prototypes.c 47 ${AM_V_GEN}gcc -I${srcdir} -E ${srcdir}/prototypes.c | awk -f ${srcdir}/prototypes.awk > $ @49 ${AM_V_GEN}gcc -I${srcdir} -E ${srcdir}/prototypes.c | awk -f ${srcdir}/prototypes.awk > ${@} 48 50 49 51 prelude.cfa : prelude-gen.cc 50 52 ${AM_V_GEN}${CXX} ${AM_CXXFLAGS} ${CXXFLAGS} ${AM_CFLAGS} ${<} -o prelude-gen -Wall -Wextra -O2 -g -std=c++14 51 @./prelude-gen > $ @53 @./prelude-gen > ${@} 52 54 @rm ./prelude-gen 53 55 … … 58 60 # create forward declarations for cfa builtins 59 61 builtins.cf : builtins.c @LOCAL_CFACC@ 60 ${AM_V_GEN}gcc ${AM_CFLAGS} -E -P${<} -o ${@} -MD -MP -MF $(DEPDIR)/builtins.Po -D__cforall62 ${AM_V_GEN}gcc ${AM_CFLAGS} -E ${<} -o ${@} -MD -MP -MF $(DEPDIR)/builtins.Po -D__cforall 61 63 ${AM_V_at}sed -i 's/builtins.o/builtins.cf/g' $(DEPDIR)/builtins.Po 62 64 … … 64 66 65 67 bootloader.c : ${srcdir}/bootloader.cf prelude.cfa extras.cf gcc-builtins.cf builtins.cf @CFACPP@ 66 ${AM_V_GEN}@CFACPP@ --prelude-dir=${builddir} -tpm ${srcdir}/bootloader.cf $ @# use src/cfa-cpp as not in lib until after install68 ${AM_V_GEN}@CFACPP@ --prelude-dir=${builddir} -tpm ${srcdir}/bootloader.cf ${@} # use src/cfa-cpp as not in lib until after install 67 69 68 70 maintainer-clean-local : -
libcfa/src/bits/locks.hfa
r21a5bfb7 r175f9f4 31 31 // previous thread to acquire the lock 32 32 void* prev_thrd; 33 // keep track of number of times we had to spin, just in case the number is unexpectedly huge 34 size_t spin_count; 33 35 #endif 34 36 }; … … 48 50 static inline void ?{}( __spinlock_t & this ) { 49 51 this.lock = 0; 52 #ifdef __CFA_DEBUG__ 53 this.spin_count = 0; 54 #endif 50 55 } 51 56 … … 72 77 for ( unsigned int i = 1;; i += 1 ) { 73 78 if ( (this.lock == 0) && (__atomic_test_and_set( &this.lock, __ATOMIC_ACQUIRE ) == 0) ) break; 79 #ifdef __CFA_DEBUG__ 80 this.spin_count++; 81 #endif 74 82 #ifndef NOEXPBACK 75 83 // exponential spin -
libcfa/src/bits/random.hfa
r21a5bfb7 r175f9f4 3 3 #include <stdint.h> 4 4 5 // Pipelined to allow out-of-order overlap with reduced dependencies. Critically, return the current value, and compute 6 // and store the next value. 7 5 8 //-------------------------------------------------- 6 9 #if defined(__SIZEOF_INT128__) 7 typedef __uint128_t __lehmer64_state_t;8 static inline uint64_t __lehmer64( __lehmer64_state_t & state ) {10 static inline uint64_t lehmer64( __uint128_t & state ) { 11 __uint128_t ret = state; 9 12 state *= 0xda942042e4dd58b5; 10 return state>> 64;13 return ret >> 64; 11 14 } 12 15 13 16 //-------------------------------------------------- 14 typedef uint64_t __wyhash64_state_t; 15 static inline uint64_t __wyhash64( __wyhash64_state_t & state ) { 17 static inline uint64_t wyhash64( uint64_t & state ) { 16 18 state += 0x60bee2bee120fc15; 17 19 __uint128_t tmp; … … 25 27 26 28 //-------------------------------------------------- 27 typedef uint64_t __xorshift64_state_t; 28 static inline uint64_t __xorshift64( __xorshift64_state_t & state ) { 29 uint64_t x = state; 30 x ^= x << 13; 31 x ^= x >> 7; 32 x ^= x << 17; 33 return state = x; 29 static inline uint64_t xorshift_13_7_17( uint64_t & state ) { 30 uint64_t ret = state; 31 state ^= state << 13; 32 state ^= state >> 7; 33 state ^= state << 17; 34 return ret; 34 35 } 36 37 //-------------------------------------------------- 38 static inline uint32_t xorshift_6_21_7( uint32_t & state ) { 39 uint32_t ret = state; 40 state ^= state << 6; 41 state ^= state >> 21; 42 state ^= state << 7; 43 return ret; 44 } // xorshift_6_21_7 35 45 36 46 //-------------------------------------------------- … … 38 48 uint32_t a, b, c, d; 39 49 uint32_t counter; 40 } __xorwow__state_t;50 } xorwow__state_t; 41 51 42 52 /* The state array must be initialized to not be all zero in the first four words */ 43 static inline uint32_t __xorwow( __xorwow__state_t & state ) {53 static inline uint32_t xorwow( xorwow__state_t & state ) { 44 54 /* Algorithm "xorwow" from p. 5 of Marsaglia, "Xorshift RNGs" */ 55 uint32_t ret = state.a + state.counter; 45 56 uint32_t t = state.d; 46 57 … … 56 67 57 68 state.counter += 362437; 58 return t + state.counter;69 return ret; 59 70 } 71 72 //-------------------------------------------------- 73 static inline uint32_t LCG( uint32_t & state ) { // linear congruential generator 74 uint32_t ret = state; 75 state = 36969 * (state & 65535) + (state >> 16); // 36969 is NOT prime! No not change it! 76 return ret; 77 } // LCG 78 79 //-------------------------------------------------- 80 #define M (1_l64u << 48_l64u) 81 #define A (25214903917_l64u) 82 #define AI (18446708753438544741_l64u) 83 #define C (11_l64u) 84 #define D (16_l64u) 85 86 static inline uint32_t LCGBI_fwd( uint64_t & state ) { 87 state = (A * state + C) & (M - 1); 88 return state >> D; 89 } 90 91 static inline uint32_t LCGBI_bck( uint64_t & state ) { 92 unsigned int r = state >> D; 93 state = AI * (state - C) & (M - 1); 94 return r; 95 } 96 97 #undef M 98 #undef A 99 #undef AI 100 #undef C 101 #undef D -
libcfa/src/common.hfa
r21a5bfb7 r175f9f4 1 // 1 // 2 2 // Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo 3 3 // 4 4 // The contents of this file are covered under the licence agreement in the 5 5 // file "LICENCE" distributed with Cforall. 6 // 7 // common --8 // 6 // 7 // common.hfa -- 8 // 9 9 // Author : Peter A. Buhr 10 10 // Created On : Wed Jul 11 17:54:36 2018 … … 12 12 // Last Modified On : Wed May 5 14:02:04 2021 13 13 // Update Count : 18 14 // 14 // 15 15 16 16 #pragma once -
libcfa/src/concurrency/clib/cfathread.cfa
r21a5bfb7 r175f9f4 22 22 #include "thread.hfa" 23 23 #include "time.hfa" 24 #include "stdlib.hfa" 24 25 25 26 #include "cfathread.h" … … 195 196 eevent.data.u64 = (uint64_t)active_thread(); 196 197 197 int id = thread_rand() % poller_cnt;198 int id = prng() % poller_cnt; 198 199 if(0 != epoll_ctl(poller_fds[id], EPOLL_CTL_ADD, fd, &eevent)) 199 200 { -
libcfa/src/concurrency/invoke.h
r21a5bfb7 r175f9f4 10 10 // Created On : Tue Jan 17 12:27:26 2016 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Thu Jan 6 16:37:40202213 // Update Count : 4 712 // Last Modified On : Sun Jan 9 19:06:45 2022 13 // Update Count : 48 14 14 // 15 15 … … 211 211 struct processor * last_proc; 212 212 213 uint32_t random_state; // fast random numbers 214 213 215 #if defined( __CFA_WITH_VERIFY__ ) 214 216 void * canary; -
libcfa/src/concurrency/io.cfa
r21a5bfb7 r175f9f4 552 552 /* paranoid */ verify( proc == __cfaabi_tls.this_processor ); 553 553 /* paranoid */ verify( ! __preemption_enabled() ); 554 555 return true; 554 556 } 555 557 #endif -
libcfa/src/concurrency/kernel.cfa
r21a5bfb7 r175f9f4 554 554 /* paranoid */ verify( 0x0D15EA5E0D15EA5Ep == thrd->canary ); 555 555 556 const bool local = thrd->state != Start;557 556 if (thrd->preempted == __NO_PREEMPTION) thrd->state = Ready; 558 557 -
libcfa/src/concurrency/kernel.hfa
r21a5bfb7 r175f9f4 67 67 unsigned target; 68 68 unsigned last; 69 unsigned cnt; 70 unsigned long long int cutoff; 69 signed cpu; 71 70 } rdq; 72 71 … … 152 151 volatile unsigned long long tv; 153 152 volatile unsigned long long ma; 153 }; 154 155 struct __attribute__((aligned(16))) __cache_id_t { 156 volatile unsigned id; 154 157 }; 155 158 … … 164 167 static inline void ^?{}(__timestamp_t & this) {} 165 168 169 struct __attribute__((aligned(128))) __ready_queue_caches_t; 170 void ?{}(__ready_queue_caches_t & this); 171 void ^?{}(__ready_queue_caches_t & this); 172 166 173 //TODO adjust cache size to ARCHITECTURE 167 // Structure holding the re laxed ready queue174 // Structure holding the ready queue 168 175 struct __ready_queue_t { 169 176 // Data tracking the actual lanes … … 177 184 // Array of times 178 185 __timestamp_t * volatile tscs; 186 187 __cache_id_t * volatile caches; 179 188 180 189 // Array of stats -
libcfa/src/concurrency/kernel/fwd.hfa
r21a5bfb7 r175f9f4 77 77 78 78 static inline uint64_t __tls_rand() { 79 return 79 80 #if defined(__SIZEOF_INT128__) 80 return __lehmer64( kernelTLS().rand_seed );81 lehmer64( kernelTLS().rand_seed ); 81 82 #else 82 return __xorshift64( kernelTLS().rand_seed );83 xorshift_13_7_17( kernelTLS().rand_seed ); 83 84 #endif 84 85 } 85 86 86 #define M (1_l64u << 48_l64u)87 #define A (25214903917_l64u)88 #define AI (18446708753438544741_l64u)89 #define C (11_l64u)90 #define D (16_l64u)91 92 87 static inline unsigned __tls_rand_fwd() { 93 94 kernelTLS().ready_rng.fwd_seed = (A * kernelTLS().ready_rng.fwd_seed + C) & (M - 1); 95 return kernelTLS().ready_rng.fwd_seed >> D; 88 return LCGBI_fwd( kernelTLS().ready_rng.fwd_seed ); 96 89 } 97 90 98 91 static inline unsigned __tls_rand_bck() { 99 unsigned int r = kernelTLS().ready_rng.bck_seed >> D; 100 kernelTLS().ready_rng.bck_seed = AI * (kernelTLS().ready_rng.bck_seed - C) & (M - 1); 101 return r; 102 } 103 104 #undef M 105 #undef A 106 #undef AI 107 #undef C 108 #undef D 92 return LCGBI_bck( kernelTLS().ready_rng.bck_seed ); 93 } 109 94 110 95 static inline void __tls_rand_advance_bck(void) { … … 112 97 } 113 98 } 114 115 116 99 117 100 extern void disable_interrupts(); … … 142 125 } 143 126 } 144 145 extern uint64_t thread_rand();146 127 147 128 // Semaphore which only supports a single thread -
libcfa/src/concurrency/kernel/startup.cfa
r21a5bfb7 r175f9f4 34 34 #include "kernel_private.hfa" 35 35 #include "startup.hfa" // STARTUP_PRIORITY_XXX 36 #include "limits.hfa" 36 37 #include "math.hfa" 37 38 … … 101 102 extern void __wake_proc(processor *); 102 103 extern int cfa_main_returned; // from interpose.cfa 104 extern uint32_t __global_random_seed; 103 105 104 106 //----------------------------------------------------------------------------- … … 174 176 this.context = &storage_mainThreadCtx; 175 177 } 176 177 178 178 179 … … 489 490 preferred = ready_queue_new_preferred(); 490 491 last_proc = 0p; 492 random_state = __global_random_seed; 491 493 #if defined( __CFA_WITH_VERIFY__ ) 492 494 canary = 0x0D15EA5E0D15EA5Ep; … … 513 515 this.rdq.its = 0; 514 516 this.rdq.itr = 0; 515 this.rdq.id = -1u; 516 this.rdq.target = -1u; 517 this.rdq.last = -1u; 518 this.rdq.cutoff = 0ull; 517 this.rdq.id = MAX; 518 this.rdq.target = MAX; 519 this.rdq.last = MAX; 520 this.rdq.cpu = 0; 521 // this.rdq.cutoff = 0ull; 519 522 do_terminate = false; 520 523 preemption_alarm = 0p; … … 684 687 uint_fast32_t last_size; 685 688 [this->unique_id, last_size] = ready_mutate_register(); 689 690 this->rdq.cpu = __kernel_getcpu(); 686 691 687 692 this->cltr->procs.total += 1u; -
libcfa/src/concurrency/locks.hfa
r21a5bfb7 r175f9f4 29 29 #include "time_t.hfa" 30 30 #include "time.hfa" 31 32 //-----------------------------------------------------------------------------33 // Semaphores34 35 // '0-nary' semaphore36 // Similar to a counting semaphore except the value of one is never reached37 // as a consequence, a V() that would bring the value to 1 *spins* until38 // a P consumes it39 struct Semaphore0nary {40 __spinlock_t lock; // needed to protect41 mpsc_queue(thread$) queue;42 };43 44 static inline bool P(Semaphore0nary & this, thread$ * thrd) {45 /* paranoid */ verify(!thrd`next);46 /* paranoid */ verify(!(&(*thrd)`next));47 48 push(this.queue, thrd);49 return true;50 }51 52 static inline bool P(Semaphore0nary & this) {53 thread$ * thrd = active_thread();54 P(this, thrd);55 park();56 return true;57 }58 59 static inline thread$ * V(Semaphore0nary & this, bool doUnpark = true) {60 thread$ * next;61 lock(this.lock __cfaabi_dbg_ctx2);62 for (;;) {63 next = pop(this.queue);64 if (next) break;65 Pause();66 }67 unlock(this.lock);68 69 if (doUnpark) unpark(next);70 return next;71 }72 73 // Wrapper used on top of any sempahore to avoid potential locking74 struct BinaryBenaphore {75 volatile ssize_t counter;76 };77 78 static inline {79 void ?{}(BinaryBenaphore & this) { this.counter = 0; }80 void ?{}(BinaryBenaphore & this, zero_t) { this.counter = 0; }81 void ?{}(BinaryBenaphore & this, one_t ) { this.counter = 1; }82 83 // returns true if no blocking needed84 bool P(BinaryBenaphore & this) {85 return __atomic_fetch_sub(&this.counter, 1, __ATOMIC_SEQ_CST) > 0;86 }87 88 bool tryP(BinaryBenaphore & this) {89 ssize_t c = this.counter;90 /* paranoid */ verify( c > MIN );91 return (c >= 1) && __atomic_compare_exchange_n(&this.counter, &c, c-1, false, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);92 }93 94 // returns true if notify needed95 bool V(BinaryBenaphore & this) {96 ssize_t c = 0;97 for () {98 /* paranoid */ verify( this.counter < MAX );99 if (__atomic_compare_exchange_n(&this.counter, &c, c+1, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {100 if (c == 0) return true;101 /* paranoid */ verify(c < 0);102 return false;103 } else {104 if (c == 1) return true;105 /* paranoid */ verify(c < 1);106 Pause();107 }108 }109 }110 }111 112 // Binary Semaphore based on the BinaryBenaphore on top of the 0-nary Semaphore113 struct ThreadBenaphore {114 BinaryBenaphore ben;115 Semaphore0nary sem;116 };117 118 static inline void ?{}(ThreadBenaphore & this) {}119 static inline void ?{}(ThreadBenaphore & this, zero_t) { (this.ben){ 0 }; }120 static inline void ?{}(ThreadBenaphore & this, one_t ) { (this.ben){ 1 }; }121 122 static inline bool P(ThreadBenaphore & this) { return P(this.ben) ? false : P(this.sem); }123 static inline bool tryP(ThreadBenaphore & this) { return tryP(this.ben); }124 static inline bool P(ThreadBenaphore & this, bool wait) { return wait ? P(this) : tryP(this); }125 126 static inline thread$ * V(ThreadBenaphore & this, bool doUnpark = true) {127 if (V(this.ben)) return 0p;128 return V(this.sem, doUnpark);129 }130 31 131 32 //----------------------------------------------------------------------------- … … 171 72 static inline void on_wakeup( owner_lock & this, size_t v ) { on_wakeup ( (blocking_lock &)this, v ); } 172 73 static inline void on_notify( owner_lock & this, struct thread$ * t ) { on_notify( (blocking_lock &)this, t ); } 173 174 struct fast_lock {175 thread$ * volatile owner;176 ThreadBenaphore sem;177 };178 179 static inline void ?{}(fast_lock & this) __attribute__((deprecated("use linear_backoff_then_block_lock instead")));180 static inline void ?{}(fast_lock & this) { this.owner = 0p; }181 182 static inline bool $try_lock(fast_lock & this, thread$ * thrd) {183 thread$ * exp = 0p;184 return __atomic_compare_exchange_n(&this.owner, &exp, thrd, false, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);185 }186 187 static inline void lock( fast_lock & this ) __attribute__((deprecated("use linear_backoff_then_block_lock instead"), artificial));188 static inline void lock( fast_lock & this ) {189 thread$ * thrd = active_thread();190 /* paranoid */verify(thrd != this.owner);191 192 for (;;) {193 if ($try_lock(this, thrd)) return;194 P(this.sem);195 }196 }197 198 static inline bool try_lock( fast_lock & this ) __attribute__((deprecated("use linear_backoff_then_block_lock instead"), artificial));199 static inline bool try_lock ( fast_lock & this ) {200 thread$ * thrd = active_thread();201 /* paranoid */ verify(thrd != this.owner);202 return $try_lock(this, thrd);203 }204 205 static inline thread$ * unlock( fast_lock & this ) __attribute__((deprecated("use linear_backoff_then_block_lock instead"), artificial));206 static inline thread$ * unlock( fast_lock & this ) {207 /* paranoid */ verify(active_thread() == this.owner);208 209 // open 'owner' before unlocking anyone210 // so new and unlocked threads don't park incorrectly.211 // This may require additional fencing on ARM.212 this.owner = 0p;213 214 return V(this.sem);215 }216 217 static inline size_t on_wait( fast_lock & this ) { unlock(this); return 0; }218 static inline void on_wakeup( fast_lock & this, size_t ) { lock(this); }219 static inline void on_notify( fast_lock &, struct thread$ * t ) { unpark(t); }220 74 221 75 struct mcs_node { -
libcfa/src/concurrency/ready_queue.cfa
r21a5bfb7 r175f9f4 20 20 21 21 22 #define USE_RELAXED_FIFO22 // #define USE_RELAXED_FIFO 23 23 // #define USE_WORK_STEALING 24 24 // #define USE_CPU_WORK_STEALING 25 #define USE_AWARE_STEALING 25 26 26 27 #include "bits/defs.hfa" … … 29 30 30 31 #include "stdlib.hfa" 32 #include "limits.hfa" 31 33 #include "math.hfa" 32 34 … … 54 56 #endif 55 57 56 #if defined(USE_CPU_WORK_STEALING) 58 #if defined(USE_AWARE_STEALING) 59 #define READYQ_SHARD_FACTOR 2 60 #define SEQUENTIAL_SHARD 2 61 #elif defined(USE_CPU_WORK_STEALING) 57 62 #define READYQ_SHARD_FACTOR 2 58 63 #elif defined(USE_RELAXED_FIFO) … … 138 143 __kernel_rseq_register(); 139 144 140 __cfadbg_print_safe(ready_queue, "Kernel : Registering proc %p for RW-Lock\n", proc);141 145 bool * handle = (bool *)&kernelTLS().sched_lock; 142 146 … … 174 178 } 175 179 176 __cfadbg_print_safe(ready_queue, "Kernel : Registering proc %p done, id %lu\n", proc, n);177 178 180 // Return new spot. 179 181 /* paranoid */ verify(n < ready); … … 190 192 191 193 __atomic_store_n(cell, 0p, __ATOMIC_RELEASE); 192 193 __cfadbg_print_safe(ready_queue, "Kernel : Unregister proc %p\n", proc);194 194 195 195 __kernel_rseq_unregister(); … … 244 244 245 245 //======================================================================= 246 // caches handling 247 248 struct __attribute__((aligned(128))) __ready_queue_caches_t { 249 // Count States: 250 // - 0 : No one is looking after this cache 251 // - 1 : No one is looking after this cache, BUT it's not empty 252 // - 2+ : At least one processor is looking after this cache 253 volatile unsigned count; 254 }; 255 256 void ?{}(__ready_queue_caches_t & this) { this.count = 0; } 257 void ^?{}(__ready_queue_caches_t & this) {} 258 259 static inline void depart(__ready_queue_caches_t & cache) { 260 /* paranoid */ verify( cache.count > 1); 261 __atomic_fetch_add(&cache.count, -1, __ATOMIC_SEQ_CST); 262 /* paranoid */ verify( cache.count != 0); 263 /* paranoid */ verify( cache.count < 65536 ); // This verify assumes no cluster will have more than 65000 kernel threads mapped to a single cache, which could be correct but is super weird. 264 } 265 266 static inline void arrive(__ready_queue_caches_t & cache) { 267 // for() { 268 // unsigned expected = cache.count; 269 // unsigned desired = 0 == expected ? 2 : expected + 1; 270 // } 271 } 272 273 //======================================================================= 246 274 // Cforall Ready Queue used for scheduling 247 275 //======================================================================= 248 unsigned long long moving_average(unsigned long long nval, unsigned long long oval) { 249 const unsigned long long tw = 16; 250 const unsigned long long nw = 4; 251 const unsigned long long ow = tw - nw; 252 return ((nw * nval) + (ow * oval)) / tw; 276 unsigned long long moving_average(unsigned long long currtsc, unsigned long long instsc, unsigned long long old_avg) { 277 /* paranoid */ verifyf( currtsc < 45000000000000000, "Suspiciously large current time: %'llu (%llx)\n", currtsc, currtsc ); 278 /* paranoid */ verifyf( instsc < 45000000000000000, "Suspiciously large insert time: %'llu (%llx)\n", instsc, instsc ); 279 /* paranoid */ verifyf( old_avg < 15000000000000, "Suspiciously large previous average: %'llu (%llx)\n", old_avg, old_avg ); 280 281 const unsigned long long new_val = currtsc > instsc ? currtsc - instsc : 0; 282 const unsigned long long total_weight = 16; 283 const unsigned long long new_weight = 4; 284 const unsigned long long old_weight = total_weight - new_weight; 285 const unsigned long long ret = ((new_weight * new_val) + (old_weight * old_avg)) / total_weight; 286 return ret; 253 287 } 254 288 … … 271 305 } 272 306 #else 273 lanes.data = 0p; 274 lanes.tscs = 0p; 275 lanes.help = 0p; 276 lanes.count = 0; 307 lanes.data = 0p; 308 lanes.tscs = 0p; 309 lanes.caches = 0p; 310 lanes.help = 0p; 311 lanes.count = 0; 277 312 #endif 278 313 } … … 285 320 free(lanes.data); 286 321 free(lanes.tscs); 322 free(lanes.caches); 287 323 free(lanes.help); 288 324 } 289 325 290 326 //----------------------------------------------------------------------- 327 #if defined(USE_AWARE_STEALING) 328 __attribute__((hot)) void push(struct cluster * cltr, struct thread$ * thrd, unpark_hint hint) with (cltr->ready_queue) { 329 processor * const proc = kernelTLS().this_processor; 330 const bool external = (!proc) || (cltr != proc->cltr); 331 const bool remote = hint == UNPARK_REMOTE; 332 333 unsigned i; 334 if( external || remote ) { 335 // Figure out where thread was last time and make sure it's valid 336 /* paranoid */ verify(thrd->preferred >= 0); 337 if(thrd->preferred * READYQ_SHARD_FACTOR < lanes.count) { 338 /* paranoid */ verify(thrd->preferred * READYQ_SHARD_FACTOR < lanes.count); 339 unsigned start = thrd->preferred * READYQ_SHARD_FACTOR; 340 do { 341 unsigned r = __tls_rand(); 342 i = start + (r % READYQ_SHARD_FACTOR); 343 /* paranoid */ verify( i < lanes.count ); 344 // If we can't lock it retry 345 } while( !__atomic_try_acquire( &lanes.data[i].lock ) ); 346 } else { 347 do { 348 i = __tls_rand() % lanes.count; 349 } while( !__atomic_try_acquire( &lanes.data[i].lock ) ); 350 } 351 } else { 352 do { 353 unsigned r = proc->rdq.its++; 354 i = proc->rdq.id + (r % READYQ_SHARD_FACTOR); 355 /* paranoid */ verify( i < lanes.count ); 356 // If we can't lock it retry 357 } while( !__atomic_try_acquire( &lanes.data[i].lock ) ); 358 } 359 360 // Actually push it 361 push(lanes.data[i], thrd); 362 363 // Unlock and return 364 __atomic_unlock( &lanes.data[i].lock ); 365 366 #if !defined(__CFA_NO_STATISTICS__) 367 if(unlikely(external || remote)) __atomic_fetch_add(&cltr->stats->ready.push.extrn.success, 1, __ATOMIC_RELAXED); 368 else __tls_stats()->ready.push.local.success++; 369 #endif 370 } 371 372 static inline unsigned long long calc_cutoff(const unsigned long long ctsc, const processor * proc, __ready_queue_t & rdq) { 373 unsigned start = proc->rdq.id; 374 unsigned long long max = 0; 375 for(i; READYQ_SHARD_FACTOR) { 376 unsigned long long ptsc = ts(rdq.lanes.data[start + i]); 377 if(ptsc != -1ull) { 378 /* paranoid */ verify( start + i < rdq.lanes.count ); 379 unsigned long long tsc = moving_average(ctsc, ptsc, rdq.lanes.tscs[start + i].ma); 380 if(tsc > max) max = tsc; 381 } 382 } 383 return (max + 2 * max) / 2; 384 } 385 386 __attribute__((hot)) struct thread$ * pop_fast(struct cluster * cltr) with (cltr->ready_queue) { 387 /* paranoid */ verify( lanes.count > 0 ); 388 /* paranoid */ verify( kernelTLS().this_processor ); 389 /* paranoid */ verify( kernelTLS().this_processor->rdq.id < lanes.count ); 390 391 processor * const proc = kernelTLS().this_processor; 392 unsigned this = proc->rdq.id; 393 /* paranoid */ verify( this < lanes.count ); 394 __cfadbg_print_safe(ready_queue, "Kernel : pop from %u\n", this); 395 396 // Figure out the current cpu and make sure it is valid 397 const int cpu = __kernel_getcpu(); 398 /* paranoid */ verify(cpu >= 0); 399 /* paranoid */ verify(cpu < cpu_info.hthrd_count); 400 unsigned this_cache = cpu_info.llc_map[cpu].cache; 401 402 // Super important: don't write the same value over and over again 403 // We want to maximise our chances that his particular values stays in cache 404 if(lanes.caches[this / READYQ_SHARD_FACTOR].id != this_cache) 405 __atomic_store_n(&lanes.caches[this / READYQ_SHARD_FACTOR].id, this_cache, __ATOMIC_RELAXED); 406 407 const unsigned long long ctsc = rdtscl(); 408 409 if(proc->rdq.target == MAX) { 410 uint64_t chaos = __tls_rand(); 411 unsigned ext = chaos & 0xff; 412 unsigned other = (chaos >> 8) % (lanes.count); 413 414 if(ext < 3 || __atomic_load_n(&lanes.caches[other / READYQ_SHARD_FACTOR].id, __ATOMIC_RELAXED) == this_cache) { 415 proc->rdq.target = other; 416 } 417 } 418 else { 419 const unsigned target = proc->rdq.target; 420 __cfadbg_print_safe(ready_queue, "Kernel : %u considering helping %u, tcsc %llu\n", this, target, lanes.tscs[target].tv); 421 /* paranoid */ verify( lanes.tscs[target].tv != MAX ); 422 if(target < lanes.count) { 423 const unsigned long long cutoff = calc_cutoff(ctsc, proc, cltr->ready_queue); 424 const unsigned long long age = moving_average(ctsc, lanes.tscs[target].tv, lanes.tscs[target].ma); 425 __cfadbg_print_safe(ready_queue, "Kernel : Help attempt on %u from %u, age %'llu vs cutoff %'llu, %s\n", target, this, age, cutoff, age > cutoff ? "yes" : "no"); 426 if(age > cutoff) { 427 thread$ * t = try_pop(cltr, target __STATS(, __tls_stats()->ready.pop.help)); 428 if(t) return t; 429 } 430 } 431 proc->rdq.target = MAX; 432 } 433 434 for(READYQ_SHARD_FACTOR) { 435 unsigned i = this + (proc->rdq.itr++ % READYQ_SHARD_FACTOR); 436 if(thread$ * t = try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.local))) return t; 437 } 438 439 // All lanes where empty return 0p 440 return 0p; 441 442 } 443 __attribute__((hot)) struct thread$ * pop_slow(struct cluster * cltr) with (cltr->ready_queue) { 444 unsigned i = __tls_rand() % lanes.count; 445 return try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.steal)); 446 } 447 __attribute__((hot)) struct thread$ * pop_search(struct cluster * cltr) { 448 return search(cltr); 449 } 450 #endif 291 451 #if defined(USE_CPU_WORK_STEALING) 292 452 __attribute__((hot)) void push(struct cluster * cltr, struct thread$ * thrd, unpark_hint hint) with (cltr->ready_queue) { … … 350 510 /* paranoid */ verify( kernelTLS().this_processor ); 351 511 512 processor * const proc = kernelTLS().this_processor; 352 513 const int cpu = __kernel_getcpu(); 353 514 /* paranoid */ verify(cpu >= 0); … … 360 521 /* paranoid */ verifyf((map.start + map.count) * READYQ_SHARD_FACTOR <= lanes.count, "have %zu lanes but map can go up to %u", lanes.count, (map.start + map.count) * READYQ_SHARD_FACTOR); 361 522 362 processor * const proc = kernelTLS().this_processor;363 523 const int start = map.self * READYQ_SHARD_FACTOR; 364 524 const unsigned long long ctsc = rdtscl(); 365 525 366 526 // Did we already have a help target 367 if(proc->rdq.target == -1u) {527 if(proc->rdq.target == MAX) { 368 528 unsigned long long max = 0; 369 529 for(i; READYQ_SHARD_FACTOR) { 370 unsigned long long tsc = moving_average(ctsc -ts(lanes.data[start + i]), lanes.tscs[start + i].ma);530 unsigned long long tsc = moving_average(ctsc, ts(lanes.data[start + i]), lanes.tscs[start + i].ma); 371 531 if(tsc > max) max = tsc; 372 532 } 373 proc->rdq.cutoff = (max + 2 * max) / 2;533 // proc->rdq.cutoff = (max + 2 * max) / 2; 374 534 /* paranoid */ verify(lanes.count < 65536); // The following code assumes max 65536 cores. 375 535 /* paranoid */ verify(map.count < 65536); // The following code assumes max 65536 cores. … … 384 544 } 385 545 386 /* paranoid */ verify(proc->rdq.target != -1u);546 /* paranoid */ verify(proc->rdq.target != MAX); 387 547 } 388 548 else { 389 549 unsigned long long max = 0; 390 550 for(i; READYQ_SHARD_FACTOR) { 391 unsigned long long tsc = moving_average(ctsc -ts(lanes.data[start + i]), lanes.tscs[start + i].ma);551 unsigned long long tsc = moving_average(ctsc, ts(lanes.data[start + i]), lanes.tscs[start + i].ma); 392 552 if(tsc > max) max = tsc; 393 553 } … … 395 555 { 396 556 unsigned target = proc->rdq.target; 397 proc->rdq.target = -1u;557 proc->rdq.target = MAX; 398 558 lanes.help[target / READYQ_SHARD_FACTOR].tri++; 399 if(moving_average(ctsc -lanes.tscs[target].tv, lanes.tscs[target].ma) > cutoff) {559 if(moving_average(ctsc, lanes.tscs[target].tv, lanes.tscs[target].ma) > cutoff) { 400 560 thread$ * t = try_pop(cltr, target __STATS(, __tls_stats()->ready.pop.help)); 401 561 proc->rdq.last = target; 402 562 if(t) return t; 403 else proc->rdq.target = -1u;404 563 } 405 else proc->rdq.target = -1u;564 proc->rdq.target = MAX; 406 565 } 407 566 408 567 unsigned last = proc->rdq.last; 409 if(last != -1u && lanes.tscs[last].tv < cutoff && ts(lanes.data[last]) <cutoff) {568 if(last != MAX && moving_average(ctsc, lanes.tscs[last].tv, lanes.tscs[last].ma) > cutoff) { 410 569 thread$ * t = try_pop(cltr, last __STATS(, __tls_stats()->ready.pop.help)); 411 570 if(t) return t; 412 571 } 413 572 else { 414 proc->rdq.last = -1u;573 proc->rdq.last = MAX; 415 574 } 416 575 } … … 428 587 processor * const proc = kernelTLS().this_processor; 429 588 unsigned last = proc->rdq.last; 430 if(last != -1u) {589 if(last != MAX) { 431 590 struct thread$ * t = try_pop(cltr, last __STATS(, __tls_stats()->ready.pop.steal)); 432 591 if(t) return t; 433 proc->rdq.last = -1u;592 proc->rdq.last = MAX; 434 593 } 435 594 … … 560 719 #else 561 720 unsigned preferred = thrd->preferred; 562 const bool external = (hint != UNPARK_LOCAL) || (!kernelTLS().this_processor) || preferred == -1u|| thrd->curr_cluster != cltr;721 const bool external = (hint != UNPARK_LOCAL) || (!kernelTLS().this_processor) || preferred == MAX || thrd->curr_cluster != cltr; 563 722 /* paranoid */ verifyf(external || preferred < lanes.count, "Invalid preferred queue %u for %u lanes", preferred, lanes.count ); 564 723 … … 612 771 processor * proc = kernelTLS().this_processor; 613 772 614 if(proc->rdq.target == -1u) {773 if(proc->rdq.target == MAX) { 615 774 unsigned long long min = ts(lanes.data[proc->rdq.id]); 616 775 for(int i = 0; i < READYQ_SHARD_FACTOR; i++) { … … 623 782 else { 624 783 unsigned target = proc->rdq.target; 625 proc->rdq.target = -1u;784 proc->rdq.target = MAX; 626 785 const unsigned long long bias = 0; //2_500_000_000; 627 786 const unsigned long long cutoff = proc->rdq.cutoff > bias ? proc->rdq.cutoff - bias : proc->rdq.cutoff; … … 658 817 // try to pop from a lane given by index w 659 818 static inline struct thread$ * try_pop(struct cluster * cltr, unsigned w __STATS(, __stats_readyQ_pop_t & stats)) with (cltr->ready_queue) { 819 /* paranoid */ verify( w < lanes.count ); 660 820 __STATS( stats.attempt++; ) 661 821 … … 681 841 // Actually pop the list 682 842 struct thread$ * thrd; 683 unsigned long long tsc_before = ts(lane); 843 #if defined(USE_AWARE_STEALING) || defined(USE_WORK_STEALING) || defined(USE_CPU_WORK_STEALING) 844 unsigned long long tsc_before = ts(lane); 845 #endif 684 846 unsigned long long tsv; 685 847 [thrd, tsv] = pop(lane); … … 695 857 __STATS( stats.success++; ) 696 858 697 #if defined(USE_WORK_STEALING) || defined(USE_CPU_WORK_STEALING) 698 unsigned long long now = rdtscl(); 699 lanes.tscs[w].tv = tsv; 700 lanes.tscs[w].ma = moving_average(now > tsc_before ? now - tsc_before : 0, lanes.tscs[w].ma); 859 #if defined(USE_AWARE_STEALING) || defined(USE_WORK_STEALING) || defined(USE_CPU_WORK_STEALING) 860 if (tsv != MAX) { 861 unsigned long long now = rdtscl(); 862 unsigned long long pma = __atomic_load_n(&lanes.tscs[w].ma, __ATOMIC_RELAXED); 863 __atomic_store_n(&lanes.tscs[w].tv, tsv, __ATOMIC_RELAXED); 864 __atomic_store_n(&lanes.tscs[w].ma, moving_average(now, tsc_before, pma), __ATOMIC_RELAXED); 865 } 701 866 #endif 702 867 703 #if defined(USE_ CPU_WORK_STEALING)868 #if defined(USE_AWARE_STEALING) || defined(USE_CPU_WORK_STEALING) 704 869 thrd->preferred = w / READYQ_SHARD_FACTOR; 705 870 #else … … 800 965 /* paranoid */ verifyf( it, "Unexpected null iterator, at index %u of %u\n", i, count); 801 966 it->rdq.id = value; 802 it->rdq.target = -1u;967 it->rdq.target = MAX; 803 968 value += READYQ_SHARD_FACTOR; 804 969 it = &(*it)`next; … … 813 978 814 979 static void fix_times( struct cluster * cltr ) with( cltr->ready_queue ) { 815 #if defined(USE_ WORK_STEALING)980 #if defined(USE_AWARE_STEALING) || defined(USE_WORK_STEALING) 816 981 lanes.tscs = alloc(lanes.count, lanes.tscs`realloc); 817 982 for(i; lanes.count) { 818 unsigned long long tsc1 = ts(lanes.data[i]); 819 unsigned long long tsc2 = rdtscl(); 820 lanes.tscs[i].tv = min(tsc1, tsc2); 983 lanes.tscs[i].tv = rdtscl(); 984 lanes.tscs[i].ma = 0; 821 985 } 822 986 #endif … … 864 1028 // Update original 865 1029 lanes.count = ncount; 1030 1031 lanes.caches = alloc( target, lanes.caches`realloc ); 866 1032 } 867 1033 … … 940 1106 fix(lanes.data[idx]); 941 1107 } 1108 1109 lanes.caches = alloc( target, lanes.caches`realloc ); 942 1110 } 943 1111 944 1112 fix_times(cltr); 1113 945 1114 946 1115 reassign_cltr_id(cltr); -
libcfa/src/concurrency/thread.cfa
r21a5bfb7 r175f9f4 10 10 // Created On : Tue Jan 17 12:27:26 2017 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Wed Dec 4 09:17:49 201913 // Update Count : 912 // Last Modified On : Thu Jan 13 20:11:55 2022 13 // Update Count : 42 14 14 // 15 15 … … 25 25 #include "invoke.h" 26 26 27 uint64_t thread_rand();27 extern uint32_t __global_random_seed; 28 28 29 29 //----------------------------------------------------------------------------- 30 30 // Thread ctors and dtors 31 void ?{}( thread$ & this, const char * const name, cluster & cl, void * storage, size_t storageSize ) with( this ) {31 void ?{}( thread$ & this, const char * const name, cluster & cl, void * storage, size_t storageSize ) with( this ) { 32 32 context{ 0p, 0p }; 33 33 self_cor{ name, storage, storageSize }; … … 45 45 preferred = ready_queue_new_preferred(); 46 46 last_proc = 0p; 47 random_state = __global_random_seed; 47 48 #if defined( __CFA_WITH_VERIFY__ ) 48 49 canary = 0x0D15EA5E0D15EA5Ep; … … 171 172 } 172 173 173 uint64_t thread_rand() { 174 disable_interrupts(); 175 uint64_t ret = __tls_rand(); 176 enable_interrupts(); 177 return ret; 178 } 174 //----------------------------------------------------------------------------- 175 #define GENERATOR LCG 176 177 void set_seed( uint32_t seed ) { 178 active_thread()->random_state = __global_random_seed = seed; 179 GENERATOR( active_thread()->random_state ); 180 } // set_seed 181 uint32_t prng( void ) { return GENERATOR( active_thread()->random_state ); } // [0,UINT_MAX] 179 182 180 183 // Local Variables: // -
libcfa/src/device/cpu.cfa
r21a5bfb7 r175f9f4 427 427 unsigned c = pairings[i].cpu; 428 428 unsigned llc_id = pairings[i].id; 429 unsigned width = maps[llc_id].raw->width;430 429 unsigned start = maps[llc_id].start; 431 unsigned self = start + (maps[llc_id].count++); 432 entries[c].count = width; 430 entries[c].count = maps[llc_id].raw->width; 433 431 entries[c].start = start; 434 entries[c].self = self; 432 entries[c].self = start + (maps[llc_id].count++); 433 entries[c].cache = llc_id; 435 434 } 436 435 -
libcfa/src/device/cpu.hfa
r21a5bfb7 r175f9f4 16 16 #include <stddef.h> 17 17 18 // Map from cpu entry to a structure detailling cpus with common topologies 19 // Note that the cpu-groups are contiguous so the indexing is different from 20 // the cpu indexing 18 21 struct cpu_map_entry_t { 22 // Where this particular cpu is in the group 19 23 unsigned self; 24 25 // Starting index of the cpus with the same topology 20 26 unsigned start; 27 28 // Number of cpus with the same topology 21 29 unsigned count; 30 31 // Index of the cache this entry describes 32 unsigned cache; 22 33 }; 23 34 -
libcfa/src/fstream.cfa
r21a5bfb7 r175f9f4 10 10 // Created On : Wed May 27 17:56:53 2015 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Sun Oct 10 11:23:05 202113 // Update Count : 51 212 // Last Modified On : Mon Jan 10 08:45:05 2022 13 // Update Count : 513 14 14 // 15 15 … … 52 52 inline void setPrt$( ofstream & os, bool state ) { os.prt$ = state; } 53 53 54 inline void lock( ofstream & os ) with( os ) { 54 inline void lock( ofstream & os ) with( os ) { lock( os.lock$ ); } 55 55 inline void unlock( ofstream & os ) { unlock( os.lock$ ); } 56 56 -
libcfa/src/iostream.cfa
r21a5bfb7 r175f9f4 10 10 // Created On : Wed May 27 17:56:53 2015 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Sun Oct 10 09:28:17 202113 // Update Count : 134 512 // Last Modified On : Mon Jan 17 16:38:32 2022 13 // Update Count : 1349 14 14 // 15 15 … … 57 57 ostype & ?|?( ostype & os, signed char sc ) { 58 58 if ( sepPrt$( os ) ) fmt( os, "%s", sepGetCur$( os ) ); 59 fmt( os, "% hhd", sc );59 fmt( os, "%'hhd", sc ); 60 60 return os; 61 61 } // ?|? … … 66 66 ostype & ?|?( ostype & os, unsigned char usc ) { 67 67 if ( sepPrt$( os ) ) fmt( os, "%s", sepGetCur$( os ) ); 68 fmt( os, "% hhu", usc );68 fmt( os, "%'hhu", usc ); 69 69 return os; 70 70 } // ?|? … … 75 75 ostype & ?|?( ostype & os, short int si ) { 76 76 if ( sepPrt$( os ) ) fmt( os, "%s", sepGetCur$( os ) ); 77 fmt( os, "% hd", si );77 fmt( os, "%'hd", si ); 78 78 return os; 79 79 } // ?|? … … 84 84 ostype & ?|?( ostype & os, unsigned short int usi ) { 85 85 if ( sepPrt$( os ) ) fmt( os, "%s", sepGetCur$( os ) ); 86 fmt( os, "% hu", usi );86 fmt( os, "%'hu", usi ); 87 87 return os; 88 88 } // ?|? … … 93 93 ostype & ?|?( ostype & os, int i ) { 94 94 if ( sepPrt$( os ) ) fmt( os, "%s", sepGetCur$( os ) ); 95 fmt( os, "% d", i );95 fmt( os, "%'d", i ); 96 96 return os; 97 97 } // ?|? … … 102 102 ostype & ?|?( ostype & os, unsigned int ui ) { 103 103 if ( sepPrt$( os ) ) fmt( os, "%s", sepGetCur$( os ) ); 104 fmt( os, "% u", ui );104 fmt( os, "%'u", ui ); 105 105 return os; 106 106 } // ?|? … … 111 111 ostype & ?|?( ostype & os, long int li ) { 112 112 if ( sepPrt$( os ) ) fmt( os, "%s", sepGetCur$( os ) ); 113 fmt( os, "% ld", li );113 fmt( os, "%'ld", li ); 114 114 return os; 115 115 } // ?|? … … 120 120 ostype & ?|?( ostype & os, unsigned long int uli ) { 121 121 if ( sepPrt$( os ) ) fmt( os, "%s", sepGetCur$( os ) ); 122 fmt( os, "% lu", uli );122 fmt( os, "%'lu", uli ); 123 123 return os; 124 124 } // ?|? … … 129 129 ostype & ?|?( ostype & os, long long int lli ) { 130 130 if ( sepPrt$( os ) ) fmt( os, "%s", sepGetCur$( os ) ); 131 fmt( os, "% lld", lli );131 fmt( os, "%'lld", lli ); 132 132 return os; 133 133 } // ?|? … … 138 138 ostype & ?|?( ostype & os, unsigned long long int ulli ) { 139 139 if ( sepPrt$( os ) ) fmt( os, "%s", sepGetCur$( os ) ); 140 fmt( os, "% llu", ulli );140 fmt( os, "%'llu", ulli ); 141 141 return os; 142 142 } // ?|? … … 496 496 if ( ! f.flags.pc ) memcpy( &fmtstr, IFMTNP, sizeof(IFMTNP) ); \ 497 497 else memcpy( &fmtstr, IFMTP, sizeof(IFMTP) ); \ 498 int star = 4; /* position before first '*' */ \498 int star = 5; /* position before first '*' */ \ 499 499 \ 500 500 /* Insert flags into spaces before '*', from right to left. */ \ … … 503 503 if ( f.flags.sign ) { fmtstr[star] = '+'; star -= 1; } \ 504 504 if ( f.flags.pad0 && ! f.flags.pc ) { fmtstr[star] = '0'; star -= 1; } \ 505 fmtstr[star] = '\''; star -= 1; /* locale */ \ 505 506 fmtstr[star] = '%'; \ 506 507 \ … … 521 522 } // distribution 522 523 523 IntegralFMTImpl( signed char, " *hh ", "*.*hh " )524 IntegralFMTImpl( unsigned char, " *hh ", "*.*hh " )525 IntegralFMTImpl( signed short int, " *h ", "*.*h " )526 IntegralFMTImpl( unsigned short int, " *h ", "*.*h " )527 IntegralFMTImpl( signed int, " * ", "*.* " )528 IntegralFMTImpl( unsigned int, " * ", "*.* " )529 IntegralFMTImpl( signed long int, " *l ", "*.*l " )530 IntegralFMTImpl( unsigned long int, " *l ", "*.*l " )531 IntegralFMTImpl( signed long long int, " *ll ", "*.*ll " )532 IntegralFMTImpl( unsigned long long int, " *ll ", "*.*ll " )524 IntegralFMTImpl( signed char, " *hh ", " *.*hh " ) 525 IntegralFMTImpl( unsigned char, " *hh ", " *.*hh " ) 526 IntegralFMTImpl( signed short int, " *h ", " *.*h " ) 527 IntegralFMTImpl( unsigned short int, " *h ", " *.*h " ) 528 IntegralFMTImpl( signed int, " * ", " *.* " ) 529 IntegralFMTImpl( unsigned int, " * ", " *.* " ) 530 IntegralFMTImpl( signed long int, " *l ", " *.*l " ) 531 IntegralFMTImpl( unsigned long int, " *l ", " *.*l " ) 532 IntegralFMTImpl( signed long long int, " *ll ", " *.*ll " ) 533 IntegralFMTImpl( unsigned long long int, " *ll ", " *.*ll " ) 533 534 534 535 -
libcfa/src/parseconfig.cfa
r21a5bfb7 r175f9f4 1 2 3 #pragma GCC diagnostic push 4 //#pragma GCC diagnostic ignored "-Wunused-parameter" 5 //#pragma GCC diagnostic ignored "-Wunused-function" 6 //#pragma GCC diagnostic ignored "-Wuninitialized" 7 //#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" 8 1 9 #include <fstream.hfa> 2 10 #include <parseargs.hfa> … … 19 27 // TODO: use string interface when it's ready (and implement exception msg protocol) 20 28 [ void ] msg( * Missing_Config_Entries ex ) { 21 serr | nlOff; 22 serr | "The config file is missing " | ex->num_missing; 23 serr | nlOn; 24 if ( ex->num_missing == 1 ) { 25 serr | " entry."; 26 } else { 27 serr | " entries."; 28 } 29 serr | "The config file is missing " | ex->num_missing | "entr" | sepOff | (ex->num_missing == 1 ? "y." : "ies."); 29 30 } // msg 30 31 … … 223 224 return value < zero_val; 224 225 } 226 #pragma GCC diagnostic pop 225 227 226 228 -
libcfa/src/startup.cfa
r21a5bfb7 r175f9f4 10 10 // Created On : Tue Jul 24 16:21:57 2018 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Sat Jan 9 23:18:23 202113 // Update Count : 3412 // Last Modified On : Mon Jan 17 16:41:54 2022 13 // Update Count : 55 14 14 // 15 15 … … 17 17 #include <locale.h> // setlocale 18 18 #include <stdlib.h> // getenv 19 #include "bits/defs.hfa" // rdtscl 19 20 #include "startup.hfa" 21 22 extern uint32_t __global_random_seed; // sequential/concurrent 23 extern uint32_t __global_random_state; // sequential 20 24 21 25 extern "C" { … … 23 27 void __cfaabi_appready_startup( void ) { 24 28 tzset(); // initialize time global variables 25 setlocale( LC_NUMERIC, getenv("LANG") );26 29 #ifdef __CFA_DEBUG__ 27 30 extern void heapAppStart(); … … 48 51 void __cfaabi_core_startup( void ) __attribute__(( constructor( STARTUP_PRIORITY_CORE ) )); 49 52 void __cfaabi_core_startup( void ) { 53 __global_random_state = __global_random_seed = rdtscl(); 50 54 __cfaabi_interpose_startup(); 51 55 __cfaabi_device_startup(); -
libcfa/src/stdlib.cfa
r21a5bfb7 r175f9f4 10 10 // Created On : Thu Jan 28 17:10:29 2016 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Mon Jan 3 09:36:27202213 // Update Count : 5 1912 // Last Modified On : Thu Jan 13 21:38:30 2022 13 // Update Count : 593 14 14 // 15 15 16 16 #include "stdlib.hfa" 17 #include "bits/random.hfa" 18 #include "concurrency/invoke.h" // random_state 17 19 18 20 //--------------------------------------- … … 221 223 //--------------------------------------- 222 224 223 static uint32_t seed = 0; // current seed224 static thread_local uint32_t state; // random state225 226 void set_seed( uint32_t seed_ ) { state = seed = seed_; }227 uint32_t get_seed() { return seed; }228 229 225 #define GENERATOR LCG 230 226 231 inline uint32_t MarsagliaXor( uint32_t & state ) { 232 if ( unlikely( seed == 0 ) ) set_seed( rdtscl() ); 233 else if ( unlikely( state == 0 ) ) state = seed; 234 state ^= state << 6; 235 state ^= state >> 21; 236 state ^= state << 7; 237 return state; 238 } // MarsagliaXor 239 240 inline uint32_t LCG( uint32_t & state ) { // linear congruential generator 241 if ( unlikely( seed == 0 ) ) set_seed( rdtscl() ); 242 else if ( unlikely( state == 0 ) ) state = seed; 243 return state = 36969 * (state & 65535) + (state >> 16); // 36969 is NOT prime! 244 } // LCG 245 227 uint32_t __global_random_seed; // sequential/concurrent 228 uint32_t __global_random_state; // sequential only 229 230 void set_seed( PRNG & prng, uint32_t seed_ ) with( prng ) { state = seed = seed_; GENERATOR( state ); } // set seed 246 231 uint32_t prng( PRNG & prng ) with( prng ) { callcnt += 1; return GENERATOR( state ); } 247 232 248 uint32_t prng( void ) { return GENERATOR( state ); } 233 void set_seed( uint32_t seed ) { __global_random_seed = seed; GENERATOR( __global_random_state ); } 234 uint32_t get_seed() { return __global_random_seed; } 235 uint32_t prng( void ) { return GENERATOR( __global_random_state ); } // [0,UINT_MAX] 249 236 250 237 //--------------------------------------- -
libcfa/src/stdlib.hfa
r21a5bfb7 r175f9f4 10 10 // Created On : Thu Jan 28 17:12:35 2016 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Sun Jan 2 22:53:57202213 // Update Count : 59412 // Last Modified On : Thu Jan 13 21:34:46 2022 13 // Update Count : 636 14 14 // 15 15 … … 21 21 #include <stdlib.h> // *alloc, strto*, ato* 22 22 #include <heap.hfa> 23 23 24 24 25 // Reduce includes by explicitly defining these routines. … … 207 208 208 209 forall( TT... | { T * alloc_internal$( void *, T *, size_t, size_t, S_fill(T), TT ); } ) { 209 210 210 T * alloc_internal$( void * , T * Realloc, size_t Align, size_t Dim, S_fill(T) Fill, T_resize Resize, TT rest) { 211 211 return alloc_internal$( Resize, (T*)0p, Align, Dim, Fill, rest); … … 231 231 return alloc_internal$( (void*)0p, (T*)0p, (_Alignof(T) > libAlign() ? _Alignof(T) : libAlign()), dim, (S_fill(T)){'0'}, all); 232 232 } 233 234 233 } // distribution TT 235 234 } // distribution T … … 385 384 //--------------------------------------- 386 385 386 // Sequential Pseudo Random-Number Generator : generate repeatable sequence of values that appear random. 387 // 388 // Declaration : 389 // PRNG sprng = { 1009 } - set starting seed versus random seed 390 // 391 // Interface : 392 // set_seed( sprng, 1009 ) - set starting seed for ALL kernel threads versus random seed 393 // get_seed( sprng ) - read seed 394 // prng( sprng ) - generate random value in range [0,UINT_MAX] 395 // prng( sprng, u ) - generate random value in range [0,u) 396 // prng( sprng, l, u ) - generate random value in range [l,u] 397 // calls( sprng ) - number of generated random value so far 398 // 399 // Examples : generate random number between 5-21 400 // prng( sprng ) % 17 + 5; values 0-16 + 5 = 5-21 401 // prng( sprng, 16 + 1 ) + 5; 402 // prng( sprng, 5, 21 ); 403 // calls( sprng ); 404 387 405 struct PRNG { 388 406 uint32_t callcnt; // call count … … 391 409 }; // PRNG 392 410 393 extern uint32_t prng( PRNG & prng ) __attribute__(( warn_unused_result )); // [0,UINT_MAX] 411 void set_seed( PRNG & prng, uint32_t seed_ ); 412 uint32_t prng( PRNG & prng ) __attribute__(( warn_unused_result )); // [0,UINT_MAX] 394 413 static inline { 395 void set_seed( PRNG & prng, uint32_t seed_ ) with( prng ) { state = seed = seed_; } // set seed396 414 void ?{}( PRNG & prng ) { set_seed( prng, rdtscl() ); } // random seed 397 415 void ?{}( PRNG & prng, uint32_t seed ) { set_seed( prng, seed ); } // fixed seed … … 402 420 } // distribution 403 421 404 extern void set_seed( uint32_t seed ); // set per thread seed 405 extern uint32_t get_seed(); // get seed 406 extern uint32_t prng( void ) __attribute__(( warn_unused_result )); // [0,UINT_MAX] 422 // Concurrent Pseudo Random-Number Generator : generate repeatable sequence of values that appear random. 423 // 424 // Interface : 425 // set_seed( 1009 ) - fixed seed for all kernel threads versus random seed 426 // get_seed() - read seed 427 // prng() - generate random value in range [0,UINT_MAX] 428 // prng( u ) - generate random value in range [0,u) 429 // prng( l, u ) - generate random value in range [l,u] 430 // 431 // Examples : generate random number between 5-21 432 // prng() % 17 + 5; values 0-16 + 5 = 5-21 433 // prng( 16 + 1 ) + 5; 434 // prng( 5, 21 ); 435 436 void set_seed( uint32_t seed_ ) OPTIONAL_THREAD; 437 uint32_t get_seed() __attribute__(( warn_unused_result )); 438 uint32_t prng( void ) __attribute__(( warn_unused_result )) OPTIONAL_THREAD; // [0,UINT_MAX] 407 439 static inline { 408 uint32_t prng( uint32_t u ) __attribute__(( warn_unused_result )); 409 uint32_t prng( uint32_t u ) { return prng() % u; } // [0,u) 410 uint32_t prng( uint32_t l, uint32_t u ) __attribute__(( warn_unused_result )); 411 uint32_t prng( uint32_t l, uint32_t u ) { return prng( u - l + 1 ) + l; } // [l,u] 440 uint32_t prng( uint32_t u ) __attribute__(( warn_unused_result )) { return prng() % u; } // [0,u) 441 uint32_t prng( uint32_t l, uint32_t u ) __attribute__(( warn_unused_result )) { return prng( u - l + 1 ) + l; } // [l,u] 412 442 } // distribution 413 443 -
src/AST/Decl.cpp
r21a5bfb7 r175f9f4 26 26 #include "Node.hpp" // for readonly 27 27 #include "Type.hpp" // for readonly 28 #include "Expr.hpp" 28 29 29 30 namespace ast { … … 65 66 for (auto & tp : this->type_params) { 66 67 ftype->forall.emplace_back(new TypeInstType(tp->name, tp)); 68 for (auto & ap: tp->assertions) { 69 ftype->assertions.emplace_back(new VariableExpr(loc, ap)); 70 } 67 71 } 68 72 this->type = ftype; -
src/AST/Decl.hpp
r21a5bfb7 r175f9f4 34 34 // Must be included in *all* AST classes; should be #undef'd at the end of the file 35 35 #define MUTATE_FRIEND \ 36 36 template<typename node_t> friend node_t * mutate(const node_t * node); \ 37 37 template<typename node_t> friend node_t * shallowCopy(const node_t * node); 38 38 … … 135 135 std::vector< ptr<Expr> > withExprs; 136 136 137 138 137 FunctionDecl( const CodeLocation & loc, const std::string & name, std::vector<ptr<TypeDecl>>&& forall, 139 138 std::vector<ptr<DeclWithType>>&& params, std::vector<ptr<DeclWithType>>&& returns, -
src/AST/Eval.hpp
r21a5bfb7 r175f9f4 24 24 template< typename... Args > 25 25 UntypedExpr * call( const CodeLocation & loc, const std::string & name, Args &&... args ) { 26 return new UntypedExpr { 27 loc, new NameExpr { loc, name }, 26 return new UntypedExpr { 27 loc, new NameExpr { loc, name }, 28 28 std::vector< ptr< Expr > > { std::forward< Args >( args )... } }; 29 29 } -
src/Validate/InitializerLength.cpp
r21a5bfb7 r175f9f4 14 14 // 15 15 16 //#include "InitializerLength.hpp"16 #include "InitializerLength.hpp" 17 17 18 18 #include "AST/Expr.hpp" -
src/Validate/InitializerLength.hpp
r21a5bfb7 r175f9f4 14 14 // 15 15 16 #pragma once 17 18 namespace ast { 19 class TranslationUnit; 20 } 21 16 22 namespace Validate { 17 23 -
tests/device/cpu.cfa
r21a5bfb7 r175f9f4 15 15 16 16 17 #include <device/cpu.hfa> 18 #include <limits.hfa> 17 19 #include <fstream.hfa> 18 #include <device/cpu.hfa>19 20 #include <stdlib.hfa> 20 21 … … 118 119 119 120 unsigned found_level = 0; 120 unsigned found = -1u;121 unsigned found = MAX; 121 122 for(i; idxs) { 122 123 unsigned idx = idxs - 1 - i; … … 136 137 } 137 138 138 /* paranoid */ verify(found != -1u);139 /* paranoid */ verify(found != MAX); 139 140 return found; 140 141 } -
tests/io/io-acquire.cfa
r21a5bfb7 r175f9f4 10 10 // Created On : Mon Mar 1 18:40:09 2021 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Wed Oct 6 18:04:58 202113 // Update Count : 7 212 // Last Modified On : Fri Jan 14 09:13:18 2022 13 // Update Count : 74 14 14 // 15 15 … … 18 18 #include <mutex_stmt.hfa> 19 19 20 Duration default_preemption() { return 0; } 21 20 22 thread T {}; 21 23 void main( T & ) { … … 23 25 24 26 for ( 100 ) { // expression protection 25 mutex( sout) sout | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9;27 mutex( sout ) sout | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9; 26 28 } 27 29 mutex( sout ) { // statement protection … … 51 53 int a, b, c, d, e, f, g, h, i; 52 54 for ( 100 ) { // expression protection 53 mutex( sin) sin | a | b | c | d | e | f | g | h | i;55 mutex( sin ) sin | a | b | c | d | e | f | g | h | i; 54 56 } 55 57 mutex( sin ) { // statement protection -
tests/unified_locking/.expect/locks.txt
r21a5bfb7 r175f9f4 11 11 Start Test 6: owner lock and condition variable 3 wait/notify all 12 12 Done Test 6 13 Start Test 7: fastlock and condition variable single wait/notify13 Start Test 7: linear backoff lock and condition variable single wait/notify 14 14 Done Test 7 15 Start Test 8: fastlock and condition variable 3 wait/notify all15 Start Test 8: linear backoff lock and condition variable 3 wait/notify all 16 16 Done Test 8 17 Start Test 9: linear backoff lock and condition variable singlewait/notify17 Start Test 9: multi acquisiton lock and condition variable multiple acquire and wait/notify 18 18 Done Test 9 19 Start Test 10: linear backoff lock and condition variable 3 wait/notify all19 Start Test 10: owner lock and condition variable multiple acquire and wait/notify 20 20 Done Test 10 21 Start Test 11: multi acquisiton lock and condition variable multiple acquire andwait/notify21 Start Test 11: no lock condition variable wait/notify 22 22 Done Test 11 23 Start Test 12: owner lock and condition variable multiple acquire and wait/notify23 Start Test 12: locked condition variable wait/notify with front() 24 24 Done Test 12 25 Start Test 13: no lock condition variable wait/notify26 Done Test 1327 Start Test 14: locked condition variable wait/notify with front()28 Done Test 14 -
tests/unified_locking/locks.cfa
r21a5bfb7 r175f9f4 15 15 condition_variable( owner_lock ) c_o; 16 16 17 fast_lock f;18 condition_variable( fast_lock ) c_f;19 20 17 linear_backoff_then_block_lock l; 21 18 condition_variable( linear_backoff_then_block_lock ) c_l; … … 74 71 } 75 72 unlock(s); 76 }77 }78 79 thread T_C_F_WS1 {};80 81 void main( T_C_F_WS1 & this ) {82 for (unsigned int i = 0; i < num_times; i++) {83 lock(f);84 if(empty(c_f) && i != num_times - 1) {85 wait(c_f,f);86 }else{87 notify_one(c_f);88 }89 unlock(f);90 }91 }92 93 thread T_C_F_WB1 {};94 95 void main( T_C_F_WB1 & this ) {96 for (unsigned int i = 0; i < num_times; i++) {97 lock(f);98 if(counter(c_f) == 3 || i == num_times - 1) {99 notify_all(c_f);100 }else{101 wait(c_f,f);102 }103 unlock(f);104 73 } 105 74 } … … 317 286 printf("Done Test 6\n"); 318 287 319 printf("Start Test 7: fastlock and condition variable single wait/notify\n");320 { 321 T_C_ F_WS1 t1[2];288 printf("Start Test 7: linear backoff lock and condition variable single wait/notify\n"); 289 { 290 T_C_L_WS1 t1[2]; 322 291 } 323 292 printf("Done Test 7\n"); 324 293 325 printf("Start Test 8: fastlock and condition variable 3 wait/notify all\n");326 { 327 T_C_ F_WB1 t1[4];294 printf("Start Test 8: linear backoff lock and condition variable 3 wait/notify all\n"); 295 { 296 T_C_L_WB1 t1[4]; 328 297 } 329 298 printf("Done Test 8\n"); 330 299 331 printf("Start Test 9: linear backoff lock and condition variable singlewait/notify\n");332 { 333 T_C_ L_WS1t1[2];300 printf("Start Test 9: multi acquisiton lock and condition variable multiple acquire and wait/notify\n"); 301 { 302 T_C_M_WS2 t1[2]; 334 303 } 335 304 printf("Done Test 9\n"); 336 305 337 printf("Start Test 10: linear backoff lock and condition variable 3 wait/notify all\n");338 { 339 T_C_ L_WB1 t1[4];306 printf("Start Test 10: owner lock and condition variable multiple acquire and wait/notify\n"); 307 { 308 T_C_O_WS2 t1[2]; 340 309 } 341 310 printf("Done Test 10\n"); 342 311 343 printf("Start Test 11: multi acquisiton lock and condition variable multiple acquire and wait/notify\n"); 344 { 345 T_C_M_WS2 t1[2]; 346 } 347 printf("Done Test 11\n"); 348 349 printf("Start Test 12: owner lock and condition variable multiple acquire and wait/notify\n"); 350 { 351 T_C_O_WS2 t1[2]; 352 } 353 printf("Done Test 12\n"); 354 355 printf("Start Test 13: no lock condition variable wait/notify\n"); 312 printf("Start Test 11: no lock condition variable wait/notify\n"); 356 313 { 357 314 T_C_NLW t1; 358 315 T_C_NLS t2; 359 316 } 360 printf("Done Test 1 3\n");361 362 printf("Start Test 1 4: locked condition variable wait/notify with front()\n");317 printf("Done Test 11\n"); 318 319 printf("Start Test 12: locked condition variable wait/notify with front()\n"); 363 320 { 364 321 T_C_S_WNF t1[2]; 365 322 } 366 printf("Done Test 1 4\n");367 } 323 printf("Done Test 12\n"); 324 } -
tools/jenkins/setup.sh.in
r21a5bfb7 r175f9f4 48 48 regex1='/([[:alpha:][:digit:]@/_.-]+)' 49 49 regex2='(libcfa[[:alpha:][:digit:].]+) => not found' 50 regex3='linux-vdso.so.1 '50 regex3='linux-vdso.so.1|linux-gate.so.1' 51 51 if [[ $line =~ $regex1 ]]; then 52 52 retsysdeps+=(${BASH_REMATCH[1]})
Note: See TracChangeset
for help on using the changeset viewer.