Changes in / [12c1eef:5235d49]
- Files:
-
- 6 added
- 12 deleted
- 40 edited
-
benchmark/readyQ/churn.cfa (modified) (1 diff)
-
benchmark/readyQ/locality.cfa (modified) (2 diffs)
-
benchmark/readyQ/transfer.cfa (modified) (6 diffs)
-
libcfa/prelude/Makefile.am (modified) (4 diffs)
-
libcfa/src/bits/locks.hfa (modified) (3 diffs)
-
libcfa/src/bits/random.hfa (modified) (4 diffs)
-
libcfa/src/common.hfa (modified) (2 diffs)
-
libcfa/src/concurrency/clib/cfathread.cfa (modified) (2 diffs)
-
libcfa/src/concurrency/coroutine.hfa (modified) (2 diffs)
-
libcfa/src/concurrency/invoke.h (modified) (3 diffs)
-
libcfa/src/concurrency/io.cfa (modified) (2 diffs)
-
libcfa/src/concurrency/kernel.cfa (modified) (4 diffs)
-
libcfa/src/concurrency/kernel.hfa (modified) (4 diffs)
-
libcfa/src/concurrency/kernel/fwd.hfa (modified) (3 diffs)
-
libcfa/src/concurrency/kernel/startup.cfa (modified) (11 diffs)
-
libcfa/src/concurrency/kernel_private.hfa (modified) (1 diff)
-
libcfa/src/concurrency/locks.hfa (modified) (2 diffs)
-
libcfa/src/concurrency/ready_queue.cfa (modified) (24 diffs)
-
libcfa/src/concurrency/thread.cfa (modified) (4 diffs)
-
libcfa/src/concurrency/thread.hfa (modified) (2 diffs)
-
libcfa/src/device/cpu.cfa (modified) (1 diff)
-
libcfa/src/device/cpu.hfa (modified) (1 diff)
-
libcfa/src/fstream.cfa (modified) (2 diffs)
-
libcfa/src/heap.cfa (modified) (28 diffs)
-
libcfa/src/iostream.cfa (modified) (24 diffs)
-
libcfa/src/parseconfig.cfa (modified) (3 diffs)
-
libcfa/src/startup.cfa (modified) (4 diffs)
-
libcfa/src/stdlib.cfa (modified) (2 diffs)
-
libcfa/src/stdlib.hfa (modified) (8 diffs)
-
src/AST/Decl.cpp (modified) (2 diffs)
-
src/AST/Decl.hpp (modified) (2 diffs)
-
src/AST/Eval.hpp (modified) (1 diff)
-
src/Validate/InitializerLength.cpp (modified) (1 diff)
-
src/Validate/InitializerLength.hpp (modified) (1 diff)
-
tests/concurrent/.expect/semaphore.txt (added)
-
tests/concurrent/.expect/spinaphore.txt (added)
-
tests/concurrent/semaphore.cfa (added)
-
tests/concurrent/spinaphore.cfa (added)
-
tests/device/cpu.cfa (modified) (3 diffs)
-
tests/io/.expect/io-acquire-in.txt (deleted)
-
tests/io/.expect/io-acquire-no-io.txt (deleted)
-
tests/io/.expect/io-acquire-out.txt (deleted)
-
tests/io/.expect/io-acquire2.txt (deleted)
-
tests/io/.in/io-acquire-in.txt (deleted)
-
tests/io/.in/io-acquire2.txt (deleted)
-
tests/io/io-acquire-in.cfa (deleted)
-
tests/io/io-acquire-no-io.cfa (deleted)
-
tests/io/io-acquire-out.cfa (deleted)
-
tests/io/io-acquire.cfa (modified) (4 diffs)
-
tests/io/io-acquire2.cfa (deleted)
-
tests/meta/.expect/dumpable.txt (deleted)
-
tests/meta/dumpable.cfa (deleted)
-
tests/unified_locking/.expect/fast.txt (added)
-
tests/unified_locking/.expect/locks.txt (modified) (1 diff)
-
tests/unified_locking/fast.cfa (added)
-
tests/unified_locking/locks.cfa (modified) (3 diffs)
-
tests/unified_locking/mutex_test.hfa (modified) (4 diffs)
-
tools/jenkins/setup.sh.in (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
-
benchmark/readyQ/churn.cfa
r12c1eef r5235d49 21 21 wait( sem ); 22 22 for() { 23 uint 32_t r = prng();23 uint64_t r = thread_rand(); 24 24 bench_sem * next = __atomic_exchange_n(&spots[r % spot_cnt], &sem, __ATOMIC_SEQ_CST); 25 25 if(next) post( *next ); -
benchmark/readyQ/locality.cfa
r12c1eef r5235d49 128 128 __attribute__((noinline)) void work(MyData & data, size_t cnt_, uint64_t & state) { 129 129 for (cnt_) { 130 access(data, xorshift_13_7_17(state));130 access(data, __xorshift64(state)); 131 131 } 132 132 } 133 133 134 134 void main(MyThread & this) { 135 uint64_t state = prng();135 uint64_t state = thread_rand(); 136 136 137 137 // Wait for start … … 144 144 145 145 // Wait on a random spot 146 uint64_t idx = xorshift_13_7_17(state) % this.spots.len;146 uint64_t idx = __xorshift64(state) % this.spots.len; 147 147 bool closed = put(*this.spots.ptr[idx], this, this.data, this.share); 148 148 -
benchmark/readyQ/transfer.cfa
r12c1eef r5235d49 1 1 #include "rq_bench.hfa" 2 2 #include <fstream.hfa> 3 #include <locale.h>4 3 5 4 Duration default_preemption() { … … 9 8 #define PRINT(...) 10 9 11 __ uint128_t lead_seed;10 __lehmer64_state_t lead_seed; 12 11 volatile unsigned leader; 13 12 volatile size_t lead_idx; … … 69 68 waitgroup(); 70 69 71 unsigned nleader = lehmer64( lead_seed ) % nthreads;70 unsigned nleader = __lehmer64( lead_seed ) % nthreads; 72 71 __atomic_store_n( &leader, nleader, __ATOMIC_SEQ_CST ); 73 72 … … 106 105 // ================================================== 107 106 int main(int argc, char * argv[]) { 108 uint64_t lead_seed = getpid();109 for(10) lehmer64( lead_seed );107 __lehmer64_state_t lead_seed = getpid(); 108 for(10) __lehmer64( lead_seed ); 110 109 unsigned nprocs = 2; 111 110 … … 127 126 128 127 lead_idx = 0; 129 leader = lehmer64( lead_seed ) % nthreads;128 leader = __lehmer64( lead_seed ) % nthreads; 130 129 131 130 size_t rechecks = 0; … … 168 167 } 169 168 170 setlocale( LC_NUMERIC, getenv( "LANG" ) );171 169 sout | "Duration (ms) : " | ws(3, 3, unit(eng((end - start)`dms))); 172 170 sout | "Number of processors : " | nprocs; -
libcfa/prelude/Makefile.am
r12c1eef r5235d49 11 11 ## Created On : Sun May 31 08:54:01 2015 12 12 ## Last Modified By : Peter A. Buhr 13 ## Last Modified On : Thu Jan 13 17:06:27 202214 ## Update Count : 2 1513 ## Last Modified On : Mon Feb 3 21:27:18 2020 14 ## Update Count : 208 15 15 ############################################################################### 16 16 … … 37 37 # create extra forward types/declarations to reduce inclusion of library files 38 38 extras.cf : ${srcdir}/extras.regx ${srcdir}/extras.c 39 @echo '# 2 "${@}" // needed for error messages from this file' > ${@} 40 ${AM_V_GEN}gcc ${AM_CFLAGS} -E ${srcdir}/extras.c | grep -f ${srcdir}/extras.regx >> ${@} 41 ${AM_V_GEN}gcc ${AM_CFLAGS} -E ${srcdir}/extras.c | grep -zo -f ${srcdir}/extras.regx2 | tr '\0' '\n' >> ${@} 39 ${AM_V_GEN}gcc ${AM_CFLAGS} -E ${srcdir}/extras.c | grep -f ${srcdir}/extras.regx > extras.cf 40 ${AM_V_GEN}gcc ${AM_CFLAGS} -E ${srcdir}/extras.c | grep -zo -f ${srcdir}/extras.regx2 | tr '\0' '\n' >> extras.cf 42 41 43 42 # create forward declarations for gcc builtins 44 43 gcc-builtins.cf : gcc-builtins.c ${srcdir}/prototypes.sed 45 @echo '# 2 "${@}" // needed for error messages from this file' > ${@} 46 ${AM_V_GEN}gcc -I${srcdir} -E -P $< | sed -r -f ${srcdir}/prototypes.sed >> ${@} 44 ${AM_V_GEN}gcc -I${srcdir} -E -P $< | sed -r -f ${srcdir}/prototypes.sed > $@ 47 45 48 46 gcc-builtins.c : ${srcdir}/builtins.def ${srcdir}/prototypes.awk ${srcdir}/sync-builtins.cf ${srcdir}/prototypes.c 49 ${AM_V_GEN}gcc -I${srcdir} -E ${srcdir}/prototypes.c | awk -f ${srcdir}/prototypes.awk > $ {@}47 ${AM_V_GEN}gcc -I${srcdir} -E ${srcdir}/prototypes.c | awk -f ${srcdir}/prototypes.awk > $@ 50 48 51 49 prelude.cfa : prelude-gen.cc 52 50 ${AM_V_GEN}${CXX} ${AM_CXXFLAGS} ${CXXFLAGS} ${AM_CFLAGS} ${<} -o prelude-gen -Wall -Wextra -O2 -g -std=c++14 53 @./prelude-gen > $ {@}51 @./prelude-gen > $@ 54 52 @rm ./prelude-gen 55 53 … … 60 58 # create forward declarations for cfa builtins 61 59 builtins.cf : builtins.c @LOCAL_CFACC@ 62 ${AM_V_GEN}gcc ${AM_CFLAGS} -E ${<} -o ${@} -MD -MP -MF $(DEPDIR)/builtins.Po -D__cforall60 ${AM_V_GEN}gcc ${AM_CFLAGS} -E -P ${<} -o ${@} -MD -MP -MF $(DEPDIR)/builtins.Po -D__cforall 63 61 ${AM_V_at}sed -i 's/builtins.o/builtins.cf/g' $(DEPDIR)/builtins.Po 64 62 … … 66 64 67 65 bootloader.c : ${srcdir}/bootloader.cf prelude.cfa extras.cf gcc-builtins.cf builtins.cf @CFACPP@ 68 ${AM_V_GEN}@CFACPP@ --prelude-dir=${builddir} -tpm ${srcdir}/bootloader.cf $ {@}# use src/cfa-cpp as not in lib until after install66 ${AM_V_GEN}@CFACPP@ --prelude-dir=${builddir} -tpm ${srcdir}/bootloader.cf $@ # use src/cfa-cpp as not in lib until after install 69 67 70 68 maintainer-clean-local : -
libcfa/src/bits/locks.hfa
r12c1eef r5235d49 31 31 // previous thread to acquire the lock 32 32 void* prev_thrd; 33 // keep track of number of times we had to spin, just in case the number is unexpectedly huge34 size_t spin_count;35 33 #endif 36 34 }; … … 50 48 static inline void ?{}( __spinlock_t & this ) { 51 49 this.lock = 0; 52 #ifdef __CFA_DEBUG__53 this.spin_count = 0;54 #endif55 50 } 56 51 … … 77 72 for ( unsigned int i = 1;; i += 1 ) { 78 73 if ( (this.lock == 0) && (__atomic_test_and_set( &this.lock, __ATOMIC_ACQUIRE ) == 0) ) break; 79 #ifdef __CFA_DEBUG__80 this.spin_count++;81 #endif82 74 #ifndef NOEXPBACK 83 75 // exponential spin -
libcfa/src/bits/random.hfa
r12c1eef r5235d49 1 //2 // Cforall Version 1.0.0 Copyright (C) 2022 University of Waterloo3 //4 // The contents of this file are covered under the licence agreement in the5 // file "LICENCE" distributed with Cforall.6 //7 // random.hfa --8 //9 // Author : Peter A. Buhr10 // Created On : Fri Jan 14 07:18:11 202211 // Last Modified By : Peter A. Buhr12 // Last Modified On : Fri Jan 14 07:18:58 202213 // Update Count : 114 //15 16 1 #pragma once 17 2 18 3 #include <stdint.h> 19 4 20 // Pipelined to allow out-of-order overlap with reduced dependencies. Critically, the current random state is returned 21 // (copied), and then compute and store the next random value. 22 5 //-------------------------------------------------- 23 6 #if defined(__SIZEOF_INT128__) 24 //-------------------------------------------------- 25 static inline uint64_t lehmer64( __uint128_t & state ) { 26 __uint128_t ret = state; 7 typedef __uint128_t __lehmer64_state_t; 8 static inline uint64_t __lehmer64( __lehmer64_state_t & state ) { 27 9 state *= 0xda942042e4dd58b5; 28 return ret>> 64;10 return state >> 64; 29 11 } 30 12 31 13 //-------------------------------------------------- 32 static inline uint64_t wyhash64( uint64_t & state ) { 14 typedef uint64_t __wyhash64_state_t; 15 static inline uint64_t __wyhash64( __wyhash64_state_t & state ) { 33 16 state += 0x60bee2bee120fc15; 34 17 __uint128_t tmp; … … 42 25 43 26 //-------------------------------------------------- 44 static inline uint64_t xorshift_13_7_17( uint64_t & state ) { 45 uint64_t ret = state; 46 state ^= state << 13; 47 state ^= state >> 7; 48 state ^= state << 17; 49 return ret; 27 typedef uint64_t __xorshift64_state_t; 28 static inline uint64_t __xorshift64( __xorshift64_state_t & state ) { 29 uint64_t x = state; 30 x ^= x << 13; 31 x ^= x >> 7; 32 x ^= x << 17; 33 return state = x; 50 34 } 51 52 //--------------------------------------------------53 static inline uint32_t xorshift_6_21_7( uint32_t & state ) {54 uint32_t ret = state;55 state ^= state << 6;56 state ^= state >> 21;57 state ^= state << 7;58 return ret;59 } // xorshift_6_21_760 35 61 36 //-------------------------------------------------- … … 63 38 uint32_t a, b, c, d; 64 39 uint32_t counter; 65 } xorwow__state_t;40 } __xorwow__state_t; 66 41 67 // The state array must be initialized to not be all zero in the first four words. 68 static inline uint32_t xorwow( xorwow__state_t & state ) { 69 // Algorithm "xorwow" from p. 5 of Marsaglia, "Xorshift RNGs". 70 uint32_t ret = state.a + state.counter; 42 /* The state array must be initialized to not be all zero in the first four words */ 43 static inline uint32_t __xorwow( __xorwow__state_t & state ) { 44 /* Algorithm "xorwow" from p. 5 of Marsaglia, "Xorshift RNGs" */ 71 45 uint32_t t = state.d; 72 46 … … 82 56 83 57 state.counter += 362437; 84 return ret;58 return t + state.counter; 85 59 } 86 87 //--------------------------------------------------88 static inline uint32_t LCG( uint32_t & state ) { // linear congruential generator89 uint32_t ret = state;90 state = 36969 * (state & 65535) + (state >> 16); // 36969 is NOT prime! No not change it!91 return ret;92 } // LCG93 94 //--------------------------------------------------95 #define M (1_l64u << 48_l64u)96 #define A (25214903917_l64u)97 #define AI (18446708753438544741_l64u)98 #define C (11_l64u)99 #define D (16_l64u)100 101 // Bi-directional LCG random-number generator102 static inline uint32_t LCGBI_fwd( uint64_t & state ) {103 state = (A * state + C) & (M - 1);104 return state >> D;105 }106 107 static inline uint32_t LCGBI_bck( uint64_t & state ) {108 unsigned int r = state >> D;109 state = AI * (state - C) & (M - 1);110 return r;111 }112 113 #undef M114 #undef A115 #undef AI116 #undef C117 #undef D -
libcfa/src/common.hfa
r12c1eef r5235d49 1 // 1 // 2 2 // Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo 3 3 // 4 4 // The contents of this file are covered under the licence agreement in the 5 5 // file "LICENCE" distributed with Cforall. 6 // 7 // common .hfa --8 // 6 // 7 // common -- 8 // 9 9 // Author : Peter A. Buhr 10 10 // Created On : Wed Jul 11 17:54:36 2018 … … 12 12 // Last Modified On : Wed May 5 14:02:04 2021 13 13 // Update Count : 18 14 // 14 // 15 15 16 16 #pragma once -
libcfa/src/concurrency/clib/cfathread.cfa
r12c1eef r5235d49 22 22 #include "thread.hfa" 23 23 #include "time.hfa" 24 #include "stdlib.hfa"25 24 26 25 #include "cfathread.h" … … 196 195 eevent.data.u64 = (uint64_t)active_thread(); 197 196 198 int id = prng() % poller_cnt;197 int id = thread_rand() % poller_cnt; 199 198 if(0 != epoll_ctl(poller_fds[id], EPOLL_CTL_ADD, fd, &eevent)) 200 199 { -
libcfa/src/concurrency/coroutine.hfa
r12c1eef r5235d49 10 10 // Created On : Mon Nov 28 12:27:26 2016 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : T hu Jan 6 16:33:16 202213 // Update Count : 1 212 // Last Modified On : Tue Feb 4 12:29:26 2020 13 // Update Count : 11 14 14 // 15 15 … … 155 155 156 156 if( unlikely(dst->context.SP == 0p) ) { 157 __stack_prepare(&dst->stack, DEFAULT_STACK_SIZE);157 __stack_prepare(&dst->stack, 65000); 158 158 __cfactx_start(main, dst, cor, __cfactx_invoke_coroutine); 159 159 } -
libcfa/src/concurrency/invoke.h
r12c1eef r5235d49 10 10 // Created On : Tue Jan 17 12:27:26 2016 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Sun Jan 9 19:06:45 202213 // Update Count : 4 812 // Last Modified On : Thu Dec 5 16:26:03 2019 13 // Update Count : 44 14 14 // 15 15 … … 27 27 #ifndef _INVOKE_H_ 28 28 #define _INVOKE_H_ 29 30 enum { DEFAULT_STACK_SIZE = 65000 };31 29 32 30 struct __cfaehm_try_resume_node; … … 211 209 struct processor * last_proc; 212 210 213 uint32_t random_state; // fast random numbers214 215 211 #if defined( __CFA_WITH_VERIFY__ ) 216 212 void * canary; -
libcfa/src/concurrency/io.cfa
r12c1eef r5235d49 144 144 __ioarbiter_flush( ctx ); 145 145 146 if(ctx.sq.to_submit != 0 || min_comp > 0) { 147 148 __STATS__( true, io.calls.flush++; ) 149 int ret = syscall( __NR_io_uring_enter, ctx.fd, ctx.sq.to_submit, min_comp, min_comp > 0 ? IORING_ENTER_GETEVENTS : 0, (sigset_t *)0p, _NSIG / 8); 150 if( ret < 0 ) { 151 switch((int)errno) { 152 case EAGAIN: 153 case EINTR: 154 case EBUSY: 155 // Update statistics 156 __STATS__( false, io.calls.errors.busy ++; ) 157 return false; 158 default: 159 abort( "KERNEL ERROR: IO_URING SYSCALL - (%d) %s\n", (int)errno, strerror(errno) ); 160 } 146 __STATS__( true, io.calls.flush++; ) 147 int ret = syscall( __NR_io_uring_enter, ctx.fd, ctx.sq.to_submit, min_comp, min_comp > 0 ? IORING_ENTER_GETEVENTS : 0, (sigset_t *)0p, _NSIG / 8); 148 if( ret < 0 ) { 149 switch((int)errno) { 150 case EAGAIN: 151 case EINTR: 152 case EBUSY: 153 // Update statistics 154 __STATS__( false, io.calls.errors.busy ++; ) 155 return false; 156 default: 157 abort( "KERNEL ERROR: IO_URING SYSCALL - (%d) %s\n", (int)errno, strerror(errno) ); 161 158 } 162 163 __cfadbg_print_safe(io, "Kernel I/O : %u submitted to io_uring %d\n", ret, ctx.fd); 164 __STATS__( true, io.calls.submitted += ret; ) 165 /* paranoid */ verify( ctx.sq.to_submit <= *ctx.sq.num ); 166 /* paranoid */ verify( ctx.sq.to_submit >= ret ); 167 168 ctx.sq.to_submit -= ret; 169 170 /* paranoid */ verify( ctx.sq.to_submit <= *ctx.sq.num ); 171 172 // Release the consumed SQEs 173 __release_sqes( ctx ); 174 175 /* paranoid */ verify( ! __preemption_enabled() ); 176 177 ctx.proc->io.pending = false; 178 } 179 159 } 160 161 __cfadbg_print_safe(io, "Kernel I/O : %u submitted to io_uring %d\n", ret, ctx.fd); 162 __STATS__( true, io.calls.submitted += ret; ) 163 /* paranoid */ verify( ctx.sq.to_submit <= *ctx.sq.num ); 164 /* paranoid */ verify( ctx.sq.to_submit >= ret ); 165 166 ctx.sq.to_submit -= ret; 167 168 /* paranoid */ verify( ctx.sq.to_submit <= *ctx.sq.num ); 169 170 // Release the consumed SQEs 171 __release_sqes( ctx ); 172 173 /* paranoid */ verify( ! __preemption_enabled() ); 174 175 ctx.proc->io.pending = false; 180 176 ready_schedule_lock(); 181 177 bool ret = __cfa_io_drain( proc ); … … 552 548 /* paranoid */ verify( proc == __cfaabi_tls.this_processor ); 553 549 /* paranoid */ verify( ! __preemption_enabled() ); 554 555 return true;556 550 } 557 551 #endif -
libcfa/src/concurrency/kernel.cfa
r12c1eef r5235d49 142 142 extern void __disable_interrupts_hard(); 143 143 extern void __enable_interrupts_hard(); 144 145 static inline void __disable_interrupts_checked() { 146 /* paranoid */ verify( __preemption_enabled() ); 147 disable_interrupts(); 148 /* paranoid */ verify( ! __preemption_enabled() ); 149 } 150 151 static inline void __enable_interrupts_checked( bool poll = true ) { 152 /* paranoid */ verify( ! __preemption_enabled() ); 153 enable_interrupts( poll ); 154 /* paranoid */ verify( __preemption_enabled() ); 155 } 144 156 145 157 … … 554 566 /* paranoid */ verify( 0x0D15EA5E0D15EA5Ep == thrd->canary ); 555 567 568 const bool local = thrd->state != Start; 556 569 if (thrd->preempted == __NO_PREEMPTION) thrd->state = Ready; 557 570 … … 736 749 737 750 // Check if there is a sleeping processor 738 // int fd = __atomic_load_n(&this->procs.fd, __ATOMIC_SEQ_CST); 739 int fd = 0; 740 if( __atomic_load_n(&this->procs.fd, __ATOMIC_SEQ_CST) != 0 ) { 741 fd = __atomic_exchange_n(&this->procs.fd, 0, __ATOMIC_RELAXED); 742 } 751 int fd = __atomic_load_n(&this->procs.fd, __ATOMIC_SEQ_CST); 743 752 744 753 // If no one is sleeping, we are done … … 767 776 // Unconditionnaly wake a thread 768 777 void __wake_proc(processor * this) { 769 /* paranoid */ verify( ! __preemption_enabled() );770 771 778 __cfadbg_print_safe(runtime_core, "Kernel : waking Processor %p\n", this); 772 779 773 eventfd_t val; 774 val = 1; 775 eventfd_write( this->idle_fd, val ); 776 777 /* paranoid */ verify( ! __preemption_enabled() ); 780 __disable_interrupts_checked(); 781 /* paranoid */ verify( ! __preemption_enabled() ); 782 eventfd_t val; 783 val = 1; 784 eventfd_write( this->idle_fd, val ); 785 __enable_interrupts_checked(); 778 786 } 779 787 -
libcfa/src/concurrency/kernel.hfa
r12c1eef r5235d49 67 67 unsigned target; 68 68 unsigned last; 69 signed cpu; 69 unsigned cnt; 70 unsigned long long int cutoff; 70 71 } rdq; 71 72 … … 151 152 volatile unsigned long long tv; 152 153 volatile unsigned long long ma; 153 };154 155 struct __attribute__((aligned(16))) __cache_id_t {156 volatile unsigned id;157 154 }; 158 155 … … 167 164 static inline void ^?{}(__timestamp_t & this) {} 168 165 169 struct __attribute__((aligned(128))) __ready_queue_caches_t;170 void ?{}(__ready_queue_caches_t & this);171 void ^?{}(__ready_queue_caches_t & this);172 173 166 //TODO adjust cache size to ARCHITECTURE 174 // Structure holding the re ady queue167 // Structure holding the relaxed ready queue 175 168 struct __ready_queue_t { 176 169 // Data tracking the actual lanes … … 184 177 // Array of times 185 178 __timestamp_t * volatile tscs; 186 187 __cache_id_t * volatile caches;188 179 189 180 // Array of stats -
libcfa/src/concurrency/kernel/fwd.hfa
r12c1eef r5235d49 77 77 78 78 static inline uint64_t __tls_rand() { 79 return80 79 #if defined(__SIZEOF_INT128__) 81 lehmer64( kernelTLS().rand_seed );80 return __lehmer64( kernelTLS().rand_seed ); 82 81 #else 83 xorshift_13_7_17( kernelTLS().rand_seed );82 return __xorshift64( kernelTLS().rand_seed ); 84 83 #endif 85 84 } 86 85 86 #define M (1_l64u << 48_l64u) 87 #define A (25214903917_l64u) 88 #define AI (18446708753438544741_l64u) 89 #define C (11_l64u) 90 #define D (16_l64u) 91 87 92 static inline unsigned __tls_rand_fwd() { 88 return LCGBI_fwd( kernelTLS().ready_rng.fwd_seed ); 93 94 kernelTLS().ready_rng.fwd_seed = (A * kernelTLS().ready_rng.fwd_seed + C) & (M - 1); 95 return kernelTLS().ready_rng.fwd_seed >> D; 89 96 } 90 97 91 98 static inline unsigned __tls_rand_bck() { 92 return LCGBI_bck( kernelTLS().ready_rng.bck_seed ); 93 } 99 unsigned int r = kernelTLS().ready_rng.bck_seed >> D; 100 kernelTLS().ready_rng.bck_seed = AI * (kernelTLS().ready_rng.bck_seed - C) & (M - 1); 101 return r; 102 } 103 104 #undef M 105 #undef A 106 #undef AI 107 #undef C 108 #undef D 94 109 95 110 static inline void __tls_rand_advance_bck(void) { … … 97 112 } 98 113 } 114 115 99 116 100 117 extern void disable_interrupts(); … … 125 142 } 126 143 } 144 145 extern uint64_t thread_rand(); 127 146 128 147 // Semaphore which only supports a single thread -
libcfa/src/concurrency/kernel/startup.cfa
r12c1eef r5235d49 34 34 #include "kernel_private.hfa" 35 35 #include "startup.hfa" // STARTUP_PRIORITY_XXX 36 #include "limits.hfa"37 36 #include "math.hfa" 38 37 … … 102 101 extern void __wake_proc(processor *); 103 102 extern int cfa_main_returned; // from interpose.cfa 104 extern uint32_t __global_random_seed;105 103 106 104 //----------------------------------------------------------------------------- … … 178 176 179 177 178 180 179 //============================================================================================= 181 180 // Kernel Setup logic … … 280 279 // When its coroutine terminates, it return control to the mainThread 281 280 // which is currently here 282 /* paranoid */ verify( !__atomic_load_n(&mainProcessor->do_terminate, __ATOMIC_ACQUIRE) );283 281 __atomic_store_n(&mainProcessor->do_terminate, true, __ATOMIC_RELEASE); 284 __wake_proc( mainProcessor );285 282 __kernel_last_resume( __cfaabi_tls.this_processor ); 286 283 mainThread->self_cor.state = Halted; … … 406 403 407 404 __cfaabi_tls.this_thread->curr_cor = dst; 408 __stack_prepare( &dst->stack, DEFAULT_STACK_SIZE);405 __stack_prepare( &dst->stack, 65000 ); 409 406 __cfactx_start(main, dst, this->runner, __cfactx_invoke_coroutine); 410 407 … … 490 487 preferred = ready_queue_new_preferred(); 491 488 last_proc = 0p; 492 random_state = __global_random_seed;493 489 #if defined( __CFA_WITH_VERIFY__ ) 494 490 canary = 0x0D15EA5E0D15EA5Ep; … … 515 511 this.rdq.its = 0; 516 512 this.rdq.itr = 0; 517 this.rdq.id = MAX; 518 this.rdq.target = MAX; 519 this.rdq.last = MAX; 520 this.rdq.cpu = 0; 521 // this.rdq.cutoff = 0ull; 513 this.rdq.id = -1u; 514 this.rdq.target = -1u; 515 this.rdq.last = -1u; 516 this.rdq.cutoff = 0ull; 522 517 do_terminate = false; 523 518 preemption_alarm = 0p; … … 569 564 extern size_t __page_size; 570 565 void ^?{}(processor & this) with( this ){ 571 /* paranoid */ verify( !__atomic_load_n(&do_terminate, __ATOMIC_ACQUIRE) ); 572 __cfadbg_print_safe(runtime_core, "Kernel : core %p signaling termination\n", &this); 573 574 __atomic_store_n(&do_terminate, true, __ATOMIC_RELAXED); 575 __disable_interrupts_checked(); 566 if( ! __atomic_load_n(&do_terminate, __ATOMIC_ACQUIRE) ) { 567 __cfadbg_print_safe(runtime_core, "Kernel : core %p signaling termination\n", &this); 568 569 __atomic_store_n(&do_terminate, true, __ATOMIC_RELAXED); 576 570 __wake_proc( &this ); 577 __enable_interrupts_checked(); 578 579 wait( terminated);580 /* paranoid */ verify( active_processor() != &this);571 572 wait( terminated ); 573 /* paranoid */ verify( active_processor() != &this); 574 } 581 575 582 576 __destroy_pthread( kernel_thread, this.stack, 0p ); … … 688 682 [this->unique_id, last_size] = ready_mutate_register(); 689 683 690 this->rdq.cpu = __kernel_getcpu();691 692 684 this->cltr->procs.total += 1u; 693 685 insert_last(this->cltr->procs.actives, *this); … … 729 721 check( pthread_attr_init( &attr ), "pthread_attr_init" ); // initialize attribute 730 722 731 size_t stacksize = DEFAULT_STACK_SIZE; 723 size_t stacksize; 724 // default stack size, normally defined by shell limit 725 check( pthread_attr_getstacksize( &attr, &stacksize ), "pthread_attr_getstacksize" ); 726 assert( stacksize >= PTHREAD_STACK_MIN ); 732 727 733 728 void * stack; … … 754 749 #endif 755 750 751 756 752 check( pthread_attr_setstack( &attr, stack, stacksize ), "pthread_attr_setstack" ); 753 757 754 check( pthread_create( pthread, &attr, start, arg ), "pthread_create" ); 758 755 return stack; -
libcfa/src/concurrency/kernel_private.hfa
r12c1eef r5235d49 60 60 extern bool __preemption_enabled(); 61 61 62 static inline void __disable_interrupts_checked() {63 /* paranoid */ verify( __preemption_enabled() );64 disable_interrupts();65 /* paranoid */ verify( ! __preemption_enabled() );66 }67 68 static inline void __enable_interrupts_checked( bool poll = true ) {69 /* paranoid */ verify( ! __preemption_enabled() );70 enable_interrupts( poll );71 /* paranoid */ verify( __preemption_enabled() );72 }73 74 62 //release/wake-up the following resources 75 63 void __thread_finish( thread$ * thrd ); -
libcfa/src/concurrency/locks.hfa
r12c1eef r5235d49 31 31 32 32 //----------------------------------------------------------------------------- 33 // Semaphores 34 35 // '0-nary' semaphore 36 // Similar to a counting semaphore except the value of one is never reached 37 // as a consequence, a V() that would bring the value to 1 *spins* until 38 // a P consumes it 39 struct Semaphore0nary { 40 __spinlock_t lock; // needed to protect 41 mpsc_queue(thread$) queue; 42 }; 43 44 static inline bool P(Semaphore0nary & this, thread$ * thrd) { 45 /* paranoid */ verify(!thrd`next); 46 /* paranoid */ verify(!(&(*thrd)`next)); 47 48 push(this.queue, thrd); 49 return true; 50 } 51 52 static inline bool P(Semaphore0nary & this) { 53 thread$ * thrd = active_thread(); 54 P(this, thrd); 55 park(); 56 return true; 57 } 58 59 static inline thread$ * V(Semaphore0nary & this, bool doUnpark = true) { 60 thread$ * next; 61 lock(this.lock __cfaabi_dbg_ctx2); 62 for (;;) { 63 next = pop(this.queue); 64 if (next) break; 65 Pause(); 66 } 67 unlock(this.lock); 68 69 if (doUnpark) unpark(next); 70 return next; 71 } 72 73 // Wrapper used on top of any sempahore to avoid potential locking 74 struct BinaryBenaphore { 75 volatile ssize_t counter; 76 }; 77 78 static inline { 79 void ?{}(BinaryBenaphore & this) { this.counter = 0; } 80 void ?{}(BinaryBenaphore & this, zero_t) { this.counter = 0; } 81 void ?{}(BinaryBenaphore & this, one_t ) { this.counter = 1; } 82 83 // returns true if no blocking needed 84 bool P(BinaryBenaphore & this) { 85 return __atomic_fetch_sub(&this.counter, 1, __ATOMIC_SEQ_CST) > 0; 86 } 87 88 bool tryP(BinaryBenaphore & this) { 89 ssize_t c = this.counter; 90 /* paranoid */ verify( c > MIN ); 91 return (c >= 1) && __atomic_compare_exchange_n(&this.counter, &c, c-1, false, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED); 92 } 93 94 // returns true if notify needed 95 bool V(BinaryBenaphore & this) { 96 ssize_t c = 0; 97 for () { 98 /* paranoid */ verify( this.counter < MAX ); 99 if (__atomic_compare_exchange_n(&this.counter, &c, c+1, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) { 100 if (c == 0) return true; 101 /* paranoid */ verify(c < 0); 102 return false; 103 } else { 104 if (c == 1) return true; 105 /* paranoid */ verify(c < 1); 106 Pause(); 107 } 108 } 109 } 110 } 111 112 // Binary Semaphore based on the BinaryBenaphore on top of the 0-nary Semaphore 113 struct ThreadBenaphore { 114 BinaryBenaphore ben; 115 Semaphore0nary sem; 116 }; 117 118 static inline void ?{}(ThreadBenaphore & this) {} 119 static inline void ?{}(ThreadBenaphore & this, zero_t) { (this.ben){ 0 }; } 120 static inline void ?{}(ThreadBenaphore & this, one_t ) { (this.ben){ 1 }; } 121 122 static inline bool P(ThreadBenaphore & this) { return P(this.ben) ? false : P(this.sem); } 123 static inline bool tryP(ThreadBenaphore & this) { return tryP(this.ben); } 124 static inline bool P(ThreadBenaphore & this, bool wait) { return wait ? P(this) : tryP(this); } 125 126 static inline thread$ * V(ThreadBenaphore & this, bool doUnpark = true) { 127 if (V(this.ben)) return 0p; 128 return V(this.sem, doUnpark); 129 } 130 131 //----------------------------------------------------------------------------- 33 132 // Semaphore 34 133 struct semaphore { … … 72 171 static inline void on_wakeup( owner_lock & this, size_t v ) { on_wakeup ( (blocking_lock &)this, v ); } 73 172 static inline void on_notify( owner_lock & this, struct thread$ * t ) { on_notify( (blocking_lock &)this, t ); } 173 174 struct fast_lock { 175 thread$ * volatile owner; 176 ThreadBenaphore sem; 177 }; 178 179 static inline void ?{}(fast_lock & this) { this.owner = 0p; } 180 181 static inline bool $try_lock(fast_lock & this, thread$ * thrd) { 182 thread$ * exp = 0p; 183 return __atomic_compare_exchange_n(&this.owner, &exp, thrd, false, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED); 184 } 185 186 static inline void lock( fast_lock & this ) __attribute__((artificial)); 187 static inline void lock( fast_lock & this ) { 188 thread$ * thrd = active_thread(); 189 /* paranoid */verify(thrd != this.owner); 190 191 for (;;) { 192 if ($try_lock(this, thrd)) return; 193 P(this.sem); 194 } 195 } 196 197 static inline bool try_lock( fast_lock & this ) __attribute__((artificial)); 198 static inline bool try_lock ( fast_lock & this ) { 199 thread$ * thrd = active_thread(); 200 /* paranoid */ verify(thrd != this.owner); 201 return $try_lock(this, thrd); 202 } 203 204 static inline thread$ * unlock( fast_lock & this ) __attribute__((artificial)); 205 static inline thread$ * unlock( fast_lock & this ) { 206 /* paranoid */ verify(active_thread() == this.owner); 207 208 // open 'owner' before unlocking anyone 209 // so new and unlocked threads don't park incorrectly. 210 // This may require additional fencing on ARM. 211 this.owner = 0p; 212 213 return V(this.sem); 214 } 215 216 static inline size_t on_wait( fast_lock & this ) { unlock(this); return 0; } 217 static inline void on_wakeup( fast_lock & this, size_t ) { lock(this); } 218 static inline void on_notify( fast_lock &, struct thread$ * t ) { unpark(t); } 74 219 75 220 struct mcs_node { -
libcfa/src/concurrency/ready_queue.cfa
r12c1eef r5235d49 20 20 21 21 22 //#define USE_RELAXED_FIFO22 #define USE_RELAXED_FIFO 23 23 // #define USE_WORK_STEALING 24 24 // #define USE_CPU_WORK_STEALING 25 #define USE_AWARE_STEALING26 25 27 26 #include "bits/defs.hfa" … … 30 29 31 30 #include "stdlib.hfa" 32 #include "limits.hfa"33 31 #include "math.hfa" 34 32 … … 56 54 #endif 57 55 58 #if defined(USE_AWARE_STEALING) 59 #define READYQ_SHARD_FACTOR 2 60 #define SEQUENTIAL_SHARD 2 61 #elif defined(USE_CPU_WORK_STEALING) 56 #if defined(USE_CPU_WORK_STEALING) 62 57 #define READYQ_SHARD_FACTOR 2 63 58 #elif defined(USE_RELAXED_FIFO) … … 143 138 __kernel_rseq_register(); 144 139 140 __cfadbg_print_safe(ready_queue, "Kernel : Registering proc %p for RW-Lock\n", proc); 145 141 bool * handle = (bool *)&kernelTLS().sched_lock; 146 142 … … 178 174 } 179 175 176 __cfadbg_print_safe(ready_queue, "Kernel : Registering proc %p done, id %lu\n", proc, n); 177 180 178 // Return new spot. 181 179 /* paranoid */ verify(n < ready); … … 192 190 193 191 __atomic_store_n(cell, 0p, __ATOMIC_RELEASE); 192 193 __cfadbg_print_safe(ready_queue, "Kernel : Unregister proc %p\n", proc); 194 194 195 195 __kernel_rseq_unregister(); … … 244 244 245 245 //======================================================================= 246 // caches handling247 248 struct __attribute__((aligned(128))) __ready_queue_caches_t {249 // Count States:250 // - 0 : No one is looking after this cache251 // - 1 : No one is looking after this cache, BUT it's not empty252 // - 2+ : At least one processor is looking after this cache253 volatile unsigned count;254 };255 256 void ?{}(__ready_queue_caches_t & this) { this.count = 0; }257 void ^?{}(__ready_queue_caches_t & this) {}258 259 static inline void depart(__ready_queue_caches_t & cache) {260 /* paranoid */ verify( cache.count > 1);261 __atomic_fetch_add(&cache.count, -1, __ATOMIC_SEQ_CST);262 /* paranoid */ verify( cache.count != 0);263 /* paranoid */ verify( cache.count < 65536 ); // This verify assumes no cluster will have more than 65000 kernel threads mapped to a single cache, which could be correct but is super weird.264 }265 266 static inline void arrive(__ready_queue_caches_t & cache) {267 // for() {268 // unsigned expected = cache.count;269 // unsigned desired = 0 == expected ? 2 : expected + 1;270 // }271 }272 273 //=======================================================================274 246 // Cforall Ready Queue used for scheduling 275 247 //======================================================================= 276 unsigned long long moving_average(unsigned long long currtsc, unsigned long long instsc, unsigned long long old_avg) { 277 /* paranoid */ verifyf( currtsc < 45000000000000000, "Suspiciously large current time: %'llu (%llx)\n", currtsc, currtsc ); 278 /* paranoid */ verifyf( instsc < 45000000000000000, "Suspiciously large insert time: %'llu (%llx)\n", instsc, instsc ); 279 /* paranoid */ verifyf( old_avg < 15000000000000, "Suspiciously large previous average: %'llu (%llx)\n", old_avg, old_avg ); 280 281 const unsigned long long new_val = currtsc > instsc ? currtsc - instsc : 0; 282 const unsigned long long total_weight = 16; 283 const unsigned long long new_weight = 4; 284 const unsigned long long old_weight = total_weight - new_weight; 285 const unsigned long long ret = ((new_weight * new_val) + (old_weight * old_avg)) / total_weight; 286 return ret; 248 unsigned long long moving_average(unsigned long long nval, unsigned long long oval) { 249 const unsigned long long tw = 16; 250 const unsigned long long nw = 4; 251 const unsigned long long ow = tw - nw; 252 return ((nw * nval) + (ow * oval)) / tw; 287 253 } 288 254 … … 305 271 } 306 272 #else 307 lanes.data = 0p; 308 lanes.tscs = 0p; 309 lanes.caches = 0p; 310 lanes.help = 0p; 311 lanes.count = 0; 273 lanes.data = 0p; 274 lanes.tscs = 0p; 275 lanes.help = 0p; 276 lanes.count = 0; 312 277 #endif 313 278 } … … 320 285 free(lanes.data); 321 286 free(lanes.tscs); 322 free(lanes.caches);323 287 free(lanes.help); 324 288 } 325 289 326 290 //----------------------------------------------------------------------- 327 #if defined(USE_AWARE_STEALING)328 __attribute__((hot)) void push(struct cluster * cltr, struct thread$ * thrd, unpark_hint hint) with (cltr->ready_queue) {329 processor * const proc = kernelTLS().this_processor;330 const bool external = (!proc) || (cltr != proc->cltr);331 const bool remote = hint == UNPARK_REMOTE;332 333 unsigned i;334 if( external || remote ) {335 // Figure out where thread was last time and make sure it's valid336 /* paranoid */ verify(thrd->preferred >= 0);337 if(thrd->preferred * READYQ_SHARD_FACTOR < lanes.count) {338 /* paranoid */ verify(thrd->preferred * READYQ_SHARD_FACTOR < lanes.count);339 unsigned start = thrd->preferred * READYQ_SHARD_FACTOR;340 do {341 unsigned r = __tls_rand();342 i = start + (r % READYQ_SHARD_FACTOR);343 /* paranoid */ verify( i < lanes.count );344 // If we can't lock it retry345 } while( !__atomic_try_acquire( &lanes.data[i].lock ) );346 } else {347 do {348 i = __tls_rand() % lanes.count;349 } while( !__atomic_try_acquire( &lanes.data[i].lock ) );350 }351 } else {352 do {353 unsigned r = proc->rdq.its++;354 i = proc->rdq.id + (r % READYQ_SHARD_FACTOR);355 /* paranoid */ verify( i < lanes.count );356 // If we can't lock it retry357 } while( !__atomic_try_acquire( &lanes.data[i].lock ) );358 }359 360 // Actually push it361 push(lanes.data[i], thrd);362 363 // Unlock and return364 __atomic_unlock( &lanes.data[i].lock );365 366 #if !defined(__CFA_NO_STATISTICS__)367 if(unlikely(external || remote)) __atomic_fetch_add(&cltr->stats->ready.push.extrn.success, 1, __ATOMIC_RELAXED);368 else __tls_stats()->ready.push.local.success++;369 #endif370 }371 372 static inline unsigned long long calc_cutoff(const unsigned long long ctsc, const processor * proc, __ready_queue_t & rdq) {373 unsigned start = proc->rdq.id;374 unsigned long long max = 0;375 for(i; READYQ_SHARD_FACTOR) {376 unsigned long long ptsc = ts(rdq.lanes.data[start + i]);377 if(ptsc != -1ull) {378 /* paranoid */ verify( start + i < rdq.lanes.count );379 unsigned long long tsc = moving_average(ctsc, ptsc, rdq.lanes.tscs[start + i].ma);380 if(tsc > max) max = tsc;381 }382 }383 return (max + 2 * max) / 2;384 }385 386 __attribute__((hot)) struct thread$ * pop_fast(struct cluster * cltr) with (cltr->ready_queue) {387 /* paranoid */ verify( lanes.count > 0 );388 /* paranoid */ verify( kernelTLS().this_processor );389 /* paranoid */ verify( kernelTLS().this_processor->rdq.id < lanes.count );390 391 processor * const proc = kernelTLS().this_processor;392 unsigned this = proc->rdq.id;393 /* paranoid */ verify( this < lanes.count );394 __cfadbg_print_safe(ready_queue, "Kernel : pop from %u\n", this);395 396 // Figure out the current cpu and make sure it is valid397 const int cpu = __kernel_getcpu();398 /* paranoid */ verify(cpu >= 0);399 /* paranoid */ verify(cpu < cpu_info.hthrd_count);400 unsigned this_cache = cpu_info.llc_map[cpu].cache;401 402 // Super important: don't write the same value over and over again403 // We want to maximise our chances that his particular values stays in cache404 if(lanes.caches[this / READYQ_SHARD_FACTOR].id != this_cache)405 __atomic_store_n(&lanes.caches[this / READYQ_SHARD_FACTOR].id, this_cache, __ATOMIC_RELAXED);406 407 const unsigned long long ctsc = rdtscl();408 409 if(proc->rdq.target == MAX) {410 uint64_t chaos = __tls_rand();411 unsigned ext = chaos & 0xff;412 unsigned other = (chaos >> 8) % (lanes.count);413 414 if(ext < 3 || __atomic_load_n(&lanes.caches[other / READYQ_SHARD_FACTOR].id, __ATOMIC_RELAXED) == this_cache) {415 proc->rdq.target = other;416 }417 }418 else {419 const unsigned target = proc->rdq.target;420 __cfadbg_print_safe(ready_queue, "Kernel : %u considering helping %u, tcsc %llu\n", this, target, lanes.tscs[target].tv);421 /* paranoid */ verify( lanes.tscs[target].tv != MAX );422 if(target < lanes.count) {423 const unsigned long long cutoff = calc_cutoff(ctsc, proc, cltr->ready_queue);424 const unsigned long long age = moving_average(ctsc, lanes.tscs[target].tv, lanes.tscs[target].ma);425 __cfadbg_print_safe(ready_queue, "Kernel : Help attempt on %u from %u, age %'llu vs cutoff %'llu, %s\n", target, this, age, cutoff, age > cutoff ? "yes" : "no");426 if(age > cutoff) {427 thread$ * t = try_pop(cltr, target __STATS(, __tls_stats()->ready.pop.help));428 if(t) return t;429 }430 }431 proc->rdq.target = MAX;432 }433 434 for(READYQ_SHARD_FACTOR) {435 unsigned i = this + (proc->rdq.itr++ % READYQ_SHARD_FACTOR);436 if(thread$ * t = try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.local))) return t;437 }438 439 // All lanes where empty return 0p440 return 0p;441 442 }443 __attribute__((hot)) struct thread$ * pop_slow(struct cluster * cltr) with (cltr->ready_queue) {444 unsigned i = __tls_rand() % lanes.count;445 return try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.steal));446 }447 __attribute__((hot)) struct thread$ * pop_search(struct cluster * cltr) {448 return search(cltr);449 }450 #endif451 291 #if defined(USE_CPU_WORK_STEALING) 452 292 __attribute__((hot)) void push(struct cluster * cltr, struct thread$ * thrd, unpark_hint hint) with (cltr->ready_queue) { … … 510 350 /* paranoid */ verify( kernelTLS().this_processor ); 511 351 512 processor * const proc = kernelTLS().this_processor;513 352 const int cpu = __kernel_getcpu(); 514 353 /* paranoid */ verify(cpu >= 0); … … 521 360 /* paranoid */ verifyf((map.start + map.count) * READYQ_SHARD_FACTOR <= lanes.count, "have %zu lanes but map can go up to %u", lanes.count, (map.start + map.count) * READYQ_SHARD_FACTOR); 522 361 362 processor * const proc = kernelTLS().this_processor; 523 363 const int start = map.self * READYQ_SHARD_FACTOR; 524 364 const unsigned long long ctsc = rdtscl(); 525 365 526 366 // Did we already have a help target 527 if(proc->rdq.target == MAX) {367 if(proc->rdq.target == -1u) { 528 368 unsigned long long max = 0; 529 369 for(i; READYQ_SHARD_FACTOR) { 530 unsigned long long tsc = moving_average(ctsc ,ts(lanes.data[start + i]), lanes.tscs[start + i].ma);370 unsigned long long tsc = moving_average(ctsc - ts(lanes.data[start + i]), lanes.tscs[start + i].ma); 531 371 if(tsc > max) max = tsc; 532 372 } 533 //proc->rdq.cutoff = (max + 2 * max) / 2;373 proc->rdq.cutoff = (max + 2 * max) / 2; 534 374 /* paranoid */ verify(lanes.count < 65536); // The following code assumes max 65536 cores. 535 375 /* paranoid */ verify(map.count < 65536); // The following code assumes max 65536 cores. … … 544 384 } 545 385 546 /* paranoid */ verify(proc->rdq.target != MAX);386 /* paranoid */ verify(proc->rdq.target != -1u); 547 387 } 548 388 else { 549 389 unsigned long long max = 0; 550 390 for(i; READYQ_SHARD_FACTOR) { 551 unsigned long long tsc = moving_average(ctsc ,ts(lanes.data[start + i]), lanes.tscs[start + i].ma);391 unsigned long long tsc = moving_average(ctsc - ts(lanes.data[start + i]), lanes.tscs[start + i].ma); 552 392 if(tsc > max) max = tsc; 553 393 } … … 555 395 { 556 396 unsigned target = proc->rdq.target; 557 proc->rdq.target = MAX;397 proc->rdq.target = -1u; 558 398 lanes.help[target / READYQ_SHARD_FACTOR].tri++; 559 if(moving_average(ctsc ,lanes.tscs[target].tv, lanes.tscs[target].ma) > cutoff) {399 if(moving_average(ctsc - lanes.tscs[target].tv, lanes.tscs[target].ma) > cutoff) { 560 400 thread$ * t = try_pop(cltr, target __STATS(, __tls_stats()->ready.pop.help)); 561 401 proc->rdq.last = target; 562 402 if(t) return t; 403 else proc->rdq.target = -1u; 563 404 } 564 proc->rdq.target = MAX;405 else proc->rdq.target = -1u; 565 406 } 566 407 567 408 unsigned last = proc->rdq.last; 568 if(last != MAX && moving_average(ctsc, lanes.tscs[last].tv, lanes.tscs[last].ma) >cutoff) {409 if(last != -1u && lanes.tscs[last].tv < cutoff && ts(lanes.data[last]) < cutoff) { 569 410 thread$ * t = try_pop(cltr, last __STATS(, __tls_stats()->ready.pop.help)); 570 411 if(t) return t; 571 412 } 572 413 else { 573 proc->rdq.last = MAX;414 proc->rdq.last = -1u; 574 415 } 575 416 } … … 587 428 processor * const proc = kernelTLS().this_processor; 588 429 unsigned last = proc->rdq.last; 589 if(last != MAX) {430 if(last != -1u) { 590 431 struct thread$ * t = try_pop(cltr, last __STATS(, __tls_stats()->ready.pop.steal)); 591 432 if(t) return t; 592 proc->rdq.last = MAX;433 proc->rdq.last = -1u; 593 434 } 594 435 … … 719 560 #else 720 561 unsigned preferred = thrd->preferred; 721 const bool external = (hint != UNPARK_LOCAL) || (!kernelTLS().this_processor) || preferred == MAX|| thrd->curr_cluster != cltr;562 const bool external = (hint != UNPARK_LOCAL) || (!kernelTLS().this_processor) || preferred == -1u || thrd->curr_cluster != cltr; 722 563 /* paranoid */ verifyf(external || preferred < lanes.count, "Invalid preferred queue %u for %u lanes", preferred, lanes.count ); 723 564 … … 771 612 processor * proc = kernelTLS().this_processor; 772 613 773 if(proc->rdq.target == MAX) {614 if(proc->rdq.target == -1u) { 774 615 unsigned long long min = ts(lanes.data[proc->rdq.id]); 775 616 for(int i = 0; i < READYQ_SHARD_FACTOR; i++) { … … 782 623 else { 783 624 unsigned target = proc->rdq.target; 784 proc->rdq.target = MAX;625 proc->rdq.target = -1u; 785 626 const unsigned long long bias = 0; //2_500_000_000; 786 627 const unsigned long long cutoff = proc->rdq.cutoff > bias ? proc->rdq.cutoff - bias : proc->rdq.cutoff; … … 817 658 // try to pop from a lane given by index w 818 659 static inline struct thread$ * try_pop(struct cluster * cltr, unsigned w __STATS(, __stats_readyQ_pop_t & stats)) with (cltr->ready_queue) { 819 /* paranoid */ verify( w < lanes.count );820 660 __STATS( stats.attempt++; ) 821 661 … … 841 681 // Actually pop the list 842 682 struct thread$ * thrd; 843 #if defined(USE_AWARE_STEALING) || defined(USE_WORK_STEALING) || defined(USE_CPU_WORK_STEALING) 844 unsigned long long tsc_before = ts(lane); 845 #endif 683 unsigned long long tsc_before = ts(lane); 846 684 unsigned long long tsv; 847 685 [thrd, tsv] = pop(lane); … … 857 695 __STATS( stats.success++; ) 858 696 859 #if defined(USE_AWARE_STEALING) || defined(USE_WORK_STEALING) || defined(USE_CPU_WORK_STEALING) 860 if (tsv != MAX) { 861 unsigned long long now = rdtscl(); 862 unsigned long long pma = __atomic_load_n(&lanes.tscs[w].ma, __ATOMIC_RELAXED); 863 __atomic_store_n(&lanes.tscs[w].tv, tsv, __ATOMIC_RELAXED); 864 __atomic_store_n(&lanes.tscs[w].ma, moving_average(now, tsc_before, pma), __ATOMIC_RELAXED); 865 } 697 #if defined(USE_WORK_STEALING) || defined(USE_CPU_WORK_STEALING) 698 unsigned long long now = rdtscl(); 699 lanes.tscs[w].tv = tsv; 700 lanes.tscs[w].ma = moving_average(now > tsc_before ? now - tsc_before : 0, lanes.tscs[w].ma); 866 701 #endif 867 702 868 #if defined(USE_ AWARE_STEALING) || defined(USE_CPU_WORK_STEALING)703 #if defined(USE_CPU_WORK_STEALING) 869 704 thrd->preferred = w / READYQ_SHARD_FACTOR; 870 705 #else … … 965 800 /* paranoid */ verifyf( it, "Unexpected null iterator, at index %u of %u\n", i, count); 966 801 it->rdq.id = value; 967 it->rdq.target = MAX;802 it->rdq.target = -1u; 968 803 value += READYQ_SHARD_FACTOR; 969 804 it = &(*it)`next; … … 978 813 979 814 static void fix_times( struct cluster * cltr ) with( cltr->ready_queue ) { 980 #if defined(USE_ AWARE_STEALING) || defined(USE_WORK_STEALING)815 #if defined(USE_WORK_STEALING) 981 816 lanes.tscs = alloc(lanes.count, lanes.tscs`realloc); 982 817 for(i; lanes.count) { 983 lanes.tscs[i].tv = rdtscl(); 984 lanes.tscs[i].ma = 0; 818 unsigned long long tsc1 = ts(lanes.data[i]); 819 unsigned long long tsc2 = rdtscl(); 820 lanes.tscs[i].tv = min(tsc1, tsc2); 985 821 } 986 822 #endif … … 1028 864 // Update original 1029 865 lanes.count = ncount; 1030 1031 lanes.caches = alloc( target, lanes.caches`realloc );1032 866 } 1033 867 … … 1106 940 fix(lanes.data[idx]); 1107 941 } 1108 1109 lanes.caches = alloc( target, lanes.caches`realloc );1110 942 } 1111 943 1112 944 fix_times(cltr); 1113 1114 945 1115 946 reassign_cltr_id(cltr); -
libcfa/src/concurrency/thread.cfa
r12c1eef r5235d49 10 10 // Created On : Tue Jan 17 12:27:26 2017 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Thu Jan 13 20:11:55 202213 // Update Count : 4212 // Last Modified On : Wed Dec 4 09:17:49 2019 13 // Update Count : 9 14 14 // 15 15 … … 25 25 #include "invoke.h" 26 26 27 extern uint32_t __global_random_seed;27 uint64_t thread_rand(); 28 28 29 29 //----------------------------------------------------------------------------- 30 30 // Thread ctors and dtors 31 void ?{}( thread$ & this, const char * const name, cluster & cl, void * storage, size_t storageSize ) with( this ) {31 void ?{}(thread$ & this, const char * const name, cluster & cl, void * storage, size_t storageSize ) with( this ) { 32 32 context{ 0p, 0p }; 33 33 self_cor{ name, storage, storageSize }; … … 45 45 preferred = ready_queue_new_preferred(); 46 46 last_proc = 0p; 47 random_state = __global_random_seed;48 47 #if defined( __CFA_WITH_VERIFY__ ) 49 48 canary = 0x0D15EA5E0D15EA5Ep; … … 172 171 } 173 172 174 //----------------------------------------------------------------------------- 175 #define GENERATOR LCG 176 177 void set_seed( uint32_t seed ) { 178 active_thread()->random_state = __global_random_seed = seed; 179 GENERATOR( active_thread()->random_state ); 180 } // set_seed 181 uint32_t prng( void ) { return GENERATOR( active_thread()->random_state ); } // [0,UINT_MAX] 173 uint64_t thread_rand() { 174 disable_interrupts(); 175 uint64_t ret = __tls_rand(); 176 enable_interrupts(); 177 return ret; 178 } 182 179 183 180 // Local Variables: // -
libcfa/src/concurrency/thread.hfa
r12c1eef r5235d49 10 10 // Created On : Tue Jan 17 12:27:26 2017 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Thu Jan 6 16:40:16 202213 // Update Count : 712 // Last Modified On : Wed Dec 4 09:18:14 2019 13 // Update Count : 6 14 14 // 15 15 … … 65 65 void ^?{}(thread$ & this); 66 66 67 static inline void ?{}(thread$ & this) { this{ "Anonymous Thread", *mainCluster, 0p, DEFAULT_STACK_SIZE}; }67 static inline void ?{}(thread$ & this) { this{ "Anonymous Thread", *mainCluster, 0p, 65000 }; } 68 68 static inline void ?{}(thread$ & this, size_t stackSize ) { this{ "Anonymous Thread", *mainCluster, 0p, stackSize }; } 69 69 static inline void ?{}(thread$ & this, void * storage, size_t storageSize ) { this{ "Anonymous Thread", *mainCluster, storage, storageSize }; } 70 static inline void ?{}(thread$ & this, struct cluster & cl ) { this{ "Anonymous Thread", cl, 0p, DEFAULT_STACK_SIZE}; }70 static inline void ?{}(thread$ & this, struct cluster & cl ) { this{ "Anonymous Thread", cl, 0p, 65000 }; } 71 71 static inline void ?{}(thread$ & this, struct cluster & cl, size_t stackSize ) { this{ "Anonymous Thread", cl, 0p, stackSize }; } 72 72 static inline void ?{}(thread$ & this, struct cluster & cl, void * storage, size_t storageSize ) { this{ "Anonymous Thread", cl, storage, storageSize }; } 73 static inline void ?{}(thread$ & this, const char * const name) { this{ name, *mainCluster, 0p, DEFAULT_STACK_SIZE}; }74 static inline void ?{}(thread$ & this, const char * const name, struct cluster & cl ) { this{ name, cl, 0p, DEFAULT_STACK_SIZE}; }73 static inline void ?{}(thread$ & this, const char * const name) { this{ name, *mainCluster, 0p, 65000 }; } 74 static inline void ?{}(thread$ & this, const char * const name, struct cluster & cl ) { this{ name, cl, 0p, 65000 }; } 75 75 static inline void ?{}(thread$ & this, const char * const name, struct cluster & cl, size_t stackSize ) { this{ name, cl, 0p, stackSize }; } 76 76 -
libcfa/src/device/cpu.cfa
r12c1eef r5235d49 427 427 unsigned c = pairings[i].cpu; 428 428 unsigned llc_id = pairings[i].id; 429 unsigned width = maps[llc_id].raw->width; 429 430 unsigned start = maps[llc_id].start; 430 entries[c].count = maps[llc_id].raw->width; 431 unsigned self = start + (maps[llc_id].count++); 432 entries[c].count = width; 431 433 entries[c].start = start; 432 entries[c].self = start + (maps[llc_id].count++); 433 entries[c].cache = llc_id; 434 entries[c].self = self; 434 435 } 435 436 -
libcfa/src/device/cpu.hfa
r12c1eef r5235d49 16 16 #include <stddef.h> 17 17 18 // Map from cpu entry to a structure detailling cpus with common topologies19 // Note that the cpu-groups are contiguous so the indexing is different from20 // the cpu indexing21 18 struct cpu_map_entry_t { 22 // Where this particular cpu is in the group23 19 unsigned self; 24 25 // Starting index of the cpus with the same topology26 20 unsigned start; 27 28 // Number of cpus with the same topology29 21 unsigned count; 30 31 // Index of the cache this entry describes32 unsigned cache;33 22 }; 34 23 -
libcfa/src/fstream.cfa
r12c1eef r5235d49 10 10 // Created On : Wed May 27 17:56:53 2015 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Mon Jan 10 08:45:05 202213 // Update Count : 51 312 // Last Modified On : Sun Oct 10 11:23:05 2021 13 // Update Count : 512 14 14 // 15 15 … … 52 52 inline void setPrt$( ofstream & os, bool state ) { os.prt$ = state; } 53 53 54 inline void lock( ofstream & os ) with( os ) { lock( os.lock$ ); }54 inline void lock( ofstream & os ) with( os ) { lock( os.lock$ ); } 55 55 inline void unlock( ofstream & os ) { unlock( os.lock$ ); } 56 56 -
libcfa/src/heap.cfa
r12c1eef r5235d49 10 10 // Created On : Tue Dec 19 21:58:35 2017 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Sun Jan 2 23:29:41 202213 // Update Count : 10 5812 // Last Modified On : Mon Aug 9 19:03:02 2021 13 // Update Count : 1040 14 14 // 15 15 … … 263 263 #ifdef __STATISTICS__ 264 264 // Heap statistics counters. 265 static unsigned int malloc_ calls, malloc_0_calls;266 static unsigned long long int malloc_storage _request, malloc_storage_alloc;267 static unsigned int aalloc_ calls, aalloc_0_calls;268 static unsigned long long int aalloc_storage _request, aalloc_storage_alloc;269 static unsigned int calloc_ calls, calloc_0_calls;270 static unsigned long long int calloc_storage _request, calloc_storage_alloc;271 static unsigned int memalign_ calls, memalign_0_calls;272 static unsigned long long int memalign_storage _request, memalign_storage_alloc;273 static unsigned int amemalign_ calls, amemalign_0_calls;274 static unsigned long long int amemalign_storage _request, amemalign_storage_alloc;275 static unsigned int cmemalign_ calls, cmemalign_0_calls;276 static unsigned long long int cmemalign_storage _request, cmemalign_storage_alloc;277 static unsigned int resize_ calls, resize_0_calls;278 static unsigned long long int resize_storage _request, resize_storage_alloc;279 static unsigned int realloc_ calls, realloc_0_calls;280 static unsigned long long int realloc_storage _request, realloc_storage_alloc;281 static unsigned int free_ calls, free_null_calls;282 static unsigned long long int free_storage _request, free_storage_alloc;265 static unsigned int malloc_zero_calls, malloc_calls; 266 static unsigned long long int malloc_storage; 267 static unsigned int aalloc_zero_calls, aalloc_calls; 268 static unsigned long long int aalloc_storage; 269 static unsigned int calloc_zero_calls, calloc_calls; 270 static unsigned long long int calloc_storage; 271 static unsigned int memalign_zero_calls, memalign_calls; 272 static unsigned long long int memalign_storage; 273 static unsigned int amemalign_zero_calls, amemalign_calls; 274 static unsigned long long int amemalign_storage; 275 static unsigned int cmemalign_zero_calls, cmemalign_calls; 276 static unsigned long long int cmemalign_storage; 277 static unsigned int resize_zero_calls, resize_calls; 278 static unsigned long long int resize_storage; 279 static unsigned int realloc_zero_calls, realloc_calls; 280 static unsigned long long int realloc_storage; 281 static unsigned int free_zero_calls, free_calls; 282 static unsigned long long int free_storage; 283 283 static unsigned int mmap_calls; 284 static unsigned long long int mmap_storage _request, mmap_storage_alloc;284 static unsigned long long int mmap_storage; 285 285 static unsigned int munmap_calls; 286 static unsigned long long int munmap_storage _request, munmap_storage_alloc;286 static unsigned long long int munmap_storage; 287 287 static unsigned int sbrk_calls; 288 288 static unsigned long long int sbrk_storage; … … 294 294 char helpText[1024]; 295 295 __cfaabi_bits_print_buffer( STDERR_FILENO, helpText, sizeof(helpText), 296 "\nHeap statistics: (storage request / allocation + header)\n"297 " malloc >0 calls %'u; 0 calls %'u; storage %'llu /%'llu bytes\n"298 " aalloc >0 calls %'u; 0 calls %'u; storage %'llu /%'llu bytes\n"299 " calloc >0 calls %'u; 0 calls %'u; storage %'llu /%'llu bytes\n"300 " memalign >0 calls %'u; 0 calls %'u; storage %'llu /%'llu bytes\n"301 " amemalign >0 calls %'u; 0 calls %'u; storage %'llu /%'llu bytes\n"302 " cmemalign >0 calls %'u; 0 calls %'u; storage %'llu /%'llu bytes\n"303 " resize >0 calls %'u; 0 calls %'u; storage %'llu /%'llu bytes\n"304 " realloc >0 calls %'u; 0 calls %'u; storage %'llu /%'llu bytes\n"305 " free !null calls %'u; null calls %'u; storage %'llu /%'llu bytes\n"306 " sbrkcalls %'u; storage %'llu bytes\n"307 " m map calls %'u; storage %'llu /%'llu bytes\n"308 " munmap calls %'u; storage %'llu /%'llu bytes\n",309 malloc_ calls, malloc_0_calls, malloc_storage_request, malloc_storage_alloc,310 aalloc_ calls, aalloc_0_calls, aalloc_storage_request, aalloc_storage_alloc,311 calloc_ calls, calloc_0_calls, calloc_storage_request, calloc_storage_alloc,312 memalign_ calls, memalign_0_calls, memalign_storage_request, memalign_storage_alloc,313 amemalign_ calls, amemalign_0_calls, amemalign_storage_request, amemalign_storage_alloc,314 cmemalign_ calls, cmemalign_0_calls, cmemalign_storage_request, cmemalign_storage_alloc,315 resize_ calls, resize_0_calls, resize_storage_request, resize_storage_alloc,316 realloc_ calls, realloc_0_calls, realloc_storage_request, realloc_storage_alloc,317 free_ calls, free_null_calls, free_storage_request, free_storage_alloc,318 sbrk_calls, sbrk_storage,319 m map_calls, mmap_storage_request, mmap_storage_alloc,320 munmap_calls, munmap_storage_request, munmap_storage_alloc296 "\nHeap statistics:\n" 297 " malloc 0-calls %'u; >0-calls %'u; storage %'llu bytes\n" 298 " aalloc 0-calls %'u; >0-calls %'u; storage %'llu bytes\n" 299 " calloc 0-calls %'u; >0-calls %'u; storage %'llu bytes\n" 300 " memalign 0-calls %'u; >0-calls %'u; storage %'llu bytes\n" 301 " amemalign 0-calls %'u; >0-calls %'u; storage %'llu bytes\n" 302 " cmemalign 0-calls %'u; >0-calls %'u; storage %'llu bytes\n" 303 " resize 0-calls %'u; >0-calls %'u; storage %'llu bytes\n" 304 " realloc 0-calls %'u; >0-calls %'u; storage %'llu bytes\n" 305 " free 0-calls %'u; >0-calls %'u; storage %'llu bytes\n" 306 " mmap calls %'u; storage %'llu bytes\n" 307 " munmap calls %'u; storage %'llu bytes\n" 308 " sbrk calls %'u; storage %'llu bytes\n", 309 malloc_zero_calls, malloc_calls, malloc_storage, 310 aalloc_zero_calls, aalloc_calls, aalloc_storage, 311 calloc_zero_calls, calloc_calls, calloc_storage, 312 memalign_zero_calls, memalign_calls, memalign_storage, 313 amemalign_zero_calls, amemalign_calls, amemalign_storage, 314 cmemalign_zero_calls, cmemalign_calls, cmemalign_storage, 315 resize_zero_calls, resize_calls, resize_storage, 316 realloc_zero_calls, realloc_calls, realloc_storage, 317 free_zero_calls, free_calls, free_storage, 318 mmap_calls, mmap_storage, 319 munmap_calls, munmap_storage, 320 sbrk_calls, sbrk_storage 321 321 ); 322 322 } // printStats … … 329 329 "<sizes>\n" 330 330 "</sizes>\n" 331 "<total type=\"malloc\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n" 332 "<total type=\"aalloc\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n" 333 "<total type=\"calloc\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n" 334 "<total type=\"memalign\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n" 335 "<total type=\"amemalign\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n" 336 "<total type=\"cmemalign\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n" 337 "<total type=\"resize\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n" 338 "<total type=\"realloc\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n" 339 "<total type=\"free\" !null=\"%'u;\" 0 null=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n" 331 "<total type=\"malloc\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n" 332 "<total type=\"aalloc\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n" 333 "<total type=\"calloc\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n" 334 "<total type=\"memalign\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n" 335 "<total type=\"amemalign\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n" 336 "<total type=\"cmemalign\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n" 337 "<total type=\"resize\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n" 338 "<total type=\"realloc\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n" 339 "<total type=\"free\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n" 340 "<total type=\"mmap\" count=\"%'u;\" size=\"%'llu\"/> bytes\n" 341 "<total type=\"munmap\" count=\"%'u;\" size=\"%'llu\"/> bytes\n" 340 342 "<total type=\"sbrk\" count=\"%'u;\" size=\"%'llu\"/> bytes\n" 341 "<total type=\"mmap\" count=\"%'u;\" size=\"%'llu / %'llu\" / > bytes\n"342 "<total type=\"munmap\" count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"343 343 "</malloc>", 344 malloc_ calls, malloc_0_calls, malloc_storage_request, malloc_storage_alloc,345 aalloc_ calls, aalloc_0_calls, aalloc_storage_request, aalloc_storage_alloc,346 calloc_ calls, calloc_0_calls, calloc_storage_request, calloc_storage_alloc,347 memalign_ calls, memalign_0_calls, memalign_storage_request, memalign_storage_alloc,348 amemalign_ calls, amemalign_0_calls, amemalign_storage_request, amemalign_storage_alloc,349 cmemalign_ calls, cmemalign_0_calls, cmemalign_storage_request, cmemalign_storage_alloc,350 resize_ calls, resize_0_calls, resize_storage_request, resize_storage_alloc,351 realloc_ calls, realloc_0_calls, realloc_storage_request, realloc_storage_alloc,352 free_ calls, free_null_calls, free_storage_request, free_storage_alloc,353 sbrk_calls, sbrk_storage,354 m map_calls, mmap_storage_request, mmap_storage_alloc,355 munmap_calls, munmap_storage_request, munmap_storage_alloc344 malloc_zero_calls, malloc_calls, malloc_storage, 345 aalloc_zero_calls, aalloc_calls, aalloc_storage, 346 calloc_zero_calls, calloc_calls, calloc_storage, 347 memalign_zero_calls, memalign_calls, memalign_storage, 348 amemalign_zero_calls, amemalign_calls, amemalign_storage, 349 cmemalign_zero_calls, cmemalign_calls, cmemalign_storage, 350 resize_zero_calls, resize_calls, resize_storage, 351 realloc_zero_calls, realloc_calls, realloc_storage, 352 free_zero_calls, free_calls, free_storage, 353 mmap_calls, mmap_storage, 354 munmap_calls, munmap_storage, 355 sbrk_calls, sbrk_storage 356 356 ); 357 357 __cfaabi_bits_write( fileno( stream ), helpText, len ); // ensures all bytes written or exit … … 577 577 #ifdef __STATISTICS__ 578 578 __atomic_add_fetch( &mmap_calls, 1, __ATOMIC_SEQ_CST ); 579 __atomic_add_fetch( &mmap_storage_request, size, __ATOMIC_SEQ_CST ); 580 __atomic_add_fetch( &mmap_storage_alloc, tsize, __ATOMIC_SEQ_CST ); 579 __atomic_add_fetch( &mmap_storage, tsize, __ATOMIC_SEQ_CST ); 581 580 #endif // __STATISTICS__ 582 581 … … 627 626 #ifdef __STATISTICS__ 628 627 __atomic_add_fetch( &munmap_calls, 1, __ATOMIC_SEQ_CST ); 629 __atomic_add_fetch( &munmap_storage_request, header->kind.real.size, __ATOMIC_SEQ_CST ); 630 __atomic_add_fetch( &munmap_storage_alloc, size, __ATOMIC_SEQ_CST ); 628 __atomic_add_fetch( &munmap_storage, size, __ATOMIC_SEQ_CST ); 631 629 #endif // __STATISTICS__ 632 630 if ( munmap( header, size ) == -1 ) { … … 644 642 #ifdef __STATISTICS__ 645 643 __atomic_add_fetch( &free_calls, 1, __ATOMIC_SEQ_CST ); 646 __atomic_add_fetch( &free_storage_request, header->kind.real.size, __ATOMIC_SEQ_CST ); 647 __atomic_add_fetch( &free_storage_alloc, size, __ATOMIC_SEQ_CST ); 644 __atomic_add_fetch( &free_storage, size, __ATOMIC_SEQ_CST ); 648 645 #endif // __STATISTICS__ 649 646 … … 822 819 if ( likely( size > 0 ) ) { 823 820 __atomic_add_fetch( &malloc_calls, 1, __ATOMIC_SEQ_CST ); 824 __atomic_add_fetch( &malloc_storage _request, size, __ATOMIC_SEQ_CST );821 __atomic_add_fetch( &malloc_storage, size, __ATOMIC_SEQ_CST ); 825 822 } else { 826 __atomic_add_fetch( &malloc_ 0_calls, 1, __ATOMIC_SEQ_CST );823 __atomic_add_fetch( &malloc_zero_calls, 1, __ATOMIC_SEQ_CST ); 827 824 } // if 828 825 #endif // __STATISTICS__ … … 838 835 if ( likely( size > 0 ) ) { 839 836 __atomic_add_fetch( &aalloc_calls, 1, __ATOMIC_SEQ_CST ); 840 __atomic_add_fetch( &aalloc_storage _request, size, __ATOMIC_SEQ_CST );837 __atomic_add_fetch( &aalloc_storage, size, __ATOMIC_SEQ_CST ); 841 838 } else { 842 __atomic_add_fetch( &aalloc_ 0_calls, 1, __ATOMIC_SEQ_CST );839 __atomic_add_fetch( &aalloc_zero_calls, 1, __ATOMIC_SEQ_CST ); 843 840 } // if 844 841 #endif // __STATISTICS__ … … 853 850 if ( unlikely( size ) == 0 ) { // 0 BYTE ALLOCATION RETURNS NULL POINTER 854 851 #ifdef __STATISTICS__ 855 __atomic_add_fetch( &calloc_ 0_calls, 1, __ATOMIC_SEQ_CST );852 __atomic_add_fetch( &calloc_zero_calls, 1, __ATOMIC_SEQ_CST ); 856 853 #endif // __STATISTICS__ 857 854 return 0p; … … 859 856 #ifdef __STATISTICS__ 860 857 __atomic_add_fetch( &calloc_calls, 1, __ATOMIC_SEQ_CST ); 861 __atomic_add_fetch( &calloc_storage _request, dim * elemSize, __ATOMIC_SEQ_CST );858 __atomic_add_fetch( &calloc_storage, dim * elemSize, __ATOMIC_SEQ_CST ); 862 859 #endif // __STATISTICS__ 863 860 … … 894 891 if ( unlikely( size == 0 ) ) { // special cases 895 892 #ifdef __STATISTICS__ 896 __atomic_add_fetch( &resize_ 0_calls, 1, __ATOMIC_SEQ_CST );893 __atomic_add_fetch( &resize_zero_calls, 1, __ATOMIC_SEQ_CST ); 897 894 #endif // __STATISTICS__ 898 895 free( oaddr ); … … 905 902 if ( unlikely( oaddr == 0p ) ) { 906 903 #ifdef __STATISTICS__ 907 __atomic_add_fetch( &resize_storage _request, size, __ATOMIC_SEQ_CST );904 __atomic_add_fetch( &resize_storage, size, __ATOMIC_SEQ_CST ); 908 905 #endif // __STATISTICS__ 909 906 return mallocNoStats( size ); … … 924 921 925 922 #ifdef __STATISTICS__ 926 __atomic_add_fetch( &resize_storage _request, size, __ATOMIC_SEQ_CST );923 __atomic_add_fetch( &resize_storage, size, __ATOMIC_SEQ_CST ); 927 924 #endif // __STATISTICS__ 928 925 … … 939 936 if ( unlikely( size == 0 ) ) { // special cases 940 937 #ifdef __STATISTICS__ 941 __atomic_add_fetch( &realloc_ 0_calls, 1, __ATOMIC_SEQ_CST );938 __atomic_add_fetch( &realloc_zero_calls, 1, __ATOMIC_SEQ_CST ); 942 939 #endif // __STATISTICS__ 943 940 free( oaddr ); … … 950 947 if ( unlikely( oaddr == 0p ) ) { 951 948 #ifdef __STATISTICS__ 952 __atomic_add_fetch( &realloc_storage _request, size, __ATOMIC_SEQ_CST );949 __atomic_add_fetch( &realloc_storage, size, __ATOMIC_SEQ_CST ); 953 950 #endif // __STATISTICS__ 954 951 return mallocNoStats( size ); … … 972 969 973 970 #ifdef __STATISTICS__ 974 __atomic_add_fetch( &realloc_storage _request, size, __ATOMIC_SEQ_CST );971 __atomic_add_fetch( &realloc_storage, size, __ATOMIC_SEQ_CST ); 975 972 #endif // __STATISTICS__ 976 973 … … 1003 1000 if ( likely( size > 0 ) ) { 1004 1001 __atomic_add_fetch( &memalign_calls, 1, __ATOMIC_SEQ_CST ); 1005 __atomic_add_fetch( &memalign_storage _request, size, __ATOMIC_SEQ_CST );1002 __atomic_add_fetch( &memalign_storage, size, __ATOMIC_SEQ_CST ); 1006 1003 } else { 1007 __atomic_add_fetch( &memalign_ 0_calls, 1, __ATOMIC_SEQ_CST );1004 __atomic_add_fetch( &memalign_zero_calls, 1, __ATOMIC_SEQ_CST ); 1008 1005 } // if 1009 1006 #endif // __STATISTICS__ … … 1019 1016 if ( likely( size > 0 ) ) { 1020 1017 __atomic_add_fetch( &cmemalign_calls, 1, __ATOMIC_SEQ_CST ); 1021 __atomic_add_fetch( &cmemalign_storage _request, size, __ATOMIC_SEQ_CST );1018 __atomic_add_fetch( &cmemalign_storage, size, __ATOMIC_SEQ_CST ); 1022 1019 } else { 1023 __atomic_add_fetch( &cmemalign_ 0_calls, 1, __ATOMIC_SEQ_CST );1020 __atomic_add_fetch( &cmemalign_zero_calls, 1, __ATOMIC_SEQ_CST ); 1024 1021 } // if 1025 1022 #endif // __STATISTICS__ … … 1034 1031 if ( unlikely( size ) == 0 ) { // 0 BYTE ALLOCATION RETURNS NULL POINTER 1035 1032 #ifdef __STATISTICS__ 1036 __atomic_add_fetch( &cmemalign_ 0_calls, 1, __ATOMIC_SEQ_CST );1033 __atomic_add_fetch( &cmemalign_zero_calls, 1, __ATOMIC_SEQ_CST ); 1037 1034 #endif // __STATISTICS__ 1038 1035 return 0p; … … 1040 1037 #ifdef __STATISTICS__ 1041 1038 __atomic_add_fetch( &cmemalign_calls, 1, __ATOMIC_SEQ_CST ); 1042 __atomic_add_fetch( &cmemalign_storage _request, dim * elemSize, __ATOMIC_SEQ_CST );1039 __atomic_add_fetch( &cmemalign_storage, dim * elemSize, __ATOMIC_SEQ_CST ); 1043 1040 #endif // __STATISTICS__ 1044 1041 … … 1104 1101 if ( unlikely( addr == 0p ) ) { // special case 1105 1102 #ifdef __STATISTICS__ 1106 __atomic_add_fetch( &free_ null_calls, 1, __ATOMIC_SEQ_CST );1103 __atomic_add_fetch( &free_zero_calls, 1, __ATOMIC_SEQ_CST ); 1107 1104 #endif // __STATISTICS__ 1108 1105 … … 1283 1280 if ( unlikely( size == 0 ) ) { // special cases 1284 1281 #ifdef __STATISTICS__ 1285 __atomic_add_fetch( &resize_ 0_calls, 1, __ATOMIC_SEQ_CST );1282 __atomic_add_fetch( &resize_zero_calls, 1, __ATOMIC_SEQ_CST ); 1286 1283 #endif // __STATISTICS__ 1287 1284 free( oaddr ); … … 1297 1294 #ifdef __STATISTICS__ 1298 1295 __atomic_add_fetch( &resize_calls, 1, __ATOMIC_SEQ_CST ); 1299 __atomic_add_fetch( &resize_storage _request, size, __ATOMIC_SEQ_CST );1296 __atomic_add_fetch( &resize_storage, size, __ATOMIC_SEQ_CST ); 1300 1297 #endif // __STATISTICS__ 1301 1298 return memalignNoStats( nalign, size ); … … 1332 1329 1333 1330 #ifdef __STATISTICS__ 1334 __atomic_add_fetch( &resize_storage _request, size, __ATOMIC_SEQ_CST );1331 __atomic_add_fetch( &resize_storage, size, __ATOMIC_SEQ_CST ); 1335 1332 #endif // __STATISTICS__ 1336 1333 … … 1345 1342 if ( unlikely( size == 0 ) ) { // special cases 1346 1343 #ifdef __STATISTICS__ 1347 __atomic_add_fetch( &realloc_ 0_calls, 1, __ATOMIC_SEQ_CST );1344 __atomic_add_fetch( &realloc_zero_calls, 1, __ATOMIC_SEQ_CST ); 1348 1345 #endif // __STATISTICS__ 1349 1346 free( oaddr ); … … 1359 1356 #ifdef __STATISTICS__ 1360 1357 __atomic_add_fetch( &realloc_calls, 1, __ATOMIC_SEQ_CST ); 1361 __atomic_add_fetch( &realloc_storage _request, size, __ATOMIC_SEQ_CST );1358 __atomic_add_fetch( &realloc_storage, size, __ATOMIC_SEQ_CST ); 1362 1359 #endif // __STATISTICS__ 1363 1360 return memalignNoStats( nalign, size ); … … 1383 1380 #ifdef __STATISTICS__ 1384 1381 __atomic_add_fetch( &realloc_calls, 1, __ATOMIC_SEQ_CST ); 1385 __atomic_add_fetch( &realloc_storage _request, size, __ATOMIC_SEQ_CST );1382 __atomic_add_fetch( &realloc_storage, size, __ATOMIC_SEQ_CST ); 1386 1383 #endif // __STATISTICS__ 1387 1384 -
libcfa/src/iostream.cfa
r12c1eef r5235d49 10 10 // Created On : Wed May 27 17:56:53 2015 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Wed Jan 19 08:15:53 202213 // Update Count : 13 5212 // Last Modified On : Sun Oct 10 09:28:17 2021 13 // Update Count : 1345 14 14 // 15 15 … … 57 57 ostype & ?|?( ostype & os, signed char sc ) { 58 58 if ( sepPrt$( os ) ) fmt( os, "%s", sepGetCur$( os ) ); 59 fmt( os, "% 'hhd", sc );59 fmt( os, "%hhd", sc ); 60 60 return os; 61 61 } // ?|? … … 66 66 ostype & ?|?( ostype & os, unsigned char usc ) { 67 67 if ( sepPrt$( os ) ) fmt( os, "%s", sepGetCur$( os ) ); 68 fmt( os, "% 'hhu", usc );68 fmt( os, "%hhu", usc ); 69 69 return os; 70 70 } // ?|? … … 75 75 ostype & ?|?( ostype & os, short int si ) { 76 76 if ( sepPrt$( os ) ) fmt( os, "%s", sepGetCur$( os ) ); 77 fmt( os, "% 'hd", si );77 fmt( os, "%hd", si ); 78 78 return os; 79 79 } // ?|? … … 84 84 ostype & ?|?( ostype & os, unsigned short int usi ) { 85 85 if ( sepPrt$( os ) ) fmt( os, "%s", sepGetCur$( os ) ); 86 fmt( os, "% 'hu", usi );86 fmt( os, "%hu", usi ); 87 87 return os; 88 88 } // ?|? … … 93 93 ostype & ?|?( ostype & os, int i ) { 94 94 if ( sepPrt$( os ) ) fmt( os, "%s", sepGetCur$( os ) ); 95 fmt( os, "% 'd", i );95 fmt( os, "%d", i ); 96 96 return os; 97 97 } // ?|? … … 102 102 ostype & ?|?( ostype & os, unsigned int ui ) { 103 103 if ( sepPrt$( os ) ) fmt( os, "%s", sepGetCur$( os ) ); 104 fmt( os, "% 'u", ui );104 fmt( os, "%u", ui ); 105 105 return os; 106 106 } // ?|? … … 111 111 ostype & ?|?( ostype & os, long int li ) { 112 112 if ( sepPrt$( os ) ) fmt( os, "%s", sepGetCur$( os ) ); 113 fmt( os, "% 'ld", li );113 fmt( os, "%ld", li ); 114 114 return os; 115 115 } // ?|? … … 120 120 ostype & ?|?( ostype & os, unsigned long int uli ) { 121 121 if ( sepPrt$( os ) ) fmt( os, "%s", sepGetCur$( os ) ); 122 fmt( os, "% 'lu", uli );122 fmt( os, "%lu", uli ); 123 123 return os; 124 124 } // ?|? … … 129 129 ostype & ?|?( ostype & os, long long int lli ) { 130 130 if ( sepPrt$( os ) ) fmt( os, "%s", sepGetCur$( os ) ); 131 fmt( os, "% 'lld", lli );131 fmt( os, "%lld", lli ); 132 132 return os; 133 133 } // ?|? … … 138 138 ostype & ?|?( ostype & os, unsigned long long int ulli ) { 139 139 if ( sepPrt$( os ) ) fmt( os, "%s", sepGetCur$( os ) ); 140 fmt( os, "% 'llu", ulli );140 fmt( os, "%llu", ulli ); 141 141 return os; 142 142 } // ?|? … … 205 205 ostype & ?|?( ostype & os, float f ) { 206 206 if ( sepPrt$( os ) ) fmt( os, "%s", sepGetCur$( os ) ); 207 PrintWithDP( os, "% 'g", f );207 PrintWithDP( os, "%g", f ); 208 208 return os; 209 209 } // ?|? … … 214 214 ostype & ?|?( ostype & os, double d ) { 215 215 if ( sepPrt$( os ) ) fmt( os, "%s", sepGetCur$( os ) ); 216 PrintWithDP( os, "% '.*lg", d, DBL_DIG );216 PrintWithDP( os, "%.*lg", d, DBL_DIG ); 217 217 return os; 218 218 } // ?|? … … 223 223 ostype & ?|?( ostype & os, long double ld ) { 224 224 if ( sepPrt$( os ) ) fmt( os, "%s", sepGetCur$( os ) ); 225 PrintWithDP( os, "% '.*Lg", ld, LDBL_DIG );225 PrintWithDP( os, "%.*Lg", ld, LDBL_DIG ); 226 226 return os; 227 227 } // ?|? … … 233 233 if ( sepPrt$( os ) ) fmt( os, "%s", sepGetCur$( os ) ); 234 234 // os | crealf( fc ) | nonl; 235 PrintWithDP( os, "% 'g", crealf( fc ) );236 PrintWithDP( os, "% '+g", cimagf( fc ) );235 PrintWithDP( os, "%g", crealf( fc ) ); 236 PrintWithDP( os, "%+g", cimagf( fc ) ); 237 237 fmt( os, "i" ); 238 238 return os; … … 245 245 if ( sepPrt$( os ) ) fmt( os, "%s", sepGetCur$( os ) ); 246 246 // os | creal( dc ) | nonl; 247 PrintWithDP( os, "% '.*lg", creal( dc ), DBL_DIG );248 PrintWithDP( os, "% '+.*lg", cimag( dc ), DBL_DIG );247 PrintWithDP( os, "%.*lg", creal( dc ), DBL_DIG ); 248 PrintWithDP( os, "%+.*lg", cimag( dc ), DBL_DIG ); 249 249 fmt( os, "i" ); 250 250 return os; … … 257 257 if ( sepPrt$( os ) ) fmt( os, "%s", sepGetCur$( os ) ); 258 258 // os | creall( ldc ) || nonl; 259 PrintWithDP( os, "% '.*Lg", creall( ldc ), LDBL_DIG );260 PrintWithDP( os, "% '+.*Lg", cimagl( ldc ), LDBL_DIG );259 PrintWithDP( os, "%.*Lg", creall( ldc ), LDBL_DIG ); 260 PrintWithDP( os, "%+.*Lg", cimagl( ldc ), LDBL_DIG ); 261 261 fmt( os, "i" ); 262 262 return os; … … 282 282 }; // mask 283 283 284 if ( s == 0p ) { fmt( os, "%s", "0p" ); return os; } // null pointer285 284 if ( s[0] == '\0' ) { sepOff( os ); return os; } // null string => no separator 286 285 … … 497 496 if ( ! f.flags.pc ) memcpy( &fmtstr, IFMTNP, sizeof(IFMTNP) ); \ 498 497 else memcpy( &fmtstr, IFMTP, sizeof(IFMTP) ); \ 499 int star = 5; /* position before first '*' */ \498 int star = 4; /* position before first '*' */ \ 500 499 \ 501 500 /* Insert flags into spaces before '*', from right to left. */ \ … … 504 503 if ( f.flags.sign ) { fmtstr[star] = '+'; star -= 1; } \ 505 504 if ( f.flags.pad0 && ! f.flags.pc ) { fmtstr[star] = '0'; star -= 1; } \ 506 fmtstr[star] = '\''; star -= 1; /* locale */ \507 505 fmtstr[star] = '%'; \ 508 506 \ … … 523 521 } // distribution 524 522 525 IntegralFMTImpl( signed char, " *hh ", "*.*hh " )526 IntegralFMTImpl( unsigned char, " *hh ", "*.*hh " )527 IntegralFMTImpl( signed short int, " *h ", "*.*h " )528 IntegralFMTImpl( unsigned short int, " *h ", "*.*h " )529 IntegralFMTImpl( signed int, " * ", "*.* " )530 IntegralFMTImpl( unsigned int, " * ", "*.* " )531 IntegralFMTImpl( signed long int, " *l ", "*.*l " )532 IntegralFMTImpl( unsigned long int, " *l ", "*.*l " )533 IntegralFMTImpl( signed long long int, " *ll ", "*.*ll " )534 IntegralFMTImpl( unsigned long long int, " *ll ", "*.*ll " )523 IntegralFMTImpl( signed char, " *hh ", " *.*hh " ) 524 IntegralFMTImpl( unsigned char, " *hh ", " *.*hh " ) 525 IntegralFMTImpl( signed short int, " *h ", " *.*h " ) 526 IntegralFMTImpl( unsigned short int, " *h ", " *.*h " ) 527 IntegralFMTImpl( signed int, " * ", " *.* " ) 528 IntegralFMTImpl( unsigned int, " * ", " *.* " ) 529 IntegralFMTImpl( signed long int, " *l ", " *.*l " ) 530 IntegralFMTImpl( unsigned long int, " *l ", " *.*l " ) 531 IntegralFMTImpl( signed long long int, " *ll ", " *.*ll " ) 532 IntegralFMTImpl( unsigned long long int, " *ll ", " *.*ll " ) 535 533 536 534 … … 694 692 if ( ! f.flags.pc ) memcpy( &fmtstr, DFMTNP, sizeof(DFMTNP) ); \ 695 693 else memcpy( &fmtstr, DFMTP, sizeof(DFMTP) ); \ 696 int star = 5; /* position before first '*' */ \694 int star = 4; /* position before first '*' */ \ 697 695 \ 698 696 /* Insert flags into spaces before '*', from right to left. */ \ … … 700 698 if ( f.flags.sign ) { fmtstr[star] = '+'; star -= 1; } \ 701 699 if ( f.flags.pad0 ) { fmtstr[star] = '0'; star -= 1; } \ 702 fmtstr[star] = '\''; star -= 1; /* locale */ \703 700 fmtstr[star] = '%'; \ 704 701 \ … … 718 715 } // distribution 719 716 720 FloatingPointFMTImpl( double, " * ", "*.* " )721 FloatingPointFMTImpl( long double, " *L ", "*.*L " )717 FloatingPointFMTImpl( double, " * ", " *.* " ) 718 FloatingPointFMTImpl( long double, " *L ", " *.*L " ) 722 719 723 720 // *********************************** character *********************************** -
libcfa/src/parseconfig.cfa
r12c1eef r5235d49 1 2 3 #pragma GCC diagnostic push4 //#pragma GCC diagnostic ignored "-Wunused-parameter"5 //#pragma GCC diagnostic ignored "-Wunused-function"6 //#pragma GCC diagnostic ignored "-Wuninitialized"7 //#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"8 9 1 #include <fstream.hfa> 10 2 #include <parseargs.hfa> … … 27 19 // TODO: use string interface when it's ready (and implement exception msg protocol) 28 20 [ void ] msg( * Missing_Config_Entries ex ) { 29 serr | "The config file is missing " | ex->num_missing | "entr" | sepOff | (ex->num_missing == 1 ? "y." : "ies."); 21 serr | nlOff; 22 serr | "The config file is missing " | ex->num_missing; 23 serr | nlOn; 24 if ( ex->num_missing == 1 ) { 25 serr | " entry."; 26 } else { 27 serr | " entries."; 28 } 30 29 } // msg 31 30 … … 224 223 return value < zero_val; 225 224 } 226 #pragma GCC diagnostic pop227 225 228 226 -
libcfa/src/startup.cfa
r12c1eef r5235d49 10 10 // Created On : Tue Jul 24 16:21:57 2018 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Mon Jan 17 16:41:54 202213 // Update Count : 5512 // Last Modified On : Sat Jan 9 23:18:23 2021 13 // Update Count : 34 14 14 // 15 15 … … 17 17 #include <locale.h> // setlocale 18 18 #include <stdlib.h> // getenv 19 #include "bits/defs.hfa" // rdtscl20 19 #include "startup.hfa" 21 22 extern uint32_t __global_random_seed; // sequential/concurrent23 extern uint32_t __global_random_state; // sequential24 20 25 21 extern "C" { … … 27 23 void __cfaabi_appready_startup( void ) { 28 24 tzset(); // initialize time global variables 25 setlocale( LC_NUMERIC, getenv("LANG") ); 29 26 #ifdef __CFA_DEBUG__ 30 27 extern void heapAppStart(); … … 51 48 void __cfaabi_core_startup( void ) __attribute__(( constructor( STARTUP_PRIORITY_CORE ) )); 52 49 void __cfaabi_core_startup( void ) { 53 __global_random_state = __global_random_seed = rdtscl();54 50 __cfaabi_interpose_startup(); 55 51 __cfaabi_device_startup(); -
libcfa/src/stdlib.cfa
r12c1eef r5235d49 10 10 // Created On : Thu Jan 28 17:10:29 2016 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Thu Jan 13 21:38:30 202213 // Update Count : 5 9312 // Last Modified On : Wed Dec 29 15:32:44 2021 13 // Update Count : 512 14 14 // 15 15 16 16 #include "stdlib.hfa" 17 #include "bits/random.hfa"18 #include "concurrency/invoke.h" // random_state19 17 20 18 //--------------------------------------- … … 223 221 //--------------------------------------- 224 222 223 static uint32_t seed = 0; // current seed 224 static thread_local uint32_t state; // random state 225 226 void set_seed( uint32_t seed_ ) { state = seed = seed_; } 227 uint32_t get_seed() { return seed; } 228 225 229 #define GENERATOR LCG 226 230 227 uint32_t __global_random_seed; // sequential/concurrent 228 uint32_t __global_random_state; // sequential only 229 230 void set_seed( PRNG & prng, uint32_t seed_ ) with( prng ) { state = seed = seed_; GENERATOR( state ); } // set seed 231 inline uint32_t MarsagliaXor( uint32_t & state ) { 232 if ( unlikely( seed == 0 ) ) set_seed( rdtscl() ); 233 else if ( unlikely( state == 0 ) ) state = seed; 234 state ^= state << 6; 235 state ^= state >> 21; 236 state ^= state << 7; 237 return state; 238 } // MarsagliaXor 239 240 inline uint32_t LCG( uint32_t & state ) { // linear congruential generator 241 if ( unlikely( seed == 0 ) ) set_seed( rdtscl() ); 242 else if ( unlikely( state == 0 ) ) state = seed; 243 return state = 36973 * (state & 65535) + (state >> 16); 244 } // LCG 245 231 246 uint32_t prng( PRNG & prng ) with( prng ) { callcnt += 1; return GENERATOR( state ); } 232 247 233 void set_seed( uint32_t seed ) { __global_random_seed = seed; GENERATOR( __global_random_state ); } 234 uint32_t get_seed() { return __global_random_seed; } 235 uint32_t prng( void ) { return GENERATOR( __global_random_state ); } // [0,UINT_MAX] 248 uint32_t prng( void ) { return GENERATOR( state ); } 236 249 237 250 //--------------------------------------- -
libcfa/src/stdlib.hfa
r12c1eef r5235d49 10 10 // Created On : Thu Jan 28 17:12:35 2016 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Thu Jan 13 21:34:46 202213 // Update Count : 63612 // Last Modified On : Wed Dec 29 15:30:58 2021 13 // Update Count : 591 14 14 // 15 15 … … 21 21 #include <stdlib.h> // *alloc, strto*, ato* 22 22 #include <heap.hfa> 23 24 23 25 24 // Reduce includes by explicitly defining these routines. … … 44 43 //--------------------------------------- 45 44 45 // Macro because of returns 46 #define ARRAY_ALLOC$( allocation, alignment, dim ) \ 47 if ( _Alignof(T) <= libAlign() ) return (T *)(void *)allocation( dim, (size_t)sizeof(T) ); /* C allocation */ \ 48 else return (T *)alignment( _Alignof(T), dim, sizeof(T) ) 49 46 50 static inline forall( T & | sized(T) ) { 47 51 // CFA safe equivalents, i.e., implicit size specification 48 52 49 53 T * malloc( void ) { 50 if ( _Alignof(T) <= libAlign() ) return (T *) malloc(sizeof(T) ); // C allocation54 if ( _Alignof(T) <= libAlign() ) return (T *)(void *)malloc( (size_t)sizeof(T) ); // C allocation 51 55 else return (T *)memalign( _Alignof(T), sizeof(T) ); 52 56 } // malloc 53 57 54 58 T * aalloc( size_t dim ) { 55 if ( _Alignof(T) <= libAlign() ) return (T *)aalloc( dim, sizeof(T) ); // C allocation 56 else return (T *)amemalign( _Alignof(T), dim, sizeof(T) ); 59 ARRAY_ALLOC$( aalloc, amemalign, dim ); 57 60 } // aalloc 58 61 59 62 T * calloc( size_t dim ) { 60 if ( _Alignof(T) <= libAlign() ) return (T *)calloc( dim, sizeof(T) ); // C allocation 61 else return (T *)cmemalign( _Alignof(T), dim, sizeof(T) ); 63 ARRAY_ALLOC$( calloc, cmemalign, dim ); 62 64 } // calloc 63 65 64 66 T * resize( T * ptr, size_t size ) { // CFA resize, eliminate return-type cast 65 if ( _Alignof(T) <= libAlign() ) return (T *) resize( (void *)ptr, size ); // CFA resize66 else return (T *) resize( (void *)ptr, _Alignof(T), size ); // CFA resize67 if ( _Alignof(T) <= libAlign() ) return (T *)(void *)resize( (void *)ptr, size ); // CFA resize 68 else return (T *)(void *)resize( (void *)ptr, _Alignof(T), size ); // CFA resize 67 69 } // resize 68 70 69 71 T * realloc( T * ptr, size_t size ) { // CFA realloc, eliminate return-type cast 70 if ( _Alignof(T) <= libAlign() ) return (T *) realloc( (void *)ptr, size ); // C realloc71 else return (T *) realloc( (void *)ptr, _Alignof(T), size ); // CFA realloc72 if ( _Alignof(T) <= libAlign() ) return (T *)(void *)realloc( (void *)ptr, size ); // C realloc 73 else return (T *)(void *)realloc( (void *)ptr, _Alignof(T), size ); // CFA realloc 72 74 } // realloc 73 75 … … 208 210 209 211 forall( TT... | { T * alloc_internal$( void *, T *, size_t, size_t, S_fill(T), TT ); } ) { 212 210 213 T * alloc_internal$( void * , T * Realloc, size_t Align, size_t Dim, S_fill(T) Fill, T_resize Resize, TT rest) { 211 214 return alloc_internal$( Resize, (T*)0p, Align, Dim, Fill, rest); … … 231 234 return alloc_internal$( (void*)0p, (T*)0p, (_Alignof(T) > libAlign() ? _Alignof(T) : libAlign()), dim, (S_fill(T)){'0'}, all); 232 235 } 236 233 237 } // distribution TT 234 238 } // distribution T … … 384 388 //--------------------------------------- 385 389 386 // Sequential Pseudo Random-Number Generator : generate repeatable sequence of values that appear random.387 //388 // Declaration :389 // PRNG sprng = { 1009 } - set starting seed versus random seed390 //391 // Interface :392 // set_seed( sprng, 1009 ) - set starting seed for ALL kernel threads versus random seed393 // get_seed( sprng ) - read seed394 // prng( sprng ) - generate random value in range [0,UINT_MAX]395 // prng( sprng, u ) - generate random value in range [0,u)396 // prng( sprng, l, u ) - generate random value in range [l,u]397 // calls( sprng ) - number of generated random value so far398 //399 // Examples : generate random number between 5-21400 // prng( sprng ) % 17 + 5; values 0-16 + 5 = 5-21401 // prng( sprng, 16 + 1 ) + 5;402 // prng( sprng, 5, 21 );403 // calls( sprng );404 405 390 struct PRNG { 406 391 uint32_t callcnt; // call count … … 409 394 }; // PRNG 410 395 411 void set_seed( PRNG & prng, uint32_t seed_ ); 412 uint32_t prng( PRNG & prng ) __attribute__(( warn_unused_result )); // [0,UINT_MAX] 396 extern uint32_t prng( PRNG & prng ) __attribute__(( warn_unused_result )); // [0,UINT_MAX] 413 397 static inline { 398 void set_seed( PRNG & prng, uint32_t seed_ ) with( prng ) { state = seed = seed_; } // set seed 414 399 void ?{}( PRNG & prng ) { set_seed( prng, rdtscl() ); } // random seed 415 400 void ?{}( PRNG & prng, uint32_t seed ) { set_seed( prng, seed ); } // fixed seed … … 420 405 } // distribution 421 406 422 // Concurrent Pseudo Random-Number Generator : generate repeatable sequence of values that appear random. 423 // 424 // Interface : 425 // set_seed( 1009 ) - fixed seed for all kernel threads versus random seed 426 // get_seed() - read seed 427 // prng() - generate random value in range [0,UINT_MAX] 428 // prng( u ) - generate random value in range [0,u) 429 // prng( l, u ) - generate random value in range [l,u] 430 // 431 // Examples : generate random number between 5-21 432 // prng() % 17 + 5; values 0-16 + 5 = 5-21 433 // prng( 16 + 1 ) + 5; 434 // prng( 5, 21 ); 435 436 void set_seed( uint32_t seed_ ) OPTIONAL_THREAD; 437 uint32_t get_seed() __attribute__(( warn_unused_result )); 438 uint32_t prng( void ) __attribute__(( warn_unused_result )) OPTIONAL_THREAD; // [0,UINT_MAX] 407 extern void set_seed( uint32_t seed ); // set per thread seed 408 extern uint32_t get_seed(); // get seed 409 extern uint32_t prng( void ) __attribute__(( warn_unused_result )); // [0,UINT_MAX] 439 410 static inline { 440 uint32_t prng( uint32_t u ) __attribute__(( warn_unused_result )) { return prng() % u; } // [0,u) 441 uint32_t prng( uint32_t l, uint32_t u ) __attribute__(( warn_unused_result )) { return prng( u - l + 1 ) + l; } // [l,u] 411 uint32_t prng( uint32_t u ) __attribute__(( warn_unused_result )); 412 uint32_t prng( uint32_t u ) { return prng() % u; } // [0,u) 413 uint32_t prng( uint32_t l, uint32_t u ) __attribute__(( warn_unused_result )); 414 uint32_t prng( uint32_t l, uint32_t u ) { return prng( u - l + 1 ) + l; } // [l,u] 442 415 } // distribution 443 416 -
src/AST/Decl.cpp
r12c1eef r5235d49 26 26 #include "Node.hpp" // for readonly 27 27 #include "Type.hpp" // for readonly 28 #include "Expr.hpp"29 28 30 29 namespace ast { … … 66 65 for (auto & tp : this->type_params) { 67 66 ftype->forall.emplace_back(new TypeInstType(tp->name, tp)); 68 for (auto & ap: tp->assertions) {69 ftype->assertions.emplace_back(new VariableExpr(loc, ap));70 }71 67 } 72 68 this->type = ftype; -
src/AST/Decl.hpp
r12c1eef r5235d49 34 34 // Must be included in *all* AST classes; should be #undef'd at the end of the file 35 35 #define MUTATE_FRIEND \ 36 template<typename node_t> friend node_t * mutate(const node_t * node); \36 template<typename node_t> friend node_t * mutate(const node_t * node); \ 37 37 template<typename node_t> friend node_t * shallowCopy(const node_t * node); 38 38 … … 135 135 std::vector< ptr<Expr> > withExprs; 136 136 137 137 138 FunctionDecl( const CodeLocation & loc, const std::string & name, std::vector<ptr<TypeDecl>>&& forall, 138 139 std::vector<ptr<DeclWithType>>&& params, std::vector<ptr<DeclWithType>>&& returns, -
src/AST/Eval.hpp
r12c1eef r5235d49 24 24 template< typename... Args > 25 25 UntypedExpr * call( const CodeLocation & loc, const std::string & name, Args &&... args ) { 26 return new UntypedExpr { 27 loc, new NameExpr { loc, name }, 26 return new UntypedExpr { 27 loc, new NameExpr { loc, name }, 28 28 std::vector< ptr< Expr > > { std::forward< Args >( args )... } }; 29 29 } -
src/Validate/InitializerLength.cpp
r12c1eef r5235d49 14 14 // 15 15 16 #include "InitializerLength.hpp"16 //#include "InitializerLength.hpp" 17 17 18 18 #include "AST/Expr.hpp" -
src/Validate/InitializerLength.hpp
r12c1eef r5235d49 14 14 // 15 15 16 #pragma once17 18 namespace ast {19 class TranslationUnit;20 }21 22 16 namespace Validate { 23 17 -
tests/device/cpu.cfa
r12c1eef r5235d49 15 15 16 16 17 #include <fstream.hfa> 17 18 #include <device/cpu.hfa> 18 #include <limits.hfa>19 #include <fstream.hfa>20 19 #include <stdlib.hfa> 21 20 … … 119 118 120 119 unsigned found_level = 0; 121 unsigned found = MAX;120 unsigned found = -1u; 122 121 for(i; idxs) { 123 122 unsigned idx = idxs - 1 - i; … … 137 136 } 138 137 139 /* paranoid */ verify(found != MAX);138 /* paranoid */ verify(found != -1u); 140 139 return found; 141 140 } -
tests/io/io-acquire.cfa
r12c1eef r5235d49 10 10 // Created On : Mon Mar 1 18:40:09 2021 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Fri Jan 14 09:13:18 202213 // Update Count : 7 412 // Last Modified On : Wed Oct 6 18:04:58 2021 13 // Update Count : 72 14 14 // 15 15 … … 18 18 #include <mutex_stmt.hfa> 19 19 20 Duration default_preemption() { return 0; }21 22 20 thread T {}; 23 21 void main( T & ) { … … 25 23 26 24 for ( 100 ) { // expression protection 27 mutex( sout) sout | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9;25 mutex(sout) sout | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9; 28 26 } 29 27 mutex( sout ) { // statement protection … … 53 51 int a, b, c, d, e, f, g, h, i; 54 52 for ( 100 ) { // expression protection 55 mutex( sin) sin | a | b | c | d | e | f | g | h | i;53 mutex(sin) sin | a | b | c | d | e | f | g | h | i; 56 54 } 57 55 mutex( sin ) { // statement protection -
tests/unified_locking/.expect/locks.txt
r12c1eef r5235d49 11 11 Start Test 6: owner lock and condition variable 3 wait/notify all 12 12 Done Test 6 13 Start Test 7: linear backofflock and condition variable single wait/notify13 Start Test 7: fast lock and condition variable single wait/notify 14 14 Done Test 7 15 Start Test 8: linear backofflock and condition variable 3 wait/notify all15 Start Test 8: fast lock and condition variable 3 wait/notify all 16 16 Done Test 8 17 Start Test 9: multi acquisiton lock and condition variable multiple acquire andwait/notify17 Start Test 9: linear backoff lock and condition variable single wait/notify 18 18 Done Test 9 19 Start Test 10: owner lock and condition variable multiple acquire and wait/notify19 Start Test 10: linear backoff lock and condition variable 3 wait/notify all 20 20 Done Test 10 21 Start Test 11: no lock condition variablewait/notify21 Start Test 11: multi acquisiton lock and condition variable multiple acquire and wait/notify 22 22 Done Test 11 23 Start Test 12: locked condition variable wait/notify with front()23 Start Test 12: owner lock and condition variable multiple acquire and wait/notify 24 24 Done Test 12 25 Start Test 13: no lock condition variable wait/notify 26 Done Test 13 27 Start Test 14: locked condition variable wait/notify with front() 28 Done Test 14 -
tests/unified_locking/locks.cfa
r12c1eef r5235d49 15 15 condition_variable( owner_lock ) c_o; 16 16 17 fast_lock f; 18 condition_variable( fast_lock ) c_f; 19 17 20 linear_backoff_then_block_lock l; 18 21 condition_variable( linear_backoff_then_block_lock ) c_l; … … 71 74 } 72 75 unlock(s); 76 } 77 } 78 79 thread T_C_F_WS1 {}; 80 81 void main( T_C_F_WS1 & this ) { 82 for (unsigned int i = 0; i < num_times; i++) { 83 lock(f); 84 if(empty(c_f) && i != num_times - 1) { 85 wait(c_f,f); 86 }else{ 87 notify_one(c_f); 88 } 89 unlock(f); 90 } 91 } 92 93 thread T_C_F_WB1 {}; 94 95 void main( T_C_F_WB1 & this ) { 96 for (unsigned int i = 0; i < num_times; i++) { 97 lock(f); 98 if(counter(c_f) == 3 || i == num_times - 1) { 99 notify_all(c_f); 100 }else{ 101 wait(c_f,f); 102 } 103 unlock(f); 73 104 } 74 105 } … … 286 317 printf("Done Test 6\n"); 287 318 288 printf("Start Test 7: linear backoff lock and condition variable single wait/notify\n"); 319 printf("Start Test 7: fast lock and condition variable single wait/notify\n"); 320 { 321 T_C_F_WS1 t1[2]; 322 } 323 printf("Done Test 7\n"); 324 325 printf("Start Test 8: fast lock and condition variable 3 wait/notify all\n"); 326 { 327 T_C_F_WB1 t1[4]; 328 } 329 printf("Done Test 8\n"); 330 331 printf("Start Test 9: linear backoff lock and condition variable single wait/notify\n"); 289 332 { 290 333 T_C_L_WS1 t1[2]; 291 334 } 292 printf("Done Test 7\n");293 294 printf("Start Test 8: linear backoff lock and condition variable 3 wait/notify all\n");335 printf("Done Test 9\n"); 336 337 printf("Start Test 10: linear backoff lock and condition variable 3 wait/notify all\n"); 295 338 { 296 339 T_C_L_WB1 t1[4]; 297 340 } 298 printf("Done Test 8\n");299 300 printf("Start Test 9: multi acquisiton lock and condition variable multiple acquire and wait/notify\n");341 printf("Done Test 10\n"); 342 343 printf("Start Test 11: multi acquisiton lock and condition variable multiple acquire and wait/notify\n"); 301 344 { 302 345 T_C_M_WS2 t1[2]; 303 346 } 304 printf("Done Test 9\n");305 306 printf("Start Test 1 0: owner lock and condition variable multiple acquire and wait/notify\n");347 printf("Done Test 11\n"); 348 349 printf("Start Test 12: owner lock and condition variable multiple acquire and wait/notify\n"); 307 350 { 308 351 T_C_O_WS2 t1[2]; 309 352 } 310 printf("Done Test 1 0\n");311 312 printf("Start Test 1 1: no lock condition variable wait/notify\n");353 printf("Done Test 12\n"); 354 355 printf("Start Test 13: no lock condition variable wait/notify\n"); 313 356 { 314 357 T_C_NLW t1; 315 358 T_C_NLS t2; 316 359 } 317 printf("Done Test 1 1\n");318 319 printf("Start Test 1 2: locked condition variable wait/notify with front()\n");360 printf("Done Test 13\n"); 361 362 printf("Start Test 14: locked condition variable wait/notify with front()\n"); 320 363 { 321 364 T_C_S_WNF t1[2]; 322 365 } 323 printf("Done Test 1 2\n");324 } 366 printf("Done Test 14\n"); 367 } -
tests/unified_locking/mutex_test.hfa
r12c1eef r5235d49 10 10 thread$ * id; 11 11 uint32_t sum; 12 uint32_t cnt;13 12 }; 14 13 … … 28 27 { 29 28 uint32_t tsum = mo.sum; 30 uint32_t cnt = mo.cnt;31 29 mo.id = me; 32 30 yield(random(5)); 33 31 value = ((uint32_t)random()) ^ ((uint32_t)me); 34 32 if(mo.id != me) sout | "Intruder!"; 35 mo.cnt = cnt + 1;36 33 mo.sum = tsum + value; 37 34 } … … 57 54 uint32_t sum = -32; 58 55 mo.sum = -32; 59 mo.cnt = 0;60 56 processor p[2]; 61 57 sout | "Starting"; … … 67 63 } 68 64 sout | "Done!"; 69 if(mo.cnt != (13 * num_times)) sout | "Invalid cs count!" | mo.cnt | "vs "| (13 * num_times) | "(13 *" | num_times | ')';70 65 if(sum == mo.sum) sout | "Match!"; 71 66 else sout | "No Match!" | sum | "vs" | mo.sum; -
tools/jenkins/setup.sh.in
r12c1eef r5235d49 48 48 regex1='/([[:alpha:][:digit:]@/_.-]+)' 49 49 regex2='(libcfa[[:alpha:][:digit:].]+) => not found' 50 regex3='linux-vdso.so.1 |linux-gate.so.1'50 regex3='linux-vdso.so.1' 51 51 if [[ $line =~ $regex1 ]]; then 52 52 retsysdeps+=(${BASH_REMATCH[1]})
Note:
See TracChangeset
for help on using the changeset viewer.