Changeset d99a716 for libcfa/src
- Timestamp:
- Jan 4, 2023, 1:44:19 PM (2 years ago)
- Branches:
- ADT, ast-experimental, master
- Children:
- 339e30a, a14926b
- Parents:
- 0348fd8 (diff), 66a89e7 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the(diff)
links above to see all the changes relative to each parent. - Location:
- libcfa/src
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
libcfa/src/bits/random.hfa
r0348fd8 rd99a716 10 10 // Created On : Fri Jan 14 07:18:11 2022 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Sun Dec 11 18:43:58202213 // Update Count : 17 112 // Last Modified On : Thu Dec 22 20:54:22 2022 13 // Update Count : 178 14 14 // 15 15 16 16 #pragma once 17 17 18 #include <stdint.h> 18 #include <stdint.h> // uintXX_t 19 19 20 20 #define GLUE2( x, y ) x##y … … 24 24 #ifdef __x86_64__ // 64-bit architecture 25 25 // 64-bit generators 26 #define LEHMER6426 //#define LEHMER64 27 27 //#define XORSHIFT_12_25_27 28 //#define XOSHIRO256PP28 #define XOSHIRO256PP 29 29 //#define KISS_64 30 30 31 31 // 32-bit generators 32 #define XORSHIFT_6_21_733 //#define XOSHIRO128PP32 //#define XORSHIFT_6_21_7 33 #define XOSHIRO128PP 34 34 #else // 32-bit architecture 35 35 // 64-bit generators 36 #define XORSHIFT_13_7_17 36 //#define XORSHIFT_13_7_17 37 #define XOSHIRO256PP 37 38 38 39 // 32-bit generators 39 #define XORSHIFT_6_21_7 40 //#define XORSHIFT_6_21_7 41 #define XOSHIRO128PP 40 42 #endif // __x86_64__ 41 43 … … 47 49 #define PRNG_NAME_64 xoshiro256pp 48 50 #define PRNG_STATE_64_T GLUE(PRNG_NAME_64,_t) 49 typedef struct PRNG_STATE_64_T { uint64_t s [4]; } PRNG_STATE_64_T;51 typedef struct PRNG_STATE_64_T { uint64_t s0, s1, s2, s3; } PRNG_STATE_64_T; 50 52 #endif // XOSHIRO256PP 51 53 … … 53 55 #define PRNG_NAME_32 xoshiro128pp 54 56 #define PRNG_STATE_32_T GLUE(PRNG_NAME_32,_t) 55 typedef struct PRNG_STATE_32_T { uint32_t s [4]; } PRNG_STATE_32_T;57 typedef struct PRNG_STATE_32_T { uint32_t s0, s1, s2, s3; } PRNG_STATE_32_T; 56 58 #endif // XOSHIRO128PP 57 59 … … 110 112 111 113 // ALL PRNG ALGORITHMS ARE OPTIMIZED SO THAT THE PRNG LOGIC CAN HAPPEN IN PARALLEL WITH THE USE OF THE RESULT. 112 // Therefore, the set_seed routine primes the PRNG by calling it with the state so the seed is not return as the 113 // first random value. 114 // Specifically, the current random state is copied for returning, before computing the next value. As a consequence, 115 // the set_seed routine primes the PRNG by calling it with the state so the seed is not return as the first random 116 // value. 117 114 118 115 119 #ifdef __cforall // don't include in C code (invoke.h) … … 126 130 127 131 #ifndef XOSHIRO256PP 128 typedef struct xoshiro256pp_t { uint64_t s [4]; } xoshiro256pp_t;132 typedef struct xoshiro256pp_t { uint64_t s0, s1, s2, s3; } xoshiro256pp_t; 129 133 #endif // ! XOSHIRO256PP 130 134 131 135 static inline uint64_t xoshiro256pp( xoshiro256pp_t & rs ) with(rs) { 132 inline uint64_t rotl( const uint64_t x, int k) {136 inline uint64_t rotl( const uint64_t x, int k ) { 133 137 return (x << k) | (x >> (64 - k)); 134 138 } // rotl 135 139 136 const uint64_t result = rotl( s [0] + s[3], 23 ) + s[0];137 const uint64_t t = s [1]<< 17;138 139 s [2] ^= s[0];140 s [3] ^= s[1];141 s [1] ^= s[2];142 s [0] ^= s[3];143 s [2]^= t;144 s [3] = rotl( s[3], 45 );140 const uint64_t result = rotl( s0 + s3, 23 ) + s0; 141 const uint64_t t = s1 << 17; 142 143 s2 ^= s0; 144 s3 ^= s1; 145 s1 ^= s2; 146 s0 ^= s3; 147 s2 ^= t; 148 s3 = rotl( s3, 45 ); 145 149 return result; 146 150 } // xoshiro256pp 147 151 148 static inline void xoshiro256pp_set_seed( xoshiro256pp_t & state, 149 state = (xoshiro256pp_t){ {seed, seed, seed, seed}};152 static inline void xoshiro256pp_set_seed( xoshiro256pp_t & state, uint64_t seed ) { 153 state = (xoshiro256pp_t){ seed, seed, seed, seed }; 150 154 xoshiro256pp( state ); 151 155 } // xoshiro256pp_set_seed … … 161 165 162 166 #ifndef XOSHIRO128PP 163 typedef struct xoshiro128pp_t { uint32_t s [4]; } xoshiro128pp_t;167 typedef struct xoshiro128pp_t { uint32_t s0, s1, s2, s3; } xoshiro128pp_t; 164 168 #endif // ! XOSHIRO128PP 165 169 … … 169 173 } // rotl 170 174 171 const uint32_t result = rotl( s [0] + s[3], 7 ) + s[0];172 const uint32_t t = s [1]<< 9;173 174 s [2] ^= s[0];175 s [3] ^= s[1];176 s [1] ^= s[2];177 s [0] ^= s[3];178 s [2]^= t;179 s [3] = rotl( s[3], 11 );175 const uint32_t result = rotl( s0 + s3, 7 ) + s0; 176 const uint32_t t = s1 << 9; 177 178 s2 ^= s0; 179 s3 ^= s1; 180 s1 ^= s2; 181 s0 ^= s3; 182 s2 ^= t; 183 s3 = rotl( s3, 11 ); 180 184 return result; 181 185 } // xoshiro128pp 182 186 183 187 static inline void xoshiro128pp_set_seed( xoshiro128pp_t & state, uint32_t seed ) { 184 state = (xoshiro128pp_t){ {seed, seed, seed, seed}};188 state = (xoshiro128pp_t){ seed, seed, seed, seed }; 185 189 xoshiro128pp( state ); // prime 186 190 } // xoshiro128pp_set_seed 187 191 188 192 #ifdef __SIZEOF_INT128__ 189 // Pipelined to allow out-of-order overlap with reduced dependencies. Critically, the current random state is190 // returned (copied), and then compute and store the next random value.191 193 //-------------------------------------------------- 192 194 static inline uint64_t lehmer64( __uint128_t & state ) { 193 195 __uint128_t ret = state; 194 state *= 0x da942042e4dd58b5;196 state *= 0x_da94_2042_e4dd_58b5; 195 197 return ret >> 64; 196 198 } // lehmer64 197 199 198 200 static inline void lehmer64_set_seed( __uint128_t & state, uint64_t seed ) { 201 // The seed needs to be coprime with the 2^64 modulus to get the largest period, so no factors of 2 in the seed. 199 202 state = seed; 200 lehmer64( state ); 203 lehmer64( state ); // prime 201 204 } // lehmer64_set_seed 202 205 … … 272 275 #endif // ! KISS_64 273 276 274 static inline uint64_t kiss_64( kiss_64_t & state ) with(state) {275 kiss_64_t ret = state;277 static inline uint64_t kiss_64( kiss_64_t & rs ) with(rs) { 278 kiss_64_t ret = rs; 276 279 z = 36969 * (z & 65535) + (z >> 16); 277 280 w = 18000 * (w & 65535) + (w >> 16); 278 jsr ^= (jsr << 17);279 281 jsr ^= (jsr << 13); 282 jsr ^= (jsr >> 17); 280 283 jsr ^= (jsr << 5); 281 284 jcong = 69069 * jcong + 1234567; … … 283 286 } // kiss_64 284 287 285 static inline void kiss_64_set_seed( kiss_64_t & state, uint64_t seed ) with(state) {288 static inline void kiss_64_set_seed( kiss_64_t & rs, uint64_t seed ) with(rs) { 286 289 z = 1; w = 1; jsr = 4; jcong = seed; 287 kiss_64( state );// prime290 kiss_64( rs ); // prime 288 291 } // kiss_64_set_seed 289 292 … … 294 297 #endif // ! XORWOW 295 298 296 static inline uint32_t xorwow( xorwow_t & state ) with(state) {299 static inline uint32_t xorwow( xorwow_t & rs ) with(rs) { 297 300 // Algorithm "xorwow" from p. 5 of Marsaglia, "Xorshift RNGs". 298 301 uint32_t ret = a + counter; … … 312 315 } // xorwow 313 316 314 static inline void xorwow_set_seed( xorwow_t & state, uint32_t seed ) {315 state= (xorwow_t){ seed, seed, seed, seed, 0 };316 xorwow( state );// prime317 static inline void xorwow_set_seed( xorwow_t & rs, uint32_t seed ) { 318 rs = (xorwow_t){ seed, seed, seed, seed, 0 }; 319 xorwow( rs ); // prime 317 320 } // xorwow_set_seed 318 321 … … 326 329 327 330 // Bi-directional LCG random-number generator 328 static inline uint32_t LCGBI_fwd( uint64_t & state) {329 state = (A * state+ C) & (M - 1);330 return state>> D;331 static inline uint32_t LCGBI_fwd( uint64_t & rs ) { 332 rs = (A * rs + C) & (M - 1); 333 return rs >> D; 331 334 } // LCGBI_fwd 332 335 333 static inline uint32_t LCGBI_bck( uint64_t & state) {334 unsigned int r = state>> D;335 state = AI * (state- C) & (M - 1);336 static inline uint32_t LCGBI_bck( uint64_t & rs ) { 337 unsigned int r = rs >> D; 338 rs = AI * (rs - C) & (M - 1); 336 339 return r; 337 340 } // LCGBI_bck -
libcfa/src/heap.cfa
r0348fd8 rd99a716 10 10 // Created On : Tue Dec 19 21:58:35 2017 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Sun Oct 30 20:56:20202213 // Update Count : 15 8412 // Last Modified On : Wed Dec 28 12:37:38 2022 13 // Update Count : 1597 14 14 // 15 15 … … 17 17 #include <string.h> // memset, memcpy 18 18 #include <limits.h> // ULONG_MAX 19 #include <stdlib.h> // EXIT_FAILURE20 19 #include <errno.h> // errno, ENOMEM, EINVAL 21 #include <unistd.h> // STDERR_FILENO, sbrk, sysconf 22 #include <malloc.h> // memalign, malloc_usable_size 20 #include <unistd.h> // STDERR_FILENO, sbrk, sysconf, write 23 21 #include <sys/mman.h> // mmap, munmap 24 22 extern "C" { … … 26 24 } // extern "C" 27 25 26 #include "heap.hfa" 28 27 #include "bits/align.hfa" // libAlign 29 28 #include "bits/defs.hfa" // likely, unlikely … … 140 139 #endif 141 140 142 typedef volatile uintptr_t SpinLock_t CALIGN; // aligned addressable word-size141 typedef volatile uintptr_t SpinLock_t; 143 142 144 143 static inline __attribute__((always_inline)) void lock( volatile SpinLock_t & slock ) { … … 147 146 148 147 for ( unsigned int i = 1;; i += 1 ) { 149 if ( slock == 0 && __atomic_test_and_set( &slock, __ATOMIC_ SEQ_CST) == 0 ) break; // Fence148 if ( slock == 0 && __atomic_test_and_set( &slock, __ATOMIC_ACQUIRE ) == 0 ) break; // Fence 150 149 for ( volatile unsigned int s = 0; s < spin; s += 1 ) Pause(); // exponential spin 151 150 spin += spin; // powers of 2 … … 156 155 157 156 static inline __attribute__((always_inline)) void unlock( volatile SpinLock_t & slock ) { 158 __atomic_clear( &slock, __ATOMIC_ SEQ_CST); // Fence157 __atomic_clear( &slock, __ATOMIC_RELEASE ); // Fence 159 158 } // spin_unlock 160 159 … … 261 260 static_assert( libAlign() >= sizeof( Storage ), "minimum alignment < sizeof( Storage )" ); 262 261 263 struct __attribute__(( aligned (8) ))FreeHeader {264 size_t blockSize __attribute__(( aligned(8) ));// size of allocations on this list262 struct CALIGN FreeHeader { 263 size_t blockSize CALIGN; // size of allocations on this list 265 264 #ifdef OWNERSHIP 266 265 #ifdef RETURNSPIN … … 284 283 285 284 #ifdef __CFA_DEBUG__ 286 int64_t allocUnfreed; // running total of allocations minus frees; can be negative285 ptrdiff_t allocUnfreed; // running total of allocations minus frees; can be negative 287 286 #endif // __CFA_DEBUG__ 288 287 … … 369 368 // Thread-local storage is allocated lazily when the storage is accessed. 370 369 static __thread size_t PAD1 CALIGN TLSMODEL __attribute__(( unused )); // protect false sharing 371 static __thread Heap * volatileheapManager CALIGN TLSMODEL;370 static __thread Heap * heapManager CALIGN TLSMODEL; 372 371 static __thread size_t PAD2 CALIGN TLSMODEL __attribute__(( unused )); // protect further false sharing 373 372 … … 443 442 // 12K ~= 120K byte superblock. Where 128-heap superblock handles a medium sized multi-processor server. 444 443 size_t remaining = heapManagersStorageEnd - heapManagersStorage; // remaining free heaps in superblock 445 if ( ! heapManagersStorage || remaining != 0 ) {444 if ( ! heapManagersStorage || remaining == 0 ) { 446 445 // Each block of heaps is a multiple of the number of cores on the computer. 447 446 int HeapDim = get_nprocs(); // get_nprocs_conf does not work … … 562 561 // allocUnfreed is set to 0 when a heap is created and it accumulates any unfreed storage during its multiple thread 563 562 // usages. At the end, add up each heap allocUnfreed value across all heaps to get the total unfreed storage. 564 int64_t allocUnfreed = 0;563 ptrdiff_t allocUnfreed = 0; 565 564 for ( Heap * heap = heapMaster.heapManagersList; heap; heap = heap->nextHeapManager ) { 566 565 allocUnfreed += heap->allocUnfreed; … … 572 571 char helpText[512]; 573 572 __cfaabi_bits_print_buffer( STDERR_FILENO, helpText, sizeof(helpText), 574 "CFA warning (UNIX pid:%ld) : program terminating with % ju(0x%jx) bytes of storage allocated but not freed.\n"573 "CFA warning (UNIX pid:%ld) : program terminating with %td(%#tx) bytes of storage allocated but not freed.\n" 575 574 "Possible cause is unfreed storage allocated by the program or system/library routines called from the program.\n", 576 575 (long int)getpid(), allocUnfreed, allocUnfreed ); // always print the UNIX pid … … 950 949 block = freeHead->freeList; // remove node from stack 951 950 if ( unlikely( block == 0p ) ) { // no free block ? 952 // Freelist for this size is empty, so check return list (OWNERSHIP), carve it out of the heap,if there951 // Freelist for this size is empty, so check return list (OWNERSHIP), or carve it out of the heap if there 953 952 // is enough left, or get some more heap storage and carve it off. 954 953 #ifdef OWNERSHIP … … 1115 1114 while ( ! __atomic_compare_exchange_n( &freeHead->returnList, &header->kind.real.next, (Heap.Storage *)header, 1116 1115 false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST ) ); 1116 1117 #ifdef __STATISTICS__ 1118 stats.return_pushes += 1; 1119 stats.return_storage_request += rsize; 1120 stats.return_storage_alloc += size; 1121 #endif // __STATISTICS__ 1117 1122 #endif // RETURNSPIN 1118 1123 } // if … … 1125 1130 freeHead->freeList = (Heap.Storage *)header; 1126 1131 #endif // ! OWNERSHIP 1127 1128 #ifdef __U_STATISTICS__1129 stats.return_pushes += 1;1130 stats.return_storage_request += rsize;1131 stats.return_storage_alloc += size;1132 #endif // __U_STATISTICS__1133 1132 1134 1133 // OK TO BE PREEMPTED HERE AS heapManager IS NO LONGER ACCESSED. … … 1180 1179 1181 1180 #ifdef __STATISTICS__ 1182 static void incCalls( intptr_t statName ) libcfa_nopreempt {1181 static void incCalls( size_t statName ) libcfa_nopreempt { 1183 1182 heapManager->stats.counters[statName].calls += 1; 1184 1183 } // incCalls 1185 1184 1186 static void incZeroCalls( intptr_t statName ) libcfa_nopreempt {1185 static void incZeroCalls( size_t statName ) libcfa_nopreempt { 1187 1186 heapManager->stats.counters[statName].calls_0 += 1; 1188 1187 } // incZeroCalls … … 1456 1455 // 0p, no operation is performed. 1457 1456 void free( void * addr ) libcfa_public { 1458 // verify( heapManager );1459 1460 1457 if ( unlikely( addr == 0p ) ) { // special case 1461 1458 #ifdef __STATISTICS__
Note: See TracChangeset
for help on using the changeset viewer.