Changeset b7d6a36 for doc/theses/thierry_delisle_PhD/code/utils.hpp
- Timestamp:
- Feb 20, 2020, 4:15:51 PM (6 years ago)
- Branches:
- ADT, arm-eh, ast-experimental, enum, forall-pointer-decay, jacob/cs343-translation, master, new-ast, new-ast-unique-expr, pthread-emulation, qualifiedEnum
- Children:
- 6a490b2
- Parents:
- dca5802 (diff), 2cbfe92 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the(diff)
links above to see all the changes relative to each parent. - File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
doc/theses/thierry_delisle_PhD/code/utils.hpp
rdca5802 rb7d6a36 10 10 #include <unistd.h> 11 11 #include <sys/sysinfo.h> 12 13 #include <x86intrin.h> 12 14 13 15 // Barrier from … … 56 58 } 57 59 58 void affinity(int tid) {60 static inline void affinity(int tid) { 59 61 static int cpus = get_nprocs(); 60 62 … … 70 72 71 73 static const constexpr std::size_t cache_line_size = 64; 72 void check_cache_line_size() {74 static inline void check_cache_line_size() { 73 75 std::cout << "Checking cache line size" << std::endl; 74 76 const std::string cache_file = "/sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size"; … … 103 105 return std::chrono::duration_cast<std::chrono::duration<T, Ratio>>(std::chrono::duration<T>(seconds)).count(); 104 106 } 107 108 static inline unsigned rand_bit(unsigned rnum, size_t mask) { 109 unsigned bit = mask ? rnum % __builtin_popcountl(mask) : 0; 110 #if !defined(__BMI2__) 111 uint64_t v = mask; // Input value to find position with rank r. 112 unsigned int r = bit + 1;// Input: bit's desired rank [1-64]. 113 unsigned int s; // Output: Resulting position of bit with rank r [1-64] 114 uint64_t a, b, c, d; // Intermediate temporaries for bit count. 115 unsigned int t; // Bit count temporary. 116 117 // Do a normal parallel bit count for a 64-bit integer, 118 // but store all intermediate steps. 119 a = v - ((v >> 1) & ~0UL/3); 120 b = (a & ~0UL/5) + ((a >> 2) & ~0UL/5); 121 c = (b + (b >> 4)) & ~0UL/0x11; 122 d = (c + (c >> 8)) & ~0UL/0x101; 123 124 125 t = (d >> 32) + (d >> 48); 126 // Now do branchless select! 127 s = 64; 128 s -= ((t - r) & 256) >> 3; r -= (t & ((t - r) >> 8)); 129 t = (d >> (s - 16)) & 0xff; 130 s -= ((t - r) & 256) >> 4; r -= (t & ((t - r) >> 8)); 131 t = (c >> (s - 8)) & 0xf; 132 s -= ((t - r) & 256) >> 5; r -= (t & ((t - r) >> 8)); 133 t = (b >> (s - 4)) & 0x7; 134 s -= ((t - r) & 256) >> 6; r -= (t & ((t - r) >> 8)); 135 t = (a >> (s - 2)) & 0x3; 136 s -= ((t - r) & 256) >> 7; r -= (t & ((t - r) >> 8)); 137 t = (v >> (s - 1)) & 0x1; 138 s -= ((t - r) & 256) >> 8; 139 return s - 1; 140 #else 141 uint64_t picked = _pdep_u64(1ul << bit, mask); 142 return picked ? __builtin_ctzl(picked) : 0; 143 #endif 144 }
Note:
See TracChangeset
for help on using the changeset viewer.