Changeset 807a632 for doc/theses/thierry_delisle_PhD/code/utils.hpp
 Timestamp:
 Feb 10, 2020, 11:17:31 AM (3 years ago)
 Branches:
 armeh, enum, forallpointerdecay, jacob/cs343translation, jenkinssandbox, master, newast, newastuniqueexpr, pthreademulation, qualifiedEnum
 Children:
 3b56166
 Parents:
 487198c
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

doc/theses/thierry_delisle_PhD/code/utils.hpp
r487198c r807a632 58 58 } 59 59 60 void affinity(int tid) {60 static inline void affinity(int tid) { 61 61 static int cpus = get_nprocs(); 62 62 … … 72 72 73 73 static const constexpr std::size_t cache_line_size = 64; 74 void check_cache_line_size() {74 static inline void check_cache_line_size() { 75 75 std::cout << "Checking cache line size" << std::endl; 76 76 const std::string cache_file = "/sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size"; … … 106 106 } 107 107 108 // from geeksforgeeks 109 const constexpr unsigned num_to_bits[16] = {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4}; 110 111 /* Recursively get nibble of a given number 112 and map them in the array */ 113 __attribute__((noinline)) unsigned countSetBits(size_t num) { 114 unsigned nibble = 0; 115 if (0 == num) 116 return num_to_bits[0]; 117 118 // Find last nibble 119 nibble = num & 0xf; 120 121 // Use prestored values to find count 122 // in last nibble plus recursively add 123 // remaining nibbles. 124 return num_to_bits[nibble] + countSetBits(num >> 4); 125 } 126 127 __attribute__((noinline)) unsigned nthSetBit(size_t mask, unsigned bit) { 108 static inline unsigned rand_bit(unsigned rnum, size_t mask) { 109 unsigned bit = mask ? rnum % __builtin_popcountl(mask) : 0; 110 #if !defined(__BMI2__) 128 111 uint64_t v = mask; // Input value to find position with rank r. 129 unsigned int r = bit ;// Input: bit's desired rank [164].112 unsigned int r = bit + 1;// Input: bit's desired rank [164]. 130 113 unsigned int s; // Output: Resulting position of bit with rank r [164] 131 114 uint64_t a, b, c, d; // Intermediate temporaries for bit count. … … 134 117 // Do a normal parallel bit count for a 64bit integer, 135 118 // but store all intermediate steps. 136 // a = (v & 0x5555...) + ((v >> 1) & 0x5555...);137 119 a = v  ((v >> 1) & ~0UL/3); 138 // b = (a & 0x3333...) + ((a >> 2) & 0x3333...);139 120 b = (a & ~0UL/5) + ((a >> 2) & ~0UL/5); 140 // c = (b & 0x0f0f...) + ((b >> 4) & 0x0f0f...);141 121 c = (b + (b >> 4)) & ~0UL/0x11; 142 // d = (c & 0x00ff...) + ((c >> 8) & 0x00ff...);143 122 d = (c + (c >> 8)) & ~0UL/0x101; 144 123 … … 147 126 // Now do branchless select! 148 127 s = 64; 149 // if (r > t) {s = 32; r = t;}150 128 s = ((t  r) & 256) >> 3; r = (t & ((t  r) >> 8)); 151 129 t = (d >> (s  16)) & 0xff; 152 // if (r > t) {s = 16; r = t;}153 130 s = ((t  r) & 256) >> 4; r = (t & ((t  r) >> 8)); 154 131 t = (c >> (s  8)) & 0xf; 155 // if (r > t) {s = 8; r = t;}156 132 s = ((t  r) & 256) >> 5; r = (t & ((t  r) >> 8)); 157 133 t = (b >> (s  4)) & 0x7; 158 // if (r > t) {s = 4; r = t;}159 134 s = ((t  r) & 256) >> 6; r = (t & ((t  r) >> 8)); 160 135 t = (a >> (s  2)) & 0x3; 161 // if (r > t) {s = 2; r = t;}162 136 s = ((t  r) & 256) >> 7; r = (t & ((t  r) >> 8)); 163 137 t = (v >> (s  1)) & 0x1; 164 // if (r > t) s;165 138 s = ((t  r) & 256) >> 8; 166 s = 65  s; 167 return s; 139 return s  1; 140 #else 141 uint64_t picked = _pdep_u64(1ul << bit, mask); 142 return picked ? __builtin_ctzl(picked) : 0; 143 #endif 168 144 } 169 170 // __attribute__((noinline)) unsigned int nth_bit_set(uint64_t value, unsigned int n)171 // {172 // uint64_t mask = 0x00000000FFFFFFFFul;173 // unsigned int size = 32u;174 // unsigned int base = 0u;175 176 // if(value == 0) return 0;177 // assertf(n < (unsigned)__builtin_popcountl(value), "%u < %d (%zu)", n, __builtin_popcountl(value), value);178 // n++;179 // while (size > 0) {180 // const unsigned int count = __builtin_popcountl(value & mask);181 // if (n > count) {182 // base += size;183 // size >>= 1;184 // mask = mask << size;185 // } else {186 // size >>= 1;187 // mask >>= size;188 // }189 // }190 191 // return base;192 // }193 194 195 static inline uint64_t rotl64 (uint64_t n, unsigned int c) {196 const unsigned int mask = (8*sizeof(n)  1); // assumes width is a power of 2.197 198 c &= mask;199 return (n<<c)  (n>>( (c)&mask ));200 }201 202 static inline uint64_t rotr64 (uint64_t n, unsigned int c)203 {204 const unsigned int mask = (8*sizeof(n)  1);205 206 // assert ( (c<=mask) &&"rotate by type width or more");207 c &= mask;208 return (n>>c)  (n<<( (c)&mask ));209 }
Note: See TracChangeset
for help on using the changeset viewer.