Changeset 9421f3d8 for doc/theses/thierry_delisle_PhD/code/utils.hpp
- Timestamp:
- Oct 30, 2019, 11:26:07 AM (5 years ago)
- Branches:
- ADT, arm-eh, ast-experimental, enum, forall-pointer-decay, jacob/cs343-translation, jenkins-sandbox, master, new-ast, new-ast-unique-expr, pthread-emulation, qualifiedEnum
- Children:
- df75fe97
- Parents:
- b067d9b
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
doc/theses/thierry_delisle_PhD/code/utils.hpp
rb067d9b r9421f3d8 10 10 #include <unistd.h> 11 11 #include <sys/sysinfo.h> 12 13 #include <x86intrin.h> 12 14 13 15 // Barrier from … … 103 105 return std::chrono::duration_cast<std::chrono::duration<T, Ratio>>(std::chrono::duration<T>(seconds)).count(); 104 106 } 107 108 // from geeksforgeeks 109 const constexpr unsigned num_to_bits[16] = {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4}; 110 111 /* Recursively get nibble of a given number 112 and map them in the array */ 113 __attribute__((noinline)) unsigned countSetBits(size_t num) { 114 unsigned nibble = 0; 115 if (0 == num) 116 return num_to_bits[0]; 117 118 // Find last nibble 119 nibble = num & 0xf; 120 121 // Use pre-stored values to find count 122 // in last nibble plus recursively add 123 // remaining nibbles. 124 return num_to_bits[nibble] + countSetBits(num >> 4); 125 } 126 127 __attribute__((noinline)) unsigned nthSetBit(size_t mask, unsigned bit) { 128 uint64_t v = mask; // Input value to find position with rank r. 129 unsigned int r = bit;// Input: bit's desired rank [1-64]. 130 unsigned int s; // Output: Resulting position of bit with rank r [1-64] 131 uint64_t a, b, c, d; // Intermediate temporaries for bit count. 132 unsigned int t; // Bit count temporary. 133 134 // Do a normal parallel bit count for a 64-bit integer, 135 // but store all intermediate steps. 136 // a = (v & 0x5555...) + ((v >> 1) & 0x5555...); 137 a = v - ((v >> 1) & ~0UL/3); 138 // b = (a & 0x3333...) + ((a >> 2) & 0x3333...); 139 b = (a & ~0UL/5) + ((a >> 2) & ~0UL/5); 140 // c = (b & 0x0f0f...) + ((b >> 4) & 0x0f0f...); 141 c = (b + (b >> 4)) & ~0UL/0x11; 142 // d = (c & 0x00ff...) + ((c >> 8) & 0x00ff...); 143 d = (c + (c >> 8)) & ~0UL/0x101; 144 145 146 t = (d >> 32) + (d >> 48); 147 // Now do branchless select! 148 s = 64; 149 // if (r > t) {s -= 32; r -= t;} 150 s -= ((t - r) & 256) >> 3; r -= (t & ((t - r) >> 8)); 151 t = (d >> (s - 16)) & 0xff; 152 // if (r > t) {s -= 16; r -= t;} 153 s -= ((t - r) & 256) >> 4; r -= (t & ((t - r) >> 8)); 154 t = (c >> (s - 8)) & 0xf; 155 // if (r > t) {s -= 8; r -= t;} 156 s -= ((t - r) & 256) >> 5; r -= (t & ((t - r) >> 8)); 157 t = (b >> (s - 4)) & 0x7; 158 // if (r > t) {s -= 4; r -= t;} 159 s -= ((t - r) & 256) >> 6; r -= (t & ((t - r) >> 8)); 160 t = (a >> (s - 2)) & 0x3; 161 // if (r > t) {s -= 2; r -= t;} 162 s -= ((t - r) & 256) >> 7; r -= (t & ((t - r) >> 8)); 163 t = (v >> (s - 1)) & 0x1; 164 // if (r > t) s--; 165 s -= ((t - r) & 256) >> 8; 166 s = 65 - s; 167 return s; 168 } 169 170 // __attribute__((noinline)) unsigned int nth_bit_set(uint64_t value, unsigned int n) 171 // { 172 // uint64_t mask = 0x00000000FFFFFFFFul; 173 // unsigned int size = 32u; 174 // unsigned int base = 0u; 175 176 // if(value == 0) return 0; 177 // assertf(n < (unsigned)__builtin_popcountl(value), "%u < %d (%zu)", n, __builtin_popcountl(value), value); 178 // n++; 179 // while (size > 0) { 180 // const unsigned int count = __builtin_popcountl(value & mask); 181 // if (n > count) { 182 // base += size; 183 // size >>= 1; 184 // mask |= mask << size; 185 // } else { 186 // size >>= 1; 187 // mask >>= size; 188 // } 189 // } 190 191 // return base; 192 // } 193 194 195 static inline uint64_t rotl64 (uint64_t n, unsigned int c) { 196 const unsigned int mask = (8*sizeof(n) - 1); // assumes width is a power of 2. 197 198 c &= mask; 199 return (n<<c) | (n>>( (-c)&mask )); 200 } 201 202 static inline uint64_t rotr64 (uint64_t n, unsigned int c) 203 { 204 const unsigned int mask = (8*sizeof(n) - 1); 205 206 // assert ( (c<=mask) &&"rotate by type width or more"); 207 c &= mask; 208 return (n>>c) | (n<<( (-c)&mask )); 209 }
Note: See TracChangeset
for help on using the changeset viewer.