Changeset 658f3179
- Timestamp:
- Jan 8, 2025, 2:00:36 PM (12 days ago)
- Branches:
- master
- Children:
- f32448e
- Parents:
- d84f2ae
- Location:
- libcfa/src
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
libcfa/src/math.cfa
rd84f2ae r658f3179 19 19 20 20 #pragma GCC visibility push(default) 21 22 unsigned long long log2_u32_32( unsigned long long val ) { 23 enum { 24 TABLE_BITS = 6, 25 TABLE_SIZE = (1 << TABLE_BITS) + 2, 26 }; 27 // for(i; TABLE_SIZE) { 28 // table[i] = (unsigned long long)(log2(1.0 + i / pow(2, TABLE_BITS)) * pow(2, 32))); 29 // } 30 static const unsigned long long table[] = { 31 0x0000000000, 0x0005b9e5a1, 0x000b5d69ba, 0x0010eb389f, 32 0x001663f6fa, 0x001bc84240, 0x002118b119, 0x002655d3c4, 33 0x002b803473, 0x00309857a0, 0x00359ebc5b, 0x003a93dc98, 34 0x003f782d72, 0x00444c1f6b, 0x0049101eac, 0x004dc4933a, 35 0x005269e12f, 0x00570068e7, 0x005b888736, 0x006002958c, 36 0x00646eea24, 0x0068cdd829, 0x006d1fafdc, 0x007164beb4, 37 0x00759d4f80, 0x0079c9aa87, 0x007dea15a3, 0x0081fed45c, 38 0x0086082806, 0x008a064fd5, 0x008df988f4, 0x0091e20ea1, 39 0x0095c01a39, 0x009993e355, 0x009d5d9fd5, 0x00a11d83f4, 40 0x00a4d3c25e, 0x00a8808c38, 0x00ac241134, 0x00afbe7fa0, 41 0x00b3500472, 0x00b6d8cb53, 0x00ba58feb2, 0x00bdd0c7c9, 42 0x00c1404ead, 0x00c4a7ba58, 0x00c80730b0, 0x00cb5ed695, 43 0x00ceaecfea, 0x00d1f73f9c, 0x00d53847ac, 0x00d8720935, 44 0x00dba4a47a, 0x00ded038e6, 0x00e1f4e517, 0x00e512c6e5, 45 0x00e829fb69, 0x00eb3a9f01, 0x00ee44cd59, 0x00f148a170, 46 0x00f446359b, 0x00f73da38d, 0x00fa2f045e, 0x00fd1a708b, 47 0x0100000000, 0x0102dfca16, 48 }; 49 _Static_assert((sizeof(table) / sizeof(table[0])) == TABLE_SIZE, "TABLE_SIZE should be accurate"); 50 // starting from val = (2 ** i)*(1 + f) where 0 <= f < 1 51 // log identities mean log2(val) = log2((2 ** i)*(1 + f)) = log2(2**i) + log2(1+f) 52 // 53 // getting i is easy to do using builtin_clz (count leading zero) 54 // 55 // we want to calculate log2(1+f) independently to have a many bits of precision as possible. 56 // val = (2 ** i)*(1 + f) = 2 ** i + f * 2 ** i 57 // isolating f we get 58 // val - 2 ** i = f * 2 ** i 59 // (val - 2 ** i) / 2 ** i = f 60 // 61 // we want to interpolate from the table to get the values 62 // and compromise by doing quadratic interpolation (rather than higher degree interpolation) 63 // 64 // for the interpolation we want to shift everything the fist sample point 65 // so our parabola becomes x = 0 66 // this further simplifies the equations 67 // 68 // the consequence is that we need f in 2 forms: 69 // - finding the index of x0 70 // - finding the distance between f and x0 71 // 72 // since sample points are equidistant we can significantly simplify the equations 73 74 // get i 75 const unsigned long long bits = sizeof(val) * __CHAR_BIT__; 76 const unsigned long long lz = __builtin_clzl(val); 77 const unsigned long long i = bits - 1 - lz; 78 79 // get the fractinal part as a u32.32 80 const unsigned long long frac = (val << (lz + 1)) >> 32; 81 82 // get high order bits for the index into the table 83 const unsigned long long idx0 = frac >> (32 - TABLE_BITS); 84 85 // get the x offset, i.e., the difference between the first sample point and the actual fractional part 86 const long long udx = frac - (idx0 << (32 - TABLE_BITS)); 87 /* paranoid */ verify((idx0 + 2) < TABLE_SIZE); 88 89 const long long y0 = table[idx0 + 0]; 90 const long long y1 = table[idx0 + 1]; 91 const long long y2 = table[idx0 + 2]; 92 93 // from there we can quadraticly interpolate to get the data, using the lagrange polynomial 94 // normally it would look like: 95 // double r0 = y0 * ((x - x1) / (x0 - x1)) * ((x - x2) / (x0 - x2)); 96 // double r1 = y1 * ((x - x0) / (x1 - x0)) * ((x - x2) / (x1 - x2)); 97 // double r2 = y2 * ((x - x0) / (x2 - x0)) * ((x - x1) / (x2 - x1)); 98 // but since the spacing between sample points is fixed, we can simplify itand extract common expressions 99 const long long f1 = (y1 - y0); 100 const long long f2 = (y2 - y0); 101 const long long a = f2 - (f1 * 2l); 102 const long long b = (f1 * 2l) - a; 103 104 // Now we can compute it in the form (ax + b)x + c (which avoid repeating steps) 105 long long sum = ((a*udx) >> (32 - TABLE_BITS)) + b; 106 sum = (sum*udx) >> (32 - TABLE_BITS + 1); 107 sum = y0 + sum; 108 109 return (i << 32) + (sum); 110 } // log2_u32_32 21 111 22 112 // Implementation of power functions (from the prelude): -
libcfa/src/math.hfa
rd84f2ae r658f3179 148 148 } // distribution 149 149 150 static inline unsigned long long log2_u32_32( unsigned long long val ) { 151 enum { 152 TABLE_BITS = 6, 153 TABLE_SIZE = (1 << TABLE_BITS) + 2, 154 }; 155 // for(i; TABLE_SIZE) { 156 // table[i] = (unsigned long long)(log2(1.0 + i / pow(2, TABLE_BITS)) * pow(2, 32))); 157 // } 158 static const unsigned long long table[] = { 159 0x0000000000, 0x0005b9e5a1, 0x000b5d69ba, 0x0010eb389f, 160 0x001663f6fa, 0x001bc84240, 0x002118b119, 0x002655d3c4, 161 0x002b803473, 0x00309857a0, 0x00359ebc5b, 0x003a93dc98, 162 0x003f782d72, 0x00444c1f6b, 0x0049101eac, 0x004dc4933a, 163 0x005269e12f, 0x00570068e7, 0x005b888736, 0x006002958c, 164 0x00646eea24, 0x0068cdd829, 0x006d1fafdc, 0x007164beb4, 165 0x00759d4f80, 0x0079c9aa87, 0x007dea15a3, 0x0081fed45c, 166 0x0086082806, 0x008a064fd5, 0x008df988f4, 0x0091e20ea1, 167 0x0095c01a39, 0x009993e355, 0x009d5d9fd5, 0x00a11d83f4, 168 0x00a4d3c25e, 0x00a8808c38, 0x00ac241134, 0x00afbe7fa0, 169 0x00b3500472, 0x00b6d8cb53, 0x00ba58feb2, 0x00bdd0c7c9, 170 0x00c1404ead, 0x00c4a7ba58, 0x00c80730b0, 0x00cb5ed695, 171 0x00ceaecfea, 0x00d1f73f9c, 0x00d53847ac, 0x00d8720935, 172 0x00dba4a47a, 0x00ded038e6, 0x00e1f4e517, 0x00e512c6e5, 173 0x00e829fb69, 0x00eb3a9f01, 0x00ee44cd59, 0x00f148a170, 174 0x00f446359b, 0x00f73da38d, 0x00fa2f045e, 0x00fd1a708b, 175 0x0100000000, 0x0102dfca16, 176 }; 177 _Static_assert((sizeof(table) / sizeof(table[0])) == TABLE_SIZE, "TABLE_SIZE should be accurate"); 178 // starting from val = (2 ** i)*(1 + f) where 0 <= f < 1 179 // log identities mean log2(val) = log2((2 ** i)*(1 + f)) = log2(2**i) + log2(1+f) 180 // 181 // getting i is easy to do using builtin_clz (count leading zero) 182 // 183 // we want to calculate log2(1+f) independently to have a many bits of precision as possible. 184 // val = (2 ** i)*(1 + f) = 2 ** i + f * 2 ** i 185 // isolating f we get 186 // val - 2 ** i = f * 2 ** i 187 // (val - 2 ** i) / 2 ** i = f 188 // 189 // we want to interpolate from the table to get the values 190 // and compromise by doing quadratic interpolation (rather than higher degree interpolation) 191 // 192 // for the interpolation we want to shift everything the fist sample point 193 // so our parabola becomes x = 0 194 // this further simplifies the equations 195 // 196 // the consequence is that we need f in 2 forms: 197 // - finding the index of x0 198 // - finding the distance between f and x0 199 // 200 // since sample points are equidistant we can significantly simplify the equations 201 202 // get i 203 const unsigned long long bits = sizeof(val) * __CHAR_BIT__; 204 const unsigned long long lz = __builtin_clzl(val); 205 const unsigned long long i = bits - 1 - lz; 206 207 // get the fractinal part as a u32.32 208 const unsigned long long frac = (val << (lz + 1)) >> 32; 209 210 // get high order bits for the index into the table 211 const unsigned long long idx0 = frac >> (32 - TABLE_BITS); 212 213 // get the x offset, i.e., the difference between the first sample point and the actual fractional part 214 const long long udx = frac - (idx0 << (32 - TABLE_BITS)); 215 /* paranoid */ verify((idx0 + 2) < TABLE_SIZE); 216 217 const long long y0 = table[idx0 + 0]; 218 const long long y1 = table[idx0 + 1]; 219 const long long y2 = table[idx0 + 2]; 220 221 // from there we can quadraticly interpolate to get the data, using the lagrange polynomial 222 // normally it would look like: 223 // double r0 = y0 * ((x - x1) / (x0 - x1)) * ((x - x2) / (x0 - x2)); 224 // double r1 = y1 * ((x - x0) / (x1 - x0)) * ((x - x2) / (x1 - x2)); 225 // double r2 = y2 * ((x - x0) / (x2 - x0)) * ((x - x1) / (x2 - x1)); 226 // but since the spacing between sample points is fixed, we can simplify it and extract common expressions 227 const long long f1 = (y1 - y0); 228 const long long f2 = (y2 - y0); 229 const long long a = f2 - (f1 * 2l); 230 const long long b = (f1 * 2l) - a; 231 232 // Now we can compute it in the form (ax + b)x + c (which avoid repeating steps) 233 long long sum = ((a*udx) >> (32 - TABLE_BITS)) + b; 234 sum = (sum*udx) >> (32 - TABLE_BITS + 1); 235 sum = y0 + sum; 236 237 return (i << 32) + (sum); 238 } // log2_u32_32 150 unsigned long long log2_u32_32( unsigned long long val ); 239 151 240 152 //---------------------- Trigonometric ----------------------
Note: See TracChangeset
for help on using the changeset viewer.