Changeset df6cc9d for libcfa/src/math.hfa
- Timestamp:
- Oct 19, 2022, 4:43:26 PM (3 years ago)
- Branches:
- ADT, ast-experimental, master
- Children:
- 1a45263
- Parents:
- 9cd5bd2 (diff), 135143ba (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the(diff)
links above to see all the changes relative to each parent. - File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
libcfa/src/math.hfa
r9cd5bd2 rdf6cc9d 10 10 // Created On : Mon Apr 18 23:37:04 2016 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Thu Apr 15 11:47:56 202113 // Update Count : 13 212 // Last Modified On : Sat Oct 8 08:40:42 2022 13 // Update Count : 136 14 14 // 15 15 … … 22 22 23 23 #include "common.hfa" 24 #include "bits/debug.hfa" 24 25 25 26 //---------------------- General ---------------------- 26 27 27 static inline {28 static inline __attribute__((always_inline)) { 28 29 float ?%?( float x, float y ) { return fmodf( x, y ); } 29 30 float fmod( float x, float y ) { return fmodf( x, y ); } … … 63 64 //---------------------- Exponential ---------------------- 64 65 65 static inline {66 static inline __attribute__((always_inline)) { 66 67 float exp( float x ) { return expf( x ); } 67 68 // extern "C" { double exp( double ); } … … 92 93 //---------------------- Logarithm ---------------------- 93 94 94 static inline {95 static inline __attribute__((always_inline)) { 95 96 float log( float x ) { return logf( x ); } 96 97 // extern "C" { double log( double ); } … … 147 148 } // distribution 148 149 150 static inline unsigned long long log2_u32_32( unsigned long long val ) { 151 enum { 152 TABLE_BITS = 6, 153 TABLE_SIZE = (1 << TABLE_BITS) + 2, 154 }; 155 // for(i; TABLE_SIZE) { 156 // table[i] = (unsigned long long)(log2(1.0 + i / pow(2, TABLE_BITS)) * pow(2, 32))); 157 // } 158 static const unsigned long long table[] = { 159 0x0000000000, 0x0005b9e5a1, 0x000b5d69ba, 0x0010eb389f, 160 0x001663f6fa, 0x001bc84240, 0x002118b119, 0x002655d3c4, 161 0x002b803473, 0x00309857a0, 0x00359ebc5b, 0x003a93dc98, 162 0x003f782d72, 0x00444c1f6b, 0x0049101eac, 0x004dc4933a, 163 0x005269e12f, 0x00570068e7, 0x005b888736, 0x006002958c, 164 0x00646eea24, 0x0068cdd829, 0x006d1fafdc, 0x007164beb4, 165 0x00759d4f80, 0x0079c9aa87, 0x007dea15a3, 0x0081fed45c, 166 0x0086082806, 0x008a064fd5, 0x008df988f4, 0x0091e20ea1, 167 0x0095c01a39, 0x009993e355, 0x009d5d9fd5, 0x00a11d83f4, 168 0x00a4d3c25e, 0x00a8808c38, 0x00ac241134, 0x00afbe7fa0, 169 0x00b3500472, 0x00b6d8cb53, 0x00ba58feb2, 0x00bdd0c7c9, 170 0x00c1404ead, 0x00c4a7ba58, 0x00c80730b0, 0x00cb5ed695, 171 0x00ceaecfea, 0x00d1f73f9c, 0x00d53847ac, 0x00d8720935, 172 0x00dba4a47a, 0x00ded038e6, 0x00e1f4e517, 0x00e512c6e5, 173 0x00e829fb69, 0x00eb3a9f01, 0x00ee44cd59, 0x00f148a170, 174 0x00f446359b, 0x00f73da38d, 0x00fa2f045e, 0x00fd1a708b, 175 0x0100000000, 0x0102dfca16, 176 }; 177 _Static_assert((sizeof(table) / sizeof(table[0])) == TABLE_SIZE, "TABLE_SIZE should be accurate"); 178 // starting from val = (2 ** i)*(1 + f) where 0 <= f < 1 179 // log identities mean log2(val) = log2((2 ** i)*(1 + f)) = log2(2**i) + log2(1+f) 180 // 181 // getting i is easy to do using builtin_clz (count leading zero) 182 // 183 // we want to calculate log2(1+f) independently to have a many bits of precision as possible. 184 // val = (2 ** i)*(1 + f) = 2 ** i + f * 2 ** i 185 // isolating f we get 186 // val - 2 ** i = f * 2 ** i 187 // (val - 2 ** i) / 2 ** i = f 188 // 189 // we want to interpolate from the table to get the values 190 // and compromise by doing quadratic interpolation (rather than higher degree interpolation) 191 // 192 // for the interpolation we want to shift everything the fist sample point 193 // so our parabola becomes x = 0 194 // this further simplifies the equations 195 // 196 // the consequence is that we need f in 2 forms: 197 // - finding the index of x0 198 // - finding the distance between f and x0 199 // 200 // since sample points are equidistant we can significantly simplify the equations 201 202 // get i 203 const unsigned long long bits = sizeof(val) * __CHAR_BIT__; 204 const unsigned long long lz = __builtin_clzl(val); 205 const unsigned long long i = bits - 1 - lz; 206 207 // get the fractinal part as a u32.32 208 const unsigned long long frac = (val << (lz + 1)) >> 32; 209 210 // get high order bits for the index into the table 211 const unsigned long long idx0 = frac >> (32 - TABLE_BITS); 212 213 // get the x offset, i.e., the difference between the first sample point and the actual fractional part 214 const long long udx = frac - (idx0 << (32 - TABLE_BITS)); 215 /* paranoid */ verify((idx0 + 2) < TABLE_SIZE); 216 217 const long long y0 = table[idx0 + 0]; 218 const long long y1 = table[idx0 + 1]; 219 const long long y2 = table[idx0 + 2]; 220 221 // from there we can quadraticly interpolate to get the data, using the lagrange polynomial 222 // normally it would look like: 223 // double r0 = y0 * ((x - x1) / (x0 - x1)) * ((x - x2) / (x0 - x2)); 224 // double r1 = y1 * ((x - x0) / (x1 - x0)) * ((x - x2) / (x1 - x2)); 225 // double r2 = y2 * ((x - x0) / (x2 - x0)) * ((x - x1) / (x2 - x1)); 226 // but since the spacing between sample points is fixed, we can simplify it and extract common expressions 227 const long long f1 = (y1 - y0); 228 const long long f2 = (y2 - y0); 229 const long long a = f2 - (f1 * 2l); 230 const long long b = (f1 * 2l) - a; 231 232 // Now we can compute it in the form (ax + b)x + c (which avoid repeating steps) 233 long long sum = ((a*udx) >> (32 - TABLE_BITS)) + b; 234 sum = (sum*udx) >> (32 - TABLE_BITS + 1); 235 sum = y0 + sum; 236 237 return (i << 32) + (sum); 238 } 239 149 240 //---------------------- Trigonometric ---------------------- 150 241 151 static inline {242 static inline __attribute__((always_inline)) { 152 243 float sin( float x ) { return sinf( x ); } 153 244 // extern "C" { double sin( double ); } … … 204 295 //---------------------- Hyperbolic ---------------------- 205 296 206 static inline {297 static inline __attribute__((always_inline)) { 207 298 float sinh( float x ) { return sinhf( x ); } 208 299 // extern "C" { double sinh( double ); } … … 250 341 //---------------------- Error / Gamma ---------------------- 251 342 252 static inline {343 static inline __attribute__((always_inline)) { 253 344 float erf( float x ) { return erff( x ); } 254 345 // extern "C" { double erf( double ); } … … 279 370 //---------------------- Nearest Integer ---------------------- 280 371 281 static inline{372 inline __attribute__((always_inline)) static { 282 373 signed char floor( signed char n, signed char align ) { return n / align * align; } 283 374 unsigned char floor( unsigned char n, unsigned char align ) { return n / align * align; } … … 307 398 // forall( T | { T ?+?( T, T ); T ?-?( T, T ); T ?%?( T, T ); } ) 308 399 // T ceiling_div( T n, T align ) { verify( is_pow2( align ) );return (n + (align - 1)) / align; } 309 400 310 401 // gcc notices the div/mod pair and saves both so only one div. 311 402 signed char ceiling( signed char n, signed char align ) { return floor( n + (n % align != 0 ? align - 1 : 0), align ); } … … 376 467 //---------------------- Manipulation ---------------------- 377 468 378 static inline {469 static inline __attribute__((always_inline)) { 379 470 float copysign( float x, float y ) { return copysignf( x, y ); } 380 471 // extern "C" { double copysign( double, double ); } … … 418 509 //--------------------------------------- 419 510 420 static inline {511 static inline __attribute__((always_inline)) { 421 512 forall( T | { void ?{}( T &, one_t ); T ?+?( T, T ); T ?-?( T, T );T ?*?( T, T ); } ) 422 513 T lerp( T x, T y, T a ) { return x * ((T){1} - a) + y * a; }
Note:
See TracChangeset
for help on using the changeset viewer.