Changeset 0deeaad for libcfa/src
- Timestamp:
- Oct 5, 2022, 5:20:54 PM (2 years ago)
- Branches:
- ADT, ast-experimental, master
- Children:
- 815943f, d0a00a5a
- Parents:
- 5f6b2c2
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
libcfa/src/math.hfa
r5f6b2c2 r0deeaad 22 22 23 23 #include "common.hfa" 24 #include "bits/debug.hfa" 24 25 25 26 //---------------------- General ---------------------- … … 307 308 // forall( T | { T ?+?( T, T ); T ?-?( T, T ); T ?%?( T, T ); } ) 308 309 // T ceiling_div( T n, T align ) { verify( is_pow2( align ) );return (n + (align - 1)) / align; } 309 310 310 311 // gcc notices the div/mod pair and saves both so only one div. 311 312 signed char ceiling( signed char n, signed char align ) { return floor( n + (n % align != 0 ? align - 1 : 0), align ); } … … 429 430 } // distribution 430 431 432 static inline unsigned long long log2_u32_32(unsigned long long val) { 433 enum { 434 TABLE_BITS = 6, 435 TABLE_SIZE = (1 << TABLE_BITS) + 2, 436 }; 437 // for(i; TABLE_SIZE) { 438 // table[i] = (unsigned long long)(log2(1.0 + i / pow(2, TABLE_BITS)) * pow(2, 32))); 439 // } 440 static const unsigned long long table[] = { 441 0x0000000000, 0x0005b9e5a1, 0x000b5d69ba, 0x0010eb389f, 442 0x001663f6fa, 0x001bc84240, 0x002118b119, 0x002655d3c4, 443 0x002b803473, 0x00309857a0, 0x00359ebc5b, 0x003a93dc98, 444 0x003f782d72, 0x00444c1f6b, 0x0049101eac, 0x004dc4933a, 445 0x005269e12f, 0x00570068e7, 0x005b888736, 0x006002958c, 446 0x00646eea24, 0x0068cdd829, 0x006d1fafdc, 0x007164beb4, 447 0x00759d4f80, 0x0079c9aa87, 0x007dea15a3, 0x0081fed45c, 448 0x0086082806, 0x008a064fd5, 0x008df988f4, 0x0091e20ea1, 449 0x0095c01a39, 0x009993e355, 0x009d5d9fd5, 0x00a11d83f4, 450 0x00a4d3c25e, 0x00a8808c38, 0x00ac241134, 0x00afbe7fa0, 451 0x00b3500472, 0x00b6d8cb53, 0x00ba58feb2, 0x00bdd0c7c9, 452 0x00c1404ead, 0x00c4a7ba58, 0x00c80730b0, 0x00cb5ed695, 453 0x00ceaecfea, 0x00d1f73f9c, 0x00d53847ac, 0x00d8720935, 454 0x00dba4a47a, 0x00ded038e6, 0x00e1f4e517, 0x00e512c6e5, 455 0x00e829fb69, 0x00eb3a9f01, 0x00ee44cd59, 0x00f148a170, 456 0x00f446359b, 0x00f73da38d, 0x00fa2f045e, 0x00fd1a708b, 457 0x0100000000, 0x0102dfca16, 458 }; 459 _Static_assert((sizeof(table) / sizeof(table[0])) == TABLE_SIZE, "TABLE_SIZE should be accurate"); 460 // starting from val = (2 ** i)*(1 + f) where 0 <= f < 1 461 // log identities mean log2(val) = log2((2 ** i)*(1 + f)) = log2(2**i) + log2(1+f) 462 // 463 // getting i is easy to do using builtin_clz (count leading zero) 464 // 465 // we want to calculate log2(1+f) independently to have a many bits of precision as possible. 466 // val = (2 ** i)*(1 + f) = 2 ** i + f * 2 ** i 467 // isolating f we get 468 // val - 2 ** i = f * 2 ** i 469 // (val - 2 ** i) / 2 ** i = f 470 // 471 // we want to interpolate from the table to get the values 472 // and compromise by doing quadratic interpolation (rather than higher degree interpolation) 473 // 474 // for the interpolation we want to shift everything the fist sample point 475 // so our parabola becomes x = 0 476 // this further simplifies the equations 477 // 478 // the consequence is that we need f in 2 forms: 479 // - finding the index of x0 480 // - finding the distance between f and x0 481 // 482 // since sample points are equidistant we can significantly simplify the equations 483 484 // get i 485 const unsigned long long bits = sizeof(val) * __CHAR_BIT__; 486 const unsigned long long lz = __builtin_clzl(val); 487 const unsigned long long i = bits - 1 - lz; 488 489 // get the fractinal part as a u32.32 490 const unsigned long long frac = (val << (lz + 1)) >> 32; 491 492 // get high order bits for the index into the table 493 const unsigned long long idx0 = frac >> (32 - TABLE_BITS); 494 495 // get the x offset, i.e., the difference between the first sample point and the actual fractional part 496 const long long udx = frac - (idx0 << (32 - TABLE_BITS)); 497 /* paranoid */ verify((idx0 + 2) < TABLE_SIZE); 498 499 const long long y0 = table[idx0 + 0]; 500 const long long y1 = table[idx0 + 1]; 501 const long long y2 = table[idx0 + 2]; 502 503 // from there we can quadraticly interpolate to get the data, using the lagrange polynomial 504 // normally it would look like: 505 // double r0 = y0 * ((x - x1) / (x0 - x1)) * ((x - x2) / (x0 - x2)); 506 // double r1 = y1 * ((x - x0) / (x1 - x0)) * ((x - x2) / (x1 - x2)); 507 // double r2 = y2 * ((x - x0) / (x2 - x0)) * ((x - x1) / (x2 - x1)); 508 // but since the spacing between sample points is fixed, we can simplify it and extract common expressions 509 const long long f1 = (y1 - y0); 510 const long long f2 = (y2 - y0); 511 const long long a = f2 - (f1 * 2l); 512 const long long b = (f1 * 2l) - a; 513 514 // Now we can compute it in the form (ax + b)x + c (which avoid repeating steps) 515 long long sum = ((a*udx) >> (32 - TABLE_BITS)) + b; 516 sum = (sum*udx) >> (32 - TABLE_BITS + 1); 517 sum = y0 + sum; 518 519 return (i << 32) + (sum); 520 } 521 431 522 // Local Variables: // 432 523 // mode: c //
Note: See TracChangeset
for help on using the changeset viewer.