Changeset 0deeaad for libcfa/src


Ignore:
Timestamp:
Oct 5, 2022, 5:20:54 PM (2 years ago)
Author:
Thierry Delisle <tdelisle@…>
Branches:
ADT, ast-experimental, master
Children:
815943f, d0a00a5a
Parents:
5f6b2c2
Message:

Added fixed point log2 calculation, which is not that useful but kind of cool.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • libcfa/src/math.hfa

    r5f6b2c2 r0deeaad  
    2222
    2323#include "common.hfa"
     24#include "bits/debug.hfa"
    2425
    2526//---------------------- General ----------------------
     
    307308        // forall( T | { T ?+?( T, T ); T ?-?( T, T ); T ?%?( T, T ); } )
    308309        // T ceiling_div( T n, T align ) { verify( is_pow2( align ) );return (n + (align - 1)) / align; }
    309        
     310
    310311        // gcc notices the div/mod pair and saves both so only one div.
    311312        signed char ceiling( signed char n, signed char align ) { return floor( n + (n % align != 0 ? align - 1 : 0), align ); }
     
    429430} // distribution
    430431
     432static inline unsigned long long log2_u32_32(unsigned long long val) {
     433        enum {
     434                TABLE_BITS = 6,
     435                TABLE_SIZE = (1 << TABLE_BITS) + 2,
     436        };
     437        // for(i; TABLE_SIZE) {
     438        //      table[i] = (unsigned long long)(log2(1.0 + i / pow(2, TABLE_BITS)) * pow(2, 32)));
     439        // }
     440        static const unsigned long long table[] = {
     441                0x0000000000, 0x0005b9e5a1, 0x000b5d69ba, 0x0010eb389f,
     442                0x001663f6fa, 0x001bc84240, 0x002118b119, 0x002655d3c4,
     443                0x002b803473, 0x00309857a0, 0x00359ebc5b, 0x003a93dc98,
     444                0x003f782d72, 0x00444c1f6b, 0x0049101eac, 0x004dc4933a,
     445                0x005269e12f, 0x00570068e7, 0x005b888736, 0x006002958c,
     446                0x00646eea24, 0x0068cdd829, 0x006d1fafdc, 0x007164beb4,
     447                0x00759d4f80, 0x0079c9aa87, 0x007dea15a3, 0x0081fed45c,
     448                0x0086082806, 0x008a064fd5, 0x008df988f4, 0x0091e20ea1,
     449                0x0095c01a39, 0x009993e355, 0x009d5d9fd5, 0x00a11d83f4,
     450                0x00a4d3c25e, 0x00a8808c38, 0x00ac241134, 0x00afbe7fa0,
     451                0x00b3500472, 0x00b6d8cb53, 0x00ba58feb2, 0x00bdd0c7c9,
     452                0x00c1404ead, 0x00c4a7ba58, 0x00c80730b0, 0x00cb5ed695,
     453                0x00ceaecfea, 0x00d1f73f9c, 0x00d53847ac, 0x00d8720935,
     454                0x00dba4a47a, 0x00ded038e6, 0x00e1f4e517, 0x00e512c6e5,
     455                0x00e829fb69, 0x00eb3a9f01, 0x00ee44cd59, 0x00f148a170,
     456                0x00f446359b, 0x00f73da38d, 0x00fa2f045e, 0x00fd1a708b,
     457                0x0100000000, 0x0102dfca16,
     458        };
     459        _Static_assert((sizeof(table) / sizeof(table[0])) == TABLE_SIZE, "TABLE_SIZE should be accurate");
     460        // starting from val = (2 ** i)*(1 + f) where 0 <= f < 1
     461        // log identities mean log2(val) = log2((2 ** i)*(1 + f)) = log2(2**i) + log2(1+f)
     462        //
     463        // getting i is easy to do using builtin_clz (count leading zero)
     464        //
     465        // we want to calculate log2(1+f) independently to have a many bits of precision as possible.
     466        //     val = (2 ** i)*(1 + f) = 2 ** i   +   f * 2 ** i
     467        // isolating f we get
     468        //     val - 2 ** i = f * 2 ** i
     469        //     (val - 2 ** i) / 2 ** i = f
     470        //
     471        // we want to interpolate from the table to get the values
     472        // and compromise by doing quadratic interpolation (rather than higher degree interpolation)
     473        //
     474        // for the interpolation we want to shift everything the fist sample point
     475        // so our parabola becomes x = 0
     476        // this further simplifies the equations
     477        //
     478        // the consequence is that we need f in 2 forms:
     479        //  - finding the index of x0
     480        //  - finding the distance between f and x0
     481        //
     482        // since sample points are equidistant we can significantly simplify the equations
     483
     484        // get i
     485        const unsigned long long bits = sizeof(val) * __CHAR_BIT__;
     486        const unsigned long long lz = __builtin_clzl(val);
     487        const unsigned long long i = bits - 1 - lz;
     488
     489        // get the fractinal part as a u32.32
     490        const unsigned long long frac = (val << (lz + 1)) >> 32;
     491
     492        // get high order bits for the index into the table
     493        const unsigned long long idx0 = frac >> (32 - TABLE_BITS);
     494
     495        // get the x offset, i.e., the difference between the first sample point and the actual fractional part
     496        const long long udx = frac - (idx0 << (32 - TABLE_BITS));
     497        /* paranoid */ verify((idx0 + 2) < TABLE_SIZE);
     498
     499        const long long y0 = table[idx0 + 0];
     500        const long long y1 = table[idx0 + 1];
     501        const long long y2 = table[idx0 + 2];
     502
     503        // from there we can quadraticly interpolate to get the data, using the lagrange polynomial
     504        // normally it would look like:
     505        //     double r0 = y0 * ((x - x1) / (x0 - x1)) * ((x - x2) / (x0 - x2));
     506        //     double r1 = y1 * ((x - x0) / (x1 - x0)) * ((x - x2) / (x1 - x2));
     507        //     double r2 = y2 * ((x - x0) / (x2 - x0)) * ((x - x1) / (x2 - x1));
     508        // but since the spacing between sample points is fixed, we can simplify it and extract common expressions
     509        const long long f1 = (y1 - y0);
     510        const long long f2 = (y2 - y0);
     511        const long long a = f2 - (f1 * 2l);
     512        const long long b = (f1 * 2l) - a;
     513
     514        // Now we can compute it in the form (ax + b)x + c (which avoid repeating steps)
     515        long long sum = ((a*udx) >> (32 - TABLE_BITS))  + b;
     516        sum = (sum*udx) >> (32 - TABLE_BITS + 1);
     517        sum = y0 + sum;
     518
     519        return (i << 32) + (sum);
     520}
     521
    431522// Local Variables: //
    432523// mode: c //
Note: See TracChangeset for help on using the changeset viewer.