Changeset f32448e


Ignore:
Timestamp:
Jan 8, 2025, 2:10:07 PM (12 days ago)
Author:
Andrew Beach <ajbeach@…>
Branches:
master
Children:
550446f
Parents:
658f3179
Message:

Fixed white-space. Woops.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • libcfa/src/math.cfa

    r658f3179 rf32448e  
    2121
    2222unsigned long long log2_u32_32( unsigned long long val ) {
    23     enum {
    24         TABLE_BITS = 6,
    25         TABLE_SIZE = (1 << TABLE_BITS) + 2,
    26     };
    27     // for(i; TABLE_SIZE) {
    28     //  table[i] = (unsigned long long)(log2(1.0 + i / pow(2, TABLE_BITS)) * pow(2, 32)));
    29     // }
    30     static const unsigned long long table[] = {
    31         0x0000000000, 0x0005b9e5a1, 0x000b5d69ba, 0x0010eb389f,
    32         0x001663f6fa, 0x001bc84240, 0x002118b119, 0x002655d3c4,
    33         0x002b803473, 0x00309857a0, 0x00359ebc5b, 0x003a93dc98,
    34         0x003f782d72, 0x00444c1f6b, 0x0049101eac, 0x004dc4933a,
    35         0x005269e12f, 0x00570068e7, 0x005b888736, 0x006002958c,
    36         0x00646eea24, 0x0068cdd829, 0x006d1fafdc, 0x007164beb4,
    37         0x00759d4f80, 0x0079c9aa87, 0x007dea15a3, 0x0081fed45c,
    38         0x0086082806, 0x008a064fd5, 0x008df988f4, 0x0091e20ea1,
    39         0x0095c01a39, 0x009993e355, 0x009d5d9fd5, 0x00a11d83f4,
    40         0x00a4d3c25e, 0x00a8808c38, 0x00ac241134, 0x00afbe7fa0,
    41         0x00b3500472, 0x00b6d8cb53, 0x00ba58feb2, 0x00bdd0c7c9,
    42         0x00c1404ead, 0x00c4a7ba58, 0x00c80730b0, 0x00cb5ed695,
    43         0x00ceaecfea, 0x00d1f73f9c, 0x00d53847ac, 0x00d8720935,
    44         0x00dba4a47a, 0x00ded038e6, 0x00e1f4e517, 0x00e512c6e5,
    45         0x00e829fb69, 0x00eb3a9f01, 0x00ee44cd59, 0x00f148a170,
    46         0x00f446359b, 0x00f73da38d, 0x00fa2f045e, 0x00fd1a708b,
    47         0x0100000000, 0x0102dfca16,
    48     };
    49     _Static_assert((sizeof(table) / sizeof(table[0])) == TABLE_SIZE, "TABLE_SIZE should be accurate");
    50     // starting from val = (2 ** i)*(1 + f) where 0 <= f < 1
    51     // log identities mean log2(val) = log2((2 ** i)*(1 + f)) = log2(2**i) + log2(1+f)
    52     //
    53     // getting i is easy to do using builtin_clz (count leading zero)
    54     //
    55     // we want to calculate log2(1+f) independently to have a many bits of precision as possible.
    56     //     val = (2 ** i)*(1 + f) = 2 ** i   +   f * 2 ** i
    57     // isolating f we get
    58     //     val - 2 ** i = f * 2 ** i
    59     //     (val - 2 ** i) / 2 ** i = f
    60     //
    61     // we want to interpolate from the table to get the values
    62     // and compromise by doing quadratic interpolation (rather than higher degree interpolation)
    63     //
    64     // for the interpolation we want to shift everything the fist sample point
    65     // so our parabola becomes x = 0
    66     // this further simplifies the equations
    67     //
    68     // the consequence is that we need f in 2 forms:
    69     //  - finding the index of x0
    70     //  - finding the distance between f and x0
    71     //
    72     // since sample points are equidistant we can significantly simplify the equations
     23        enum {
     24                TABLE_BITS = 6,
     25                TABLE_SIZE = (1 << TABLE_BITS) + 2,
     26        };
     27        // for(i; TABLE_SIZE) {
     28        //  table[i] = (unsigned long long)(log2(1.0 + i / pow(2, TABLE_BITS)) * pow(2, 32)));
     29        // }
     30        static const unsigned long long table[] = {
     31                0x0000000000, 0x0005b9e5a1, 0x000b5d69ba, 0x0010eb389f,
     32                0x001663f6fa, 0x001bc84240, 0x002118b119, 0x002655d3c4,
     33                0x002b803473, 0x00309857a0, 0x00359ebc5b, 0x003a93dc98,
     34                0x003f782d72, 0x00444c1f6b, 0x0049101eac, 0x004dc4933a,
     35                0x005269e12f, 0x00570068e7, 0x005b888736, 0x006002958c,
     36                0x00646eea24, 0x0068cdd829, 0x006d1fafdc, 0x007164beb4,
     37                0x00759d4f80, 0x0079c9aa87, 0x007dea15a3, 0x0081fed45c,
     38                0x0086082806, 0x008a064fd5, 0x008df988f4, 0x0091e20ea1,
     39                0x0095c01a39, 0x009993e355, 0x009d5d9fd5, 0x00a11d83f4,
     40                0x00a4d3c25e, 0x00a8808c38, 0x00ac241134, 0x00afbe7fa0,
     41                0x00b3500472, 0x00b6d8cb53, 0x00ba58feb2, 0x00bdd0c7c9,
     42                0x00c1404ead, 0x00c4a7ba58, 0x00c80730b0, 0x00cb5ed695,
     43                0x00ceaecfea, 0x00d1f73f9c, 0x00d53847ac, 0x00d8720935,
     44                0x00dba4a47a, 0x00ded038e6, 0x00e1f4e517, 0x00e512c6e5,
     45                0x00e829fb69, 0x00eb3a9f01, 0x00ee44cd59, 0x00f148a170,
     46                0x00f446359b, 0x00f73da38d, 0x00fa2f045e, 0x00fd1a708b,
     47                0x0100000000, 0x0102dfca16,
     48        };
     49        _Static_assert((sizeof(table) / sizeof(table[0])) == TABLE_SIZE, "TABLE_SIZE should be accurate");
     50        // starting from val = (2 ** i)*(1 + f) where 0 <= f < 1
     51        // log identities mean log2(val) = log2((2 ** i)*(1 + f)) = log2(2**i) + log2(1+f)
     52        //
     53        // getting i is easy to do using builtin_clz (count leading zero)
     54        //
     55        // we want to calculate log2(1+f) independently to have a many bits of precision as possible.
     56        //     val = (2 ** i)*(1 + f) = 2 ** i   +   f * 2 ** i
     57        // isolating f we get
     58        //     val - 2 ** i = f * 2 ** i
     59        //     (val - 2 ** i) / 2 ** i = f
     60        //
     61        // we want to interpolate from the table to get the values
     62        // and compromise by doing quadratic interpolation (rather than higher degree interpolation)
     63        //
     64        // for the interpolation we want to shift everything the fist sample point
     65        // so our parabola becomes x = 0
     66        // this further simplifies the equations
     67        //
     68        // the consequence is that we need f in 2 forms:
     69        //  - finding the index of x0
     70        //  - finding the distance between f and x0
     71        //
     72        // since sample points are equidistant we can significantly simplify the equations
    7373
    74     // get i
    75     const unsigned long long bits = sizeof(val) * __CHAR_BIT__;
    76     const unsigned long long lz = __builtin_clzl(val);
    77     const unsigned long long i = bits - 1 - lz;
     74        // get i
     75        const unsigned long long bits = sizeof(val) * __CHAR_BIT__;
     76        const unsigned long long lz = __builtin_clzl(val);
     77        const unsigned long long i = bits - 1 - lz;
    7878
    79     // get the fractinal part as a u32.32
    80     const unsigned long long frac = (val << (lz + 1)) >> 32;
     79        // get the fractinal part as a u32.32
     80        const unsigned long long frac = (val << (lz + 1)) >> 32;
    8181
    82     // get high order bits for the index into the table
    83     const unsigned long long idx0 = frac >> (32 - TABLE_BITS);
     82        // get high order bits for the index into the table
     83        const unsigned long long idx0 = frac >> (32 - TABLE_BITS);
    8484
    85     // get the x offset, i.e., the difference between the first sample point and the actual fractional part
    86     const long long udx = frac - (idx0 << (32 - TABLE_BITS));
    87     /* paranoid */ verify((idx0 + 2) < TABLE_SIZE);
     85        // get the x offset, i.e., the difference between the first sample point and the actual fractional part
     86        const long long udx = frac - (idx0 << (32 - TABLE_BITS));
     87        /* paranoid */ verify((idx0 + 2) < TABLE_SIZE);
    8888
    89     const long long y0 = table[idx0 + 0];
    90     const long long y1 = table[idx0 + 1];
    91     const long long y2 = table[idx0 + 2];
     89        const long long y0 = table[idx0 + 0];
     90        const long long y1 = table[idx0 + 1];
     91        const long long y2 = table[idx0 + 2];
    9292
    93     // from there we can quadraticly interpolate to get the data, using the lagrange polynomial
    94     // normally it would look like:
    95     //     double r0 = y0 * ((x - x1) / (x0 - x1)) * ((x - x2) / (x0 - x2));
    96     //     double r1 = y1 * ((x - x0) / (x1 - x0)) * ((x - x2) / (x1 - x2));
    97     //     double r2 = y2 * ((x - x0) / (x2 - x0)) * ((x - x1) / (x2 - x1));
    98     // but since the spacing between sample points is fixed, we can simplify itand extract common expressions
    99     const long long f1 = (y1 - y0);
    100     const long long f2 = (y2 - y0);
    101     const long long a = f2 - (f1 * 2l);
    102     const long long b = (f1 * 2l) - a;
     93        // from there we can quadraticly interpolate to get the data, using the lagrange polynomial
     94        // normally it would look like:
     95        //     double r0 = y0 * ((x - x1) / (x0 - x1)) * ((x - x2) / (x0 - x2));
     96        //     double r1 = y1 * ((x - x0) / (x1 - x0)) * ((x - x2) / (x1 - x2));
     97        //     double r2 = y2 * ((x - x0) / (x2 - x0)) * ((x - x1) / (x2 - x1));
     98        // but since the spacing between sample points is fixed, we can simplify itand extract common expressions
     99        const long long f1 = (y1 - y0);
     100        const long long f2 = (y2 - y0);
     101        const long long a = f2 - (f1 * 2l);
     102        const long long b = (f1 * 2l) - a;
    103103
    104     // Now we can compute it in the form (ax + b)x + c (which avoid repeating steps)
    105     long long sum = ((a*udx) >> (32 - TABLE_BITS))  + b;
    106     sum = (sum*udx) >> (32 - TABLE_BITS + 1);
    107     sum = y0 + sum;
     104        // Now we can compute it in the form (ax + b)x + c (which avoid repeating steps)
     105        long long sum = ((a*udx) >> (32 - TABLE_BITS))  + b;
     106        sum = (sum*udx) >> (32 - TABLE_BITS + 1);
     107        sum = y0 + sum;
    108108
    109     return (i << 32) + (sum);
     109        return (i << 32) + (sum);
    110110} // log2_u32_32
    111111
Note: See TracChangeset for help on using the changeset viewer.