Ignore:
Timestamp:
Jan 11, 2025, 5:48:46 PM (8 months ago)
Author:
Peter A. Buhr <pabuhr@…>
Branches:
master
Children:
f886608
Parents:
7d65715f (diff), 32a119e9 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.
Message:

Merge branch 'master' of plg.uwaterloo.ca:software/cfa/cfa-cc

File:
1 edited

Legend:

Unmodified
Added
Removed
  • libcfa/src/math.hfa

    r7d65715f rd60a4c2  
    148148} // distribution
    149149
    150 static inline unsigned long long log2_u32_32( unsigned long long val ) {
    151         enum {
    152                 TABLE_BITS = 6,
    153                 TABLE_SIZE = (1 << TABLE_BITS) + 2,
    154         };
    155         // for(i; TABLE_SIZE) {
    156         //      table[i] = (unsigned long long)(log2(1.0 + i / pow(2, TABLE_BITS)) * pow(2, 32)));
    157         // }
    158         static const unsigned long long table[] = {
    159                 0x0000000000, 0x0005b9e5a1, 0x000b5d69ba, 0x0010eb389f,
    160                 0x001663f6fa, 0x001bc84240, 0x002118b119, 0x002655d3c4,
    161                 0x002b803473, 0x00309857a0, 0x00359ebc5b, 0x003a93dc98,
    162                 0x003f782d72, 0x00444c1f6b, 0x0049101eac, 0x004dc4933a,
    163                 0x005269e12f, 0x00570068e7, 0x005b888736, 0x006002958c,
    164                 0x00646eea24, 0x0068cdd829, 0x006d1fafdc, 0x007164beb4,
    165                 0x00759d4f80, 0x0079c9aa87, 0x007dea15a3, 0x0081fed45c,
    166                 0x0086082806, 0x008a064fd5, 0x008df988f4, 0x0091e20ea1,
    167                 0x0095c01a39, 0x009993e355, 0x009d5d9fd5, 0x00a11d83f4,
    168                 0x00a4d3c25e, 0x00a8808c38, 0x00ac241134, 0x00afbe7fa0,
    169                 0x00b3500472, 0x00b6d8cb53, 0x00ba58feb2, 0x00bdd0c7c9,
    170                 0x00c1404ead, 0x00c4a7ba58, 0x00c80730b0, 0x00cb5ed695,
    171                 0x00ceaecfea, 0x00d1f73f9c, 0x00d53847ac, 0x00d8720935,
    172                 0x00dba4a47a, 0x00ded038e6, 0x00e1f4e517, 0x00e512c6e5,
    173                 0x00e829fb69, 0x00eb3a9f01, 0x00ee44cd59, 0x00f148a170,
    174                 0x00f446359b, 0x00f73da38d, 0x00fa2f045e, 0x00fd1a708b,
    175                 0x0100000000, 0x0102dfca16,
    176         };
    177         _Static_assert((sizeof(table) / sizeof(table[0])) == TABLE_SIZE, "TABLE_SIZE should be accurate");
    178         // starting from val = (2 ** i)*(1 + f) where 0 <= f < 1
    179         // log identities mean log2(val) = log2((2 ** i)*(1 + f)) = log2(2**i) + log2(1+f)
    180         //
    181         // getting i is easy to do using builtin_clz (count leading zero)
    182         //
    183         // we want to calculate log2(1+f) independently to have a many bits of precision as possible.
    184         //     val = (2 ** i)*(1 + f) = 2 ** i   +   f * 2 ** i
    185         // isolating f we get
    186         //     val - 2 ** i = f * 2 ** i
    187         //     (val - 2 ** i) / 2 ** i = f
    188         //
    189         // we want to interpolate from the table to get the values
    190         // and compromise by doing quadratic interpolation (rather than higher degree interpolation)
    191         //
    192         // for the interpolation we want to shift everything the fist sample point
    193         // so our parabola becomes x = 0
    194         // this further simplifies the equations
    195         //
    196         // the consequence is that we need f in 2 forms:
    197         //  - finding the index of x0
    198         //  - finding the distance between f and x0
    199         //
    200         // since sample points are equidistant we can significantly simplify the equations
    201 
    202         // get i
    203         const unsigned long long bits = sizeof(val) * __CHAR_BIT__;
    204         const unsigned long long lz = __builtin_clzl(val);
    205         const unsigned long long i = bits - 1 - lz;
    206 
    207         // get the fractinal part as a u32.32
    208         const unsigned long long frac = (val << (lz + 1)) >> 32;
    209 
    210         // get high order bits for the index into the table
    211         const unsigned long long idx0 = frac >> (32 - TABLE_BITS);
    212 
    213         // get the x offset, i.e., the difference between the first sample point and the actual fractional part
    214         const long long udx = frac - (idx0 << (32 - TABLE_BITS));
    215         /* paranoid */ verify((idx0 + 2) < TABLE_SIZE);
    216 
    217         const long long y0 = table[idx0 + 0];
    218         const long long y1 = table[idx0 + 1];
    219         const long long y2 = table[idx0 + 2];
    220 
    221         // from there we can quadraticly interpolate to get the data, using the lagrange polynomial
    222         // normally it would look like:
    223         //     double r0 = y0 * ((x - x1) / (x0 - x1)) * ((x - x2) / (x0 - x2));
    224         //     double r1 = y1 * ((x - x0) / (x1 - x0)) * ((x - x2) / (x1 - x2));
    225         //     double r2 = y2 * ((x - x0) / (x2 - x0)) * ((x - x1) / (x2 - x1));
    226         // but since the spacing between sample points is fixed, we can simplify it and extract common expressions
    227         const long long f1 = (y1 - y0);
    228         const long long f2 = (y2 - y0);
    229         const long long a = f2 - (f1 * 2l);
    230         const long long b = (f1 * 2l) - a;
    231 
    232         // Now we can compute it in the form (ax + b)x + c (which avoid repeating steps)
    233         long long sum = ((a*udx) >> (32 - TABLE_BITS))  + b;
    234         sum = (sum*udx) >> (32 - TABLE_BITS + 1);
    235         sum = y0 + sum;
    236 
    237         return (i << 32) + (sum);
    238 } // log2_u32_32
     150unsigned long long log2_u32_32( unsigned long long val );
    239151
    240152//---------------------- Trigonometric ----------------------
Note: See TracChangeset for help on using the changeset viewer.