Changeset 658f3179


Ignore:
Timestamp:
Jan 8, 2025, 2:00:36 PM (12 days ago)
Author:
Andrew Beach <ajbeach@…>
Branches:
master
Children:
f32448e
Parents:
d84f2ae
Message:

Moved massive function log2_u32_32 out of header.

Location:
libcfa/src
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • libcfa/src/math.cfa

    rd84f2ae r658f3179  
    1919
    2020#pragma GCC visibility push(default)
     21
     22unsigned long long log2_u32_32( unsigned long long val ) {
     23    enum {
     24        TABLE_BITS = 6,
     25        TABLE_SIZE = (1 << TABLE_BITS) + 2,
     26    };
     27    // for(i; TABLE_SIZE) {
     28    //  table[i] = (unsigned long long)(log2(1.0 + i / pow(2, TABLE_BITS)) * pow(2, 32)));
     29    // }
     30    static const unsigned long long table[] = {
     31        0x0000000000, 0x0005b9e5a1, 0x000b5d69ba, 0x0010eb389f,
     32        0x001663f6fa, 0x001bc84240, 0x002118b119, 0x002655d3c4,
     33        0x002b803473, 0x00309857a0, 0x00359ebc5b, 0x003a93dc98,
     34        0x003f782d72, 0x00444c1f6b, 0x0049101eac, 0x004dc4933a,
     35        0x005269e12f, 0x00570068e7, 0x005b888736, 0x006002958c,
     36        0x00646eea24, 0x0068cdd829, 0x006d1fafdc, 0x007164beb4,
     37        0x00759d4f80, 0x0079c9aa87, 0x007dea15a3, 0x0081fed45c,
     38        0x0086082806, 0x008a064fd5, 0x008df988f4, 0x0091e20ea1,
     39        0x0095c01a39, 0x009993e355, 0x009d5d9fd5, 0x00a11d83f4,
     40        0x00a4d3c25e, 0x00a8808c38, 0x00ac241134, 0x00afbe7fa0,
     41        0x00b3500472, 0x00b6d8cb53, 0x00ba58feb2, 0x00bdd0c7c9,
     42        0x00c1404ead, 0x00c4a7ba58, 0x00c80730b0, 0x00cb5ed695,
     43        0x00ceaecfea, 0x00d1f73f9c, 0x00d53847ac, 0x00d8720935,
     44        0x00dba4a47a, 0x00ded038e6, 0x00e1f4e517, 0x00e512c6e5,
     45        0x00e829fb69, 0x00eb3a9f01, 0x00ee44cd59, 0x00f148a170,
     46        0x00f446359b, 0x00f73da38d, 0x00fa2f045e, 0x00fd1a708b,
     47        0x0100000000, 0x0102dfca16,
     48    };
     49    _Static_assert((sizeof(table) / sizeof(table[0])) == TABLE_SIZE, "TABLE_SIZE should be accurate");
     50    // starting from val = (2 ** i)*(1 + f) where 0 <= f < 1
     51    // log identities mean log2(val) = log2((2 ** i)*(1 + f)) = log2(2**i) + log2(1+f)
     52    //
     53    // getting i is easy to do using builtin_clz (count leading zero)
     54    //
     55    // we want to calculate log2(1+f) independently to have a many bits of precision as possible.
     56    //     val = (2 ** i)*(1 + f) = 2 ** i   +   f * 2 ** i
     57    // isolating f we get
     58    //     val - 2 ** i = f * 2 ** i
     59    //     (val - 2 ** i) / 2 ** i = f
     60    //
     61    // we want to interpolate from the table to get the values
     62    // and compromise by doing quadratic interpolation (rather than higher degree interpolation)
     63    //
     64    // for the interpolation we want to shift everything the fist sample point
     65    // so our parabola becomes x = 0
     66    // this further simplifies the equations
     67    //
     68    // the consequence is that we need f in 2 forms:
     69    //  - finding the index of x0
     70    //  - finding the distance between f and x0
     71    //
     72    // since sample points are equidistant we can significantly simplify the equations
     73
     74    // get i
     75    const unsigned long long bits = sizeof(val) * __CHAR_BIT__;
     76    const unsigned long long lz = __builtin_clzl(val);
     77    const unsigned long long i = bits - 1 - lz;
     78
     79    // get the fractinal part as a u32.32
     80    const unsigned long long frac = (val << (lz + 1)) >> 32;
     81
     82    // get high order bits for the index into the table
     83    const unsigned long long idx0 = frac >> (32 - TABLE_BITS);
     84
     85    // get the x offset, i.e., the difference between the first sample point and the actual fractional part
     86    const long long udx = frac - (idx0 << (32 - TABLE_BITS));
     87    /* paranoid */ verify((idx0 + 2) < TABLE_SIZE);
     88
     89    const long long y0 = table[idx0 + 0];
     90    const long long y1 = table[idx0 + 1];
     91    const long long y2 = table[idx0 + 2];
     92
     93    // from there we can quadraticly interpolate to get the data, using the lagrange polynomial
     94    // normally it would look like:
     95    //     double r0 = y0 * ((x - x1) / (x0 - x1)) * ((x - x2) / (x0 - x2));
     96    //     double r1 = y1 * ((x - x0) / (x1 - x0)) * ((x - x2) / (x1 - x2));
     97    //     double r2 = y2 * ((x - x0) / (x2 - x0)) * ((x - x1) / (x2 - x1));
     98    // but since the spacing between sample points is fixed, we can simplify itand extract common expressions
     99    const long long f1 = (y1 - y0);
     100    const long long f2 = (y2 - y0);
     101    const long long a = f2 - (f1 * 2l);
     102    const long long b = (f1 * 2l) - a;
     103
     104    // Now we can compute it in the form (ax + b)x + c (which avoid repeating steps)
     105    long long sum = ((a*udx) >> (32 - TABLE_BITS))  + b;
     106    sum = (sum*udx) >> (32 - TABLE_BITS + 1);
     107    sum = y0 + sum;
     108
     109    return (i << 32) + (sum);
     110} // log2_u32_32
    21111
    22112// Implementation of power functions (from the prelude):
  • libcfa/src/math.hfa

    rd84f2ae r658f3179  
    148148} // distribution
    149149
    150 static inline unsigned long long log2_u32_32( unsigned long long val ) {
    151         enum {
    152                 TABLE_BITS = 6,
    153                 TABLE_SIZE = (1 << TABLE_BITS) + 2,
    154         };
    155         // for(i; TABLE_SIZE) {
    156         //      table[i] = (unsigned long long)(log2(1.0 + i / pow(2, TABLE_BITS)) * pow(2, 32)));
    157         // }
    158         static const unsigned long long table[] = {
    159                 0x0000000000, 0x0005b9e5a1, 0x000b5d69ba, 0x0010eb389f,
    160                 0x001663f6fa, 0x001bc84240, 0x002118b119, 0x002655d3c4,
    161                 0x002b803473, 0x00309857a0, 0x00359ebc5b, 0x003a93dc98,
    162                 0x003f782d72, 0x00444c1f6b, 0x0049101eac, 0x004dc4933a,
    163                 0x005269e12f, 0x00570068e7, 0x005b888736, 0x006002958c,
    164                 0x00646eea24, 0x0068cdd829, 0x006d1fafdc, 0x007164beb4,
    165                 0x00759d4f80, 0x0079c9aa87, 0x007dea15a3, 0x0081fed45c,
    166                 0x0086082806, 0x008a064fd5, 0x008df988f4, 0x0091e20ea1,
    167                 0x0095c01a39, 0x009993e355, 0x009d5d9fd5, 0x00a11d83f4,
    168                 0x00a4d3c25e, 0x00a8808c38, 0x00ac241134, 0x00afbe7fa0,
    169                 0x00b3500472, 0x00b6d8cb53, 0x00ba58feb2, 0x00bdd0c7c9,
    170                 0x00c1404ead, 0x00c4a7ba58, 0x00c80730b0, 0x00cb5ed695,
    171                 0x00ceaecfea, 0x00d1f73f9c, 0x00d53847ac, 0x00d8720935,
    172                 0x00dba4a47a, 0x00ded038e6, 0x00e1f4e517, 0x00e512c6e5,
    173                 0x00e829fb69, 0x00eb3a9f01, 0x00ee44cd59, 0x00f148a170,
    174                 0x00f446359b, 0x00f73da38d, 0x00fa2f045e, 0x00fd1a708b,
    175                 0x0100000000, 0x0102dfca16,
    176         };
    177         _Static_assert((sizeof(table) / sizeof(table[0])) == TABLE_SIZE, "TABLE_SIZE should be accurate");
    178         // starting from val = (2 ** i)*(1 + f) where 0 <= f < 1
    179         // log identities mean log2(val) = log2((2 ** i)*(1 + f)) = log2(2**i) + log2(1+f)
    180         //
    181         // getting i is easy to do using builtin_clz (count leading zero)
    182         //
    183         // we want to calculate log2(1+f) independently to have a many bits of precision as possible.
    184         //     val = (2 ** i)*(1 + f) = 2 ** i   +   f * 2 ** i
    185         // isolating f we get
    186         //     val - 2 ** i = f * 2 ** i
    187         //     (val - 2 ** i) / 2 ** i = f
    188         //
    189         // we want to interpolate from the table to get the values
    190         // and compromise by doing quadratic interpolation (rather than higher degree interpolation)
    191         //
    192         // for the interpolation we want to shift everything the fist sample point
    193         // so our parabola becomes x = 0
    194         // this further simplifies the equations
    195         //
    196         // the consequence is that we need f in 2 forms:
    197         //  - finding the index of x0
    198         //  - finding the distance between f and x0
    199         //
    200         // since sample points are equidistant we can significantly simplify the equations
    201 
    202         // get i
    203         const unsigned long long bits = sizeof(val) * __CHAR_BIT__;
    204         const unsigned long long lz = __builtin_clzl(val);
    205         const unsigned long long i = bits - 1 - lz;
    206 
    207         // get the fractinal part as a u32.32
    208         const unsigned long long frac = (val << (lz + 1)) >> 32;
    209 
    210         // get high order bits for the index into the table
    211         const unsigned long long idx0 = frac >> (32 - TABLE_BITS);
    212 
    213         // get the x offset, i.e., the difference between the first sample point and the actual fractional part
    214         const long long udx = frac - (idx0 << (32 - TABLE_BITS));
    215         /* paranoid */ verify((idx0 + 2) < TABLE_SIZE);
    216 
    217         const long long y0 = table[idx0 + 0];
    218         const long long y1 = table[idx0 + 1];
    219         const long long y2 = table[idx0 + 2];
    220 
    221         // from there we can quadraticly interpolate to get the data, using the lagrange polynomial
    222         // normally it would look like:
    223         //     double r0 = y0 * ((x - x1) / (x0 - x1)) * ((x - x2) / (x0 - x2));
    224         //     double r1 = y1 * ((x - x0) / (x1 - x0)) * ((x - x2) / (x1 - x2));
    225         //     double r2 = y2 * ((x - x0) / (x2 - x0)) * ((x - x1) / (x2 - x1));
    226         // but since the spacing between sample points is fixed, we can simplify it and extract common expressions
    227         const long long f1 = (y1 - y0);
    228         const long long f2 = (y2 - y0);
    229         const long long a = f2 - (f1 * 2l);
    230         const long long b = (f1 * 2l) - a;
    231 
    232         // Now we can compute it in the form (ax + b)x + c (which avoid repeating steps)
    233         long long sum = ((a*udx) >> (32 - TABLE_BITS))  + b;
    234         sum = (sum*udx) >> (32 - TABLE_BITS + 1);
    235         sum = y0 + sum;
    236 
    237         return (i << 32) + (sum);
    238 } // log2_u32_32
     150unsigned long long log2_u32_32( unsigned long long val );
    239151
    240152//---------------------- Trigonometric ----------------------
Note: See TracChangeset for help on using the changeset viewer.