Ignore:
Timestamp:
Oct 19, 2022, 4:43:26 PM (3 years ago)
Author:
Thierry Delisle <tdelisle@…>
Branches:
ADT, ast-experimental, master
Children:
1a45263
Parents:
9cd5bd2 (diff), 135143ba (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.
Message:

Merge branch 'master' into pthread-emulation

File:
1 edited

Legend:

Unmodified
Added
Removed
  • libcfa/src/math.hfa

    r9cd5bd2 rdf6cc9d  
    1010// Created On       : Mon Apr 18 23:37:04 2016
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Thu Apr 15 11:47:56 2021
    13 // Update Count     : 132
     12// Last Modified On : Sat Oct  8 08:40:42 2022
     13// Update Count     : 136
    1414//
    1515
     
    2222
    2323#include "common.hfa"
     24#include "bits/debug.hfa"
    2425
    2526//---------------------- General ----------------------
    2627
    27 static inline {
     28static inline __attribute__((always_inline)) {
    2829        float ?%?( float x, float y ) { return fmodf( x, y ); }
    2930        float fmod( float x, float y ) { return fmodf( x, y ); }
     
    6364//---------------------- Exponential ----------------------
    6465
    65 static inline {
     66static inline __attribute__((always_inline)) {
    6667        float exp( float x ) { return expf( x ); }
    6768        // extern "C" { double exp( double ); }
     
    9293//---------------------- Logarithm ----------------------
    9394
    94 static inline {
     95static inline __attribute__((always_inline)) {
    9596        float log( float x ) { return logf( x ); }
    9697        // extern "C" { double log( double ); }
     
    147148} // distribution
    148149
     150static inline unsigned long long log2_u32_32( unsigned long long val ) {
     151        enum {
     152                TABLE_BITS = 6,
     153                TABLE_SIZE = (1 << TABLE_BITS) + 2,
     154        };
     155        // for(i; TABLE_SIZE) {
     156        //      table[i] = (unsigned long long)(log2(1.0 + i / pow(2, TABLE_BITS)) * pow(2, 32)));
     157        // }
     158        static const unsigned long long table[] = {
     159                0x0000000000, 0x0005b9e5a1, 0x000b5d69ba, 0x0010eb389f,
     160                0x001663f6fa, 0x001bc84240, 0x002118b119, 0x002655d3c4,
     161                0x002b803473, 0x00309857a0, 0x00359ebc5b, 0x003a93dc98,
     162                0x003f782d72, 0x00444c1f6b, 0x0049101eac, 0x004dc4933a,
     163                0x005269e12f, 0x00570068e7, 0x005b888736, 0x006002958c,
     164                0x00646eea24, 0x0068cdd829, 0x006d1fafdc, 0x007164beb4,
     165                0x00759d4f80, 0x0079c9aa87, 0x007dea15a3, 0x0081fed45c,
     166                0x0086082806, 0x008a064fd5, 0x008df988f4, 0x0091e20ea1,
     167                0x0095c01a39, 0x009993e355, 0x009d5d9fd5, 0x00a11d83f4,
     168                0x00a4d3c25e, 0x00a8808c38, 0x00ac241134, 0x00afbe7fa0,
     169                0x00b3500472, 0x00b6d8cb53, 0x00ba58feb2, 0x00bdd0c7c9,
     170                0x00c1404ead, 0x00c4a7ba58, 0x00c80730b0, 0x00cb5ed695,
     171                0x00ceaecfea, 0x00d1f73f9c, 0x00d53847ac, 0x00d8720935,
     172                0x00dba4a47a, 0x00ded038e6, 0x00e1f4e517, 0x00e512c6e5,
     173                0x00e829fb69, 0x00eb3a9f01, 0x00ee44cd59, 0x00f148a170,
     174                0x00f446359b, 0x00f73da38d, 0x00fa2f045e, 0x00fd1a708b,
     175                0x0100000000, 0x0102dfca16,
     176        };
     177        _Static_assert((sizeof(table) / sizeof(table[0])) == TABLE_SIZE, "TABLE_SIZE should be accurate");
     178        // starting from val = (2 ** i)*(1 + f) where 0 <= f < 1
     179        // log identities mean log2(val) = log2((2 ** i)*(1 + f)) = log2(2**i) + log2(1+f)
     180        //
     181        // getting i is easy to do using builtin_clz (count leading zero)
     182        //
     183        // we want to calculate log2(1+f) independently to have a many bits of precision as possible.
     184        //     val = (2 ** i)*(1 + f) = 2 ** i   +   f * 2 ** i
     185        // isolating f we get
     186        //     val - 2 ** i = f * 2 ** i
     187        //     (val - 2 ** i) / 2 ** i = f
     188        //
     189        // we want to interpolate from the table to get the values
     190        // and compromise by doing quadratic interpolation (rather than higher degree interpolation)
     191        //
     192        // for the interpolation we want to shift everything the fist sample point
     193        // so our parabola becomes x = 0
     194        // this further simplifies the equations
     195        //
     196        // the consequence is that we need f in 2 forms:
     197        //  - finding the index of x0
     198        //  - finding the distance between f and x0
     199        //
     200        // since sample points are equidistant we can significantly simplify the equations
     201
     202        // get i
     203        const unsigned long long bits = sizeof(val) * __CHAR_BIT__;
     204        const unsigned long long lz = __builtin_clzl(val);
     205        const unsigned long long i = bits - 1 - lz;
     206
     207        // get the fractinal part as a u32.32
     208        const unsigned long long frac = (val << (lz + 1)) >> 32;
     209
     210        // get high order bits for the index into the table
     211        const unsigned long long idx0 = frac >> (32 - TABLE_BITS);
     212
     213        // get the x offset, i.e., the difference between the first sample point and the actual fractional part
     214        const long long udx = frac - (idx0 << (32 - TABLE_BITS));
     215        /* paranoid */ verify((idx0 + 2) < TABLE_SIZE);
     216
     217        const long long y0 = table[idx0 + 0];
     218        const long long y1 = table[idx0 + 1];
     219        const long long y2 = table[idx0 + 2];
     220
     221        // from there we can quadraticly interpolate to get the data, using the lagrange polynomial
     222        // normally it would look like:
     223        //     double r0 = y0 * ((x - x1) / (x0 - x1)) * ((x - x2) / (x0 - x2));
     224        //     double r1 = y1 * ((x - x0) / (x1 - x0)) * ((x - x2) / (x1 - x2));
     225        //     double r2 = y2 * ((x - x0) / (x2 - x0)) * ((x - x1) / (x2 - x1));
     226        // but since the spacing between sample points is fixed, we can simplify it and extract common expressions
     227        const long long f1 = (y1 - y0);
     228        const long long f2 = (y2 - y0);
     229        const long long a = f2 - (f1 * 2l);
     230        const long long b = (f1 * 2l) - a;
     231
     232        // Now we can compute it in the form (ax + b)x + c (which avoid repeating steps)
     233        long long sum = ((a*udx) >> (32 - TABLE_BITS))  + b;
     234        sum = (sum*udx) >> (32 - TABLE_BITS + 1);
     235        sum = y0 + sum;
     236
     237        return (i << 32) + (sum);
     238}
     239
    149240//---------------------- Trigonometric ----------------------
    150241
    151 static inline {
     242static inline __attribute__((always_inline)) {
    152243        float sin( float x ) { return sinf( x ); }
    153244        // extern "C" { double sin( double ); }
     
    204295//---------------------- Hyperbolic ----------------------
    205296
    206 static inline {
     297static inline __attribute__((always_inline)) {
    207298        float sinh( float x ) { return sinhf( x ); }
    208299        // extern "C" { double sinh( double ); }
     
    250341//---------------------- Error / Gamma ----------------------
    251342
    252 static inline {
     343static inline __attribute__((always_inline)) {
    253344        float erf( float x ) { return erff( x ); }
    254345        // extern "C" { double erf( double ); }
     
    279370//---------------------- Nearest Integer ----------------------
    280371
    281 static inline {
     372inline __attribute__((always_inline)) static {
    282373        signed char floor( signed char n, signed char align ) { return n / align * align; }
    283374        unsigned char floor( unsigned char n, unsigned char align ) { return n / align * align; }
     
    307398        // forall( T | { T ?+?( T, T ); T ?-?( T, T ); T ?%?( T, T ); } )
    308399        // T ceiling_div( T n, T align ) { verify( is_pow2( align ) );return (n + (align - 1)) / align; }
    309        
     400
    310401        // gcc notices the div/mod pair and saves both so only one div.
    311402        signed char ceiling( signed char n, signed char align ) { return floor( n + (n % align != 0 ? align - 1 : 0), align ); }
     
    376467//---------------------- Manipulation ----------------------
    377468
    378 static inline {
     469static inline __attribute__((always_inline)) {
    379470        float copysign( float x, float y ) { return copysignf( x, y ); }
    380471        // extern "C" { double copysign( double, double ); }
     
    418509//---------------------------------------
    419510
    420 static inline {
     511static inline __attribute__((always_inline)) {
    421512        forall( T | { void ?{}( T &, one_t ); T ?+?( T, T ); T ?-?( T, T );T ?*?( T, T ); } )
    422513        T lerp( T x, T y, T a ) { return x * ((T){1} - a) + y * a; }
Note: See TracChangeset for help on using the changeset viewer.