| [108b2c7] | 1 | // | 
|---|
|  | 2 | // Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo | 
|---|
|  | 3 | // | 
|---|
|  | 4 | // The contents of this file are covered under the licence agreement in the | 
|---|
|  | 5 | // file "LICENCE" distributed with Cforall. | 
|---|
|  | 6 | // | 
|---|
|  | 7 | // math.cpp -- | 
|---|
|  | 8 | // | 
|---|
|  | 9 | // Author           : Andrew Beach | 
|---|
|  | 10 | // Created On       : Mon Nov 25 16:20:00 2024 | 
|---|
|  | 11 | // Last Modified By : Andrew Beach | 
|---|
|  | 12 | // Created On       : Mon Nov 27 15:11:00 2024 | 
|---|
|  | 13 | // Update Count     : 0 | 
|---|
|  | 14 | // | 
|---|
|  | 15 |  | 
|---|
|  | 16 | #include "math.hfa" | 
|---|
|  | 17 |  | 
|---|
|  | 18 | #include <limits.h> | 
|---|
|  | 19 |  | 
|---|
|  | 20 | #pragma GCC visibility push(default) | 
|---|
|  | 21 |  | 
|---|
| [658f3179] | 22 | unsigned long long log2_u32_32( unsigned long long val ) { | 
|---|
| [f32448e] | 23 | enum { | 
|---|
|  | 24 | TABLE_BITS = 6, | 
|---|
|  | 25 | TABLE_SIZE = (1 << TABLE_BITS) + 2, | 
|---|
|  | 26 | }; | 
|---|
|  | 27 | // for(i; TABLE_SIZE) { | 
|---|
|  | 28 | //  table[i] = (unsigned long long)(log2(1.0 + i / pow(2, TABLE_BITS)) * pow(2, 32))); | 
|---|
|  | 29 | // } | 
|---|
|  | 30 | static const unsigned long long table[] = { | 
|---|
|  | 31 | 0x0000000000, 0x0005b9e5a1, 0x000b5d69ba, 0x0010eb389f, | 
|---|
|  | 32 | 0x001663f6fa, 0x001bc84240, 0x002118b119, 0x002655d3c4, | 
|---|
|  | 33 | 0x002b803473, 0x00309857a0, 0x00359ebc5b, 0x003a93dc98, | 
|---|
|  | 34 | 0x003f782d72, 0x00444c1f6b, 0x0049101eac, 0x004dc4933a, | 
|---|
|  | 35 | 0x005269e12f, 0x00570068e7, 0x005b888736, 0x006002958c, | 
|---|
|  | 36 | 0x00646eea24, 0x0068cdd829, 0x006d1fafdc, 0x007164beb4, | 
|---|
|  | 37 | 0x00759d4f80, 0x0079c9aa87, 0x007dea15a3, 0x0081fed45c, | 
|---|
|  | 38 | 0x0086082806, 0x008a064fd5, 0x008df988f4, 0x0091e20ea1, | 
|---|
|  | 39 | 0x0095c01a39, 0x009993e355, 0x009d5d9fd5, 0x00a11d83f4, | 
|---|
|  | 40 | 0x00a4d3c25e, 0x00a8808c38, 0x00ac241134, 0x00afbe7fa0, | 
|---|
|  | 41 | 0x00b3500472, 0x00b6d8cb53, 0x00ba58feb2, 0x00bdd0c7c9, | 
|---|
|  | 42 | 0x00c1404ead, 0x00c4a7ba58, 0x00c80730b0, 0x00cb5ed695, | 
|---|
|  | 43 | 0x00ceaecfea, 0x00d1f73f9c, 0x00d53847ac, 0x00d8720935, | 
|---|
|  | 44 | 0x00dba4a47a, 0x00ded038e6, 0x00e1f4e517, 0x00e512c6e5, | 
|---|
|  | 45 | 0x00e829fb69, 0x00eb3a9f01, 0x00ee44cd59, 0x00f148a170, | 
|---|
|  | 46 | 0x00f446359b, 0x00f73da38d, 0x00fa2f045e, 0x00fd1a708b, | 
|---|
|  | 47 | 0x0100000000, 0x0102dfca16, | 
|---|
|  | 48 | }; | 
|---|
|  | 49 | _Static_assert((sizeof(table) / sizeof(table[0])) == TABLE_SIZE, "TABLE_SIZE should be accurate"); | 
|---|
|  | 50 | // starting from val = (2 ** i)*(1 + f) where 0 <= f < 1 | 
|---|
|  | 51 | // log identities mean log2(val) = log2((2 ** i)*(1 + f)) = log2(2**i) + log2(1+f) | 
|---|
|  | 52 | // | 
|---|
|  | 53 | // getting i is easy to do using builtin_clz (count leading zero) | 
|---|
|  | 54 | // | 
|---|
|  | 55 | // we want to calculate log2(1+f) independently to have a many bits of precision as possible. | 
|---|
|  | 56 | //     val = (2 ** i)*(1 + f) = 2 ** i   +   f * 2 ** i | 
|---|
|  | 57 | // isolating f we get | 
|---|
|  | 58 | //     val - 2 ** i = f * 2 ** i | 
|---|
|  | 59 | //     (val - 2 ** i) / 2 ** i = f | 
|---|
|  | 60 | // | 
|---|
|  | 61 | // we want to interpolate from the table to get the values | 
|---|
|  | 62 | // and compromise by doing quadratic interpolation (rather than higher degree interpolation) | 
|---|
|  | 63 | // | 
|---|
|  | 64 | // for the interpolation we want to shift everything the fist sample point | 
|---|
|  | 65 | // so our parabola becomes x = 0 | 
|---|
|  | 66 | // this further simplifies the equations | 
|---|
|  | 67 | // | 
|---|
|  | 68 | // the consequence is that we need f in 2 forms: | 
|---|
|  | 69 | //  - finding the index of x0 | 
|---|
|  | 70 | //  - finding the distance between f and x0 | 
|---|
|  | 71 | // | 
|---|
|  | 72 | // since sample points are equidistant we can significantly simplify the equations | 
|---|
| [658f3179] | 73 |  | 
|---|
| [f32448e] | 74 | // get i | 
|---|
|  | 75 | const unsigned long long bits = sizeof(val) * __CHAR_BIT__; | 
|---|
|  | 76 | const unsigned long long lz = __builtin_clzl(val); | 
|---|
|  | 77 | const unsigned long long i = bits - 1 - lz; | 
|---|
| [658f3179] | 78 |  | 
|---|
| [f32448e] | 79 | // get the fractinal part as a u32.32 | 
|---|
|  | 80 | const unsigned long long frac = (val << (lz + 1)) >> 32; | 
|---|
| [658f3179] | 81 |  | 
|---|
| [f32448e] | 82 | // get high order bits for the index into the table | 
|---|
|  | 83 | const unsigned long long idx0 = frac >> (32 - TABLE_BITS); | 
|---|
| [658f3179] | 84 |  | 
|---|
| [f32448e] | 85 | // get the x offset, i.e., the difference between the first sample point and the actual fractional part | 
|---|
|  | 86 | const long long udx = frac - (idx0 << (32 - TABLE_BITS)); | 
|---|
|  | 87 | /* paranoid */ verify((idx0 + 2) < TABLE_SIZE); | 
|---|
| [658f3179] | 88 |  | 
|---|
| [f32448e] | 89 | const long long y0 = table[idx0 + 0]; | 
|---|
|  | 90 | const long long y1 = table[idx0 + 1]; | 
|---|
|  | 91 | const long long y2 = table[idx0 + 2]; | 
|---|
| [658f3179] | 92 |  | 
|---|
| [f32448e] | 93 | // from there we can quadraticly interpolate to get the data, using the lagrange polynomial | 
|---|
|  | 94 | // normally it would look like: | 
|---|
|  | 95 | //     double r0 = y0 * ((x - x1) / (x0 - x1)) * ((x - x2) / (x0 - x2)); | 
|---|
|  | 96 | //     double r1 = y1 * ((x - x0) / (x1 - x0)) * ((x - x2) / (x1 - x2)); | 
|---|
|  | 97 | //     double r2 = y2 * ((x - x0) / (x2 - x0)) * ((x - x1) / (x2 - x1)); | 
|---|
|  | 98 | // but since the spacing between sample points is fixed, we can simplify itand extract common expressions | 
|---|
|  | 99 | const long long f1 = (y1 - y0); | 
|---|
|  | 100 | const long long f2 = (y2 - y0); | 
|---|
|  | 101 | const long long a = f2 - (f1 * 2l); | 
|---|
|  | 102 | const long long b = (f1 * 2l) - a; | 
|---|
| [658f3179] | 103 |  | 
|---|
| [f32448e] | 104 | // Now we can compute it in the form (ax + b)x + c (which avoid repeating steps) | 
|---|
|  | 105 | long long sum = ((a*udx) >> (32 - TABLE_BITS))  + b; | 
|---|
|  | 106 | sum = (sum*udx) >> (32 - TABLE_BITS + 1); | 
|---|
|  | 107 | sum = y0 + sum; | 
|---|
| [658f3179] | 108 |  | 
|---|
| [f32448e] | 109 | return (i << 32) + (sum); | 
|---|
| [658f3179] | 110 | } // log2_u32_32 | 
|---|
|  | 111 |  | 
|---|
| [108b2c7] | 112 | // Implementation of power functions (from the prelude): | 
|---|
|  | 113 |  | 
|---|
|  | 114 | #define __CFA_EXP__() \ | 
|---|
|  | 115 | if ( y == 0 ) return 1;                             /* convention */ \ | 
|---|
|  | 116 | __CFA_EXP_INT__(                                    /* special cases for integral types */ \ | 
|---|
|  | 117 | if ( x == 1 ) return 1;                         /* base case */ \ | 
|---|
|  | 118 | if ( x == 2 ) return x << (y - 1);              /* positive shifting */ \ | 
|---|
|  | 119 | if ( y >= sizeof(y) * CHAR_BIT ) return 0;      /* immediate overflow, negative exponent > 2^size-1 */ \ | 
|---|
|  | 120 | ) \ | 
|---|
|  | 121 | typeof(x) op = 1;                                   /* accumulate odd product */ \ | 
|---|
|  | 122 | typeof(x) w = x; /* FIX-ME: possible bug in the box pass changing value argument through parameter */ \ | 
|---|
|  | 123 | for ( ; y > 1; y >>= 1 ) {                          /* squaring exponentiation, O(log2 y) */ \ | 
|---|
|  | 124 | if ( (y & 1) == 1 ) op = op * w;                /* odd ? */ \ | 
|---|
|  | 125 | w = w * w; \ | 
|---|
|  | 126 | } \ | 
|---|
|  | 127 | return w * op | 
|---|
|  | 128 | #define __CFA_EXP_INT__(...) __VA_ARGS__ | 
|---|
|  | 129 |  | 
|---|
|  | 130 | int ?\?( int x, unsigned int y ) { __CFA_EXP__(); } | 
|---|
|  | 131 | long int ?\?( long int x, unsigned long int y ) { __CFA_EXP__(); } | 
|---|
|  | 132 | long long int ?\?( long long int x, unsigned long long int y ) { __CFA_EXP__(); } | 
|---|
|  | 133 | unsigned int ?\?( unsigned int x, unsigned int y ) { __CFA_EXP__(); } | 
|---|
|  | 134 | unsigned long int ?\?( unsigned long int x, unsigned long int y ) { __CFA_EXP__(); } | 
|---|
|  | 135 | unsigned long long int ?\?( unsigned long long int x, unsigned long long int y ) { __CFA_EXP__(); } | 
|---|
|  | 136 |  | 
|---|
|  | 137 | #undef __CFA_EXP_INT__ | 
|---|
|  | 138 | #define __CFA_EXP_INT__(...) | 
|---|
|  | 139 |  | 
|---|
|  | 140 | forall( OT | { void ?{}( OT & this, one_t ); OT ?*?( OT, OT ); } ) { | 
|---|
|  | 141 | OT ?\?( OT x, unsigned int y ) { __CFA_EXP__(); } | 
|---|
|  | 142 | OT ?\?( OT x, unsigned long int y ) { __CFA_EXP__(); } | 
|---|
|  | 143 | OT ?\?( OT x, unsigned long long int y ) { __CFA_EXP__(); } | 
|---|
|  | 144 | } // distribution | 
|---|
|  | 145 |  | 
|---|
|  | 146 | #undef __CFA_EXP_INT__ | 
|---|
|  | 147 | #undef __CFA_EXP__ | 
|---|