1 | //
|
---|
2 | // Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
|
---|
3 | //
|
---|
4 | // The contents of this file are covered under the licence agreement in the
|
---|
5 | // file "LICENCE" distributed with Cforall.
|
---|
6 | //
|
---|
7 | // math.cpp --
|
---|
8 | //
|
---|
9 | // Author : Andrew Beach
|
---|
10 | // Created On : Mon Nov 25 16:20:00 2024
|
---|
11 | // Last Modified By : Andrew Beach
|
---|
12 | // Created On : Mon Nov 27 15:11:00 2024
|
---|
13 | // Update Count : 0
|
---|
14 | //
|
---|
15 |
|
---|
16 | #include "math.hfa"
|
---|
17 |
|
---|
18 | #include <limits.h>
|
---|
19 |
|
---|
20 | #pragma GCC visibility push(default)
|
---|
21 |
|
---|
22 | unsigned long long log2_u32_32( unsigned long long val ) {
|
---|
23 | enum {
|
---|
24 | TABLE_BITS = 6,
|
---|
25 | TABLE_SIZE = (1 << TABLE_BITS) + 2,
|
---|
26 | };
|
---|
27 | // for(i; TABLE_SIZE) {
|
---|
28 | // table[i] = (unsigned long long)(log2(1.0 + i / pow(2, TABLE_BITS)) * pow(2, 32)));
|
---|
29 | // }
|
---|
30 | static const unsigned long long table[] = {
|
---|
31 | 0x0000000000, 0x0005b9e5a1, 0x000b5d69ba, 0x0010eb389f,
|
---|
32 | 0x001663f6fa, 0x001bc84240, 0x002118b119, 0x002655d3c4,
|
---|
33 | 0x002b803473, 0x00309857a0, 0x00359ebc5b, 0x003a93dc98,
|
---|
34 | 0x003f782d72, 0x00444c1f6b, 0x0049101eac, 0x004dc4933a,
|
---|
35 | 0x005269e12f, 0x00570068e7, 0x005b888736, 0x006002958c,
|
---|
36 | 0x00646eea24, 0x0068cdd829, 0x006d1fafdc, 0x007164beb4,
|
---|
37 | 0x00759d4f80, 0x0079c9aa87, 0x007dea15a3, 0x0081fed45c,
|
---|
38 | 0x0086082806, 0x008a064fd5, 0x008df988f4, 0x0091e20ea1,
|
---|
39 | 0x0095c01a39, 0x009993e355, 0x009d5d9fd5, 0x00a11d83f4,
|
---|
40 | 0x00a4d3c25e, 0x00a8808c38, 0x00ac241134, 0x00afbe7fa0,
|
---|
41 | 0x00b3500472, 0x00b6d8cb53, 0x00ba58feb2, 0x00bdd0c7c9,
|
---|
42 | 0x00c1404ead, 0x00c4a7ba58, 0x00c80730b0, 0x00cb5ed695,
|
---|
43 | 0x00ceaecfea, 0x00d1f73f9c, 0x00d53847ac, 0x00d8720935,
|
---|
44 | 0x00dba4a47a, 0x00ded038e6, 0x00e1f4e517, 0x00e512c6e5,
|
---|
45 | 0x00e829fb69, 0x00eb3a9f01, 0x00ee44cd59, 0x00f148a170,
|
---|
46 | 0x00f446359b, 0x00f73da38d, 0x00fa2f045e, 0x00fd1a708b,
|
---|
47 | 0x0100000000, 0x0102dfca16,
|
---|
48 | };
|
---|
49 | _Static_assert((sizeof(table) / sizeof(table[0])) == TABLE_SIZE, "TABLE_SIZE should be accurate");
|
---|
50 | // starting from val = (2 ** i)*(1 + f) where 0 <= f < 1
|
---|
51 | // log identities mean log2(val) = log2((2 ** i)*(1 + f)) = log2(2**i) + log2(1+f)
|
---|
52 | //
|
---|
53 | // getting i is easy to do using builtin_clz (count leading zero)
|
---|
54 | //
|
---|
55 | // we want to calculate log2(1+f) independently to have a many bits of precision as possible.
|
---|
56 | // val = (2 ** i)*(1 + f) = 2 ** i + f * 2 ** i
|
---|
57 | // isolating f we get
|
---|
58 | // val - 2 ** i = f * 2 ** i
|
---|
59 | // (val - 2 ** i) / 2 ** i = f
|
---|
60 | //
|
---|
61 | // we want to interpolate from the table to get the values
|
---|
62 | // and compromise by doing quadratic interpolation (rather than higher degree interpolation)
|
---|
63 | //
|
---|
64 | // for the interpolation we want to shift everything the fist sample point
|
---|
65 | // so our parabola becomes x = 0
|
---|
66 | // this further simplifies the equations
|
---|
67 | //
|
---|
68 | // the consequence is that we need f in 2 forms:
|
---|
69 | // - finding the index of x0
|
---|
70 | // - finding the distance between f and x0
|
---|
71 | //
|
---|
72 | // since sample points are equidistant we can significantly simplify the equations
|
---|
73 |
|
---|
74 | // get i
|
---|
75 | const unsigned long long bits = sizeof(val) * __CHAR_BIT__;
|
---|
76 | const unsigned long long lz = __builtin_clzl(val);
|
---|
77 | const unsigned long long i = bits - 1 - lz;
|
---|
78 |
|
---|
79 | // get the fractinal part as a u32.32
|
---|
80 | const unsigned long long frac = (val << (lz + 1)) >> 32;
|
---|
81 |
|
---|
82 | // get high order bits for the index into the table
|
---|
83 | const unsigned long long idx0 = frac >> (32 - TABLE_BITS);
|
---|
84 |
|
---|
85 | // get the x offset, i.e., the difference between the first sample point and the actual fractional part
|
---|
86 | const long long udx = frac - (idx0 << (32 - TABLE_BITS));
|
---|
87 | /* paranoid */ verify((idx0 + 2) < TABLE_SIZE);
|
---|
88 |
|
---|
89 | const long long y0 = table[idx0 + 0];
|
---|
90 | const long long y1 = table[idx0 + 1];
|
---|
91 | const long long y2 = table[idx0 + 2];
|
---|
92 |
|
---|
93 | // from there we can quadraticly interpolate to get the data, using the lagrange polynomial
|
---|
94 | // normally it would look like:
|
---|
95 | // double r0 = y0 * ((x - x1) / (x0 - x1)) * ((x - x2) / (x0 - x2));
|
---|
96 | // double r1 = y1 * ((x - x0) / (x1 - x0)) * ((x - x2) / (x1 - x2));
|
---|
97 | // double r2 = y2 * ((x - x0) / (x2 - x0)) * ((x - x1) / (x2 - x1));
|
---|
98 | // but since the spacing between sample points is fixed, we can simplify itand extract common expressions
|
---|
99 | const long long f1 = (y1 - y0);
|
---|
100 | const long long f2 = (y2 - y0);
|
---|
101 | const long long a = f2 - (f1 * 2l);
|
---|
102 | const long long b = (f1 * 2l) - a;
|
---|
103 |
|
---|
104 | // Now we can compute it in the form (ax + b)x + c (which avoid repeating steps)
|
---|
105 | long long sum = ((a*udx) >> (32 - TABLE_BITS)) + b;
|
---|
106 | sum = (sum*udx) >> (32 - TABLE_BITS + 1);
|
---|
107 | sum = y0 + sum;
|
---|
108 |
|
---|
109 | return (i << 32) + (sum);
|
---|
110 | } // log2_u32_32
|
---|
111 |
|
---|
112 | // Implementation of power functions (from the prelude):
|
---|
113 |
|
---|
114 | #define __CFA_EXP__() \
|
---|
115 | if ( y == 0 ) return 1; /* convention */ \
|
---|
116 | __CFA_EXP_INT__( /* special cases for integral types */ \
|
---|
117 | if ( x == 1 ) return 1; /* base case */ \
|
---|
118 | if ( x == 2 ) return x << (y - 1); /* positive shifting */ \
|
---|
119 | if ( y >= sizeof(y) * CHAR_BIT ) return 0; /* immediate overflow, negative exponent > 2^size-1 */ \
|
---|
120 | ) \
|
---|
121 | typeof(x) op = 1; /* accumulate odd product */ \
|
---|
122 | typeof(x) w = x; /* FIX-ME: possible bug in the box pass changing value argument through parameter */ \
|
---|
123 | for ( ; y > 1; y >>= 1 ) { /* squaring exponentiation, O(log2 y) */ \
|
---|
124 | if ( (y & 1) == 1 ) op = op * w; /* odd ? */ \
|
---|
125 | w = w * w; \
|
---|
126 | } \
|
---|
127 | return w * op
|
---|
128 | #define __CFA_EXP_INT__(...) __VA_ARGS__
|
---|
129 |
|
---|
130 | int ?\?( int x, unsigned int y ) { __CFA_EXP__(); }
|
---|
131 | long int ?\?( long int x, unsigned long int y ) { __CFA_EXP__(); }
|
---|
132 | long long int ?\?( long long int x, unsigned long long int y ) { __CFA_EXP__(); }
|
---|
133 | unsigned int ?\?( unsigned int x, unsigned int y ) { __CFA_EXP__(); }
|
---|
134 | unsigned long int ?\?( unsigned long int x, unsigned long int y ) { __CFA_EXP__(); }
|
---|
135 | unsigned long long int ?\?( unsigned long long int x, unsigned long long int y ) { __CFA_EXP__(); }
|
---|
136 |
|
---|
137 | #undef __CFA_EXP_INT__
|
---|
138 | #define __CFA_EXP_INT__(...)
|
---|
139 |
|
---|
140 | forall( OT | { void ?{}( OT & this, one_t ); OT ?*?( OT, OT ); } ) {
|
---|
141 | OT ?\?( OT x, unsigned int y ) { __CFA_EXP__(); }
|
---|
142 | OT ?\?( OT x, unsigned long int y ) { __CFA_EXP__(); }
|
---|
143 | OT ?\?( OT x, unsigned long long int y ) { __CFA_EXP__(); }
|
---|
144 | } // distribution
|
---|
145 |
|
---|
146 | #undef __CFA_EXP_INT__
|
---|
147 | #undef __CFA_EXP__
|
---|