Context Navigation

← Previous Change
Next Change →

Changeset d60a4c2 for libcfa

Timestamp:

Jan 11, 2025, 5:48:46 PM (12 months ago)

Author:

Peter A. Buhr <pabuhr@…>

Branches:

Children:

Parents:

7d65715f (diff), 32a119e9 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.

Message:

Merge branch 'master' of plg.uwaterloo.ca:software/cfa/cfa-cc

Location:

Files:

: 6 edited

bits/queue.hfa (modified) (1 diff)
collections/array.hfa (modified) (1 diff)
concurrency/io/call.cfa.in (modified) (14 diffs)
concurrency/mutex_stmt.hfa (modified) (1 diff)
math.cfa (modified) (1 diff)
math.hfa (modified) (1 diff)

Legend:

: Unmodified
: Added
: Removed

libcfa/src/bits/queue.hfa

-              r7d65715f
+              rd60a4c2
+                }
                 T * succ( Queue(T) & q, T * n ) with( q ) {             // pre: *n in *q
                         #ifdef __CFA_DEBUG__
+                T * succ( Queue(T) & q, T * n ) {                                       // pre: *n in *q
+                  #ifdef __CFA_DEBUG__
                         if ( ! listed( n ) ) abort( "(Queue &)%p.succ( %p ) : Node is not on a list.", &q, n );
+                        #endif // __CFA_DEBUG__
+                  #else
+                        (void) q;
+                  #endif // __CFA_DEBUG__
                         return (Next( n ) == n) ? 0p : Next( n );
                 } // post: n == tail() & succ(n) == 0 | n != tail() & *succ(n) in *q

libcfa/src/collections/array.hfa

-              r7d65715f
+              rd60a4c2
 #ifdef __CFA_DEBUG__
-// FIXME: `len` printing format %ld is a workaround for #269; once fixed, it should be %zd
 #define subcheck( arr, sub, len ) \
         if ( (sub) < 0 || (sub) >= (len) ) \
                 abort( "Subscript %ld exceeds dimension range [0,%ld) for array %p.\n", \
+                abort( "Subscript %ld exceeds dimension range [0,%zu) for array %p.\n", \
                            (sub), (len), (arr) )
 #define subchecku( arr, sub, len ) \
         if ( (sub) >= (len) ) \
                 abort( "Subscript %ld exceeds dimension range [0,%ld) for array %p.\n", \
+                abort( "Subscript %ld exceeds dimension range [0,%zu) for array %p.\n", \
                            (sub), (len), (arr) )
 #else

libcfa/src/concurrency/io/call.cfa.in

-              r7d65715f
+              rd60a4c2
         Call('READV', 'ssize_t preadv2(int fd, const struct iovec * iov, int iovcnt, off_t offset, int flags)', {
                 'fd'  : 'fd',
                 'addr': '(typeof(sqe->addr))iov',
+                'addr': '(uintptr_t)iov',
                 'len' : 'iovcnt',
                 'off' : 'offset',
 …
         Call('WRITEV', 'ssize_t pwritev2(int fd, const struct iovec * iov, int iovcnt, off_t offset, int flags)', {
                 'fd'  : 'fd',
                 'addr': '(typeof(sqe->addr))iov',
+                'addr': '(uintptr_t)iov',
                 'len' : 'iovcnt',
                 'off' : 'offset',
 …
         Call('SENDMSG', 'ssize_t sendmsg(int sockfd, const struct msghdr * msg, int flags)', {
                 'fd': 'sockfd',
                 'addr': '(typeof(sqe->addr))(struct msghdr *)msg',
+                'addr': '(uintptr_t)(struct msghdr *)msg',
                 'len': '1',
                 'msg_flags': 'flags'
 …
         Call('RECVMSG', 'ssize_t recvmsg(int sockfd, struct msghdr * msg, int flags)', {
                 'fd': 'sockfd',
                 'addr': '(typeof(sqe->addr))(struct msghdr *)msg',
+                'addr': '(uintptr_t)(struct msghdr *)msg',
                 'len': '1',
                 'msg_flags': 'flags'
 …
         Call('SEND', 'ssize_t send(int sockfd, const void * buf, size_t len, int flags)', {
                 'fd': 'sockfd',
                 'addr': '(typeof(sqe->addr))buf',
+                'addr': '(uintptr_t)buf',
                 'len': 'len',
                 'msg_flags': 'flags'
 …
         Call('RECV', 'ssize_t recv(int sockfd, void * buf, size_t len, int flags)', {
                 'fd': 'sockfd',
                 'addr': '(typeof(sqe->addr))buf',
+                'addr': '(uintptr_t)buf',
                 'len': 'len',
                 'msg_flags': 'flags'
 …
         Call('ACCEPT', 'int accept4(int sockfd, __SOCKADDR_ARG addr, socklen_t * restrict addrlen, int flags)', {
                 'fd': 'sockfd',
                 'addr': '(typeof(sqe->addr))&addr',
+                'addr': '(uintptr_t)&addr',
                 'addr2': '(typeof(sqe->addr2))addrlen',
                 'accept_flags': 'flags'
 …
         Call('CONNECT', 'int connect(int sockfd, __CONST_SOCKADDR_ARG addr, socklen_t addrlen)', {
                 'fd': 'sockfd',
                 'addr': '(typeof(sqe->addr))&addr',
+                'addr': '(uintptr_t)&addr',
                 'off': 'addrlen'
         }),
 …
         # CFA_HAVE_IORING_OP_MADVISE
         Call('MADVISE', 'int madvise(void * addr, size_t length, int advice)', {
                 'addr': '(typeof(sqe->addr))addr',
+                'addr': '(uintptr_t)addr',
                 'len': 'length',
                 'fadvise_advice': 'advice'
 …
         Call('OPENAT', 'int openat(int dirfd, const char * pathname, int flags, mode_t mode)', {
                 'fd': 'dirfd',
                 'addr': '(typeof(sqe->addr))pathname',
+                'addr': '(uintptr_t)pathname',
                 'open_flags': 'flags;',
                 'len': 'mode'
 …
         Call('OPENAT2', 'int openat2(int dirfd, const char * pathname, struct open_how * how, size_t size)', {
                 'fd': 'dirfd',
                 'addr': '(typeof(sqe->addr))pathname',
+                'addr': '(uintptr_t)pathname',
                 'off': '(typeof(sqe->off))how',
                 'len': 'sizeof(*how)'
 …
         Call('STATX', 'int statx(int dirfd, const char * pathname, int flags, unsigned int mask, struct statx * statxbuf)', {
                 'fd': 'dirfd',
                 'addr': '(typeof(sqe->addr))pathname',
+                'addr': '(uintptr_t)pathname',
                 'statx_flags': 'flags',
                 'len': 'mask',
 …
         Call('READ', 'ssize_t read(int fd, void * buf, size_t count)', {
                 'fd': 'fd',
                 'addr': '(typeof(sqe->addr))buf',
+                'addr': '(uintptr_t)buf',
                 'len': 'count'
         }),
 …
         Call('WRITE', 'ssize_t write(int fd, void * buf, size_t count)', {
                 'fd': 'fd',
                 'addr': '(typeof(sqe->addr))buf',
+                'addr': '(uintptr_t)buf',
                 'len': 'count'
         }),

libcfa/src/concurrency/mutex_stmt.hfa

-              r7d65715f
+              rd60a4c2
 forall(L & | is_lock(L)) {
     static inline void * __get_mutexstmt_lock_ptr( L & this ) { return &this; }
     static inline L __get_mutexstmt_lock_type( L & this ) {}
     static inline L __get_mutexstmt_lock_type( L * this ) {}
+    static inline L __get_mutexstmt_lock_type( L & ) {}
+    static inline L __get_mutexstmt_lock_type( L * ) {}
+}

libcfa/src/math.cfa

-              r7d65715f
+              rd60a4c2
 #pragma GCC visibility push(default)
+unsigned long long log2_u32_32( unsigned long long val ) {
+        enum {
+                TABLE_BITS = 6,
+                TABLE_SIZE = (1 << TABLE_BITS) + 2,
+        };
+        // for(i; TABLE_SIZE) {
+        //  table[i] = (unsigned long long)(log2(1.0 + i / pow(2, TABLE_BITS)) * pow(2, 32)));
+        // }
+        static const unsigned long long table[] = {
+x0000000000, 0x0005b9e5a1, 0x000b5d69ba, 0x0010eb389f,
+x001663f6fa, 0x001bc84240, 0x002118b119, 0x002655d3c4,
+x002b803473, 0x00309857a0, 0x00359ebc5b, 0x003a93dc98,
+x003f782d72, 0x00444c1f6b, 0x0049101eac, 0x004dc4933a,
+x005269e12f, 0x00570068e7, 0x005b888736, 0x006002958c,
+x00646eea24, 0x0068cdd829, 0x006d1fafdc, 0x007164beb4,
+x00759d4f80, 0x0079c9aa87, 0x007dea15a3, 0x0081fed45c,
+x0086082806, 0x008a064fd5, 0x008df988f4, 0x0091e20ea1,
+x0095c01a39, 0x009993e355, 0x009d5d9fd5, 0x00a11d83f4,
+x00a4d3c25e, 0x00a8808c38, 0x00ac241134, 0x00afbe7fa0,
+x00b3500472, 0x00b6d8cb53, 0x00ba58feb2, 0x00bdd0c7c9,
+x00c1404ead, 0x00c4a7ba58, 0x00c80730b0, 0x00cb5ed695,
+x00ceaecfea, 0x00d1f73f9c, 0x00d53847ac, 0x00d8720935,
+x00dba4a47a, 0x00ded038e6, 0x00e1f4e517, 0x00e512c6e5,
+x00e829fb69, 0x00eb3a9f01, 0x00ee44cd59, 0x00f148a170,
+x00f446359b, 0x00f73da38d, 0x00fa2f045e, 0x00fd1a708b,
+x0100000000, 0x0102dfca16,
+        };
+        _Static_assert((sizeof(table) / sizeof(table[0])) == TABLE_SIZE, "TABLE_SIZE should be accurate");
+        // starting from val = (2 ** i)*(1 + f) where 0 <= f < 1
+        // log identities mean log2(val) = log2((2 ** i)*(1 + f)) = log2(2**i) + log2(1+f)
+        //
+        // getting i is easy to do using builtin_clz (count leading zero)
+        //
+        // we want to calculate log2(1+f) independently to have a many bits of precision as possible.
+        //     val = (2 ** i)*(1 + f) = 2 ** i   +   f * 2 ** i
+        // isolating f we get
+        //     val - 2 ** i = f * 2 ** i
+        //     (val - 2 ** i) / 2 ** i = f
+        //
+        // we want to interpolate from the table to get the values
+        // and compromise by doing quadratic interpolation (rather than higher degree interpolation)
+        //
+        // for the interpolation we want to shift everything the fist sample point
+        // so our parabola becomes x = 0
+        // this further simplifies the equations
+        //
+        // the consequence is that we need f in 2 forms:
+        //  - finding the index of x0
+        //  - finding the distance between f and x0
+        //
+        // since sample points are equidistant we can significantly simplify the equations
+        // get i
+        const unsigned long long bits = sizeof(val) * __CHAR_BIT__;
+        const unsigned long long lz = __builtin_clzl(val);
+        const unsigned long long i = bits - 1 - lz;
+        // get the fractinal part as a u32.32
+        const unsigned long long frac = (val << (lz + 1)) >> 32;
+        // get high order bits for the index into the table
+        const unsigned long long idx0 = frac >> (32 - TABLE_BITS);
+        // get the x offset, i.e., the difference between the first sample point and the actual fractional part
+        const long long udx = frac - (idx0 << (32 - TABLE_BITS));
+        /* paranoid */ verify((idx0 + 2) < TABLE_SIZE);
+        const long long y0 = table[idx0 + 0];
+        const long long y1 = table[idx0 + 1];
+        const long long y2 = table[idx0 + 2];
+        // from there we can quadraticly interpolate to get the data, using the lagrange polynomial
+        // normally it would look like:
+        //     double r0 = y0 * ((x - x1) / (x0 - x1)) * ((x - x2) / (x0 - x2));
+        //     double r1 = y1 * ((x - x0) / (x1 - x0)) * ((x - x2) / (x1 - x2));
+        //     double r2 = y2 * ((x - x0) / (x2 - x0)) * ((x - x1) / (x2 - x1));
+        // but since the spacing between sample points is fixed, we can simplify itand extract common expressions
+        const long long f1 = (y1 - y0);
+        const long long f2 = (y2 - y0);
+        const long long a = f2 - (f1 * 2l);
+        const long long b = (f1 * 2l) - a;
+        // Now we can compute it in the form (ax + b)x + c (which avoid repeating steps)
+        long long sum = ((a*udx) >> (32 - TABLE_BITS))  + b;
+        sum = (sum*udx) >> (32 - TABLE_BITS + 1);
+        sum = y0 + sum;
+        return (i << 32) + (sum);
+} // log2_u32_32
 // Implementation of power functions (from the prelude):

libcfa/src/math.hfa

-              r7d65715f
+              rd60a4c2
 } // distribution
+static inline unsigned long long log2_u32_32( unsigned long long val ) {
+        enum {
+                TABLE_BITS = 6,
+                TABLE_SIZE = (1 << TABLE_BITS) + 2,
+        };
+        // for(i; TABLE_SIZE) {
+        //      table[i] = (unsigned long long)(log2(1.0 + i / pow(2, TABLE_BITS)) * pow(2, 32)));
+        // }
+        static const unsigned long long table[] = {
+x0000000000, 0x0005b9e5a1, 0x000b5d69ba, 0x0010eb389f,
+x001663f6fa, 0x001bc84240, 0x002118b119, 0x002655d3c4,
+x002b803473, 0x00309857a0, 0x00359ebc5b, 0x003a93dc98,
+x003f782d72, 0x00444c1f6b, 0x0049101eac, 0x004dc4933a,
+x005269e12f, 0x00570068e7, 0x005b888736, 0x006002958c,
+x00646eea24, 0x0068cdd829, 0x006d1fafdc, 0x007164beb4,
+x00759d4f80, 0x0079c9aa87, 0x007dea15a3, 0x0081fed45c,
+x0086082806, 0x008a064fd5, 0x008df988f4, 0x0091e20ea1,
+x0095c01a39, 0x009993e355, 0x009d5d9fd5, 0x00a11d83f4,
+x00a4d3c25e, 0x00a8808c38, 0x00ac241134, 0x00afbe7fa0,
+x00b3500472, 0x00b6d8cb53, 0x00ba58feb2, 0x00bdd0c7c9,
+x00c1404ead, 0x00c4a7ba58, 0x00c80730b0, 0x00cb5ed695,
+x00ceaecfea, 0x00d1f73f9c, 0x00d53847ac, 0x00d8720935,
+x00dba4a47a, 0x00ded038e6, 0x00e1f4e517, 0x00e512c6e5,
+x00e829fb69, 0x00eb3a9f01, 0x00ee44cd59, 0x00f148a170,
+x00f446359b, 0x00f73da38d, 0x00fa2f045e, 0x00fd1a708b,
+x0100000000, 0x0102dfca16,
+        };
+        _Static_assert((sizeof(table) / sizeof(table[0])) == TABLE_SIZE, "TABLE_SIZE should be accurate");
+        // starting from val = (2 ** i)*(1 + f) where 0 <= f < 1
+        // log identities mean log2(val) = log2((2 ** i)*(1 + f)) = log2(2**i) + log2(1+f)
+        //
+        // getting i is easy to do using builtin_clz (count leading zero)
+        //
+        // we want to calculate log2(1+f) independently to have a many bits of precision as possible.
+        //     val = (2 ** i)*(1 + f) = 2 ** i   +   f * 2 ** i
+        // isolating f we get
+        //     val - 2 ** i = f * 2 ** i
+        //     (val - 2 ** i) / 2 ** i = f
+        //
+        // we want to interpolate from the table to get the values
+        // and compromise by doing quadratic interpolation (rather than higher degree interpolation)
+        //
+        // for the interpolation we want to shift everything the fist sample point
+        // so our parabola becomes x = 0
+        // this further simplifies the equations
+        //
+        // the consequence is that we need f in 2 forms:
+        //  - finding the index of x0
+        //  - finding the distance between f and x0
+        //
+        // since sample points are equidistant we can significantly simplify the equations
+        // get i
+        const unsigned long long bits = sizeof(val) * __CHAR_BIT__;
+        const unsigned long long lz = __builtin_clzl(val);
+        const unsigned long long i = bits - 1 - lz;
+        // get the fractinal part as a u32.32
+        const unsigned long long frac = (val << (lz + 1)) >> 32;
+        // get high order bits for the index into the table
+        const unsigned long long idx0 = frac >> (32 - TABLE_BITS);
+        // get the x offset, i.e., the difference between the first sample point and the actual fractional part
+        const long long udx = frac - (idx0 << (32 - TABLE_BITS));
+        /* paranoid */ verify((idx0 + 2) < TABLE_SIZE);
+        const long long y0 = table[idx0 + 0];
+        const long long y1 = table[idx0 + 1];
+        const long long y2 = table[idx0 + 2];
+        // from there we can quadraticly interpolate to get the data, using the lagrange polynomial
+        // normally it would look like:
+        //     double r0 = y0 * ((x - x1) / (x0 - x1)) * ((x - x2) / (x0 - x2));
+        //     double r1 = y1 * ((x - x0) / (x1 - x0)) * ((x - x2) / (x1 - x2));
+        //     double r2 = y2 * ((x - x0) / (x2 - x0)) * ((x - x1) / (x2 - x1));
+        // but since the spacing between sample points is fixed, we can simplify it and extract common expressions
+        const long long f1 = (y1 - y0);
+        const long long f2 = (y2 - y0);
+        const long long a = f2 - (f1 * 2l);
+        const long long b = (f1 * 2l) - a;
+        // Now we can compute it in the form (ax + b)x + c (which avoid repeating steps)
+        long long sum = ((a*udx) >> (32 - TABLE_BITS))  + b;
+        sum = (sum*udx) >> (32 - TABLE_BITS + 1);
+        sum = y0 + sum;
+        return (i << 32) + (sum);
+} // log2_u32_32
+unsigned long long log2_u32_32( unsigned long long val );
 //---------------------- Trigonometric ----------------------

Note: See TracChangeset for help on using the changeset viewer.

Download in other formats: