Context Navigation

← Previous Change
Next Change →

Changeset 0e4df2e for libcfa

Timestamp:

May 22, 2020, 11:49:29 AM (4 years ago)

Author:

Thierry Delisle <tdelisle@…>

Branches:

ADT, arm-eh, ast-experimental, enum, forall-pointer-decay, jacob/cs343-translation, master, new-ast, new-ast-unique-expr, pthread-emulation, qualifiedEnum

Children:

Parents:

2802824 (diff), 99fea48 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.

Message:

Merge branch 'master' into relaxed_ready

Location:

Files:

: 1 added
: 11 edited

Makefile.am (modified) (2 diffs)
Makefile.in (modified) (2 diffs)
concurrency/io.cfa (modified) (18 diffs)
concurrency/kernel.cfa (modified) (1 diff)
containers/list.hfa (modified) (1 diff)
containers/stackLockFree.hfa (added)
exception.c (modified) (8 diffs)
exception.h (modified) (3 diffs)
exception.hfa (modified) (3 diffs)
executor.cfa (modified) (6 diffs)
heap.cfa (modified) (24 diffs)
stdlib.hfa (modified) (8 diffs)

Legend:

: Unmodified
: Added
: Removed

libcfa/src/Makefile.am

-                      r2802824
+                      r0e4df2e
 ## Created On       : Sun May 31 08:54:01 2015
 ## Last Modified By : Peter A. Buhr
 ## Last Modified On : Mon Mar 16 18:07:59 2020
 ## Update Count     : 242
+## Last Modified On : Sun May 17 21:10:26 2020
+## Update Count     : 243
 ###############################################################################
 …
 #----------------------------------------------------------------------------------------------------------------
 if BUILDLIB
+headers_nosrc = bitmanip.hfa math.hfa gmp.hfa time_t.hfa bits/align.hfa bits/containers.hfa bits/defs.hfa bits/debug.hfa bits/locks.hfa containers/list.hfa
+headers_nosrc = bitmanip.hfa math.hfa gmp.hfa time_t.hfa clock.hfa \
+                bits/align.hfa bits/containers.hfa bits/defs.hfa bits/debug.hfa bits/locks.hfa containers/list.hfa
 headers = fstream.hfa iostream.hfa iterator.hfa limits.hfa rational.hfa time.hfa stdlib.hfa common.hfa \
           containers/maybe.hfa containers/pair.hfa containers/result.hfa containers/vector.hfa
+                containers/maybe.hfa containers/pair.hfa containers/result.hfa containers/stackLockFree.hfa containers/vector.hfa
 libsrc = startup.cfa interpose.cfa bits/debug.cfa assert.cfa exception.c virtual.c heap.cfa ${headers:.hfa=.cfa}

libcfa/src/Makefile.in

-                      r2802824
+                      r0e4df2e
         containers/maybe.hfa containers/pair.hfa containers/result.hfa \
         containers/vector.hfa bitmanip.hfa math.hfa gmp.hfa time_t.hfa \
         bits/align.hfa bits/containers.hfa bits/defs.hfa \
+        clock.hfa bits/align.hfa bits/containers.hfa bits/defs.hfa \
         bits/debug.hfa bits/locks.hfa containers/list.hfa \
         concurrency/coroutine.hfa concurrency/thread.hfa \
 …
 #----------------------------------------------------------------------------------------------------------------
+@BUILDLIB_TRUE@headers_nosrc = bitmanip.hfa math.hfa gmp.hfa time_t.hfa bits/align.hfa bits/containers.hfa bits/defs.hfa bits/debug.hfa bits/locks.hfa containers/list.hfa
+@BUILDLIB_TRUE@headers_nosrc = bitmanip.hfa math.hfa gmp.hfa time_t.hfa clock.hfa \
+@BUILDLIB_TRUE@         bits/align.hfa bits/containers.hfa bits/defs.hfa bits/debug.hfa bits/locks.hfa containers/list.hfa
 @BUILDLIB_FALSE@headers =
 @BUILDLIB_TRUE@headers = fstream.hfa iostream.hfa iterator.hfa limits.hfa rational.hfa time.hfa stdlib.hfa common.hfa \
 @BUILDLIB_TRUE@   containers/maybe.hfa containers/pair.hfa containers/result.hfa containers/vector.hfa
+@BUILDLIB_TRUE@         containers/maybe.hfa containers/pair.hfa containers/result.hfa containers/vector.hfa
 @BUILDLIB_FALSE@libsrc =

libcfa/src/concurrency/io.cfa

-                      r2802824
+                      r0e4df2e
                 // Like head/tail but not seen by the kernel
-                volatile uint32_t alloc;
                 volatile uint32_t * ready;
                 uint32_t ready_cnt;
 …
                         struct {
                                 struct {
+                                        volatile unsigned long long int val;
+                                        volatile unsigned long long int rdy;
+                                        volatile unsigned long long int csm;
+                                        volatile unsigned long long int avl;
                                         volatile unsigned long long int cnt;
-                                        volatile unsigned long long int block;
                                 } submit_avg;
                                 struct {
 …
                                         volatile unsigned long long int block;
                                 } look_avg;
+                                struct {
+                                        volatile unsigned long long int val;
+                                        volatile unsigned long long int cnt;
+                                        volatile unsigned long long int block;
+                                } alloc_avg;
                         } stats;
                 #endif
 …
                 sq.dropped = (         uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.dropped);
                 sq.array   = (         uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.array);
+                sq.alloc = *sq.tail;
+                {
+                        const uint32_t num = *sq.num;
+                        for( i; num ) {
+                                sq.sqes[i].user_data = 0ul64;
+                        }
+                }
                 if( io_flags & CFA_CLUSTER_IO_POLLER_THREAD_SUBMITS ) {
 …
                 // Initialize statistics
                 #if !defined(__CFA_NO_STATISTICS__)
+                        this.io->submit_q.stats.submit_avg.val   = 0;
+                        this.io->submit_q.stats.submit_avg.cnt   = 0;
+                        this.io->submit_q.stats.submit_avg.block = 0;
+                        this.io->submit_q.stats.submit_avg.rdy = 0;
+                        this.io->submit_q.stats.submit_avg.csm = 0;
+                        this.io->submit_q.stats.submit_avg.avl = 0;
+                        this.io->submit_q.stats.submit_avg.cnt = 0;
                         this.io->submit_q.stats.look_avg.val   = 0;
                         this.io->submit_q.stats.look_avg.cnt   = 0;
                         this.io->submit_q.stats.look_avg.block = 0;
+                        this.io->submit_q.stats.alloc_avg.val   = 0;
+                        this.io->submit_q.stats.alloc_avg.cnt   = 0;
+                        this.io->submit_q.stats.alloc_avg.block = 0;
                         this.io->completion_q.stats.completed_avg.val = 0;
                         this.io->completion_q.stats.completed_avg.slow_cnt = 0;
 …
                                         this.ready_queue.head = 1p;
                                         thrd.next = 0p;
+                                        __cfaabi_dbg_debug_do( thrd.unpark_stale = true );
                                         // Fixup the thread state
 …
                         if(this.print_stats) {
                                 with(this.io->submit_q.stats, this.io->completion_q.stats) {
+                                        double avgrdy = ((double)submit_avg.rdy) / submit_avg.cnt;
+                                        double avgcsm = ((double)submit_avg.csm) / submit_avg.cnt;
+                                        double avgavl = ((double)submit_avg.avl) / submit_avg.cnt;
                                         double lavgv = 0;
                                         double lavgb = 0;
 …
+                                        }
+                                        double aavgv = 0;
+                                        double aavgb = 0;
+                                        if(alloc_avg.cnt != 0) {
+                                                aavgv = ((double)alloc_avg.val  ) / alloc_avg.cnt;
+                                                aavgb = ((double)alloc_avg.block) / alloc_avg.cnt;
+                                        }
                                         __cfaabi_bits_print_safe( STDOUT_FILENO,
                                                 "----- I/O uRing Stats -----\n"
                                                 "- total submit calls     : %'15llu\n"
+                                                "- avg submit             : %'18.2lf\n"
+                                                "- pre-submit block %%     : %'18.2lf\n"
+                                                "- avg ready entries      : %'18.2lf\n"
+                                                "- avg submitted entries  : %'18.2lf\n"
+                                                "- avg available entries  : %'18.2lf\n"
                                                 "- total ready search     : %'15llu\n"
                                                 "- avg ready search len   : %'18.2lf\n"
                                                 "- avg ready search block : %'18.2lf\n"
+                                                "- total alloc search     : %'15llu\n"
+                                                "- avg alloc search len   : %'18.2lf\n"
+                                                "- avg alloc search block : %'18.2lf\n"
                                                 "- total wait calls       : %'15llu   (%'llu slow, %'llu fast)\n"
                                                 "- avg completion/wait    : %'18.2lf\n",
                                                 submit_avg.cnt,
+                                                ((double)submit_avg.val) / submit_avg.cnt,
+                                                (100.0 * submit_avg.block) / submit_avg.cnt,
+                                                avgrdy,
+                                                avgcsm,
+                                                avgavl,
                                                 look_avg.cnt,
                                                 lavgv,
                                                 lavgb,
+                                                alloc_avg.cnt,
+                                                aavgv,
+                                                aavgb,
                                                 completed_avg.slow_cnt + completed_avg.fast_cnt,
                                                 completed_avg.slow_cnt,  completed_avg.fast_cnt,
 …
                         // If the poller thread also submits, then we need to aggregate the submissions which are ready
                         uint32_t * tail = ring.submit_q.tail;
+                        uint32_t tail = *ring.submit_q.tail;
                         const uint32_t mask = *ring.submit_q.mask;
 …
                                 // If we got a real submission, append it to the list
                                 ring.submit_q.array[ ((*tail) + to_submit) & mask ] = idx & mask;
+                                ring.submit_q.array[ (tail + to_submit) & mask ] = idx & mask;
                                 to_submit++;
+                        }
                         // Increment the tail based on how many we are ready to submit
+                        __atomic_fetch_add(tail, to_submit, __ATOMIC_SEQ_CST);
+                        // update statistics
+                        #if !defined(__CFA_NO_STATISTICS__)
+                                ring.submit_q.stats.submit_avg.val += to_submit;
+                                ring.submit_q.stats.submit_avg.cnt += 1;
+                        #endif
+                }
+                        __atomic_fetch_add(ring.submit_q.tail, to_submit, __ATOMIC_SEQ_CST);
+                }
+                const uint32_t smask = *ring.submit_q.mask;
+                uint32_t shead = *ring.submit_q.head;
                 int ret = syscall( __NR_io_uring_enter, ring.fd, to_submit, waitcnt, IORING_ENTER_GETEVENTS, mask, _NSIG / 8);
                 if( ret < 0 ) {
 …
+                }
+                verify( (shead + ret) == *ring.submit_q.head );
+                // Release the consumed SQEs
+                for( i; ret ) {
+                        uint32_t idx = ring.submit_q.array[ (i + shead) & smask ];
+                        ring.submit_q.sqes[ idx ].user_data = 0;
+                }
+                uint32_t avail = 0;
+                uint32_t sqe_num = *ring.submit_q.num;
+                for(i; sqe_num) {
+                        if( ring.submit_q.sqes[ i ].user_data == 0 ) avail++;
+                }
+                // update statistics
+                #if !defined(__CFA_NO_STATISTICS__)
+                        ring.submit_q.stats.submit_avg.rdy += to_submit;
+                        ring.submit_q.stats.submit_avg.csm += ret;
+                        ring.submit_q.stats.submit_avg.avl += avail;
+                        ring.submit_q.stats.submit_avg.cnt += 1;
+                #endif
                 // Drain the queue
                 unsigned head = *ring.completion_q.head;
+                unsigned tail = __atomic_load_n(ring.completion_q.tail, __ATOMIC_ACQUIRE);
+                unsigned tail = *ring.completion_q.tail;
+                const uint32_t mask = *ring.completion_q.mask;
+                // Memory barrier
+                __atomic_thread_fence( __ATOMIC_SEQ_CST );
                 // Nothing was new return 0
 …
                 uint32_t count = tail - head;
                 for(i; count) {
                         unsigned idx = (head + i) & (*ring.completion_q.mask);
+                        unsigned idx = (head + i) & mask;
                         struct io_uring_cqe & cqe = ring.completion_q.cqes[idx];
 …
                 // Allow new submissions to happen
                 V(ring.submit, count);
+                // V(ring.submit, count);
                 // Mark to the kernel that the cqe has been seen
                 // Ensure that the kernel only sees the new value of the head index after the CQEs have been read.
+                __atomic_thread_fence( __ATOMIC_SEQ_CST );
                 __atomic_fetch_add( ring.completion_q.head, count, __ATOMIC_RELAXED );
 …
 //
+        static inline [* struct io_uring_sqe, uint32_t] __submit_alloc( struct __io_data & ring ) {
+                // Wait for a spot to be available
+                __attribute__((unused)) bool blocked = P(ring.submit);
+                #if !defined(__CFA_NO_STATISTICS__)
+                        __atomic_fetch_add( &ring.submit_q.stats.submit_avg.block, blocked ? 1ul64 : 0ul64, __ATOMIC_RELAXED );
+                #endif
+                // Allocate the sqe
+                uint32_t idx = __atomic_fetch_add(&ring.submit_q.alloc, 1ul32, __ATOMIC_SEQ_CST);
+                // Mask the idx now to allow make everything easier to check
+                idx &= *ring.submit_q.mask;
+                // Return the sqe
+                return [&ring.submit_q.sqes[ idx ], idx];
+        static inline [* struct io_uring_sqe, uint32_t] __submit_alloc( struct __io_data & ring, uint64_t data ) {
+                verify( data != 0 );
+                // Prepare the data we need
+                __attribute((unused)) int len   = 0;
+                __attribute((unused)) int block = 0;
+                uint32_t cnt = *ring.submit_q.num;
+                uint32_t mask = *ring.submit_q.mask;
+                uint32_t off = __tls_rand();
+                // Loop around looking for an available spot
+                LOOKING: for() {
+                        // Look through the list starting at some offset
+                        for(i; cnt) {
+                                uint64_t expected = 0;
+                                uint32_t idx = (i + off) & mask;
+                                struct io_uring_sqe * sqe = &ring.submit_q.sqes[idx];
+                                volatile uint64_t * udata = &sqe->user_data;
+                                if( *udata == expected &&
+                                        __atomic_compare_exchange_n( udata, &expected, data, true, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED ) )
+                                {
+                                        // update statistics
+                                        #if !defined(__CFA_NO_STATISTICS__)
+                                                __atomic_fetch_add( &ring.submit_q.stats.alloc_avg.val,   len,   __ATOMIC_RELAXED );
+                                                __atomic_fetch_add( &ring.submit_q.stats.alloc_avg.block, block, __ATOMIC_RELAXED );
+                                                __atomic_fetch_add( &ring.submit_q.stats.alloc_avg.cnt,   1,     __ATOMIC_RELAXED );
+                                        #endif
+                                        // Success return the data
+                                        return [sqe, idx];
+                                }
+                                verify(expected != data);
+                                len ++;
+                        }
+                        block++;
+                        yield();
+                }
+        }
 …
                         __attribute((unused)) int len   = 0;
                         __attribute((unused)) int block = 0;
-                        uint32_t expected = -1ul32;
                         uint32_t ready_mask = ring.submit_q.ready_cnt - 1;
                         uint32_t off = __tls_rand();
 …
                                 for(i; ring.submit_q.ready_cnt) {
                                         uint32_t ii = (i + off) & ready_mask;
+                                        uint32_t expected = -1ul32;
                                         if( __atomic_compare_exchange_n( &ring.submit_q.ready[ii], &expected, idx, true, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED ) ) {
                                                 break LOOKING;
+                                        }
+                                        verify(expected != idx);
                                         len ++;
 …
                         // update statistics
                         #if !defined(__CFA_NO_STATISTICS__)
                                 ring.submit_q.stats.submit_avg.val += 1;
+                                ring.submit_q.stats.submit_avg.csm += 1;
                                 ring.submit_q.stats.submit_avg.cnt += 1;
                         #endif
 …
         #define __submit_prelude \
+                struct __io_data & ring = *active_cluster()->io; \
+                io_user_data data = { 0, active_thread() }; \
+                struct __io_data & ring = *data.thrd->curr_cluster->io; \
                 struct io_uring_sqe * sqe; \
                 uint32_t idx; \
                 [sqe, idx] = __submit_alloc( ring );
+                [sqe, idx] = __submit_alloc( ring, (uint64_t)&data );
         #define __submit_wait \
-                io_user_data data = { 0, active_thread() }; \
                 /*__cfaabi_bits_print_safe( STDERR_FILENO, "Preparing user data %p for %p\n", &data, data.thrd );*/ \
                 sqe->user_data = (uint64_t)&data; \
+                verify( sqe->user_data == (uint64_t)&data ); \
                 __submit( ring, idx ); \
                 park( __cfaabi_dbg_ctx ); \

libcfa/src/concurrency/kernel.cfa

r2802824	r0e4df2e
648	648
649	649	// record activity
	650	__cfaabi_dbg_debug_do( char * old_caller = thrd->unpark_caller; )
650	651	__cfaabi_dbg_record_thrd( *thrd, false, caller );
651	652

libcfa/src/containers/list.hfa

-                      r2802824
+                      r0e4df2e
                 $prev_link(list_pos) = (Telem*) 0p;
+        }
+        static inline bool ?`is_empty(dlist(Tnode, Telem) &list) {
+                assert( &list != 0p );
+                $dlinks(Telem) *listLinks = & list.$links;
+                if (listLinks->next.is_terminator) {
+                        assert(listLinks->prev.is_terminator);
+                        assert(listLinks->next.terminator);
+                        assert(listLinks->prev.terminator);
+                        return true;
+                } else {
+                        assert(!listLinks->prev.is_terminator);
+                        assert(listLinks->next.elem);
+                        assert(listLinks->prev.elem);
+                        return false;
+                }
+        }
+        static inline Telem & pop_first(dlist(Tnode, Telem) &list) {
+                assert( &list != 0p );
+                assert( !list`is_empty );
+                $dlinks(Telem) *listLinks = & list.$links;
+                Telem & first = *listLinks->next.elem;
+                Tnode & list_pos_first  = $tempcv_e2n( first );
+                remove(list_pos_first);
+                return first;
+        }
+        static inline Telem & pop_last(dlist(Tnode, Telem) &list) {
+                assert( &list != 0p );
+                assert( !list`is_empty );
+                $dlinks(Telem) *listLinks = & list.$links;
+                Telem & last = *listLinks->prev.elem;
+                Tnode & list_pos_last  = $tempcv_e2n( last );
+                remove(list_pos_last);
+                return last;
+        }
+}

libcfa/src/exception.c

-                      r2802824
+                      r0e4df2e
 // Created On       : Mon Jun 26 15:13:00 2017
 // Last Modified By : Andrew Beach
 // Last Modified On : Tue Apr 14 12:01:00 2020
 // Update Count     : 18
+// Last Modified On : Thr May 21 12:18:00 2020
+// Update Count     : 20
 //
 …
+}
 void __cfaehm_throw_resume(exception_t * except) {
+void __cfaehm_throw_resume(exception_t * except, void (*defaultHandler)(exception_t *)) {
         struct exception_context_t * context = this_exception_context();
 …
+        }
+        // No handler found, fall back to the default operation.
         __cfadbg_print_safe(exception, "Unhandled exception\n");
+        // Fall back to termination:
+        __cfaehm_throw_terminate(except);
+        // TODO: Default handler for resumption.
+        defaultHandler(except);
+}
 …
 // Cancel the current stack, prefroming approprate clean-up and messaging.
+static __attribute__((noreturn)) void __cfaehm_cancel_stack(
+                exception_t * exception ) {
+void __cfaehm_cancel_stack( exception_t * exception ) {
         // TODO: Detect current stack and pick a particular stop-function.
         _Unwind_Reason_Code ret;
 …
+}
+static void __cfaehm_cleanup_default( exception_t ** except ) {
+        __cfaehm_delete_exception( *except );
+        *except = NULL;
+}
 // The exception that is being thrown must already be stored.
+static __attribute__((noreturn)) void __cfaehm_begin_unwind(void) {
+        if ( ! this_exception_context()->current_exception ) {
+static void __cfaehm_begin_unwind(void(*defaultHandler)(exception_t *)) {
+        struct exception_context_t * context = this_exception_context();
+        struct _Unwind_Exception * storage = &this_exception_storage;
+        if ( NULL == context->current_exception ) {
                 printf("UNWIND ERROR missing exception in begin unwind\n");
                 abort();
 …
         // Call stdlibc to raise the exception
+        _Unwind_Reason_Code ret = _Unwind_RaiseException( &this_exception_storage );
+        __cfadbg_print_safe(exception, "Begin unwinding (storage &p, context %p)\n", storage, context);
+        _Unwind_Reason_Code ret = _Unwind_RaiseException( storage );
         // If we reach here it means something happened. For resumption to work we need to find a way
 …
         // the whole stack.
+        // We did not simply reach the end of the stack without finding a handler. This is an error.
+        if ( ret != _URC_END_OF_STACK ) {
+                printf("UNWIND ERROR %d after raise exception\n", ret);
+                abort();
+        }
         // No handler found, go to the default operation.
+        // Currently this will always be a cancellation.
+        if ( ret == _URC_END_OF_STACK ) {
+                __cfadbg_print_safe(exception, "Uncaught exception %p\n", &this_exception_storage);
+                __cfaehm_cancel_stack(this_exception_context()->current_exception);
+        }
+        // We did not simply reach the end of the stack without finding a handler. This is an error.
+        printf("UNWIND ERROR %d after raise exception\n", ret);
+        __cfadbg_print_safe(exception, "Uncaught exception %p\n", storage);
+        __attribute__((cleanup(__cfaehm_cleanup_default)))
+        exception_t * exception = context->current_exception;
+        defaultHandler( exception );
+}
+void __cfaehm_throw_terminate( exception_t * val, void (*defaultHandler)(exception_t *) ) {
+        __cfadbg_print_safe(exception, "Throwing termination exception\n");
+        __cfaehm_allocate_exception( val );
+        __cfaehm_begin_unwind( defaultHandler );
+}
+static __attribute__((noreturn)) void __cfaehm_rethrow_adapter( exception_t * except ) {
+        // TODO: Print some error message.
+        (void)except;
         abort();
+}
-void __cfaehm_throw_terminate( exception_t * val ) {
-        __cfadbg_print_safe(exception, "Throwing termination exception\n");
-        __cfaehm_allocate_exception( val );
-        __cfaehm_begin_unwind();
+}
 …
         __cfadbg_print_safe(exception, "Rethrowing termination exception\n");
+        __cfaehm_begin_unwind();
+        __cfaehm_begin_unwind( __cfaehm_rethrow_adapter );
+        abort();
+}

libcfa/src/exception.h

-                      r2802824
+                      r0e4df2e
 // Created On       : Mon Jun 26 15:11:00 2017
 // Last Modified By : Andrew Beach
 // Last Modified On : Fri Mar 27 10:16:00 2020
 // Update Count     : 9
+// Last Modified On : Tue May 19 14:17:00 2020
+// Update Count     : 10
 //
 …
+void __cfaehm_cancel_stack(exception_t * except) __attribute__((noreturn));
 // Used in throw statement translation.
 void __cfaehm_throw_terminate(exception_t * except) __attribute__((noreturn));
+void __cfaehm_throw_terminate(exception_t * except, void (*)(exception_t *));
 void __cfaehm_rethrow_terminate() __attribute__((noreturn));
 void __cfaehm_throw_resume(exception_t * except);
+void __cfaehm_throw_resume(exception_t * except, void (*)(exception_t *));
 // Function catches termination exceptions.
 …
 #ifdef __cforall
+}
+// Not all the built-ins can be expressed in C. These can't be
+// implemented in the .c file either so they all have to be inline.
+trait is_exception(dtype T) {
+        /* The first field must be a pointer to a virtual table.
+         * That virtual table must be a decendent of the base exception virtual tab$
+         */
+        void mark_exception(T *);
+        // This is never used and should be a no-op.
+};
+trait is_termination_exception(dtype T | is_exception(T)) {
+        void defaultTerminationHandler(T &);
+};
+trait is_resumption_exception(dtype T | is_exception(T)) {
+        void defaultResumptionHandler(T &);
+};
+forall(dtype T | is_termination_exception(T))
+static inline void $throw(T & except) {
+        __cfaehm_throw_terminate(
+                (exception_t *)&except,
+                (void(*)(exception_t *))defaultTerminationHandler
+        );
+}
+forall(dtype T | is_resumption_exception(T))
+static inline void $throwResume(T & except) {
+        __cfaehm_throw_resume(
+                (exception_t *)&except,
+                (void(*)(exception_t *))defaultResumptionHandler
+        );
+}
+forall(dtype T | is_exception(T))
+static inline void cancel_stack(T & except) __attribute__((noreturn)) {
+        __cfaehm_cancel_stack( (exception_t *)&except );
+}
+forall(dtype T | is_exception(T))
+static inline void defaultTerminationHandler(T & except) {
+        return cancel_stack( except );
+}
+forall(dtype T | is_exception(T))
+static inline void defaultResumptionHandler(T & except) {
+        throw except;
+}
 #endif

libcfa/src/exception.hfa

-                      r2802824
+                      r0e4df2e
 // Created On       : Thu Apr  7 10:25:00 2020
 // Last Modified By : Andrew Beach
 // Last Modified On : Thu Apr  7 10:25:00 2020
 // Update Count     : 0
+// Last Modified On : Tue May 19 14:17:00 2020
+// Update Count     : 2
 //
 …
 #define _VTABLE_DECLARATION(exception_name, parent_name, ...) \
         struct exception_name; \
+        void mark_exception(exception_name *); \
         VTABLE_TYPE(exception_name); \
         extern VTABLE_TYPE(exception_name) VTABLE_NAME(exception_name); \
 …
 #define VTABLE_INSTANCE(...) _EXC_DISPATCH(_VTABLE_INSTANCE, __VA_ARGS__)
 #define _VTABLE_INSTANCE(exception_name, parent_name, ...) \
+        void mark_exception(exception_name *) {} \
         void _GLUE2(exception_name,_copy)(exception_name * this, exception_name * other) { \
                 *this = *other; \

libcfa/src/executor.cfa

-                      r2802824
+                      r0e4df2e
 // buffer.
 #include <bits/containers.hfa>
+#include <containers/list.hfa>
 #include <thread.hfa>
 #include <stdio.h>
-forall( dtype T )
-monitor Buffer {                                        // unbounded buffer
-    __queue_t( T ) queue;                               // unbounded list of work requests
-    condition delay;
-}; // Buffer
-forall( dtype T | is_node(T) ) {
-    void insert( Buffer( T ) & mutex buf, T * elem ) with(buf) {
-        append( queue, elem );                          // insert element into buffer
-        signal( delay );                                // restart
-    } // insert
-    T * remove( Buffer( T ) & mutex buf ) with(buf) {
-        if ( queue.head != 0 ) wait( delay );                   // no request to process ? => wait
-//      return pop_head( queue );
-    } // remove
-} // distribution
 struct WRequest {                                       // client request, no return
     void (* action)( void );
     WRequest * next;                                    // intrusive queue field
+    DLISTED_MGD_IMPL_IN(WRequest)
 }; // WRequest
+DLISTED_MGD_IMPL_OUT(WRequest)
+WRequest *& get_next( WRequest & this ) { return this.next; }
+void ?{}( WRequest & req ) with(req) { action = 0; next = 0; }
+void ?{}( WRequest & req, void (* action)( void ) ) with(req) { req.action = action; next = 0; }
+void ?{}( WRequest & req ) with(req) { action = 0; }
+void ?{}( WRequest & req, void (* action)( void ) ) with(req) { req.action = action; }
 bool stop( WRequest & req ) { return req.action == 0; }
 void doit( WRequest & req ) { req.action(); }
+monitor WRBuffer {                                      // unbounded buffer
+    dlist( WRequest, WRequest ) queue;                  // unbounded list of work requests
+    condition delay;
+}; // WRBuffer
+void insert( WRBuffer & mutex buf, WRequest * elem ) with(buf) {
+    insert_last( queue, *elem );                        // insert element into buffer
+    signal( delay );                                    // restart
+} // insert
+WRequest * remove( WRBuffer & mutex buf ) with(buf) {
+    if ( queue`is_empty ) wait( delay );                // no request to process ? => wait
+    return & pop_first( queue );
+} // remove
 // Each worker has its own work buffer to reduce contention between client and server. Hence, work requests arrive and
 …
 thread Worker {
     Buffer( WRequest ) * requests;
+    WRBuffer * requests;
     unsigned int start, range;
 }; // Worker
 …
 } // Worker::main
 void ?{}( Worker & worker, cluster * wc, Buffer( WRequest ) * requests, unsigned int start, unsigned int range ) {
+void ?{}( Worker & worker, cluster * wc, WRBuffer * requests, unsigned int start, unsigned int range ) {
     (*get_thread(worker)){ *wc };                       // create on given cluster
     worker.[requests, start, range] = [requests, start, range];
 …
     cluster * cluster;                                  // if workers execute on separate cluster
     processor ** processors;                            // array of virtual processors adding parallelism for workers
     Buffer( WRequest ) * requests;                      // list of work requests
+    WRBuffer * requests;                                // list of work requests
     Worker ** workers;                                  // array of workers executing work requests
     unsigned int nprocessors, nworkers, nmailboxes;     // number of mailboxes/workers/processor tasks
 …
     cluster = sepClus ? new( "Executor" ) : active_cluster();
     processors = (processor **)anew( nprocessors );
     requests = anew( nmailboxes );
+    requests = (WRBuffer *)anew( nmailboxes );
     workers = (Worker **)anew( nworkers );
 …
         for ( i; 3000 ) {
             send( exector, workie );
+            if ( i % 100 ) yield();
+            if ( i % 100 == 0 ) {
+//              fprintf( stderr, "%d\n", i );
+                yield();
+            }
         } // for
+    }

libcfa/src/heap.cfa

-                      r2802824
+                      r0e4df2e
 // Created On       : Tue Dec 19 21:58:35 2017
 // Last Modified By : Peter A. Buhr
 // Last Modified On : Wed May  6 17:29:26 2020
 // Update Count     : 727
+// Last Modified On : Sun May 17 20:58:17 2020
+// Update Count     : 762
 //
 …
 #define LOCKFREE 1
 #define BUCKETLOCK SPINLOCK
+#if BUCKETLOCK == LOCKFREE
+#include <uStackLF.h>
+#if BUCKETLOCK == SPINLOCK
+#elif BUCKETLOCK == LOCKFREE
+#include <stackLockFree.hfa>
+#else
+        #error undefined lock type for bucket lock
 #endif // LOCKFREE
 …
 struct HeapManager {
-//      struct FreeHeader;                                                                      // forward declaration
         struct Storage {
                 struct Header {                                                                 // header
 …
                                                 struct {                                                // 4-byte word => 8-byte header, 8-byte word => 16-byte header
                                                         #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ && __SIZEOF_POINTER__ == 4
                                                         uint32_t padding;                       // unused, force home/blocksize to overlay alignment in fake header
+                                                        uint64_t padding;                       // unused, force home/blocksize to overlay alignment in fake header
                                                         #endif // __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ && __SIZEOF_POINTER__ == 4
                                                         union {
 //                                                              FreeHeader * home;              // allocated block points back to home locations (must overlay alignment)
+                                                                // FreeHeader * home;           // allocated block points back to home locations (must overlay alignment)
                                                                 // 2nd low-order bit => zero filled
                                                                 void * home;                    // allocated block points back to home locations (must overlay alignment)
                                                                 size_t blockSize;               // size for munmap (must overlay alignment)
                                                                 #if BUCKLOCK == SPINLOCK
+                                                                #if BUCKETLOCK == SPINLOCK
                                                                 Storage * next;                 // freed block points next freed block of same size
                                                                 #endif // SPINLOCK
                                                         };
+                                                        size_t size;                            // allocation size in bytes
                                                         #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && __SIZEOF_POINTER__ == 4
                                                         uint32_t padding;                       // unused, force home/blocksize to overlay alignment in fake header
+                                                        uint64_t padding;                       // unused, force home/blocksize to overlay alignment in fake header
                                                         #endif // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && __SIZEOF_POINTER__ == 4
                                                 };
+                                                // future code
+                                                #if BUCKLOCK == LOCKFREE
+                                                Stack<Storage>::Link next;              // freed block points next freed block of same size (double-wide)
+                                                #if BUCKETLOCK == LOCKFREE
+                                                Link(Storage) next;                             // freed block points next freed block of same size (double-wide)
                                                 #endif // LOCKFREE
                                         };
                                 } real; // RealHeader
                                 struct FakeHeader {
                                         #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+                                        // 1st low-order bit => fake header & alignment
+                                        uint32_t alignment;
+                                        uint32_t alignment;                                     // 1st low-order bit => fake header & alignment
                                         #endif // __ORDER_LITTLE_ENDIAN__
 …
                                 } fake; // FakeHeader
                         } kind; // Kind
-                        size_t size;                                                            // allocation size in bytes
                 } header; // Header
                 char pad[libAlign() - sizeof( Header )];
 …
         struct FreeHeader {
                 #if BUCKLOCK == SPINLOCK
+                #if BUCKETLOCK == SPINLOCK
                 __spinlock_t lock;                                                              // must be first field for alignment
                 Storage * freeList;
-                #elif BUCKLOCK == LOCKFREE
-                // future code
-                StackLF<Storage> freeList;
                 #else
                         #error undefined lock type for bucket lock
                 #endif // SPINLOCK
+                StackLF(Storage) freeList;
+                #endif // BUCKETLOCK
                 size_t blockSize;                                                               // size of allocations on this list
         }; // FreeHeader
 …
         size_t heapRemaining;                                                           // amount of storage not allocated in the current chunk
 }; // HeapManager
+#if BUCKETLOCK == LOCKFREE
+static inline Link(HeapManager.Storage) * getNext( HeapManager.Storage * this ) { return &this->header.kind.real.next; }
+static inline void ?{}( HeapManager.FreeHeader & ) {}
+static inline void ^?{}( HeapManager.FreeHeader & ) {}
+#endif // LOCKFREE
 static inline size_t getKey( const HeapManager.FreeHeader & freeheader ) { return freeheader.blockSize; }
 …
 static bool heapBoot = 0;                                                               // detect recursion during boot
 #endif // __CFA_DEBUG__
+// The constructor for heapManager is called explicitly in memory_startup.
 static HeapManager heapManager __attribute__(( aligned (128) )) @= {}; // size of cache line to prevent false sharing
 …
-// static inline void noMemory() {
-//      abort( "Heap memory exhausted at %zu bytes.\n"
-//                 "Possible cause is very large memory allocation and/or large amount of unfreed storage allocated by the program or system/library routines.",
-//                 ((char *)(sbrk( 0 )) - (char *)(heapManager.heapBegin)) );
-// } // noMemory
 // thunk problem
 size_t Bsearchl( unsigned int key, const unsigned int * vals, size_t dim ) {
 …
+// static inline void noMemory() {
+//      abort( "Heap memory exhausted at %zu bytes.\n"
+//                 "Possible cause is very large memory allocation and/or large amount of unfreed storage allocated by the program or system/library routines.",
+//                 ((char *)(sbrk( 0 )) - (char *)(heapManager.heapBegin)) );
+// } // noMemory
 static inline void checkAlign( size_t alignment ) {
         if ( alignment < libAlign() || ! libPow2( alignment ) ) {
 …
+static inline bool headers( const char name[] __attribute__(( unused )), void * addr, HeapManager.Storage.Header *& header, HeapManager.FreeHeader *& freeElem, size_t & size, size_t & alignment ) with ( heapManager ) {
+static inline bool headers( const char name[] __attribute__(( unused )), void * addr, HeapManager.Storage.Header *& header, HeapManager.FreeHeader *& freeElem,
+                                                        size_t & size, size_t & alignment ) with( heapManager ) {
         header = headerAddr( addr );
 …
 static inline void * extend( size_t size ) with ( heapManager ) {
+static inline void * extend( size_t size ) with( heapManager ) {
         lock( extlock __cfaabi_dbg_ctx2 );
         ptrdiff_t rem = heapRemaining - size;
 …
 static inline void * doMalloc( size_t size ) with ( heapManager ) {
+static inline void * doMalloc( size_t size ) with( heapManager ) {
         HeapManager.Storage * block;                                            // pointer to new block of storage
 …
                 // Spin until the lock is acquired for this particular size of block.
                 #if defined( SPINLOCK )
+                #if BUCKETLOCK == SPINLOCK
                 lock( freeElem->lock __cfaabi_dbg_ctx2 );
                 block = freeElem->freeList;                                             // remove node from stack
                 #else
                 block = freeElem->freeList.pop();
                 #endif // SPINLOCK
+                block = pop( freeElem->freeList );
+                #endif // BUCKETLOCK
                 if ( unlikely( block == 0p ) ) {                                // no free block ?
                         #if defined( SPINLOCK )
+                        #if BUCKETLOCK == SPINLOCK
                         unlock( freeElem->lock );
                         #endif // SPINLOCK
+                        #endif // BUCKETLOCK
                         // Freelist for that size was empty, so carve it out of the heap if there's enough left, or get some more
 …
                         block = (HeapManager.Storage *)extend( tsize ); // mutual exclusion on call
   if ( unlikely( block == 0p ) ) return 0p;
                 #if defined( SPINLOCK )
+        if ( unlikely( block == 0p ) ) return 0p;
+                #if BUCKETLOCK == SPINLOCK
                 } else {
                         freeElem->freeList = block->header.kind.real.next;
                         unlock( freeElem->lock );
                 #endif // SPINLOCK
+                #endif // BUCKETLOCK
                 } // if
 …
         } // if
         block->header.size = size;                                                      // store allocation size
+        block->header.kind.real.size = size;                            // store allocation size
         void * addr = &(block->data);                                           // adjust off header to user bytes
 …
 static inline void doFree( void * addr ) with ( heapManager ) {
+static inline void doFree( void * addr ) with( heapManager ) {
         #ifdef __CFA_DEBUG__
         if ( unlikely( heapManager.heapBegin == 0p ) ) {
 …
                 free_storage += size;
                 #endif // __STATISTICS__
                 #if defined( SPINLOCK )
+                #if BUCKETLOCK == SPINLOCK
                 lock( freeElem->lock __cfaabi_dbg_ctx2 );               // acquire spin lock
                 header->kind.real.next = freeElem->freeList;    // push on stack
 …
                 unlock( freeElem->lock );                                               // release spin lock
                 #else
                 freeElem->freeList.push( *(HeapManager.Storage *)header );
                 #endif // SPINLOCK
+                push( freeElem->freeList, *(HeapManager.Storage *)header );
+                #endif // BUCKETLOCK
         } // if
 …
 size_t prtFree( HeapManager & manager ) with ( manager ) {
+size_t prtFree( HeapManager & manager ) with( manager ) {
         size_t total = 0;
         #ifdef __STATISTICS__
 …
                 #endif // __STATISTICS__
                 #if defined( SPINLOCK )
+                #if BUCKETLOCK == SPINLOCK
                 for ( HeapManager.Storage * p = freeLists[i].freeList; p != 0p; p = p->header.kind.real.next ) {
                 #else
+                for ( HeapManager.Storage * p = freeLists[i].freeList.top(); p != 0p; p = p->header.kind.real.next.top ) {
+                #endif // SPINLOCK
+                for ( HeapManager.Storage * p = top( freeLists[i].freeList ); p != 0p; /* p = getNext( p )->top */) {
+                        typeof(p) temp = getNext( p )->top;                     // FIX ME: direct assignent fails, initialization works
+                        p = temp;
+                #endif // BUCKETLOCK
                         total += size;
                         #ifdef __STATISTICS__
 …
 static void ?{}( HeapManager & manager ) with ( manager ) {
+static void ?{}( HeapManager & manager ) with( manager ) {
         pageSize = sysconf( _SC_PAGESIZE );
 …
                         header = realHeader( header );                          // backup from fake to real header
                 } // if
                 return header->size;
+                return header->kind.real.size;
         } // malloc_size
 …
                         header = realHeader( header );                          // backup from fake to real header
                 } // if
                 size_t ret = header->size;
                 header->size = size;
+                size_t ret = header->kind.real.size;
+                header->kind.real.size = size;
                 return ret;
         } // $malloc_size_set

libcfa/src/stdlib.hfa

-                      r2802824
+                      r0e4df2e
 // Created On       : Thu Jan 28 17:12:35 2016
 // Last Modified By : Peter A. Buhr
 // Last Modified On : Thu Apr 16 22:44:05 2020
 // Update Count     : 432
+// Last Modified On : Wed May 13 17:23:51 2020
+// Update Count     : 435
 //
 …
 // Reduce includes by explicitly defining these routines.
 extern "C" {
+        void * aalloc( size_t dim, size_t elemSize );           // CFA heap
+        void * resize( void * oaddr, size_t size );                     // CFA heap
         void * memalign( size_t align, size_t size );           // malloc.h
+        void * amemalign( size_t align, size_t dim, size_t elemSize ); // CFA heap
+        void * cmemalign( size_t align, size_t noOfElems, size_t elemSize ); // CFA heap
+        size_t malloc_size( void * addr );                                      // CFA heap
         size_t malloc_usable_size( void * ptr );                        // malloc.h
-        size_t malloc_size( void * addr );                                      // CFA heap
-        void * cmemalign( size_t alignment, size_t noOfElems, size_t elemSize ); // CFA heap
         void * memset( void * dest, int fill, size_t size ); // string.h
         void * memcpy( void * dest, const void * src, size_t size ); // string.h
-        void * resize( void * oaddr, size_t size );                     // CFA heap
 } // extern "C"
 …
         } // malloc
+        T * aalloc( size_t dim ) {
+                if ( _Alignof(T) <= libAlign() ) return (T *)(void *)aalloc( dim, (size_t)sizeof(T) ); // CFA aalloc
+                else return (T *)amemalign( _Alignof(T), dim, sizeof(T) );
+        } // aalloc
         T * calloc( size_t dim ) {
                 if ( _Alignof(T) <= libAlign() )return (T *)(void *)calloc( dim, sizeof(T) ); // C calloc
 …
         } // calloc
+        T * resize( T * ptr, size_t size ) {                            // CFA realloc, eliminate return-type cast
+                return (T *)(void *)resize( (void *)ptr, size ); // C realloc
+        } // resize
         T * realloc( T * ptr, size_t size ) {                           // CFA realloc, eliminate return-type cast
                 return (T *)(void *)realloc( (void *)ptr, size ); // C realloc
 …
         } // memalign
+        T * amemalign( size_t align, size_t dim ) {
+                return (T *)amemalign( align, dim, sizeof(T) ); // CFA amemalign
+        } // amemalign
         T * cmemalign( size_t align, size_t dim  ) {
                 return (T *)cmemalign( align, dim, sizeof(T) ); // CFA cmemalign
 …
         T * alloc( size_t dim ) {
+                if ( _Alignof(T) <= libAlign() ) return (T *)(void *)malloc( dim * (size_t)sizeof(T) );
+                else return (T *)memalign( _Alignof(T), dim * sizeof(T) );
+                return aalloc( dim );
         } // alloc
 …
                         return (T *)(void *)realloc( (void *)ptr, dim * sizeof(T) ); // C realloc
                 } else {
+                        struct __Unknown {};
+                        return alloc( (__Unknown *)ptr, dim );          // reuse, cheat making T/S different types
+                        return resize( ptr, dim * sizeof(T) );          // resize
                 } // if
         } // alloc
 …
         } // alloc_align
         T * alloc_align( T ptr[], size_t align ) {                      // aligned realloc array
+        T * alloc_align( T * ptr, size_t align ) {                      // aligned realloc array
                 return (T *)(void *)realloc( (void *)ptr, align, sizeof(T) ); // CFA realloc
         } // alloc_align

Note: See TracChangeset for help on using the changeset viewer.

Download in other formats: