Changeset 2223c80 for libcfa

libcfa/Makefile.in

r856fe3e	r2223c80
253	253	DEFS = @DEFS@
254	254	DEPDIR = @DEPDIR@
	255	DIST_BWLIMIT = @DIST_BWLIMIT@
255	256	DLLTOOL = @DLLTOOL@
256	257	DRIVER_DIR = @DRIVER_DIR@

libcfa/configure

-                      r856fe3e
+                      r2223c80
 CONFIG_CFLAGS
 ARCH_FLAGS
+DIST_BWLIMIT
 CFADIR_HASH
 LOCAL_CC1
 …
 enable_silent_rules
 enable_distcc
+with_bwlimit
 with_cfa_name
 enable_static
 …
   --with-PACKAGE[=ARG]    use PACKAGE [ARG=yes]
   --without-PACKAGE       do not use PACKAGE (same as --with-PACKAGE=no)
+  --with-bwlimit=RATE     RATE the maximum rate at which rsync will be limited when using distributed builds
   --with-cfa-name=NAME     NAME too which cfa will be installed
   --with-pic[=PKGS]       try to use only PIC/non-PIC objects [default=use
 …
+# Check whether --with-bwlimit was given.
+if test "${with_bwlimit+set}" = set; then :
+  withval=$with_bwlimit; DIST_BWLIMIT=$withval
+else
+  DIST_BWLIMIT=0
+fi
 echo -n "checking for distributated build... "
 if test x$enable_distcc = xno; then
 …
   ENABLE_DISTCC_FALSE=
 fi

libcfa/configure.ac

-                      r856fe3e
+                      r2223c80
         enable_distcc=$enableval, enable_distcc=no)
+AC_ARG_WITH(bwlimit,
+        [  --with-bwlimit=RATE     RATE the maximum rate at which rsync will be limited when using distributed builds],
+        DIST_BWLIMIT=$withval, DIST_BWLIMIT=0)
 echo -n "checking for distributated build... "
 if test x$enable_distcc = xno; then
 …
 AC_SUBST(CFADIR_HASH)
 AC_SUBST(CFA_VERSION)
+AC_SUBST(DIST_BWLIMIT)
 #==============================================================================

libcfa/prelude/Makefile.am

r856fe3e	r2223c80
72	72	if ENABLE_DISTCC
73	73	distribution: @LOCAL_CFACC@ @LOCAL_CC1@ @CFACPP@ gcc-builtins.cf builtins.cf extras.cf prelude.cfa bootloader.c $(srcdir)/../../tools/build/push2dist.sh
74		${AM_V_GEN}$(srcdir)/../../tools/build/push2dist.sh @CFADIR_HASH@
	74	${AM_V_GEN}$(srcdir)/../../tools/build/push2dist.sh @CFADIR_HASH@ @DIST_BWLIMIT@
75	75	@echo "Dummy file to track distribution to remote hosts" > ${@}
76	76

libcfa/prelude/Makefile.in

-                      r856fe3e
+                      r2223c80
 DEFS = @DEFS@
 DEPDIR = @DEPDIR@
+DIST_BWLIMIT = @DIST_BWLIMIT@
 DLLTOOL = @DLLTOOL@
 DRIVER_DIR = @DRIVER_DIR@
 …
 @ENABLE_DISTCC_TRUE@distribution: @LOCAL_CFACC@ @LOCAL_CC1@ @CFACPP@ gcc-builtins.cf builtins.cf extras.cf prelude.cfa bootloader.c $(srcdir)/../../tools/build/push2dist.sh
 @ENABLE_DISTCC_TRUE@    ${AM_V_GEN}$(srcdir)/../../tools/build/push2dist.sh @CFADIR_HASH@
+@ENABLE_DISTCC_TRUE@    ${AM_V_GEN}$(srcdir)/../../tools/build/push2dist.sh @CFADIR_HASH@ @DIST_BWLIMIT@
 @ENABLE_DISTCC_TRUE@    @echo "Dummy file to track distribution to remote hosts" > ${@}

libcfa/src/Makefile.in

r856fe3e	r2223c80
307	307	DEFS = @DEFS@
308	308	DEPDIR = @DEPDIR@
	309	DIST_BWLIMIT = @DIST_BWLIMIT@
309	310	DLLTOOL = @DLLTOOL@
310	311	DRIVER_DIR = @DRIVER_DIR@

libcfa/src/concurrency/io.cfa

-                      r856fe3e
+                      r2223c80
 #include "kernel.hfa"
+#include "bitmanip.hfa"
 #if !defined(HAVE_LINUX_IO_URING_H)
         void __kernel_io_startup( cluster &, int, bool ) {
+        void __kernel_io_startup( cluster &, unsigned, bool ) {
                 // Nothing to do without io_uring
+        }
 …
         struct __io_poller_fast {
                 struct __io_data * ring;
-                bool waiting;
                 $thread thrd;
         };
 …
         void ?{}( __io_poller_fast & this, struct cluster & cltr ) {
                 this.ring = cltr.io;
-                this.waiting = true;
                 (this.thrd){ "Fast I/O Poller", cltr };
+        }
 …
                 // Like head/tail but not seen by the kernel
                 volatile uint32_t alloc;
+                volatile uint32_t ready;
+                volatile uint32_t * ready;
+                uint32_t ready_cnt;
                 __spinlock_t lock;
 …
                                         volatile unsigned long long int block;
                                 } submit_avg;
+                                struct {
+                                        volatile unsigned long long int val;
+                                        volatile unsigned long long int cnt;
+                                        volatile unsigned long long int block;
+                                } look_avg;
                         } stats;
                 #endif
 …
                                 void * stack;
                                 pthread_t kthrd;
+                                volatile bool blocked;
                         } slow;
                         __io_poller_fast fast;
 …
 // I/O Startup / Shutdown logic
 //=============================================================================================
         void __kernel_io_startup( cluster & this, int io_flags, bool main_cluster ) {
+        void __kernel_io_startup( cluster & this, unsigned io_flags, bool main_cluster ) {
                 this.io = malloc();
 …
                 sq.array   = (         uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.array);
                 sq.alloc = *sq.tail;
+                sq.ready = *sq.tail;
+                if( io_flags & CFA_CLUSTER_IO_POLLER_THREAD_SUBMITS ) {
+                        /* paranoid */ verify( is_pow2( io_flags >> CFA_CLUSTER_IO_BUFFLEN_OFFSET ) || ((io_flags >> CFA_CLUSTER_IO_BUFFLEN_OFFSET) < 8)  );
+                        sq.ready_cnt = max(io_flags >> CFA_CLUSTER_IO_BUFFLEN_OFFSET, 8);
+                        sq.ready = alloc_align( 64, sq.ready_cnt );
+                        for(i; sq.ready_cnt) {
+                                sq.ready[i] = -1ul32;
+                        }
+                }
+                else {
+                        sq.ready_cnt = 0;
+                        sq.ready = 0p;
+                }
                 // completion queue
 …
                         this.io->submit_q.stats.submit_avg.cnt   = 0;
                         this.io->submit_q.stats.submit_avg.block = 0;
+                        this.io->submit_q.stats.look_avg.val   = 0;
+                        this.io->submit_q.stats.look_avg.cnt   = 0;
+                        this.io->submit_q.stats.look_avg.block = 0;
                         this.io->completion_q.stats.completed_avg.val = 0;
                         this.io->completion_q.stats.completed_avg.slow_cnt = 0;
 …
                 // Create the poller thread
                 __cfadbg_print_safe(io_core, "Kernel I/O : Creating slow poller for cluter %p\n", &this);
+                this.io->poller.slow.blocked = false;
                 this.io->poller.slow.stack = __create_pthread( &this.io->poller.slow.kthrd, __io_poller_slow, &this );
+        }
 …
                 if( this.io->cltr_flags & CFA_CLUSTER_IO_POLLER_USER_THREAD ) {
                         with( this.io->poller.fast ) {
-                                /* paranoid */ verify( waiting ); // The thread shouldn't be in a system call
                                 /* paranoid */ verify( this.procs.head == 0p || &this == mainCluster );
                                 /* paranoid */ verify( this.idles.head == 0p || &this == mainCluster );
                                 // We need to adjust the clean-up based on where the thread is
                                 if( thrd.preempted != __NO_PREEMPTION ) {
+                                if( thrd.state == Ready || thrd.preempted != __NO_PREEMPTION ) {
                                         // This is the tricky case
                                         // The thread was preempted and now it is on the ready queue
-                                        /* paranoid */ verify( thrd.state == Active );           // The thread better be in this state
                                         /* paranoid */ verify( thrd.next == 1p );                // The thread should be the last on the list
                                         /* paranoid */ verify( this.ready_queue.head == &thrd ); // The thread should be the only thing on the list
 …
                         if(this.print_stats) {
                                 with(this.io->submit_q.stats, this.io->completion_q.stats) {
+                                        double lavgv = 0;
+                                        double lavgb = 0;
+                                        if(look_avg.cnt != 0) {
+                                                lavgv = ((double)look_avg.val  ) / look_avg.cnt;
+                                                lavgb = ((double)look_avg.block) / look_avg.cnt;
+                                        }
                                         __cfaabi_bits_print_safe( STDERR_FILENO,
                                                 "----- I/O uRing Stats -----\n"
+                                                "- total submit calls  : %'15llu\n"
+                                                "- avg submit          : %'18.2lf\n"
+                                                "- pre-submit block %%  : %'18.2lf\n"
+                                                "- total wait calls    : %'15llu   (%'llu slow, %'llu fast)\n"
+                                                "- avg completion/wait : %'18.2lf\n",
+                                                "- total submit calls     : %'15llu\n"
+                                                "- avg submit             : %'18.2lf\n"
+                                                "- pre-submit block %%     : %'18.2lf\n"
+                                                "- total ready search     : %'15llu\n"
+                                                "- avg ready search len   : %'18.2lf\n"
+                                                "- avg ready search block : %'18.2lf\n"
+                                                "- total wait calls       : %'15llu   (%'llu slow, %'llu fast)\n"
+                                                "- avg completion/wait    : %'18.2lf\n",
                                                 submit_avg.cnt,
                                                 ((double)submit_avg.val) / submit_avg.cnt,
                                                 (100.0 * submit_avg.block) / submit_avg.cnt,
+                                                look_avg.cnt,
+                                                lavgv,
+                                                lavgb,
                                                 completed_avg.slow_cnt + completed_avg.fast_cnt,
                                                 completed_avg.slow_cnt,  completed_avg.fast_cnt,
 …
                 close(this.io->fd);
+                free( this.io->submit_q.ready ); // Maybe null, doesn't matter
                 free( this.io );
+        }
 …
         // Process a single completion message from the io_uring
         // This is NOT thread-safe
+        static int __drain_io( struct __io_data & ring, sigset_t * mask, int waitcnt, bool in_kernel ) {
+                int ret = syscall( __NR_io_uring_enter, ring.fd, 0, waitcnt, IORING_ENTER_GETEVENTS, mask, _NSIG / 8);
+        static [int, bool] __drain_io( & struct __io_data ring, * sigset_t mask, int waitcnt, bool in_kernel ) {
+                unsigned to_submit = 0;
+                if( ring.cltr_flags & CFA_CLUSTER_IO_POLLER_THREAD_SUBMITS ) {
+                        // If the poller thread also submits, then we need to aggregate the submissions which are ready
+                        uint32_t * tail = ring.submit_q.tail;
+                        const uint32_t mask = *ring.submit_q.mask;
+                        // Go through the list of ready submissions
+                        for( i; ring.submit_q.ready_cnt ) {
+                                // replace any submission with the sentinel, to consume it.
+                                uint32_t idx = __atomic_exchange_n( &ring.submit_q.ready[i], -1ul32, __ATOMIC_RELAXED);
+                                // If it was already the sentinel, then we are done
+                                if( idx == -1ul32 ) continue;
+                                // If we got a real submission, append it to the list
+                                ring.submit_q.array[ ((*tail) + to_submit) & mask ] = idx & mask;
+                                to_submit++;
+                        }
+                        // Increment the tail based on how many we are ready to submit
+                        __atomic_fetch_add(tail, to_submit, __ATOMIC_SEQ_CST);
+                        // update statistics
+                        #if !defined(__CFA_NO_STATISTICS__)
+                                ring.submit_q.stats.submit_avg.val += to_submit;
+                                ring.submit_q.stats.submit_avg.cnt += 1;
+                        #endif
+                }
+                int ret = syscall( __NR_io_uring_enter, ring.fd, to_submit, waitcnt, IORING_ENTER_GETEVENTS, mask, _NSIG / 8);
                 if( ret < 0 ) {
                         switch((int)errno) {
 …
                 __atomic_fetch_add( ring.completion_q.head, count, __ATOMIC_RELAXED );
                 return count;
+                return [count, count > 0 || to_submit > 0];
+        }
 …
                 if( ring.cltr_flags & CFA_CLUSTER_IO_POLLER_USER_THREAD ) {
                         while(!__atomic_load_n(&ring.done, __ATOMIC_SEQ_CST)) {
+                                __atomic_store_n( &ring.poller.slow.blocked, true, __ATOMIC_SEQ_CST );
                                 // In the user-thread approach drain and if anything was drained,
                                 // batton pass to the user-thread
+                                int count = __drain_io( ring, &mask, 1, true );
+                                int count;
+                                bool again;
+                                [count, again] = __drain_io( ring, &mask, 1, true );
+                                __atomic_store_n( &ring.poller.slow.blocked, false, __ATOMIC_SEQ_CST );
                                 // Update statistics
 …
                                 #endif
                                 if(count > 0) {
+                                if(again) {
                                         __cfadbg_print_safe(io_core, "Kernel I/O : Moving to ring %p to fast poller\n", &ring);
                                         __unpark( &ring.poller.fast.thrd __cfaabi_dbg_ctx2 );
 …
                         while(!__atomic_load_n(&ring.done, __ATOMIC_SEQ_CST)) {
                                 //In the naive approach, just poll the io completion queue directly
+                                int count = __drain_io( ring, &mask, 1, true );
+                                int count;
+                                bool again;
+                                [count, again] = __drain_io( ring, &mask, 1, true );
                                 // Update statistics
 …
                 // Then loop until we need to start
                 while(!__atomic_load_n(&this.ring->done, __ATOMIC_SEQ_CST)) {
                         // Drain the io
+                        this.waiting = false;
+                        int count = __drain_io( *this.ring, 0p, 0, false );
+                        reset += count > 0 ? 1 : 0;
+                        int count;
+                        bool again;
+                        [count, again] = __drain_io( *this.ring, 0p, 0, false );
+                        if(!again) reset++;
                         // Update statistics
 …
                         #endif
                         this.waiting = true;
+                        // If we got something, just yield and check again
                         if(reset < 5) {
-                                // If we got something, just yield and check again
                                 yield();
+                        }
+                        // We didn't get anything baton pass to the slow poller
                         else {
-                                // We didn't get anything baton pass to the slow poller
                                 __cfadbg_print_safe(io_core, "Kernel I/O : Moving to ring %p to slow poller\n", &this.ring);
+                                reset = 0;
+                                // wake up the slow poller
                                 post( this.ring->poller.sem );
+                                // park this thread
                                 park( __cfaabi_dbg_ctx );
-                                reset = 0;
+                        }
+                }
                 __cfadbg_print_safe(io_core, "Kernel I/O : Fast poller for ring %p stopping\n", &this.ring);
+        }
+        static inline void __wake_poller( struct __io_data & ring ) __attribute__((artificial));
+        static inline void __wake_poller( struct __io_data & ring ) {
+                if(!__atomic_load_n( &ring.poller.slow.blocked, __ATOMIC_SEQ_CST)) return;
+                sigval val = { 1 };
+                pthread_sigqueue( ring.poller.slow.kthrd, SIGUSR1, val );
+        }
 …
                 uint32_t idx = __atomic_fetch_add(&ring.submit_q.alloc, 1ul32, __ATOMIC_SEQ_CST);
+                // Validate that we didn't overflow anything
+                // Check that nothing overflowed
+                /* paranoid */ verify( true );
+                // Check that it goes head -> tail -> alloc and never head -> alloc -> tail
+                /* paranoid */ verify( true );
+                // Mask the idx now to allow make everything easier to check
+                idx &= *ring.submit_q.mask;
                 // Return the sqe
                 return [&ring.submit_q.sqes[ idx & (*ring.submit_q.mask)], idx];
+                return [&ring.submit_q.sqes[ idx ], idx];
+        }
         static inline void __submit( struct __io_data & ring, uint32_t idx ) {
+                // get mutual exclusion
+                lock(ring.submit_q.lock __cfaabi_dbg_ctx2);
+                // Append to the list of ready entries
+                uint32_t * tail = ring.submit_q.tail;
+                // Get now the data we definetely need
+                uint32_t * const tail = ring.submit_q.tail;
                 const uint32_t mask = *ring.submit_q.mask;
+                ring.submit_q.array[ (*tail) & mask ] = idx & mask;
+                __atomic_fetch_add(tail, 1ul32, __ATOMIC_SEQ_CST);
+                // Submit however, many entries need to be submitted
+                int ret = syscall( __NR_io_uring_enter, ring.fd, 1, 0, 0, 0p, 0);
+                if( ret < 0 ) {
+                        switch((int)errno) {
+                        default:
+                                abort( "KERNEL ERROR: IO_URING SUBMIT - %s\n", strerror(errno) );
+                        }
+                }
+                // update statistics
+                #if !defined(__CFA_NO_STATISTICS__)
+                        ring.submit_q.stats.submit_avg.val += 1;
+                        ring.submit_q.stats.submit_avg.cnt += 1;
+                #endif
+                unlock(ring.submit_q.lock);
+                // Make sure that idx was submitted
+                // Be careful to not get false positive if we cycled the entire list or that someone else submitted for us
+                __cfadbg_print_safe( io, "Kernel I/O : Performed io_submit for %p, returned %d\n", active_thread(), ret );
+                // There are 2 submission schemes, check which one we are using
+                if( ring.cltr_flags & CFA_CLUSTER_IO_POLLER_THREAD_SUBMITS ) {
+                        // If the poller thread submits, then we just need to add this to the ready array
+                        /* paranoid */ verify( idx <= mask   );
+                        /* paranoid */ verify( idx != -1ul32 );
+                        // We need to find a spot in the ready array
+                        __attribute((unused)) int len   = 0;
+                        __attribute((unused)) int block = 0;
+                        uint32_t expected = -1ul32;
+                        uint32_t ready_mask = ring.submit_q.ready_cnt - 1;
+                        uint32_t off = __tls_rand();
+                        LOOKING: for() {
+                                for(i; ring.submit_q.ready_cnt) {
+                                        uint32_t ii = (i + off) & ready_mask;
+                                        if( __atomic_compare_exchange_n( &ring.submit_q.ready[ii], &expected, idx, true, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED ) ) {
+                                                break LOOKING;
+                                        }
+                                        len ++;
+                                }
+                                block++;
+                                yield();
+                        }
+                        __wake_poller( ring );
+                        // update statistics
+                        #if !defined(__CFA_NO_STATISTICS__)
+                                __atomic_fetch_add( &ring.submit_q.stats.look_avg.val,   len,   __ATOMIC_RELAXED );
+                                __atomic_fetch_add( &ring.submit_q.stats.look_avg.block, block, __ATOMIC_RELAXED );
+                                __atomic_fetch_add( &ring.submit_q.stats.look_avg.cnt,   1,     __ATOMIC_RELAXED );
+                        #endif
+                        __cfadbg_print_safe( io, "Kernel I/O : Added %u to ready for %p\n", idx, active_thread() );
+                }
+                else {
+                        // get mutual exclusion
+                        lock(ring.submit_q.lock __cfaabi_dbg_ctx2);
+                        // Append to the list of ready entries
+                        /* paranoid */ verify( idx <= mask );
+                        ring.submit_q.array[ (*tail) & mask ] = idx & mask;
+                        __atomic_fetch_add(tail, 1ul32, __ATOMIC_SEQ_CST);
+                        // Submit however, many entries need to be submitted
+                        int ret = syscall( __NR_io_uring_enter, ring.fd, 1, 0, 0, 0p, 0);
+                        if( ret < 0 ) {
+                                switch((int)errno) {
+                                default:
+                                        abort( "KERNEL ERROR: IO_URING SUBMIT - %s\n", strerror(errno) );
+                                }
+                        }
+                        // update statistics
+                        #if !defined(__CFA_NO_STATISTICS__)
+                                ring.submit_q.stats.submit_avg.val += 1;
+                                ring.submit_q.stats.submit_avg.cnt += 1;
+                        #endif
+                        unlock(ring.submit_q.lock);
+                        __cfadbg_print_safe( io, "Kernel I/O : Performed io_submit for %p, returned %d\n", active_thread(), ret );
+                }
+        }

libcfa/src/concurrency/kernel.cfa

-                      r856fe3e
+                      r2223c80
+}
 void ?{}(cluster & this, const char name[], Duration preemption_rate, int io_flags) with( this ) {
+void ?{}(cluster & this, const char name[], Duration preemption_rate, unsigned io_flags) with( this ) {
         this.name = name;
         this.preemption_rate = preemption_rate;
 …
                 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+                /* paranoid */ verify( kernelTLS.this_thread == thrd_dst );
                 /* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) < ((uintptr_t)__get_stack(thrd_dst->curr_cor)->base ) || thrd_dst->curr_cor == proc_cor, "ERROR : Destination $thread %p has been corrupted.\n StackPointer too small.\n", thrd_dst ); // add escape condition if we are setting up the processor
                 /* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) > ((uintptr_t)__get_stack(thrd_dst->curr_cor)->limit) || thrd_dst->curr_cor == proc_cor, "ERROR : Destination $thread %p has been corrupted.\n StackPointer too large.\n", thrd_dst ); // add escape condition if we are setting up the processor
 …
                 /* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) > ((uintptr_t)__get_stack(thrd_dst->curr_cor)->limit), "ERROR : Destination $thread %p has been corrupted.\n StackPointer too large.\n", thrd_dst );
                 /* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) < ((uintptr_t)__get_stack(thrd_dst->curr_cor)->base ), "ERROR : Destination $thread %p has been corrupted.\n StackPointer too small.\n", thrd_dst );
+                /* paranoid */ verify( kernelTLS.this_thread == thrd_dst );
                 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );

libcfa/src/concurrency/kernel.hfa

-                      r856fe3e
+                      r2223c80
 struct __io_data;
+#define CFA_CLUSTER_IO_POLLER_USER_THREAD 1 << 0
+// #define CFA_CLUSTER_IO_POLLER_KERNEL_SIDE 1 << 1
+#define CFA_CLUSTER_IO_POLLER_USER_THREAD    1 << 0 // 0x1
+#define CFA_CLUSTER_IO_POLLER_THREAD_SUBMITS 1 << 1 // 0x2
+// #define CFA_CLUSTER_IO_POLLER_KERNEL_SIDE 1 << 2 // 0x4
+#define CFA_CLUSTER_IO_BUFFLEN_OFFSET        16
 //-----------------------------------------------------------------------------
 …
 extern Duration default_preemption();
 void ?{} (cluster & this, const char name[], Duration preemption_rate, int flags);
+void ?{} (cluster & this, const char name[], Duration preemption_rate, unsigned flags);
 void ^?{}(cluster & this);
 static inline void ?{} (cluster & this)                                      { this{"Anonymous Cluster", default_preemption(), 0}; }
 static inline void ?{} (cluster & this, Duration preemption_rate)            { this{"Anonymous Cluster", preemption_rate, 0}; }
 static inline void ?{} (cluster & this, const char name[])                   { this{name, default_preemption(), 0}; }
 static inline void ?{} (cluster & this, int flags)                           { this{"Anonymous Cluster", default_preemption(), flags}; }
 static inline void ?{} (cluster & this, Duration preemption_rate, int flags) { this{"Anonymous Cluster", preemption_rate, flags}; }
 static inline void ?{} (cluster & this, const char name[], int flags)        { this{name, default_preemption(), flags}; }
+static inline void ?{} (cluster & this)                                           { this{"Anonymous Cluster", default_preemption(), 0}; }
+static inline void ?{} (cluster & this, Duration preemption_rate)                 { this{"Anonymous Cluster", preemption_rate, 0}; }
+static inline void ?{} (cluster & this, const char name[])                        { this{name, default_preemption(), 0}; }
+static inline void ?{} (cluster & this, unsigned flags)                           { this{"Anonymous Cluster", default_preemption(), flags}; }
+static inline void ?{} (cluster & this, Duration preemption_rate, unsigned flags) { this{"Anonymous Cluster", preemption_rate, flags}; }
+static inline void ?{} (cluster & this, const char name[], unsigned flags)        { this{name, default_preemption(), flags}; }
 static inline [cluster *&, cluster *& ] __get( cluster & this ) __attribute__((const)) { return this.node.[next, prev]; }

libcfa/src/concurrency/kernel_private.hfa

r856fe3e	r2223c80
77	77	//-----------------------------------------------------------------------------
78	78	// I/O
79		void __kernel_io_startup ( cluster &, ~~int~~, bool );
	79	void __kernel_io_startup ( cluster &, unsigned, bool );
80	80	void __kernel_io_finish_start( cluster & );
81	81	void __kernel_io_prepare_stop( cluster & );

libcfa/src/containers/list.hfa

-                      r856fe3e
+                      r2223c80
                 $prev_link(list_pos) = (Telem*) 0p;
+        }
+        static inline bool ?`is_empty(dlist(Tnode, Telem) &list) {
+                assert( &list != 0p );
+                $dlinks(Telem) *listLinks = & list.$links;
+                if (listLinks->next.is_terminator) {
+                        assert(listLinks->prev.is_terminator);
+                        assert(listLinks->next.terminator);
+                        assert(listLinks->prev.terminator);
+                        return true;
+                } else {
+                        assert(!listLinks->prev.is_terminator);
+                        assert(listLinks->next.elem);
+                        assert(listLinks->prev.elem);
+                        return false;
+                }
+        }
+        static inline Telem & pop_first(dlist(Tnode, Telem) &list) {
+                assert( &list != 0p );
+                assert( !list`is_empty );
+                $dlinks(Telem) *listLinks = & list.$links;
+                Telem & first = *listLinks->next.elem;
+                Tnode & list_pos_first  = $tempcv_e2n( first );
+                remove(list_pos_first);
+                return first;
+        }
+        static inline Telem & pop_last(dlist(Tnode, Telem) &list) {
+                assert( &list != 0p );
+                assert( !list`is_empty );
+                $dlinks(Telem) *listLinks = & list.$links;
+                Telem & last = *listLinks->prev.elem;
+                Tnode & list_pos_last  = $tempcv_e2n( last );
+                remove(list_pos_last);
+                return last;
+        }
+}

libcfa/src/exception.c

-                      r856fe3e
+                      r2223c80
+// TERMINATION ===============================================================
+// MEMORY MANAGEMENT (still for integers)
+// May have to move to cfa for constructors and destructors (references).
+// MEMORY MANAGEMENT =========================================================
 // How to clean up an exception in various situations.
 …
+}
+// If this isn't a rethrow (*except==0), delete the provided exception.
+void __cfaehm_cleanup_terminate( void * except ) {
+        if ( *(void**)except ) __cfaehm_delete_exception( *(exception_t **)except );
+}
+// CANCELLATION ==============================================================
 // Function needed by force unwind
 …
+}
+// Cancel the current stack, prefroming approprate clean-up and messaging.
+void __cfaehm_cancel_stack( exception_t * exception ) {
+        // TODO: Detect current stack and pick a particular stop-function.
+        _Unwind_Reason_Code ret;
+        ret = _Unwind_ForcedUnwind( &this_exception_storage, _Stop_Fn, (void*)0x22 );
+        printf("UNWIND ERROR %d after force unwind\n", ret);
+        abort();
+}
+// TERMINATION ===============================================================
+// If this isn't a rethrow (*except==0), delete the provided exception.
+void __cfaehm_cleanup_terminate( void * except ) {
+        if ( *(void**)except ) __cfaehm_delete_exception( *(exception_t **)except );
+}
 // The exception that is being thrown must already be stored.
 static __attribute__((noreturn)) void __cfaehm_begin_unwind(void) {
 …
         // the whole stack.
+        // No handler found, go to the default operation.
+        // Currently this will always be a cancellation.
         if ( ret == _URC_END_OF_STACK ) {
+                // No proper handler was found. This can be handled in many ways, C++ calls std::terminate.
+                // Here we force unwind the stack, basically raising a cancellation.
+                printf("Uncaught exception %p\n", &this_exception_storage);
+                ret = _Unwind_ForcedUnwind( &this_exception_storage, _Stop_Fn, (void*)0x22 );
+                printf("UNWIND ERROR %d after force unwind\n", ret);
+                abort();
+                __cfadbg_print_safe(exception, "Uncaught exception %p\n", &this_exception_storage);
+                __cfaehm_cancel_stack(this_exception_context()->current_exception);
+        }

libcfa/src/exception.h

r856fe3e	r2223c80
38	38
39	39
	40	void __cfaehm_cancel_stack(exception_t * except) __attribute__((noreturn));
	41
40	42	// Used in throw statement translation.
41	43	void __cfaehm_throw_terminate(exception_t * except) __attribute__((noreturn));

libcfa/src/exception.hfa

-                      r856fe3e
+                      r2223c80
 // Created On       : Thu Apr  7 10:25:00 2020
 // Last Modified By : Andrew Beach
 // Last Modified On : Thu Apr  7 10:25:00 2020
 // Update Count     : 0
+// Last Modified On : Wed Apr 13 15:42:00 2020
+// Update Count     : 1
 //
+// WARNING: This is for documentation as it will match ANY type.
+trait is_exception(dtype T) {
+        /* The first field must be a pointer to a virtual table.
+         * That virtual table must be a decendent of the base exception virtual table.
+         */
+};
+forall(dtype T | is_exception(T))
+inline void cancel_stack(T & except) __attribute__((noreturn)) {
+        __cfaehm_cancel_stack( (exception_t *)&except );
+}
 // Everything below this line should be considered a patch while the exception

libcfa/src/executor.cfa

-                      r856fe3e
+                      r2223c80
 // buffer.
 #include <bits/containers.hfa>
+#include <containers/list.hfa>
 #include <thread.hfa>
 #include <stdio.h>
-forall( dtype T )
-monitor Buffer {                                        // unbounded buffer
-    __queue_t( T ) queue;                               // unbounded list of work requests
-    condition delay;
-}; // Buffer
-forall( dtype T | is_node(T) ) {
-    void insert( Buffer( T ) & mutex buf, T * elem ) with(buf) {
-        append( queue, elem );                          // insert element into buffer
-        signal( delay );                                // restart
-    } // insert
-    T * remove( Buffer( T ) & mutex buf ) with(buf) {
-        if ( queue.head != 0 ) wait( delay );                   // no request to process ? => wait
-//      return pop_head( queue );
-    } // remove
-} // distribution
 struct WRequest {                                       // client request, no return
     void (* action)( void );
     WRequest * next;                                    // intrusive queue field
+    DLISTED_MGD_IMPL_IN(WRequest)
 }; // WRequest
+DLISTED_MGD_IMPL_OUT(WRequest)
+WRequest *& get_next( WRequest & this ) { return this.next; }
+void ?{}( WRequest & req ) with(req) { action = 0; next = 0; }
+void ?{}( WRequest & req, void (* action)( void ) ) with(req) { req.action = action; next = 0; }
+void ?{}( WRequest & req ) with(req) { action = 0; }
+void ?{}( WRequest & req, void (* action)( void ) ) with(req) { req.action = action; }
 bool stop( WRequest & req ) { return req.action == 0; }
 void doit( WRequest & req ) { req.action(); }
+monitor WRBuffer {                                      // unbounded buffer
+    dlist( WRequest, WRequest ) queue;                  // unbounded list of work requests
+    condition delay;
+}; // WRBuffer
+void insert( WRBuffer & mutex buf, WRequest * elem ) with(buf) {
+    insert_last( queue, *elem );                        // insert element into buffer
+    signal( delay );                                    // restart
+} // insert
+WRequest * remove( WRBuffer & mutex buf ) with(buf) {
+    if ( queue`is_empty ) wait( delay );                // no request to process ? => wait
+    return & pop_first( queue );
+} // remove
 // Each worker has its own work buffer to reduce contention between client and server. Hence, work requests arrive and
 …
 thread Worker {
     Buffer( WRequest ) * requests;
+    WRBuffer * requests;
     unsigned int start, range;
 }; // Worker
 …
 } // Worker::main
 void ?{}( Worker & worker, cluster * wc, Buffer( WRequest ) * requests, unsigned int start, unsigned int range ) {
+void ?{}( Worker & worker, cluster * wc, WRBuffer * requests, unsigned int start, unsigned int range ) {
     (*get_thread(worker)){ *wc };                       // create on given cluster
     worker.[requests, start, range] = [requests, start, range];
 …
     cluster * cluster;                                  // if workers execute on separate cluster
     processor ** processors;                            // array of virtual processors adding parallelism for workers
     Buffer( WRequest ) * requests;                      // list of work requests
+    WRBuffer * requests;                                // list of work requests
     Worker ** workers;                                  // array of workers executing work requests
     unsigned int nprocessors, nworkers, nmailboxes;     // number of mailboxes/workers/processor tasks
 …
     cluster = sepClus ? new( "Executor" ) : active_cluster();
     processors = (processor **)anew( nprocessors );
     requests = anew( nmailboxes );
+    requests = (WRBuffer *)anew( nmailboxes );
     workers = (Worker **)anew( nworkers );
 …
         for ( i; 3000 ) {
             send( exector, workie );
+            if ( i % 100 ) yield();
+            if ( i % 100 == 0 ) {
+//              fprintf( stderr, "%d\n", i );
+                yield();
+            }
         } // for
+    }

Context Navigation

Legend:

libcfa/Makefile.in

libcfa/configure

libcfa/configure.ac

libcfa/prelude/Makefile.am

libcfa/prelude/Makefile.in

libcfa/src/Makefile.in

libcfa/src/concurrency/io.cfa

libcfa/src/concurrency/kernel.cfa

libcfa/src/concurrency/kernel.hfa

libcfa/src/concurrency/kernel_private.hfa

libcfa/src/containers/list.hfa

libcfa/src/exception.c

libcfa/src/exception.h

libcfa/src/exception.hfa

libcfa/src/executor.cfa

Download in other formats: