Changeset 139775e

benchmark/readyQ/cycle.cfa

-              r55acc3a
+              r139775e
 #include "rq_bench.hfa"
+thread Partner {
+        Partner * partner;
+struct Partner {
         unsigned long long count;
+        unsigned long long blocks;
+        bench_sem self;
+        bench_sem * next;
 };
 void ?{}( Partner & this ) {
         ((thread&)this){ bench_cluster };
+        this.count = this.blocks = 0;
+}
+void main( Partner & this ) {
+        this.count = 0;
+thread BThrd {
+        Partner & partner;
+};
+void ?{}( BThrd & this, Partner * partner ) {
+        ((thread&)this){ bench_cluster };
+        &this.partner = partner;
+}
+void ^?{}( BThrd & mutex this ) {}
+void main( BThrd & thrd ) with(thrd.partner) {
+        count = 0;
         for() {
                 park();
                 unpark( *this.partner );
                 this.count ++;
+                blocks += wait( self );
+                post( *next );
+                count ++;
                 if( clock_mode && stop) break;
                 if(!clock_mode && this.count >= stop_count) break;
+                if(!clock_mode && count >= stop_count) break;
+        }
 …
+        {
                 unsigned long long global_counter = 0;
+                unsigned long long global_blocks  = 0;
                 unsigned tthreads = nthreads * ring_size;
                 Time start, end;
 …
+                {
                         threads_left = tthreads;
+                        Partner threads[tthreads];
+                        BThrd * threads[tthreads];
+                        Partner thddata[tthreads];
                         for(i; tthreads) {
                                 unsigned pi = (i + nthreads) % tthreads;
+                                threads[i].partner = &threads[pi];
+                                thddata[i].next = &thddata[pi].self;
+                        }
+                        for(int i = 0; i < tthreads; i++) {
+                                threads[i] = malloc();
+                                (*threads[i]){ &thddata[i] };
+                        }
                         printf("Starting\n");
 …
                         for(i; nthreads) {
                                 unpark( threads[i] );
+                                post( thddata[i].self );
+                        }
                         wait(start, is_tty);
 …
                         for(i; tthreads) {
+                                global_counter += join( threads[i] ).count;
+                                Partner & partner = join( *threads[i] ).partner;
+                                global_counter += partner.count;
+                                global_blocks  += partner.blocks;
+                                delete(threads[i]);
+                        }
+                }
+                printf("Duration (ms)       : %'ld\n", (end - start)`ms);
+                printf("Number of processors: %'d\n", nprocs);
+                printf("Number of threads   : %'d\n", tthreads);
+                printf("Cycle size (# thrds): %'d\n", ring_size);
+                printf("Yields per second   : %'18.2lf\n", ((double)global_counter) / (end - start)`s);
+                printf("ns per yields       : %'18.2lf\n", ((double)(end - start)`ns) / global_counter);
+                printf("Total yields        : %'15llu\n", global_counter);
+                printf("Yields per threads  : %'15llu\n", global_counter / tthreads);
+                printf("Yields per procs    : %'15llu\n", global_counter / nprocs);
+                printf("Yields/sec/procs    : %'18.2lf\n", (((double)global_counter) / nprocs) / (end - start)`s);
+                printf("ns per yields/procs : %'18.2lf\n", ((double)(end - start)`ns) / (global_counter / nprocs));
+                printf("Duration (ms)        : %'ld\n", (end - start)`dms);
+                printf("Number of processors : %'d\n", nprocs);
+                printf("Number of threads    : %'d\n", tthreads);
+                printf("Cycle size (# thrds) : %'d\n", ring_size);
+                printf("Total Operations(ops): %'15llu\n", global_counter);
+                printf("Total blocks         : %'15llu\n", global_blocks);
+                printf("Ops per second       : %'18.2lf\n", ((double)global_counter) / (end - start)`ds);
+                printf("ns per ops           : %'18.2lf\n", (end - start)`dns / global_counter);
+                printf("Ops per threads      : %'15llu\n", global_counter / tthreads);
+                printf("Ops per procs        : %'15llu\n", global_counter / nprocs);
+                printf("Ops/sec/procs        : %'18.2lf\n", (((double)global_counter) / nprocs) / (end - start)`ds);
+                printf("ns per ops/procs     : %'18.2lf\n", (end - start)`dns / (global_counter / nprocs));
                 fflush(stdout);
+        }

benchmark/readyQ/cycle.cpp

-              r55acc3a
+              r139775e
+#include "rq_bench.hpp"
+struct Partner {
+        unsigned long long count  = 0;
+        unsigned long long blocks = 0;
+        bench_sem self;
+        bench_sem * next;
+};
+void partner_main( Partner * self ) {
+        self->count = 0;
+        for(;;) {
+                self->blocks += self->self.wait();
+                self->next->post();
+                self->count ++;
+                if( clock_mode && stop) break;
+                if(!clock_mode && self->count >= stop_count) break;
+        }
+        __atomic_fetch_add(&threads_left, -1, __ATOMIC_SEQ_CST);
+}
+int main(int argc, char * argv[]) {
+        unsigned ring_size = 2;
+        option_t opt[] = {
+                BENCH_OPT,
+                { 'r', "ringsize", "Number of threads in a cycle", ring_size }
+        };
+        BENCH_OPT_PARSE("cforall cycle benchmark");
+        {
+                unsigned long long global_counter = 0;
+                unsigned long long global_blocks  = 0;
+                unsigned tthreads = nthreads * ring_size;
+                uint64_t start, end;
+                FibreInit(1, nprocs);
+                {
+                        threads_left = tthreads;
+                        Fibre * threads[tthreads];
+                        Partner thddata[tthreads];
+                        for(int i = 0; i < tthreads; i++) {
+                                unsigned pi = (i + nthreads) % tthreads;
+                                thddata[i].next = &thddata[pi].self;
+                        }
+                        for(int i = 0; i < tthreads; i++) {
+                                threads[i] = new Fibre( reinterpret_cast<void (*)(void *)>(partner_main), &thddata[i] );
+                        }
+                        printf("Starting\n");
+                        bool is_tty = isatty(STDOUT_FILENO);
+                        start = getTimeNsec();
+                        for(int i = 0; i < nthreads; i++) {
+                                thddata[i].self.post();
+                        }
+                        wait(start, is_tty);
+                        stop = true;
+                        end = getTimeNsec();
+                        printf("\nDone\n");
+                        for(int i = 0; i < tthreads; i++) {
+                                fibre_join( threads[i], nullptr );
+                                global_counter += thddata[i].count;
+                                global_blocks  += thddata[i].blocks;
+                        }
+                }
+                printf("Duration (ms)        : %'ld\n", to_miliseconds(end - start));
+                printf("Number of processors : %'d\n", nprocs);
+                printf("Number of threads    : %'d\n", tthreads);
+                printf("Cycle size (# thrds) : %'d\n", ring_size);
+                printf("Total Operations(ops): %'15llu\n", global_counter);
+                printf("Total blocks         : %'15llu\n", global_blocks);
+                printf("Ops per second       : %'18.2lf\n", ((double)global_counter) / to_fseconds(end - start));
+                printf("ns per ops           : %'18.2lf\n", ((double)(end - start)) / global_counter);
+                printf("Ops per threads      : %'15llu\n", global_counter / tthreads);
+                printf("Ops per procs        : %'15llu\n", global_counter / nprocs);
+                printf("Ops/sec/procs        : %'18.2lf\n", (((double)global_counter) / nprocs) / to_fseconds(end - start));
+                printf("ns per ops/procs     : %'18.2lf\n", ((double)(end - start)) / (global_counter / nprocs));
+                fflush(stdout);
+        }
+        return 0;
+}

benchmark/readyQ/cycle.go

-              r55acc3a
+              r139775e
 import (
-        "bufio"
         "flag"
         "fmt"
-        "os"
-        "runtime"
         "sync/atomic"
         "time"
 …
         "golang.org/x/text/message"
+)
-var clock_mode bool
-var threads_left int64
-var stop int32
-var duration float64
-var stop_count uint64
-func fflush(f *bufio.Writer) {
-        defer f.Flush()
-        f.Write([]byte("\r"))
+}
-func wait(start time.Time, is_tty bool) {
-        f := bufio.NewWriter(os.Stdout)
-        tdur := time.Duration(duration)
-        for true {
-                time.Sleep(100 * time.Millisecond)
-                end := time.Now()
-                delta := end.Sub(start)
-                if is_tty {
-                        fmt.Printf(" %.1f",delta.Seconds())
-                        fflush(f)
+                }
-                if clock_mode && delta >= (tdur * time.Second) {
-                        break
-                } else if !clock_mode && atomic.LoadInt64(&threads_left) == 0 {
-                        break
+                }
+        }
+}
 func partner(result chan uint64, mine chan int, next chan int) {
 …
 func main() {
-        var nprocs int
-        var nthreads int
         var ring_size int
-        nprocsOpt := flag.Int("p", 1, "The number of processors")
-        nthreadsOpt := flag.Int("t", 1, "The number of threads")
         ring_sizeOpt := flag.Int("r", 2, "The number of threads per cycles")
-        durationOpt := flag.Float64("d", 0, "Duration of the experiment in seconds")
-        stopOpt := flag.Uint64("i", 0, "Duration of the experiment in iterations")
         flag.Parse()
+        bench_init()
-        nprocs = *nprocsOpt
-        nthreads = *nthreadsOpt
         ring_size = *ring_sizeOpt
-        duration = *durationOpt
-        stop_count = *stopOpt
-        if duration > 0 && stop_count > 0 {
-                panic(fmt.Sprintf("--duration and --iterations cannot be used together\n"))
-        } else if duration > 0 {
-                clock_mode = true
-                stop_count = 0xFFFFFFFFFFFFFFFF
-                fmt.Printf("Running for %f seconds\n", duration)
-        } else if stop_count > 0 {
-                clock_mode = false
-                fmt.Printf("Running for %d iterations\n", stop_count)
-        } else {
-                duration = 5
-                clock_mode = true
-                fmt.Printf("Running for %f seconds\n", duration)
+        }
-        runtime.GOMAXPROCS(nprocs)
         tthreads := nthreads * ring_size
         threads_left = int64(tthreads)
 …
         p := message.NewPrinter(language.English)
         p.Printf("Duration (ms)       : %f\n", delta.Seconds());
         p.Printf("Number of processors: %d\n", nprocs);
         p.Printf("Number of threads   : %d\n", tthreads);
         p.Printf("Cycle size (# thrds): %d\n", ring_size);
         p.Printf("Yields per second   : %18.2f\n", float64(global_counter) / delta.Seconds())
         p.Printf("ns per yields       : %18.2f\n", float64(delta.Nanoseconds()) / float64(global_counter))
         p.Printf("Total yields        : %15d\n", global_counter)
         p.Printf("Yields per threads  : %15d\n", global_counter / uint64(tthreads))
         p.Printf("Yields per procs    : %15d\n", global_counter / uint64(nprocs))
         p.Printf("Yields/sec/procs    : %18.2f\n", (float64(global_counter) / float64(nprocs)) / delta.Seconds())
         p.Printf("ns per yields/procs : %18.2f\n", float64(delta.Nanoseconds()) / (float64(global_counter) / float64(nprocs)))
+        p.Printf("Duration (ms)        : %f\n", delta.Seconds());
+        p.Printf("Number of processors : %d\n", nprocs);
+        p.Printf("Number of threads    : %d\n", tthreads);
+        p.Printf("Cycle size (# thrds) : %d\n", ring_size);
+        p.Printf("Total Operations(ops): %15d\n", global_counter)
+        p.Printf("Ops per second       : %18.2f\n", float64(global_counter) / delta.Seconds())
+        p.Printf("ns per ops           : %18.2f\n", float64(delta.Nanoseconds()) / float64(global_counter))
+        p.Printf("Ops per threads      : %15d\n", global_counter / uint64(tthreads))
+        p.Printf("Ops per procs        : %15d\n", global_counter / uint64(nprocs))
+        p.Printf("Ops/sec/procs        : %18.2f\n", (float64(global_counter) / float64(nprocs)) / delta.Seconds())
+        p.Printf("ns per ops/procs     : %18.2f\n", float64(delta.Nanoseconds()) / (float64(global_counter) / float64(nprocs)))
+}

benchmark/readyQ/rq_bench.hfa

-              r55acc3a
+              r139775e
 void ^?{}( BenchCluster & this ) {
         adelete( this.nprocs, this.procs );
+        adelete( this.procs );
         ^(this.cl){};
+}
 …
+        }
+}
+struct __attribute__((aligned(128))) bench_sem {
+        struct $thread * volatile ptr;
+};
+static inline {
+        void  ?{}(bench_sem & this) {
+                this.ptr = 0p;
+        }
+        void ^?{}(bench_sem & this) {}
+        bool wait(bench_sem & this) {
+                for() {
+                        struct $thread * expected = this.ptr;
+                        if(expected == 1p) {
+                                if(__atomic_compare_exchange_n(&this.ptr, &expected, 0p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
+                                        return false;
+                                }
+                        }
+                        else {
+                                /* paranoid */ verify( expected == 0p );
+                                if(__atomic_compare_exchange_n(&this.ptr, &expected, active_thread(), false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
+                                        park();
+                                        return true;
+                                }
+                        }
+                }
+        }
+        bool post(bench_sem & this) {
+                for() {
+                        struct $thread * expected = this.ptr;
+                        if(expected == 1p) return false;
+                        if(expected == 0p) {
+                                if(__atomic_compare_exchange_n(&this.ptr, &expected, 1p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
+                                        return false;
+                                }
+                        }
+                        else {
+                                if(__atomic_compare_exchange_n(&this.ptr, &expected, 0p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
+                                        unpark( expected );
+                                        return true;
+                                }
+                        }
+                }
+        }
+}

benchmark/rmit.py

-              r55acc3a
+              r139775e
 import argparse
+import datetime
 import itertools
 import os
 …
         return nopts
+def actions_eta(actions):
+        time = 0
+        for a in actions:
+                i = 0
+                while i < len(a):
+                        if a[i] == '-d':
+                                i += 1
+                                if i != len(a):
+                                        time += int(a[i])
+                        i += 1
+        return time
 if __name__ == "__main__":
         # ================================================================================
 …
         # ================================================================================
         # Prepare to run
+        # find expected time
+        time = actions_eta(actions)
+        print("Running {} trials{}".format(len(actions), "" if time == 0 else " (expecting to take {}".format(str(datetime.timedelta(seconds=int(time)))) ))
         random.shuffle(actions)
-        print("Running {} trials".format(len(actions)))
         result = []
 …
                         d = [r[0], r[1]]
                         for k in headers[2:]:
+                                d.append(r[2][k])
+                                try:
+                                        d.append(r[2][k])
+                                except:
+                                        d.append(0.0)
                         data.append(d)

configure.ac

-              r55acc3a
+              r139775e
 # New AST toggling support
 AH_TEMPLATE([CFA_USE_NEW_AST],[Sets whether or not to use the new-ast, this is adefault value and can be overrided by --old-ast and --new-ast])
+DEFAULT_NEW_AST="False"
 AC_ARG_ENABLE(new-ast,
         [  --enable-new-ast     whether or not to use new ast as the default AST algorithm],
         [case "${enableval}" in
                 yes) newast=true ;;
                 no)  newast=false ;;
+                yes) newast=true ; DEFAULT_NEW_AST="True"  ;;
+                no)  newast=false; DEFAULT_NEW_AST="False" ;;
                 *) AC_MSG_ERROR([bad value ${enableval} for --enable-new-ast]) ;;
         esac],[newast=false])
 AC_DEFINE_UNQUOTED([CFA_USE_NEW_AST], $newast)
+AC_SUBST(DEFAULT_NEW_AST)
 #==============================================================================

libcfa/src/concurrency/alarm.cfa

-              r55acc3a
+              r139775e
 //=============================================================================================
 void ?{}( alarm_node_t & this, $thread * thrd, Time alarm, Duration period ) with( this ) {
+void ?{}( alarm_node_t & this, $thread * thrd, Time alarm, Duration period) with( this ) {
         this.thrd = thrd;
         this.alarm = alarm;
         this.period = period;
         set = false;
         kernel_alarm = false;
+        type = User;
+}
 …
         this.period = period;
         set = false;
+        kernel_alarm = true;
+        type = Kernel;
+}
+void ?{}( alarm_node_t & this, $thread * thrd, Time alarm, Duration period, Alarm_Callback callback ) with( this ) {
+        this.thrd = thrd;
+        this.alarm = alarm;
+        this.period = period;
+        this.callback = callback;
+        set = false;
+        type = Callback;
+}

libcfa/src/concurrency/alarm.hfa

-              r55acc3a
+              r139775e
 //=============================================================================================
+enum alarm_type{ Kernel = 0, User = 1, Callback = 2 };
+struct alarm_node_t;
+typedef void (*Alarm_Callback)(alarm_node_t & );
 struct alarm_node_t {
         Time alarm;                             // time when alarm goes off
 …
         };
+        Alarm_Callback callback;
         bool set                :1;             // whether or not the alarm has be registered
         bool kernel_alarm       :1;             // true if this is not a user defined alarm
+        enum alarm_type type;           // true if this is not a user defined alarm
 };
 DLISTED_MGD_IMPL_OUT(alarm_node_t)
 …
 void ?{}( alarm_node_t & this, $thread * thrd, Time alarm, Duration period );
 void ?{}( alarm_node_t & this, processor   * proc, Time alarm, Duration period );
+void ?{}( alarm_node_t & this, $thread * thrd, Time alarm, Duration period, Alarm_Callback callback );
 void ^?{}( alarm_node_t & this );

libcfa/src/concurrency/coroutine.cfa

-              r55acc3a
+              r139775e
 void ^?{}($coroutine& this) {
         if(this.state != Halted && this.state != Start && this.state != Primed) {
                 $coroutine * src = TL_GET( this_thread )->curr_cor;
+                $coroutine * src = active_coroutine();
                 $coroutine * dst = &this;
 …
         struct $coroutine * __cfactx_cor_finish(void) {
                 struct $coroutine * cor = kernelTLS.this_thread->curr_cor;
+                struct $coroutine * cor = active_coroutine();
                 if(cor->state == Primed) {

libcfa/src/concurrency/coroutine.hfa

-              r55acc3a
+              r139775e
 void prime(T & cor);
 static inline struct $coroutine * active_coroutine() { return TL_GET( this_thread )->curr_cor; }
+static inline struct $coroutine * active_coroutine() { return active_thread()->curr_cor; }
 //-----------------------------------------------------------------------------
 …
         // set new coroutine that task is executing
         TL_GET( this_thread )->curr_cor = dst;
+        active_thread()->curr_cor = dst;
         // context switch to specified coroutine
 …
                 // will also migrate which means this value will
                 // stay in syn with the TLS
                 $coroutine * src = TL_GET( this_thread )->curr_cor;
+                $coroutine * src = active_coroutine();
                 assertf( src->last != 0,
 …
         // will also migrate which means this value will
         // stay in syn with the TLS
         $coroutine * src = TL_GET( this_thread )->curr_cor;
+        $coroutine * src = active_coroutine();
         $coroutine * dst = get_coroutine(cor);
         if( unlikely(dst->context.SP == 0p) ) {
                 TL_GET( this_thread )->curr_cor = dst;
+                active_thread()->curr_cor = dst;
                 __stack_prepare(&dst->stack, 65000);
                 __cfactx_start(main, dst, cor, __cfactx_invoke_coroutine);
                 TL_GET( this_thread )->curr_cor = src;
+                active_thread()->curr_cor = src;
+        }
 …
         // will also migrate which means this value will
         // stay in syn with the TLS
         $coroutine * src = TL_GET( this_thread )->curr_cor;
+        $coroutine * src = active_coroutine();
         // not resuming self ?

libcfa/src/concurrency/exception.cfa

r55acc3a	r139775e
72	72	void * stop_param;
73	73
74		struct $thread * this_thread = ~~TL_GET( this_thread~~ );
	74	struct $thread * this_thread = active_thread();
75	75	if ( &this_thread->self_cor != this_thread->curr_cor ) {
76	76	struct $coroutine * cor = this_thread->curr_cor;

libcfa/src/concurrency/io.cfa

-              r55acc3a
+              r139775e
         static inline bool next( __leaderlock_t & this ) {
                 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+                /* paranoid */ verify( ! __preemption_enabled() );
                 struct $thread * nextt;
                 for() {
 …
         // This is NOT thread-safe
         static [int, bool] __drain_io( & struct __io_data ring ) {
                 /* paranoid */ verify( !kernelTLS.preemption_state.enabled );
+                /* paranoid */ verify( ! __preemption_enabled() );
                 unsigned to_submit = 0;
 …
                                         return;
+                                }
                                 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+                                /* paranoid */ verify( ! __preemption_enabled() );
                                 __STATS__( true,
                                         io.submit_q.leader += 1;
 …
                         #if defined(LEADER_LOCK)
                                 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+                                /* paranoid */ verify( ! __preemption_enabled() );
                                 next(ring.submit_q.submit_lock);
                         #else

libcfa/src/concurrency/io/setup.cfa

-              r55acc3a
+              r139775e
                 id.full_proc = false;
                 id.id = doregister(&id);
+                __cfaabi_tls.this_proc_id = &id;
                 __cfaabi_dbg_print_safe( "Kernel : IO poller thread starting\n" );
 …
                                 __cfadbg_print_safe(io_core, "Kernel I/O : Unparking io poller %p\n", io_ctx);
                                 #if !defined( __CFA_NO_STATISTICS__ )
                                         kernelTLS.this_stats = io_ctx->self.curr_cluster->stats;
+                                        __cfaabi_tls.this_stats = io_ctx->self.curr_cluster->stats;
                                 #endif
                                 __post( io_ctx->sem, &id );
+                                post( io_ctx->sem );
+                        }
+                }
 …
                         if( thrd.state == Ready || thrd.preempted != __NO_PREEMPTION ) {
                                 ready_schedule_lock( (struct __processor_id_t *)active_processor() );
+                                ready_schedule_lock();
                                         // This is the tricky case
 …
                                         thrd.preempted = __NO_PREEMPTION;
                                 ready_schedule_unlock( (struct __processor_id_t *)active_processor() );
+                                ready_schedule_unlock();
                                 // Pretend like the thread was blocked all along
 …
+                        }
                 } else {
                         unpark( &thrd );
+                        post( this.thrd.sem );
+                }

libcfa/src/concurrency/kernel.cfa

-              r55acc3a
+              r139775e
 static $thread * __next_thread_slow(cluster * this);
 static void __run_thread(processor * this, $thread * dst);
 static void __wake_one(struct __processor_id_t * id, cluster * cltr);
+static void __wake_one(cluster * cltr);
 static void push  (__cluster_idles & idles, processor & proc);
 …
         // Because of a bug, we couldn't initialized the seed on construction
         // Do it here
         kernelTLS.rand_seed ^= rdtscl();
         kernelTLS.ready_rng.fwd_seed = 25214903917_l64u * (rdtscl() ^ (uintptr_t)&runner);
+        __cfaabi_tls.rand_seed ^= rdtscl();
+        __cfaabi_tls.ready_rng.fwd_seed = 25214903917_l64u * (rdtscl() ^ (uintptr_t)&runner);
         __tls_rand_advance_bck();
 …
                 // and it make sense for it to be set in all other cases except here
                 // fake it
                 kernelTLS.this_thread = mainThread;
+                __cfaabi_tls.this_thread = mainThread;
+        }
 …
 // from the processor coroutine to the target thread
 static void __run_thread(processor * this, $thread * thrd_dst) {
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
         /* paranoid */ verifyf( thrd_dst->state == Ready || thrd_dst->preempted != __NO_PREEMPTION, "state : %d, preempted %d\n", thrd_dst->state, thrd_dst->preempted);
         /* paranoid */ verifyf( thrd_dst->link.next == 0p, "Expected null got %p", thrd_dst->link.next );
 …
                 // Update global state
                 kernelTLS.this_thread = thrd_dst;
                 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
                 /* paranoid */ verify( kernelTLS.this_thread == thrd_dst );
+                kernelTLS().this_thread = thrd_dst;
+                /* paranoid */ verify( ! __preemption_enabled() );
+                /* paranoid */ verify( kernelTLS().this_thread == thrd_dst );
                 /* paranoid */ verify( thrd_dst->context.SP );
                 /* paranoid */ verify( thrd_dst->state != Halted );
 …
                 /* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) < ((uintptr_t)__get_stack(thrd_dst->curr_cor)->base ), "ERROR : Destination $thread %p has been corrupted.\n StackPointer too small.\n", thrd_dst );
                 /* paranoid */ verify( thrd_dst->context.SP );
                 /* paranoid */ verify( kernelTLS.this_thread == thrd_dst );
                 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+                /* paranoid */ verify( kernelTLS().this_thread == thrd_dst );
+                /* paranoid */ verify( ! __preemption_enabled() );
                 // Reset global state
                 kernelTLS.this_thread = 0p;
+                kernelTLS().this_thread = 0p;
                 // We just finished running a thread, there are a few things that could have happened.
 …
                 if(unlikely(thrd_dst->preempted != __NO_PREEMPTION)) {
                         // The thread was preempted, reschedule it and reset the flag
                         __schedule_thread( (__processor_id_t*)this, thrd_dst );
+                        __schedule_thread( thrd_dst );
                         break RUNNING;
+                }
 …
         proc_cor->state = Active;
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
+}
 // KERNEL_ONLY
 void returnToKernel() {
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
         $coroutine * proc_cor = get_coroutine(kernelTLS.this_processor->runner);
         $thread * thrd_src = kernelTLS.this_thread;
+        /* paranoid */ verify( ! __preemption_enabled() );
+        $coroutine * proc_cor = get_coroutine(kernelTLS().this_processor->runner);
+        $thread * thrd_src = kernelTLS().this_thread;
         #if !defined(__CFA_NO_STATISTICS__)
                 struct processor * last_proc = kernelTLS.this_processor;
+                struct processor * last_proc = kernelTLS().this_processor;
         #endif
 …
         #if !defined(__CFA_NO_STATISTICS__)
                 if(last_proc != kernelTLS.this_processor) {
+                if(last_proc != kernelTLS().this_processor) {
                         __tls_stats()->ready.threads.migration++;
+                }
         #endif
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
         /* paranoid */ verifyf( ((uintptr_t)thrd_src->context.SP) < ((uintptr_t)__get_stack(thrd_src->curr_cor)->base ), "ERROR : Returning $thread %p has been corrupted.\n StackPointer too small.\n", thrd_src );
         /* paranoid */ verifyf( ((uintptr_t)thrd_src->context.SP) > ((uintptr_t)__get_stack(thrd_src->curr_cor)->limit), "ERROR : Returning $thread %p has been corrupted.\n StackPointer too large.\n", thrd_src );
 …
 // Scheduler routines
 // KERNEL ONLY
+void __schedule_thread( struct __processor_id_t * id, $thread * thrd ) {
+void __schedule_thread( $thread * thrd ) {
+        /* paranoid */ verify( ! __preemption_enabled() );
         /* paranoid */ verify( thrd );
         /* paranoid */ verify( thrd->state != Halted );
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( kernelTLS().this_proc_id );
         /* paranoid */ #if defined( __CFA_WITH_VERIFY__ )
         /* paranoid */  if( thrd->state == Blocked || thrd->state == Start ) assertf( thrd->preempted == __NO_PREEMPTION,
 …
         if (thrd->preempted == __NO_PREEMPTION) thrd->state = Ready;
         ready_schedule_lock  ( id );
+        ready_schedule_lock();
                 push( thrd->curr_cluster, thrd );
                 __wake_one(id, thrd->curr_cluster);
         ready_schedule_unlock( id );
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+                __wake_one(thrd->curr_cluster);
+        ready_schedule_unlock();
+        /* paranoid */ verify( ! __preemption_enabled() );
+}
 // KERNEL ONLY
 static inline $thread * __next_thread(cluster * this) with( *this ) {
+        /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        ready_schedule_lock  ( (__processor_id_t*)kernelTLS.this_processor );
+        /* paranoid */ verify( ! __preemption_enabled() );
+        /* paranoid */ verify( kernelTLS().this_proc_id );
+        ready_schedule_lock();
                 $thread * thrd = pop( this );
+        ready_schedule_unlock( (__processor_id_t*)kernelTLS.this_processor );
+        /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        ready_schedule_unlock();
+        /* paranoid */ verify( kernelTLS().this_proc_id );
+        /* paranoid */ verify( ! __preemption_enabled() );
         return thrd;
+}
 …
 // KERNEL ONLY
 static inline $thread * __next_thread_slow(cluster * this) with( *this ) {
+        /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        ready_schedule_lock  ( (__processor_id_t*)kernelTLS.this_processor );
+        /* paranoid */ verify( ! __preemption_enabled() );
+        /* paranoid */ verify( kernelTLS().this_proc_id );
+        ready_schedule_lock();
                 $thread * thrd = pop_slow( this );
+        ready_schedule_unlock( (__processor_id_t*)kernelTLS.this_processor );
+        /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        ready_schedule_unlock();
+        /* paranoid */ verify( kernelTLS().this_proc_id );
+        /* paranoid */ verify( ! __preemption_enabled() );
         return thrd;
+}
+// KERNEL ONLY unpark with out disabling interrupts
+void __unpark(  struct __processor_id_t * id, $thread * thrd ) {
+void unpark( $thread * thrd ) {
+        if( !thrd ) return;
         int old_ticket = __atomic_fetch_add(&thrd->ticket, 1, __ATOMIC_SEQ_CST);
         switch(old_ticket) {
 …
                         /* paranoid */ verify( thrd->state == Blocked );
+                        // Wake lost the race,
+                        __schedule_thread( id, thrd );
+                        {
+                                /* paranoid */ verify( publicTLS_get(this_proc_id) );
+                                bool full = publicTLS_get(this_proc_id)->full_proc;
+                                if(full) disable_interrupts();
+                                /* paranoid */ verify( ! __preemption_enabled() );
+                                // Wake lost the race,
+                                __schedule_thread( thrd );
+                                /* paranoid */ verify( ! __preemption_enabled() );
+                                if(full) enable_interrupts( __cfaabi_dbg_ctx );
+                                /* paranoid */ verify( publicTLS_get(this_proc_id) );
+                        }
                         break;
                 default:
 …
+}
+void unpark( $thread * thrd ) {
+        if( !thrd ) return;
+void park( void ) {
+        /* paranoid */ verify( __preemption_enabled() );
         disable_interrupts();
+        __unpark( (__processor_id_t*)kernelTLS.this_processor, thrd );
+        /* paranoid */ verify( ! __preemption_enabled() );
+        /* paranoid */ verify( kernelTLS().this_thread->preempted == __NO_PREEMPTION );
+        returnToKernel();
+        /* paranoid */ verify( ! __preemption_enabled() );
         enable_interrupts( __cfaabi_dbg_ctx );
+}
+void park( void ) {
+        /* paranoid */ verify( kernelTLS.preemption_state.enabled );
+        disable_interrupts();
+        /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( kernelTLS.this_thread->preempted == __NO_PREEMPTION );
+        returnToKernel();
+        /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        enable_interrupts( __cfaabi_dbg_ctx );
+        /* paranoid */ verify( kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( __preemption_enabled() );
+}
 …
         // Should never return
         void __cfactx_thrd_leave() {
                 $thread * thrd = TL_GET( this_thread );
+                $thread * thrd = active_thread();
                 $monitor * this = &thrd->self_mon;
 …
                 thrd->state = Halted;
+                if( TICKET_RUNNING != thrd->ticket ) { abort( "Thread terminated with pending unpark" ); }
                 if( thrd != this->owner || this->recursion != 1) { abort( "Thread internal monitor has unbalanced recursion" ); }
                 // Leave the thread
                 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+                /* paranoid */ verify( ! __preemption_enabled() );
                 returnToKernel();
                 abort();
 …
 // KERNEL ONLY
 bool force_yield( __Preemption_Reason reason ) {
         /* paranoid */ verify( kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( __preemption_enabled() );
         disable_interrupts();
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
         $thread * thrd = kernelTLS.this_thread;
+        /* paranoid */ verify( ! __preemption_enabled() );
+        $thread * thrd = kernelTLS().this_thread;
         /* paranoid */ verify(thrd->state == Active);
 …
+        }
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
         enable_interrupts_noPoll();
         /* paranoid */ verify( kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( __preemption_enabled() );
         return preempted;
 …
 //=============================================================================================
 // Wake a thread from the front if there are any
 static void __wake_one(struct __processor_id_t * id, cluster * this) {
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
         /* paranoid */ verify( ready_schedule_islocked( id ) );
+static void __wake_one(cluster * this) {
+        /* paranoid */ verify( ! __preemption_enabled() );
+        /* paranoid */ verify( ready_schedule_islocked() );
         // Check if there is a sleeping processor
 …
         #endif
         /* paranoid */ verify( ready_schedule_islocked( id ) );
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ready_schedule_islocked() );
+        /* paranoid */ verify( ! __preemption_enabled() );
         return;
 …
         disable_interrupts();
                 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+                /* paranoid */ verify( ! __preemption_enabled() );
                 post( this->idle );
         enable_interrupts( __cfaabi_dbg_ctx );
 …
 static void push  (__cluster_idles & this, processor & proc) {
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
         lock( this );
                 this.idle++;
 …
                 insert_first(this.list, proc);
         unlock( this );
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
+}
 static void remove(__cluster_idles & this, processor & proc) {
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
         lock( this );
                 this.idle--;
 …
                 remove(proc);
         unlock( this );
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
+}
 …
+        }
         return kernelTLS.this_thread;
+        return __cfaabi_tls.this_thread;
+}
 …
 int kernel_abort_lastframe( void ) __attribute__ ((__nothrow__)) {
         return get_coroutine(kernelTLS.this_thread) == get_coroutine(mainThread) ? 4 : 2;
+        return get_coroutine(kernelTLS().this_thread) == get_coroutine(mainThread) ? 4 : 2;
+}
 …
         if ( count < 0 ) {
                 // queue current task
                 append( waiting, kernelTLS.this_thread );
+                append( waiting, active_thread() );
                 // atomically release spin lock and block
 …
                 void __cfaabi_dbg_record_lock(__spinlock_t & this, const char prev_name[]) {
                         this.prev_name = prev_name;
                         this.prev_thrd = kernelTLS.this_thread;
+                        this.prev_thrd = kernelTLS().this_thread;
+                }
+        }
 …
                 this.print_halts = true;
+        }
+        void print_stats_now( cluster & this, int flags ) {
+                __print_stats( this.stats, this.print_stats, true, this.name, (void*)&this );
+        }
 #endif
 // Local Variables: //

libcfa/src/concurrency/kernel.hfa

-              r55acc3a
+              r139775e
 static inline [cluster *&, cluster *& ] __get( cluster & this ) __attribute__((const)) { return this.node.[next, prev]; }
 static inline struct processor * active_processor() { return TL_GET( this_processor ); } // UNSAFE
 static inline struct cluster   * active_cluster  () { return TL_GET( this_processor )->cltr; }
+static inline struct processor * active_processor() { return publicTLS_get( this_processor ); } // UNSAFE
+static inline struct cluster   * active_cluster  () { return publicTLS_get( this_processor )->cltr; }
 #if !defined(__CFA_NO_STATISTICS__)
+        void print_stats_now( cluster & this, int flags );
         static inline void print_stats_at_exit( cluster & this, int flags ) {
                 this.print_stats |= flags;

libcfa/src/concurrency/kernel/fwd.hfa

-              r55acc3a
+              r139775e
         extern "Cforall" {
                 extern __attribute__((aligned(128))) thread_local struct KernelThreadData {
+                        struct $thread    * volatile this_thread;
+                        struct processor  * volatile this_processor;
+                        struct __stats_t  * volatile this_stats;
+                        struct $thread          * volatile this_thread;
+                        struct processor        * volatile this_processor;
+                        struct __processor_id_t * volatile this_proc_id;
+                        struct __stats_t        * volatile this_stats;
                         struct {
 …
                                 uint64_t bck_seed;
                         } ready_rng;
                 } kernelTLS __attribute__ ((tls_model ( "initial-exec" )));
+                } __cfaabi_tls __attribute__ ((tls_model ( "initial-exec" )));
+                extern bool __preemption_enabled();
+                static inline KernelThreadData & kernelTLS( void ) {
+                        /* paranoid */ verify( ! __preemption_enabled() );
+                        return __cfaabi_tls;
+                }
+                extern uintptr_t __cfatls_get( unsigned long int member );
+                // #define publicTLS_get( member ) ((typeof(__cfaabi_tls.member))__cfatls_get( __builtin_offsetof(KernelThreadData, member) ))
+                #define publicTLS_get( member ) (__cfaabi_tls.member)
+                // extern forall(otype T) T __cfatls_get( T * member, T value );
+                // #define publicTLS_set( member, value ) __cfatls_set( (typeof(member)*)__builtin_offsetof(KernelThreadData, member), value );
                 static inline uint64_t __tls_rand() {
                         #if defined(__SIZEOF_INT128__)
                                 return __lehmer64( kernelTLS.rand_seed );
+                                return __lehmer64( kernelTLS().rand_seed );
                         #else
                                 return __xorshift64( kernelTLS.rand_seed );
+                                return __xorshift64( kernelTLS().rand_seed );
                         #endif
+                }
 …
                 static inline unsigned __tls_rand_fwd() {
                         kernelTLS.ready_rng.fwd_seed = (A * kernelTLS.ready_rng.fwd_seed + C) & (M - 1);
                         return kernelTLS.ready_rng.fwd_seed >> D;
+                        kernelTLS().ready_rng.fwd_seed = (A * kernelTLS().ready_rng.fwd_seed + C) & (M - 1);
+                        return kernelTLS().ready_rng.fwd_seed >> D;
+                }
                 static inline unsigned __tls_rand_bck() {
                         unsigned int r = kernelTLS.ready_rng.bck_seed >> D;
                         kernelTLS.ready_rng.bck_seed = AI * (kernelTLS.ready_rng.bck_seed - C) & (M - 1);
+                        unsigned int r = kernelTLS().ready_rng.bck_seed >> D;
+                        kernelTLS().ready_rng.bck_seed = AI * (kernelTLS().ready_rng.bck_seed - C) & (M - 1);
                         return r;
+                }
 …
                 static inline void __tls_rand_advance_bck(void) {
                         kernelTLS.ready_rng.bck_seed = kernelTLS.ready_rng.fwd_seed;
+                        kernelTLS().ready_rng.bck_seed = kernelTLS().ready_rng.fwd_seed;
+                }
+        }
-        #if 0 // def __ARM_ARCH
-                // function prototypes are only really used by these macros on ARM
-                void disable_global_interrupts();
-                void enable_global_interrupts();
-                #define TL_GET( member ) ( { __typeof__( kernelTLS.member ) target; \
-                        disable_global_interrupts(); \
-                        target = kernelTLS.member; \
-                        enable_global_interrupts(); \
-                        target; } )
-                #define TL_SET( member, value ) disable_global_interrupts(); \
-                        kernelTLS.member = value; \
-                        enable_global_interrupts();
-        #else
-                #define TL_GET( member ) kernelTLS.member
-                #define TL_SET( member, value ) kernelTLS.member = value;
-        #endif
         extern void disable_interrupts();
 …
                 extern void park( void );
                 extern void unpark( struct $thread * this );
+                static inline struct $thread * active_thread () { return TL_GET( this_thread ); }
+                static inline struct $thread * active_thread () {
+                        struct $thread * t = publicTLS_get( this_thread );
+                        /* paranoid */ verify( t );
+                        return t;
+                }
                 extern bool force_yield( enum __Preemption_Reason );
 …
                 #if !defined(__CFA_NO_STATISTICS__)
                         static inline struct __stats_t * __tls_stats() {
                                 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
                                 /* paranoid */ verify( kernelTLS.this_stats );
                                 return kernelTLS.this_stats;
+                                /* paranoid */ verify( ! __preemption_enabled() );
+                                /* paranoid */ verify( kernelTLS().this_stats );
+                                return kernelTLS().this_stats;
+                        }

libcfa/src/concurrency/kernel/startup.cfa

-              r55acc3a
+              r139775e
 //-----------------------------------------------------------------------------
 // Global state
 thread_local struct KernelThreadData kernelTLS __attribute__ ((tls_model ( "initial-exec" ))) @= {
+thread_local struct KernelThreadData __cfaabi_tls __attribute__ ((tls_model ( "initial-exec" ))) @= {
         NULL,                                                                                           // cannot use 0p
+        NULL,
         NULL,
         NULL,
 …
 // Kernel boot procedures
 static void __kernel_startup(void) {
         verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
         __cfadbg_print_safe(runtime_core, "Kernel : Starting\n");
 …
         //initialize the global state variables
+        kernelTLS.this_processor = mainProcessor;
+        kernelTLS.this_thread    = mainThread;
+        __cfaabi_tls.this_processor = mainProcessor;
+        __cfaabi_tls.this_proc_id   = (__processor_id_t*)mainProcessor;
+        __cfaabi_tls.this_thread    = mainThread;
         #if !defined( __CFA_NO_STATISTICS__ )
                 kernelTLS.this_stats = (__stats_t *)& storage_mainProcStats;
                 __init_stats( kernelTLS.this_stats );
+                __cfaabi_tls.this_stats = (__stats_t *)& storage_mainProcStats;
+                __init_stats( __cfaabi_tls.this_stats );
         #endif
 …
         // Add the main thread to the ready queue
         // once resume is called on mainProcessor->runner the mainThread needs to be scheduled like any normal thread
         __schedule_thread((__processor_id_t *)mainProcessor, mainThread);
+        __schedule_thread(mainThread);
         // SKULLDUGGERY: Force a context switch to the main processor to set the main thread's context to the current UNIX
         // context. Hence, the main thread does not begin through __cfactx_invoke_thread, like all other threads. The trick here is that
         // mainThread is on the ready queue when this call is made.
         __kernel_first_resume( kernelTLS.this_processor );
+        __kernel_first_resume( __cfaabi_tls.this_processor );
 …
         __cfadbg_print_safe(runtime_core, "Kernel : Started\n--------------------------------------------------\n\n");
         verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
         enable_interrupts( __cfaabi_dbg_ctx );
+        verify( TL_GET( preemption_state.enabled ) );
+        /* paranoid */ verify( __preemption_enabled() );
+}
 …
         mainCluster->io.ctxs = 0p;
         /* paranoid */ verify( TL_GET( preemption_state.enabled ) );
+        /* paranoid */ verify( __preemption_enabled() );
         disable_interrupts();
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
         __cfadbg_print_safe(runtime_core, "\n--------------------------------------------------\nKernel : Shutting down\n");
 …
         // which is currently here
         __atomic_store_n(&mainProcessor->do_terminate, true, __ATOMIC_RELEASE);
         __kernel_last_resume( kernelTLS.this_processor );
+        __kernel_last_resume( __cfaabi_tls.this_processor );
         mainThread->self_cor.state = Halted;
 …
                 __stats_t local_stats;
                 __init_stats( &local_stats );
                 kernelTLS.this_stats = &local_stats;
+                __cfaabi_tls.this_stats = &local_stats;
         #endif
         processor * proc = (processor *) arg;
+        kernelTLS.this_processor = proc;
+        kernelTLS.this_thread    = 0p;
+        kernelTLS.preemption_state.[enabled, disable_count] = [false, 1];
+        __cfaabi_tls.this_processor = proc;
+        __cfaabi_tls.this_proc_id   = (__processor_id_t*)proc;
+        __cfaabi_tls.this_thread    = 0p;
+        __cfaabi_tls.preemption_state.[enabled, disable_count] = [false, 1];
         // SKULLDUGGERY: We want to create a context for the processor coroutine
         // which is needed for the 2-step context switch. However, there is no reason
 …
         //Set global state
         kernelTLS.this_thread = 0p;
+        __cfaabi_tls.this_thread = 0p;
         //We now have a proper context from which to schedule threads
 …
         $coroutine * dst = get_coroutine(this->runner);
         verify( ! kernelTLS.preemption_state.enabled );
         kernelTLS.this_thread->curr_cor = dst;
+        /* paranoid */ verify( ! __preemption_enabled() );
+        __cfaabi_tls.this_thread->curr_cor = dst;
         __stack_prepare( &dst->stack, 65000 );
         __cfactx_start(main, dst, this->runner, __cfactx_invoke_coroutine);
         verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
         dst->last = &src->self_cor;
 …
         /* paranoid */ verify(src->state == Active);
         verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
+}
 …
         $coroutine * dst = get_coroutine(this->runner);
         verify( ! kernelTLS.preemption_state.enabled );
         verify( dst->starter == src );
         verify( dst->context.SP );
+        /* paranoid */ verify( ! __preemption_enabled() );
+        /* paranoid */ verify( dst->starter == src );
+        /* paranoid */ verify( dst->context.SP );
         // SKULLDUGGERY in debug the processors check that the
 …
                 P( terminated );
                 verify( kernelTLS.this_processor != &this);
+                /* paranoid */ verify( active_processor() != &this);
+        }
 …
 #if defined(__CFA_WITH_VERIFY__)
 static bool verify_fwd_bck_rng(void) {
         kernelTLS.ready_rng.fwd_seed = 25214903917_l64u * (rdtscl() ^ (uintptr_t)&verify_fwd_bck_rng);
+        __cfaabi_tls.ready_rng.fwd_seed = 25214903917_l64u * (rdtscl() ^ (uintptr_t)&verify_fwd_bck_rng);
         unsigned values[10];

libcfa/src/concurrency/kernel_private.hfa

-              r55acc3a
+              r139775e
+}
 void __schedule_thread( struct __processor_id_t *, $thread * )
+void __schedule_thread( $thread * )
 #if defined(NDEBUG) || (!defined(__CFA_DEBUG__) && !defined(__CFA_VERIFY__))
         __attribute__((nonnull (2)))
+        __attribute__((nonnull (1)))
 #endif
+;
+extern bool __preemption_enabled();
 //release/wake-up the following resources
 …
+)
-// KERNEL ONLY unpark with out disabling interrupts
-void __unpark( struct __processor_id_t *, $thread * thrd );
 #define TICKET_BLOCKED (-1) // thread is blocked
 #define TICKET_RUNNING ( 0) // thread is running
 #define TICKET_UNBLOCK ( 1) // thread should ignore next block
-static inline bool __post(single_sem & this, struct __processor_id_t * id) {
-        for() {
-                struct $thread * expected = this.ptr;
-                if(expected == 1p) return false;
-                if(expected == 0p) {
-                        if(__atomic_compare_exchange_n(&this.ptr, &expected, 1p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
-                                return false;
+                        }
+                }
-                else {
-                        if(__atomic_compare_exchange_n(&this.ptr, &expected, 0p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
-                                __unpark( id, expected );
-                                return true;
+                        }
+                }
+        }
+}
 //-----------------------------------------------------------------------------
 …
 // Reader side : acquire when using the ready queue to schedule but not
 //  creating/destroying queues
+static inline void ready_schedule_lock( struct __processor_id_t * proc) with(*__scheduler_lock) {
+        unsigned iproc = proc->id;
+        /*paranoid*/ verify(data[iproc].handle == proc);
+static inline void ready_schedule_lock(void) with(*__scheduler_lock) {
+        /* paranoid */ verify( ! __preemption_enabled() );
+        /* paranoid */ verify( kernelTLS().this_proc_id );
+        unsigned iproc = kernelTLS().this_proc_id->id;
+        /*paranoid*/ verify(data[iproc].handle == kernelTLS().this_proc_id);
         /*paranoid*/ verify(iproc < ready);
 …
+}
+static inline void ready_schedule_unlock( struct __processor_id_t * proc) with(*__scheduler_lock) {
+        unsigned iproc = proc->id;
+        /*paranoid*/ verify(data[iproc].handle == proc);
+static inline void ready_schedule_unlock(void) with(*__scheduler_lock) {
+        /* paranoid */ verify( ! __preemption_enabled() );
+        /* paranoid */ verify( kernelTLS().this_proc_id );
+        unsigned iproc = kernelTLS().this_proc_id->id;
+        /*paranoid*/ verify(data[iproc].handle == kernelTLS().this_proc_id);
         /*paranoid*/ verify(iproc < ready);
         /*paranoid*/ verify(data[iproc].lock);
 …
 #ifdef __CFA_WITH_VERIFY__
+        static inline bool ready_schedule_islocked( struct __processor_id_t * proc) {
+        static inline bool ready_schedule_islocked(void) {
+                /* paranoid */ verify( ! __preemption_enabled() );
+                /*paranoid*/ verify( kernelTLS().this_proc_id );
+                __processor_id_t * proc = kernelTLS().this_proc_id;
                 return __scheduler_lock->data[proc->id].owned;
+        }

libcfa/src/concurrency/locks.cfa

-              r55acc3a
+              r139775e
                 this.t = t;
                 this.lock = 0p;
+                this.listed = false;
+        }
 …
                 this.info = info;
                 this.lock = 0p;
+                this.listed = false;
+        }
 …
 void lock( blocking_lock & this ) with( this ) {
+        $thread * thrd = active_thread();
         lock( lock __cfaabi_dbg_ctx2 );
         if ( owner == kernelTLS.this_thread && !multi_acquisition) {
+        if ( owner == thrd && !multi_acquisition) {
                 fprintf(stderr, "A single acquisition lock holder attempted to reacquire the lock resulting in a deadlock."); // Possibly throw instead
                 exit(EXIT_FAILURE);
         } else if ( owner != 0p && owner != kernelTLS.this_thread ) {
                 append( blocked_threads, kernelTLS.this_thread );
+        exit(EXIT_FAILURE);
+        } else if ( owner != 0p && owner != thrd ) {
+                append( blocked_threads, thrd );
                 wait_count++;
                 unlock( lock );
                 park( __cfaabi_dbg_ctx );
         } else if ( owner == kernelTLS.this_thread && multi_acquisition ) {
+                park( );
+        } else if ( owner == thrd && multi_acquisition ) {
                 recursion_count++;
                 unlock( lock );
         } else {
                 owner = kernelTLS.this_thread;
+                owner = thrd;
                 recursion_count = 1;
                 unlock( lock );
 …
 bool try_lock( blocking_lock & this ) with( this ) {
+        $thread * thrd = active_thread();
         bool ret = false;
         lock( lock __cfaabi_dbg_ctx2 );
         if ( owner == 0p ) {
                 owner = kernelTLS.this_thread;
+                owner = thrd;
                 if ( multi_acquisition ) recursion_count = 1;
                 ret = true;
         } else if ( owner == kernelTLS.this_thread && multi_acquisition ) {
+        } else if ( owner == thrd && multi_acquisition ) {
                 recursion_count++;
                 ret = true;
 …
                 fprintf( stderr, "There was an attempt to release a lock that isn't held" );
                 return;
         } else if ( strict_owner && owner != kernelTLS.this_thread ) {
+        } else if ( strict_owner && active_thread() ) {
                 fprintf( stderr, "A thread other than the owner attempted to release an owner lock" );
                 return;
 …
                 recursion_count = ( thrd && multi_acquisition ? 1 : 0 );
                 wait_count--;
                 unpark( thrd __cfaabi_dbg_ctx2 );
+                unpark( thrd );
+        }
         unlock( lock );
 …
                 owner = t;
                 if ( multi_acquisition ) recursion_count = 1;
+                unpark( t __cfaabi_dbg_ctx2 );
+                #if !defined( __CFA_NO_STATISTICS__ )
+                        kernelTLS.this_stats = t->curr_cluster->stats;
+                #endif
+                unpark( t );
                 unlock( lock );
+        }
 …
         if ( owner == 0p ){ // no owner implies lock isn't held
                 fprintf( stderr, "A lock that is not held was passed to a synchronization lock" );
         } else if ( strict_owner && owner != kernelTLS.this_thread ) {
+        } else if ( strict_owner && active_thread() ) {
                 fprintf( stderr, "A thread other than the owner of a lock passed it to a synchronization lock" );
         } else {
 …
                 recursion_count = ( thrd && multi_acquisition ? 1 : 0 );
                 wait_count--;
                 unpark( thrd __cfaabi_dbg_ctx2 );
+                unpark( thrd );
+        }
         unlock( lock );
 …
 ///////////////////////////////////////////////////////////////////
+// In an ideal world this may not be necessary
+// Is it possible for nominal inheritance to inherit traits??
+// If that occurs we would avoid all this extra code
+// This is temporary until an inheritance bug is fixed
 void lock( mutex_lock & this ){
 …
 ///////////////////////////////////////////////////////////////////
 //// Synchronization Locks
+//// condition variable
 ///////////////////////////////////////////////////////////////////
 forall(dtype L | is_blocking_lock(L)) {
+        void ?{}( synchronization_lock(L) & this, bool reacquire_after_signal ){
+        void timeout_handler ( alarm_node_wrap(L) & this ) with( this ) {
+        // This condition_variable member is called from the kernel, and therefore, cannot block, but it can spin.
+            lock( cond->lock __cfaabi_dbg_ctx2 );
+            if ( (*i)->listed ) {                       // is thread on queue
+                info_thread(L) * copy = *i;
+                        remove( cond->blocked_threads, i );              //remove this thread O(1)
+                        cond->wait_count--;
+                        if( !copy->lock ) {
+                                unlock( cond->lock );
+                                #if !defined( __CFA_NO_STATISTICS__ )
+                                        #warning unprotected access to tls TODO discuss this
+                                        kernelTLS.this_stats = copy->t->curr_cluster->stats;
+                                #endif
+                                unpark( copy->t );
+                } else {
+                        add_(*copy->lock, copy->t);                     // call lock's add_
+                }
+            }
+            unlock( cond->lock );
+        }
+        void alarm_node_wrap_cast( alarm_node_t & a ) {
+                timeout_handler( (alarm_node_wrap(L) &)a );
+        }
+        void ?{}( condition_variable(L) & this ){
                 this.lock{};
                 this.blocked_threads{};
                 this.count = 0;
-                this.reacquire_after_signal = reacquire_after_signal;
+        }
-        void ^?{}( synchronization_lock(L) & this ){
-                // default
+        }
-        void ?{}( condition_variable(L) & this ){
-                ((synchronization_lock(L) &)this){ true };
+        }
 …
+        }
         void ?{}( thread_queue(L) & this ){
                 ((synchronization_lock(L) &)this){ false };
+        }
         void ^?{}( thread_queue(L) & this ){
+        void ?{}( alarm_node_wrap(L) & this, $thread * thrd, Time alarm, Duration period, Alarm_Callback callback ) {
+                this.alarm_node{ thrd, alarm, period, callback };
+        }
+        void ^?{}( alarm_node_wrap(L) & this ) {
                 // default
+        }
         bool notify_one( synchronization_lock(L) & this ) with( this ) {
+        bool notify_one( condition_variable(L) & this ) with( this ) {
                 lock( lock __cfaabi_dbg_ctx2 );
                 bool ret = !!blocked_threads;
                 info_thread(L) * popped = pop_head( blocked_threads );
+                popped->listed = false;
                 if(popped != 0p) {
+                        if( reacquire_after_signal ){
+                        count--;
+                        if (popped->lock) {
                                 add_(*popped->lock, popped->t);
                         } else {
+                                unpark(
+                                        popped->t __cfaabi_dbg_ctx2
+                                );
+                                unpark(popped->t);
+                        }
+                }
 …
+        }
         bool notify_all( synchronization_lock(L) & this ) with(this) {
+        bool notify_all( condition_variable(L) & this ) with(this) {
                 lock( lock __cfaabi_dbg_ctx2 );
                 bool ret = blocked_threads ? true : false;
                 while( blocked_threads ) {
                         info_thread(L) * popped = pop_head( blocked_threads );
+                        popped->listed = false;
                         if(popped != 0p){
+                                if( reacquire_after_signal ){
+                                count--;
+                                if (popped->lock) {
                                         add_(*popped->lock, popped->t);
                                 } else {
+                                        unpark(
+                                                popped->t __cfaabi_dbg_ctx2
+                                        );
+                                        unpark(popped->t);
+                                }
+                        }
 …
+        }
+        uintptr_t front( synchronization_lock(L) & this ) with(this) {
+                return (*peek(blocked_threads)).info;
+        }
+        bool empty( synchronization_lock(L) & this ) with(this) {
+        uintptr_t front( condition_variable(L) & this ) with(this) {
+                if(!blocked_threads) return NULL;
+                return peek(blocked_threads)->info;
+        }
+        bool empty( condition_variable(L) & this ) with(this) {
                 return blocked_threads ? false : true;
+        }
         int counter( synchronization_lock(L) & this ) with(this) {
+        int counter( condition_variable(L) & this ) with(this) {
                 return count;
+        }
+        void queue_info_thread( synchronization_lock(L) & this, info_thread(L) & i ) with(this) {
+                lock( lock __cfaabi_dbg_ctx2 );
+                append( blocked_threads, &i );
+                count++;
+                unlock( lock );
+                park( __cfaabi_dbg_ctx );
+        }
+        void wait( synchronization_lock(L) & this ) with(this) {
+                info_thread( L ) i = { kernelTLS.this_thread };
+                queue_info_thread( this, i );
+        }
+        void wait( synchronization_lock(L) & this, uintptr_t info ) with(this) {
+                info_thread( L ) i = { kernelTLS.this_thread, info };
+                queue_info_thread( this, i );
+        }
+        // I still need to implement the time delay wait routines
+        bool wait( synchronization_lock(L) & this, Duration duration ) with(this) {
+                timeval tv = { time(0) };
+                Time t = { tv };
+                return wait( this, t + duration );
+        }
+        bool wait( synchronization_lock(L) & this, uintptr_t info, Duration duration ) with(this) {
+                // TODO: ADD INFO
+                return wait( this, duration );
+        }
+        bool wait( synchronization_lock(L) & this, Time time ) with(this) {
+                return false; //default
+        }
+        bool wait( synchronization_lock(L) & this, uintptr_t info, Time time ) with(this) {
+                // TODO: ADD INFO
+                return wait( this, time );
+        }
+        void queue_info_thread_unlock( synchronization_lock(L) & this, L & l, info_thread(L) & i ) with(this) {
+        // helper for wait()'s' without a timeout
+        void queue_info_thread( condition_variable(L) & this, info_thread(L) & i ) with(this) {
                 lock( lock __cfaabi_dbg_ctx2 );
                 append( this.blocked_threads, &i );
                 count++;
+                i.lock = &l;
+                size_t recursion_count = get_recursion_count(l);
+                remove_( l );
+                unlock( lock );
+                park( __cfaabi_dbg_ctx ); // blocks here
+                set_recursion_count(l, recursion_count); // resets recursion count here after waking
+        }
+        void wait( synchronization_lock(L) & this, L & l ) with(this) {
+                info_thread(L) i = { kernelTLS.this_thread };
+                queue_info_thread_unlock( this, l, i );
+        }
+        void wait( synchronization_lock(L) & this, L & l, uintptr_t info ) with(this) {
+                info_thread(L) i = { kernelTLS.this_thread, info };
+                queue_info_thread_unlock( this, l, i );
+        }
+        bool wait( synchronization_lock(L) & this, L & l, Duration duration ) with(this) {
+                timeval tv = { time(0) };
+                Time t = { tv };
+                return wait( this, l, t + duration );
+        }
+        bool wait( synchronization_lock(L) & this, L & l, uintptr_t info, Duration duration ) with(this) {
+                // TODO: ADD INFO
+                return wait( this, l, duration );
+        }
+        bool wait( synchronization_lock(L) & this, L & l, Time time ) with(this) {
+                return false; //default
+        }
+        bool wait( synchronization_lock(L) & this, L & l, uintptr_t info, Time time ) with(this) {
+                // TODO: ADD INFO
+                return wait( this, l, time );
+        }
+}
+///////////////////////////////////////////////////////////////////
+//// condition lock alternative approach
+///////////////////////////////////////////////////////////////////
+// the solution below is less efficient but does not require the lock to have a specific add/remove routine
+///////////////////////////////////////////////////////////////////
+//// is_simple_lock
+///////////////////////////////////////////////////////////////////
+forall(dtype L | is_simple_lock(L)) {
+        void ?{}( condition_lock(L) & this ){
+                // default
+        }
+        void ^?{}( condition_lock(L) & this ){
+                // default
+        }
+        bool notify_one( condition_lock(L) & this ) with(this) {
+                return notify_one( c_var );
+        }
+        bool notify_all( condition_lock(L) & this ) with(this) {
+                return notify_all( c_var );
+        }
+        void wait( condition_lock(L) & this, L & l ) with(this) {
+                lock( m_lock );
+                size_t recursion = get_recursion_count( l );
+                unlock( l );
+                wait( c_var, m_lock );
+                lock( l );
+                set_recursion_count( l , recursion );
+                unlock( m_lock );
+        }
+}
+                i.listed = true;
+                size_t recursion_count;
+                if (i.lock) {
+                        recursion_count = get_recursion_count(*i.lock);
+                        remove_( *i.lock );
+                }
+                unlock( lock );
+                park( ); // blocks here
+                if (i.lock) set_recursion_count(*i.lock, recursion_count); // resets recursion count here after waking
+        }
+        // helper for wait()'s' with a timeout
+        void queue_info_thread_timeout( condition_variable(L) & this, info_thread(L) & info, Time t ) with(this) {
+                lock( lock __cfaabi_dbg_ctx2 );
+                info_thread(L) * queue_ptr = &info;
+                alarm_node_wrap(L) node_wrap = { info.t, t, 0`s, alarm_node_wrap_cast };
+                node_wrap.cond = &this;
+                node_wrap.i = &queue_ptr;
+                register_self( &node_wrap.alarm_node );
+                append( blocked_threads, queue_ptr );
+                info.listed = true;
+                count++;
+                size_t recursion_count;
+                if (info.lock) {
+                        recursion_count = get_recursion_count(*info.lock);
+                        remove_( *info.lock );
+                }
+                unlock( lock );
+                park();
+                if (info.lock) set_recursion_count(*info.lock, recursion_count);
+        }
+        void wait( condition_variable(L) & this ) with(this) {
+                info_thread( L ) i = { active_thread() };
+                queue_info_thread( this, i );
+        }
+        void wait( condition_variable(L) & this, uintptr_t info ) with(this) {
+                info_thread( L ) i = { active_thread(), info };
+                queue_info_thread( this, i );
+        }
+        void wait( condition_variable(L) & this, Duration duration ) with(this) {
+                info_thread( L ) i = { active_thread() };
+                queue_info_thread_timeout(this, i, __kernel_get_time() + duration );
+        }
+        void wait( condition_variable(L) & this, uintptr_t info, Duration duration ) with(this) {
+                info_thread( L ) i = { active_thread(), info };
+                queue_info_thread_timeout(this, i, __kernel_get_time() + duration );
+        }
+        void wait( condition_variable(L) & this, Time time ) with(this) {
+                info_thread( L ) i = { active_thread() };
+                queue_info_thread_timeout(this, i, time);
+        }
+        void wait( condition_variable(L) & this, uintptr_t info, Time time ) with(this) {
+                info_thread( L ) i = { active_thread(), info };
+                queue_info_thread_timeout(this, i, time);
+        }
+        void wait( condition_variable(L) & this, L & l ) with(this) {
+                info_thread(L) i = { active_thread() };
+                i.lock = &l;
+                queue_info_thread( this, i );
+        }
+        void wait( condition_variable(L) & this, L & l, uintptr_t info ) with(this) {
+                info_thread(L) i = { active_thread(), info };
+                i.lock = &l;
+                queue_info_thread( this, i );
+        }
+        void wait( condition_variable(L) & this, L & l, Duration duration ) with(this) {
+                info_thread(L) i = { active_thread() };
+                i.lock = &l;
+                queue_info_thread_timeout(this, i, __kernel_get_time() + duration );
+        }
+        void wait( condition_variable(L) & this, L & l, uintptr_t info, Duration duration ) with(this) {
+                info_thread(L) i = { active_thread(), info };
+                i.lock = &l;
+                queue_info_thread_timeout(this, i, __kernel_get_time() + duration );
+        }
+        void wait( condition_variable(L) & this, L & l, Time time ) with(this) {
+                info_thread(L) i = { active_thread() };
+                i.lock = &l;
+                queue_info_thread_timeout(this, i, time );
+        }
+        void wait( condition_variable(L) & this, L & l, uintptr_t info, Time time ) with(this) {
+                info_thread(L) i = { active_thread(), info };
+                i.lock = &l;
+                queue_info_thread_timeout(this, i, time );
+        }
+}

libcfa/src/concurrency/locks.hfa

-              r55acc3a
+              r139775e
+#pragma once
 #include <stdbool.h>
 …
 #include "time.hfa"
 #include <sys/time.h>
+#include "alarm.hfa"
 ///////////////////////////////////////////////////////////////////
 …
                 info_thread(L) * next;
                 L * lock;
+                bool listed;                                    // true if info_thread is on queue, false otherwise;
         };
 …
 ///////////////////////////////////////////////////////////////////
 forall(dtype L | is_blocking_lock(L)) {
         struct synchronization_lock {
+        struct condition_variable {
                 // Spin lock used for mutual exclusion
                 __spinlock_t lock;
 …
                 // Count of current blocked threads
                 int count;
-                // If true threads will reacquire the lock they block on upon waking
-                bool reacquire_after_signal;
         };
-        struct condition_variable {
-                inline synchronization_lock(L);
-        };
-        struct thread_queue {
-                inline synchronization_lock(L);
-        };
-        void ?{}( synchronization_lock(L) & this, bool multi_acquisition, bool strict_owner );
-        void ^?{}( synchronization_lock(L) & this );
         void ?{}( condition_variable(L) & this );
         void ^?{}( condition_variable(L) & this );
         void ?{}( thread_queue(L) & this );
         void ^?{}( thread_queue(L) & this );
+        struct alarm_node_wrap {
+                alarm_node_t alarm_node;
+        bool notify_one( synchronization_lock(L) & this );
+        bool notify_all( synchronization_lock(L) & this );
+                condition_variable(L) * cond;
+        uintptr_t front( synchronization_lock(L) & this );
+        bool empty( synchronization_lock(L) & this );
+        int counter( synchronization_lock(L) & this );
+        // wait functions that are not passed a mutex lock
+        void wait( synchronization_lock(L) & this );
+        void wait( synchronization_lock(L) & this, uintptr_t info );
+        bool wait( synchronization_lock(L) & this, Duration duration );
+        bool wait( synchronization_lock(L) & this, uintptr_t info, Duration duration );
+        bool wait( synchronization_lock(L) & this, Time time );
+        bool wait( synchronization_lock(L) & this, uintptr_t info, Time time );
+        // wait functions that are passed a lock
+        bool notify_one( synchronization_lock(L) & this, L & l );
+        bool notify_all( synchronization_lock(L) & this, L & l );
+        void wait( synchronization_lock(L) & this, L & l );
+        void wait( synchronization_lock(L) & this, L & l, uintptr_t info );
+        bool wait( synchronization_lock(L) & this, L & l, Duration duration );
+        bool wait( synchronization_lock(L) & this, L & l, uintptr_t info, Duration duration );
+        bool wait( synchronization_lock(L) & this, L & l, Time time );
+        bool wait( synchronization_lock(L) & this, L & l, uintptr_t info, Time time );
+}
+///////////////////////////////////////////////////////////////////
+//// condition lock alternative approach
+///////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////
+//// is_simple_lock
+///////////////////////////////////////////////////////////////////
+trait is_simple_lock(dtype L | sized(L)) {
+        void lock( L & );               // For synchronization locks to use when acquiring
+        void unlock( L & );    // For synchronization locks to use when releasing
+        size_t get_recursion_count( L & ); // to get recursion count for cond lock to reset after waking
+        void set_recursion_count( L &, size_t recursion ); // to set recursion count after getting signalled;
+};
+forall(dtype L | is_simple_lock(L)) {
+        struct condition_lock {
+                // Spin lock used for mutual exclusion
+                mutex_lock m_lock;
+                condition_variable( mutex_lock ) c_var;
+                info_thread(L) ** i;
         };
         void ?{}( condition_lock(L) & this );
         void ^?{}( condition_lock(L) & this );
+        void ?{}( alarm_node_wrap(L) & this, $thread * thrd, Time alarm, Duration period, Alarm_Callback callback );
+        void ^?{}( alarm_node_wrap(L) & this );
+        bool notify_one( condition_lock(L) & this );
+        bool notify_all( condition_lock(L) & this );
+        void wait( condition_lock(L) & this, L & l );
+        void alarm_node_callback( alarm_node_wrap(L) & this );
+        void alarm_node_wrap_cast( alarm_node_t & a );
+        bool notify_one( condition_variable(L) & this );
+        bool notify_all( condition_variable(L) & this );
+        uintptr_t front( condition_variable(L) & this );
+        bool empty( condition_variable(L) & this );
+        int counter( condition_variable(L) & this );
+        // TODO: look into changing timout routines to return bool showing if signalled or woken by kernel
+        void wait( condition_variable(L) & this );
+        void wait( condition_variable(L) & this, uintptr_t info );
+        void wait( condition_variable(L) & this, Duration duration );
+        void wait( condition_variable(L) & this, uintptr_t info, Duration duration );
+        void wait( condition_variable(L) & this, Time time );
+        void wait( condition_variable(L) & this, uintptr_t info, Time time );
+        void wait( condition_variable(L) & this, L & l );
+        void wait( condition_variable(L) & this, L & l, uintptr_t info );
+        void wait( condition_variable(L) & this, L & l, Duration duration );
+        void wait( condition_variable(L) & this, L & l, uintptr_t info, Duration duration );
+        void wait( condition_variable(L) & this, L & l, Time time );
+        void wait( condition_variable(L) & this, L & l, uintptr_t info, Time time );
+}

libcfa/src/concurrency/monitor.cfa

-              r55acc3a
+              r139775e
 // Enter single monitor
 static void __enter( $monitor * this, const __monitor_group_t & group ) {
+        $thread * thrd = active_thread();
         // Lock the monitor spinlock
         lock( this->lock __cfaabi_dbg_ctx2 );
-        // Interrupts disable inside critical section
-        $thread * thrd = kernelTLS.this_thread;
         __cfaabi_dbg_print_safe( "Kernel : %10p Entering mon %p (%p)\n", thrd, this, this->owner);
 …
                 __cfaabi_dbg_print_safe( "Kernel : %10p Entered  mon %p\n", thrd, this);
                 /* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+                /* paranoid */ verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
                 return;
+        }
 …
         __cfaabi_dbg_print_safe( "Kernel : %10p Entered  mon %p\n", thrd, this);
         /* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+        /* paranoid */ verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
         /* paranoid */ verify( this->lock.lock );
 …
 static void __dtor_enter( $monitor * this, fptr_t func, bool join ) {
+        $thread * thrd = active_thread();
         // Lock the monitor spinlock
         lock( this->lock __cfaabi_dbg_ctx2 );
-        // Interrupts disable inside critical section
-        $thread * thrd = kernelTLS.this_thread;
         __cfaabi_dbg_print_safe( "Kernel : %10p Entering dtor for mon %p (%p)\n", thrd, this, this->owner);
 …
                 __set_owner( this, thrd );
                 verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+                verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
                 unlock( this->lock );
 …
                 this->owner = thrd;
                 verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+                verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
                 unlock( this->lock );
 …
                 // Release the next thread
                 /* paranoid */ verifyf( urgent->owner->waiting_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+                /* paranoid */ verifyf( urgent->owner->waiting_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
                 unpark( urgent->owner->waiting_thread );
 …
                 // Some one was waiting for us, enter
                 /* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+                /* paranoid */ verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
+        }
         else {
 …
                 park();
                 /* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+                /* paranoid */ verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
                 return;
+        }
 …
         lock( this->lock __cfaabi_dbg_ctx2 );
         __cfaabi_dbg_print_safe( "Kernel : %10p Leaving mon %p (%p)\n", kernelTLS.this_thread, this, this->owner);
         /* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+        __cfaabi_dbg_print_safe( "Kernel : %10p Leaving mon %p (%p)\n", active_thread(), this, this->owner);
+        /* paranoid */ verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
         // Leaving a recursion level, decrement the counter
 …
 void __dtor_leave( $monitor * this, bool join ) {
         __cfaabi_dbg_debug_do(
                 if( TL_GET( this_thread ) != this->owner ) {
                         abort( "Destroyed monitor %p has inconsistent owner, expected %p got %p.\n", this, TL_GET( this_thread ), this->owner);
+                if( active_thread() != this->owner ) {
+                        abort( "Destroyed monitor %p has inconsistent owner, expected %p got %p.\n", this, active_thread(), this->owner);
+                }
                 if( this->recursion != 1  && !join ) {
 …
         /* paranoid */ verify( this->lock.lock );
         /* paranoid */ verifyf( thrd == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", thrd, this->owner, this->recursion, this );
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
         /* paranoid */ verify( thrd->state == Halted );
         /* paranoid */ verify( this->recursion == 1 );
 …
         // Unpark the next owner if needed
         /* paranoid */ verifyf( !new_owner || new_owner == this->owner, "Expected owner to be %p, got %p (m: %p)", new_owner, this->owner, this );
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
         /* paranoid */ verify( thrd->state == Halted );
         unpark( new_owner );
 …
 // Sorts monitors before entering
 void ?{}( monitor_guard_t & this, $monitor * m [], __lock_size_t count, fptr_t func ) {
         $thread * thrd = TL_GET( this_thread );
+        $thread * thrd = active_thread();
         // Store current array
 …
         // Restore thread context
         TL_GET( this_thread )->monitors = this.prev;
+        active_thread()->monitors = this.prev;
+}
 …
 void ?{}( monitor_dtor_guard_t & this, $monitor * m [], fptr_t func, bool join ) {
         // optimization
         $thread * thrd = TL_GET( this_thread );
+        $thread * thrd = active_thread();
         // Store current array
 …
         // Restore thread context
         TL_GET( this_thread )->monitors = this.prev;
+        active_thread()->monitors = this.prev;
+}
 …
         // Create the node specific to this wait operation
         wait_ctx( TL_GET( this_thread ), user_info );
+        wait_ctx( active_thread(), user_info );
         // Append the current wait operation to the ones already queued on the condition
 …
         //Some more checking in debug
         __cfaabi_dbg_debug_do(
                 $thread * this_thrd = TL_GET( this_thread );
+                $thread * this_thrd = active_thread();
                 if ( this.monitor_count != this_thrd->monitors.size ) {
                         abort( "Signal on condition %p made with different number of monitor(s), expected %zi got %zi", &this, this.monitor_count, this_thrd->monitors.size );
 …
         // Create the node specific to this wait operation
         wait_ctx_primed( kernelTLS.this_thread, 0 )
+        wait_ctx_primed( active_thread(), 0 )
         //save contexts
 …
                                 // Create the node specific to this wait operation
                                 wait_ctx_primed( kernelTLS.this_thread, 0 );
+                                wait_ctx_primed( active_thread(), 0 );
                                 // Save monitor states
 …
         // Create the node specific to this wait operation
         wait_ctx_primed( kernelTLS.this_thread, 0 );
+        wait_ctx_primed( active_thread(), 0 );
         monitor_save;
 …
         for( __lock_size_t i = 0; i < count; i++) {
                 verify( monitors[i]->owner == kernelTLS.this_thread );
+                verify( monitors[i]->owner == active_thread() );
+        }
 …
 static inline void __set_owner( $monitor * monitors [], __lock_size_t count, $thread * owner ) {
         /* paranoid */ verify ( monitors[0]->lock.lock );
         /* paranoid */ verifyf( monitors[0]->owner == kernelTLS.this_thread, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, monitors[0]->owner, monitors[0]->recursion, monitors[0] );
+        /* paranoid */ verifyf( monitors[0]->owner == active_thread(), "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), monitors[0]->owner, monitors[0]->recursion, monitors[0] );
         monitors[0]->owner        = owner;
         monitors[0]->recursion    = 1;
         for( __lock_size_t i = 1; i < count; i++ ) {
                 /* paranoid */ verify ( monitors[i]->lock.lock );
                 /* paranoid */ verifyf( monitors[i]->owner == kernelTLS.this_thread, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, monitors[i]->owner, monitors[i]->recursion, monitors[i] );
+                /* paranoid */ verifyf( monitors[i]->owner == active_thread(), "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), monitors[i]->owner, monitors[i]->recursion, monitors[i] );
                 monitors[i]->owner        = owner;
                 monitors[i]->recursion    = 0;
 …
                 //regardless of if we are ready to baton pass,
                 //we need to set the monitor as in use
                 /* paranoid */ verifyf( !this->owner || kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+                /* paranoid */ verifyf( !this->owner || active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
                 __set_owner( this,  urgent->owner->waiting_thread );
 …
         // Get the next thread in the entry_queue
         $thread * new_owner = pop_head( this->entry_queue );
         /* paranoid */ verifyf( !this->owner || kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+        /* paranoid */ verifyf( !this->owner || active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
         /* paranoid */ verify( !new_owner || new_owner->link.next == 0p );
         __set_owner( this, new_owner );
 …
 static inline void brand_condition( condition & this ) {
         $thread * thrd = TL_GET( this_thread );
+        $thread * thrd = active_thread();
         if( !this.monitors ) {
                 // __cfaabi_dbg_print_safe( "Branding\n" );

libcfa/src/concurrency/mutex.cfa

-              r55acc3a
+              r139775e
         lock( lock __cfaabi_dbg_ctx2 );
         if( is_locked ) {
                 append( blocked_threads, kernelTLS.this_thread );
+                append( blocked_threads, active_thread() );
                 unlock( lock );
                 park();
 …
         lock( lock __cfaabi_dbg_ctx2 );
         if( owner == 0p ) {
                 owner = kernelTLS.this_thread;
+                owner = active_thread();
                 recursion_count = 1;
                 unlock( lock );
+        }
         else if( owner == kernelTLS.this_thread ) {
+        else if( owner == active_thread() ) {
                 recursion_count++;
                 unlock( lock );
+        }
         else {
                 append( blocked_threads, kernelTLS.this_thread );
+                append( blocked_threads, active_thread() );
                 unlock( lock );
                 park();
 …
         lock( lock __cfaabi_dbg_ctx2 );
         if( owner == 0p ) {
                 owner = kernelTLS.this_thread;
+                owner = active_thread();
                 recursion_count = 1;
                 ret = true;
+        }
         else if( owner == kernelTLS.this_thread ) {
+        else if( owner == active_thread() ) {
                 recursion_count++;
                 ret = true;
 …
 void wait(condition_variable & this) {
         lock( this.lock __cfaabi_dbg_ctx2 );
         append( this.blocked_threads, kernelTLS.this_thread );
+        append( this.blocked_threads, active_thread() );
         unlock( this.lock );
         park();
 …
 void wait(condition_variable & this, L & l) {
         lock( this.lock __cfaabi_dbg_ctx2 );
         append( this.blocked_threads, kernelTLS.this_thread );
+        append( this.blocked_threads, active_thread() );
         unlock(l);
         unlock(this.lock);

libcfa/src/concurrency/preemption.cfa

-              r55acc3a
+              r139775e
 // Created On       : Mon Jun 5 14:20:42 2017
 // Last Modified By : Peter A. Buhr
 // Last Modified On : Wed Aug 26 16:46:03 2020
 // Update Count     : 53
+// Last Modified On : Fri Nov  6 07:42:13 2020
+// Update Count     : 54
 //
 …
 // FwdDeclarations : timeout handlers
 static void preempt( processor   * this );
 static void timeout( struct __processor_id_t * id, $thread * this );
+static void timeout( $thread * this );
 // FwdDeclarations : Signal handlers
 …
 // Tick one frame of the Discrete Event Simulation for alarms
 static void tick_preemption( struct __processor_id_t * id ) {
+static void tick_preemption(void) {
         alarm_node_t * node = 0p;                                                       // Used in the while loop but cannot be declared in the while condition
         alarm_list_t * alarms = &event_kernel->alarms;          // Local copy for ease of reading
 …
                 // Check if this is a kernel
                 if( node->kernel_alarm ) {
+                if( node->type == Kernel ) {
                         preempt( node->proc );
+                }
+                else if( node->type == User ) {
+                        timeout( node->thrd );
+                }
                 else {
                         timeout( id, node->thrd );
+                        node->callback(*node);
+                }
 …
 // Kernel Signal Tools
 //=============================================================================================
+__cfaabi_dbg_debug_do( static thread_local void * last_interrupt = 0; )
+// In a user-level threading system, there are handful of thread-local variables where this problem occurs on the ARM.
+//
+// For each kernel thread running user-level threads, there is a flag variable to indicate if interrupts are
+// enabled/disabled for that kernel thread. Therefore, this variable is made thread local.
+//
+// For example, this code fragment sets the state of the "interrupt" variable in thread-local memory.
+//
+// _Thread_local volatile int interrupts;
+// int main() {
+//     interrupts = 0; // disable interrupts }
+//
+// which generates the following code on the ARM
+//
+// (gdb) disassemble main
+// Dump of assembler code for function main:
+//    0x0000000000000610 <+0>:  mrs     x1, tpidr_el0
+//    0x0000000000000614 <+4>:  mov     w0, #0x0                        // #0
+//    0x0000000000000618 <+8>:  add     x1, x1, #0x0, lsl #12
+//    0x000000000000061c <+12>: add     x1, x1, #0x10
+//    0x0000000000000620 <+16>: str     wzr, [x1]
+//    0x0000000000000624 <+20>: ret
+//
+// The mrs moves a pointer from coprocessor register tpidr_el0 into register x1.  Register w0 is set to 0. The two adds
+// increase the TLS pointer with the displacement (offset) 0x10, which is the location in the TSL of variable
+// "interrupts".  Finally, 0 is stored into "interrupts" through the pointer in register x1 that points into the
+// TSL. Now once x1 has the pointer to the location of the TSL for kernel thread N, it can be be preempted at a
+// user-level and the user thread is put on the user-level ready-queue. When the preempted thread gets to the front of
+// the user-level ready-queue it is run on kernel thread M. It now stores 0 into "interrupts" back on kernel thread N,
+// turning off interrupt on the wrong kernel thread.
+//
+// On the x86, the following code is generated for the same code fragment.
+//
+// (gdb) disassemble main
+// Dump of assembler code for function main:
+//    0x0000000000400420 <+0>:  movl   $0x0,%fs:0xfffffffffffffffc
+//    0x000000000040042c <+12>: xor    %eax,%eax
+//    0x000000000040042e <+14>: retq
+//
+// and there is base-displacement addressing used to atomically reset variable "interrupts" off of the TSL pointer in
+// register "fs".
+//
+// Hence, the ARM has base-displacement address for the general purpose registers, BUT not to the coprocessor
+// registers. As a result, generating the address for the write into variable "interrupts" is no longer atomic.
+//
+// Note this problem does NOT occur when just using multiple kernel threads because the preemption ALWAYS restarts the
+// thread on the same kernel thread.
+//
+// The obvious question is why does ARM use a coprocessor register to store the TSL pointer given that coprocessor
+// registers are second-class registers with respect to the instruction set. One possible answer is that they did not
+// want to dedicate one of the general registers to hold the TLS pointer and there was a free coprocessor register
+// available.
+//----------
+// special case for preemption since used often
+bool __preemption_enabled() {
+        // create a assembler label before
+        // marked as clobber all to avoid movement
+        asm volatile("__cfaasm_check_before:":::"memory");
+        // access tls as normal
+        bool enabled = __cfaabi_tls.preemption_state.enabled;
+        // create a assembler label after
+        // marked as clobber all to avoid movement
+        asm volatile("__cfaasm_check_after:":::"memory");
+        return enabled;
+}
+//----------
+// Get data from the TLS block
+uintptr_t __cfatls_get( unsigned long int offset ) __attribute__((__noinline__)); //no inline to avoid problems
+uintptr_t __cfatls_get( unsigned long int offset ) {
+        // create a assembler label before
+        // marked as clobber all to avoid movement
+        asm volatile("__cfaasm_get_before:":::"memory");
+        // access tls as normal (except for pointer arithmetic)
+        uintptr_t val = *(uintptr_t*)((uintptr_t)&__cfaabi_tls + offset);
+        // create a assembler label after
+        // marked as clobber all to avoid movement
+        asm volatile("__cfaasm_get_after:":::"memory");
+        return val;
+}
 extern "C" {
         // Disable interrupts by incrementing the counter
         void disable_interrupts() {
+                with( kernelTLS.preemption_state ) {
+                // create a assembler label before
+                // marked as clobber all to avoid movement
+                asm volatile("__cfaasm_disable_before:":::"memory");
+                with( __cfaabi_tls.preemption_state ) {
                         #if GCC_VERSION > 50000
                         static_assert(__atomic_always_lock_free(sizeof(enabled), &enabled), "Must be lock-free");
 …
                         verify( new_val < 65_000u );              // If this triggers someone is disabling interrupts without enabling them
+                }
+                // create a assembler label after
+                // marked as clobber all to avoid movement
+                asm volatile("__cfaasm_disable_after:":::"memory");
+        }
 …
         // If counter reaches 0, execute any pending __cfactx_switch
         void enable_interrupts( __cfaabi_dbg_ctx_param ) {
+                processor   * proc = kernelTLS.this_processor; // Cache the processor now since interrupts can start happening after the atomic store
+                // create a assembler label before
+                // marked as clobber all to avoid movement
+                asm volatile("__cfaasm_enable_before:":::"memory");
+                processor   * proc = __cfaabi_tls.this_processor; // Cache the processor now since interrupts can start happening after the atomic store
                 /* paranoid */ verify( proc );
                 with( kernelTLS.preemption_state ){
+                with( __cfaabi_tls.preemption_state ){
                         unsigned short prev = disable_count;
                         disable_count -= 1;
 …
                 // For debugging purposes : keep track of the last person to enable the interrupts
                 __cfaabi_dbg_debug_do( proc->last_enable = caller; )
+                // create a assembler label after
+                // marked as clobber all to avoid movement
+                asm volatile("__cfaasm_enable_after:":::"memory");
+        }
 …
         // Don't execute any pending __cfactx_switch even if counter reaches 0
         void enable_interrupts_noPoll() {
+                unsigned short prev = kernelTLS.preemption_state.disable_count;
+                kernelTLS.preemption_state.disable_count -= 1;
+                // create a assembler label before
+                // marked as clobber all to avoid movement
+                asm volatile("__cfaasm_nopoll_before:":::"memory");
+                unsigned short prev = __cfaabi_tls.preemption_state.disable_count;
+                __cfaabi_tls.preemption_state.disable_count -= 1;
                 verifyf( prev != 0u, "Incremented from %u\n", prev );                     // If this triggers someone is enabled already enabled interrupts
                 if( prev == 1 ) {
                         #if GCC_VERSION > 50000
                         static_assert(__atomic_always_lock_free(sizeof(kernelTLS.preemption_state.enabled), &kernelTLS.preemption_state.enabled), "Must be lock-free");
+                        static_assert(__atomic_always_lock_free(sizeof(__cfaabi_tls.preemption_state.enabled), &__cfaabi_tls.preemption_state.enabled), "Must be lock-free");
                         #endif
                         // Set enabled flag to true
                         // should be atomic to avoid preemption in the middle of the operation.
                         // use memory order RELAXED since there is no inter-thread on this variable requirements
                         __atomic_store_n(&kernelTLS.preemption_state.enabled, true, __ATOMIC_RELAXED);
+                        __atomic_store_n(&__cfaabi_tls.preemption_state.enabled, true, __ATOMIC_RELAXED);
                         // Signal the compiler that a fence is needed but only for signal handlers
                         __atomic_signal_fence(__ATOMIC_RELEASE);
+                }
+                // create a assembler label after
+                // marked as clobber all to avoid movement
+                asm volatile("__cfaasm_nopoll_after:":::"memory");
+        }
+}
 …
 // reserved for future use
 static void timeout( struct __processor_id_t * id, $thread * this ) {
+static void timeout( $thread * this ) {
         #if !defined( __CFA_NO_STATISTICS__ )
                 kernelTLS.this_stats = this->curr_cluster->stats;
+                kernelTLS().this_stats = this->curr_cluster->stats;
         #endif
         __unpark( id, this );
+        unpark( this );
+}
 …
 static inline bool preemption_ready() {
         // Check if preemption is safe
         bool ready = kernelTLS.preemption_state.enabled && ! kernelTLS.preemption_state.in_progress;
+        bool ready = __cfaabi_tls.preemption_state.enabled && ! __cfaabi_tls.preemption_state.in_progress;
         // Adjust the pending flag accordingly
         kernelTLS.this_processor->pending_preemption = !ready;
+        __cfaabi_tls.this_processor->pending_preemption = !ready;
         return ready;
+}
 …
         // Start with preemption disabled until ready
         kernelTLS.preemption_state.enabled = false;
         kernelTLS.preemption_state.disable_count = 1;
+        __cfaabi_tls.preemption_state.enabled = false;
+        __cfaabi_tls.preemption_state.disable_count = 1;
         // Initialize the event kernel
 …
 // Kernel Signal Handlers
 //=============================================================================================
+struct asm_region {
+        void * before;
+        void * after;
+};
+//-----------------------------------------------------------------------------
+// Some assembly required
+#if defined( __i386 )
+        #define __cfaasm_label( label ) \
+                ({ \
+                        struct asm_region region; \
+                        asm( \
+                                "movl $__cfaasm_" #label "_before, %[vb]\n\t" \
+                                "movl $__cfaasm_" #label "_after , %[va]\n\t" \
+                                 : [vb]"=r"(region.before), [vb]"=r"(region.before) \
+                        ); \
+                        region; \
+                });
+#elif defined( __x86_64 )
+        #ifdef __PIC__
+                #define PLT "@PLT"
+        #else
+                #define PLT ""
+        #endif
+        #define __cfaasm_label( label ) \
+                ({ \
+                        struct asm_region region; \
+                        asm( \
+                                "movq $__cfaasm_" #label "_before" PLT ", %[vb]\n\t" \
+                                "movq $__cfaasm_" #label "_after"  PLT ", %[va]\n\t" \
+                                 : [vb]"=r"(region.before), [va]"=r"(region.after) \
+                        ); \
+                        region; \
+                });
+#elif defined( __aarch64__ )
+        #error __cfaasm_label undefined for arm
+#else
+        #error unknown hardware architecture
+#endif
+__cfaabi_dbg_debug_do( static thread_local void * last_interrupt = 0; )
 // Context switch signal handler
 // Receives SIGUSR1 signal and causes the current thread to yield
 static void sigHandler_ctxSwitch( __CFA_SIGPARMS__ ) {
+        __cfaabi_dbg_debug_do( last_interrupt = (void *)(cxt->uc_mcontext.CFA_REG_IP); )
+        void * ip = (void *)(cxt->uc_mcontext.CFA_REG_IP);
+        __cfaabi_dbg_debug_do( last_interrupt = ip; )
         // SKULLDUGGERY: if a thread creates a processor and the immediately deletes it,
 …
         // before the kernel thread has even started running. When that happens, an interrupt
         // with a null 'this_processor' will be caught, just ignore it.
         if(! kernelTLS.this_processor ) return;
+        if(! __cfaabi_tls.this_processor ) return;
         choose(sfp->si_value.sival_int) {
                 case PREEMPT_NORMAL   : ;// Normal case, nothing to do here
                 case PREEMPT_TERMINATE: verify( __atomic_load_n( &kernelTLS.this_processor->do_terminate, __ATOMIC_SEQ_CST ) );
+                case PREEMPT_TERMINATE: verify( __atomic_load_n( &__cfaabi_tls.this_processor->do_terminate, __ATOMIC_SEQ_CST ) );
                 default:
                         abort( "internal error, signal value is %d", sfp->si_value.sival_int );
 …
         if( !preemption_ready() ) { return; }
+        __cfaabi_dbg_print_buffer_decl( " KERNEL: preempting core %p (%p @ %p).\n", kernelTLS.this_processor, kernelTLS.this_thread, (void *)(cxt->uc_mcontext.CFA_REG_IP) );
+        struct asm_region region;
+        region = __cfaasm_label( get     ); if( ip >= region.before && ip <= region.after ) return;
+        region = __cfaasm_label( check   ); if( ip >= region.before && ip <= region.after ) return;
+        region = __cfaasm_label( disable ); if( ip >= region.before && ip <= region.after ) return;
+        region = __cfaasm_label( enable  ); if( ip >= region.before && ip <= region.after ) return;
+        region = __cfaasm_label( nopoll  ); if( ip >= region.before && ip <= region.after ) return;
+        __cfaabi_dbg_print_buffer_decl( " KERNEL: preempting core %p (%p @ %p).\n", __cfaabi_tls.this_processor, __cfaabi_tls.this_thread, (void *)(cxt->uc_mcontext.CFA_REG_IP) );
         // Sync flag : prevent recursive calls to the signal handler
         kernelTLS.preemption_state.in_progress = true;
+        __cfaabi_tls.preemption_state.in_progress = true;
         // Clear sighandler mask before context switching.
 …
+        }
-        // TODO: this should go in finish action
         // Clear the in progress flag
         kernelTLS.preemption_state.in_progress = false;
+        __cfaabi_tls.preemption_state.in_progress = false;
         // Preemption can occur here
 …
         id.full_proc = false;
         id.id = doregister(&id);
+        __cfaabi_tls.this_proc_id = &id;
         // Block sigalrms to control when they arrive
 …
                         // __cfaabi_dbg_print_safe( "Kernel : Preemption thread tick\n" );
                         lock( event_kernel->lock __cfaabi_dbg_ctx2 );
                         tick_preemption( &id );
+                        tick_preemption();
                         unlock( event_kernel->lock );
                         break;
 …
 void __cfaabi_check_preemption() {
         bool ready = kernelTLS.preemption_state.enabled;
+        bool ready = __preemption_enabled();
         if(!ready) { abort("Preemption should be ready"); }
 …
 #ifdef __CFA_WITH_VERIFY__
 bool __cfaabi_dbg_in_kernel() {
         return !kernelTLS.preemption_state.enabled;
+        return !__preemption_enabled();
+}
 #endif

libcfa/src/concurrency/ready_queue.cfa

-              r55acc3a
+              r139775e
 //  queues or removing them.
 uint_fast32_t ready_mutate_lock( void ) with(*__scheduler_lock) {
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
         // Step 1 : lock global lock
 …
+        }
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
         return s;
+}
 void ready_mutate_unlock( uint_fast32_t last_s ) with(*__scheduler_lock) {
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
         // Step 1 : release local locks
 …
         __atomic_store_n(&lock, (bool)false, __ATOMIC_RELEASE);
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
+}
 …
                 preferred =
                         //*
                         kernelTLS.this_processor ? kernelTLS.this_processor->id * 4 : -1;
+                        kernelTLS().this_processor ? kernelTLS().this_processor->id * 4 : -1;
                         /*/
                         thrd->link.preferred * 4;
 …
                 // Don't bother trying locally too much
                 int local_tries = 8;
                 preferred = kernelTLS.this_processor->id * 4;
+                preferred = kernelTLS().this_processor->id * 4;
         #endif

libcfa/src/concurrency/thread.cfa

r55acc3a	r139775e
127	127	verify( this_thrd->context.SP );
128	128
129		__schedule_thread( ~~(__processor_id_t *)kernelTLS.this_processor, this_thrd~~);
	129	__schedule_thread( this_thrd );
130	130	enable_interrupts( __cfaabi_dbg_ctx );
131	131	}

src/AST/Convert.cpp

-              r55acc3a
+              r139775e
 #include "AST/Init.hpp"
 #include "AST/Stmt.hpp"
+#include "AST/TranslationUnit.hpp"
 #include "AST/TypeSubstitution.hpp"
 …
 };
 std::list< Declaration * > convert( const std::list< ast::ptr< ast::Decl > > && translationUnit ) {
+std::list< Declaration * > convert( const ast::TranslationUnit && translationUnit ) {
         ConverterNewToOld c;
         std::list< Declaration * > decls;
         for(auto d : translationUnit) {
+        for(auto d : translationUnit.decls) {
                 decls.emplace_back( c.decl( d ) );
+        }
 …
 #undef GET_ACCEPT_1
 std::list< ast::ptr< ast::Decl > > convert( const std::list< Declaration * > && translationUnit ) {
+ast::TranslationUnit convert( const std::list< Declaration * > && translationUnit ) {
         ConverterOldToNew c;
         std::list< ast::ptr< ast::Decl > > decls;
+        ast::TranslationUnit unit;
         for(auto d : translationUnit) {
                 d->accept( c );
                 decls.emplace_back( c.decl() );
+                unit.decls.emplace_back( c.decl() );
+        }
         deleteAll(translationUnit);
         return decls;
+        return unit;
+}

src/AST/Convert.hpp

-              r55acc3a
+              r139775e
 #include <list>
-#include "AST/Node.hpp"
 class Declaration;
 namespace ast {
         class Decl;
+        class TranslationUnit;
 };
 std::list< Declaration * > convert( const std::list< ast::ptr< ast::Decl > > && translationUnit );
 std::list< ast::ptr< ast::Decl > > convert( const std::list< Declaration * > && translationUnit );
+std::list< Declaration * > convert( const ast::TranslationUnit && translationUnit );
+ast::TranslationUnit convert( const std::list< Declaration * > && translationUnit );

src/AST/Fwd.hpp

r55acc3a	r139775e
137	137	typedef unsigned int UniqueId;
138	138
	139	class TranslationUnit;
	140	// TODO: Get from the TranslationUnit:
139	141	extern Type * sizeType;
140	142	extern FunctionDecl * dereferenceOperator;

src/AST/Pass.hpp

-              r55acc3a
+              r139775e
         /// Construct and run a pass on a translation unit.
         template< typename... Args >
         static void run( std::list< ptr<Decl> > & decls, Args &&... args ) {
+        static void run( TranslationUnit & decls, Args &&... args ) {
                 Pass<core_t> visitor( std::forward<Args>( args )... );
                 accept_all( decls, visitor );
 …
         // Versions of the above for older compilers.
         template< typename... Args >
         static void run( std::list< ptr<Decl> > & decls ) {
+        static void run( TranslationUnit & decls ) {
                 Pass<core_t> visitor;
                 accept_all( decls, visitor );
 …
 void accept_all( std::list< ast::ptr<ast::Decl> > &, ast::Pass<core_t> & visitor );
+template<typename core_t>
+void accept_all( ast::TranslationUnit &, ast::Pass<core_t> & visitor );
 //-------------------------------------------------------------------------------------------------
 // PASS ACCESSORIES

src/AST/Pass.impl.hpp

-              r55acc3a
+              r139775e
 #include <unordered_map>
+#include "AST/TranslationUnit.hpp"
 #include "AST/TypeSubstitution.hpp"
 …
         pass_visitor_stats.depth--;
         if ( !errors.isEmpty() ) { throw errors; }
+}
+template< typename core_t >
+inline void ast::accept_all( ast::TranslationUnit & unit, ast::Pass< core_t > & visitor ) {
+        return ast::accept_all( unit.decls, visitor );
+}
 …
 const ast::CompoundStmt * ast::Pass< core_t >::visit( const ast::CompoundStmt * node ) {
         VISIT_START( node );
         VISIT({
+        VISIT(
                 // Do not enter (or leave) a new scope if atFunctionTop. Remember to save the result.
                 auto guard1 = makeFuncGuard( [this, enterScope = !this->atFunctionTop]() {
 …
                         if ( leaveScope ) __pass::symtab::leave(core, 0);
                 });
+                ValueGuard< bool > guard2( inFunction );
+                ValueGuard< bool > guard2( atFunctionTop );
+                atFunctionTop = false;
                 guard_scope guard3 { *this };
-                inFunction = false;
                 maybe_accept( node, &CompoundStmt::kids );
         })
+        )
         VISIT_END( CompoundStmt, node );
+}

src/AST/Pass.proto.hpp

r55acc3a	r139775e
22	22	template<typename core_t>
23	23	class Pass;
	24
	25	class TranslationUnit;
24	26
25	27	struct PureVisitor;

src/InitTweak/FixGlobalInit.cc

-              r55acc3a
+              r139775e
+        }
         void fixGlobalInit(std::list<ast::ptr<ast::Decl>> & translationUnit, bool inLibrary) {
+        void fixGlobalInit(ast::TranslationUnit & translationUnit, bool inLibrary) {
                 ast::Pass<GlobalFixer_new> fixer;
                 accept_all(translationUnit, fixer);
 …
                                 ast::Storage::Static, ast::Linkage::C, {new ast::Attribute("constructor", std::move(ctorParams))});
                         translationUnit.emplace_back( initFunction );
+                        translationUnit.decls.emplace_back( initFunction );
                 } // if
 …
                                 ast::Storage::Static, ast::Linkage::C, {new ast::Attribute("destructor", std::move(dtorParams))});
                         translationUnit.emplace_back(destroyFunction);
+                        translationUnit.decls.emplace_back(destroyFunction);
                 } // if
+        }
 …
                         } // if
                         if ( const ast::Stmt * ctor = ctorInit->ctor ) {
+                                addDataSectionAttribute(mutDecl);
                                 initStmts.push_back( ctor );
                                 mutDecl->init = nullptr;

src/InitTweak/FixGlobalInit.h

r55acc3a	r139775e
29	29	/// function is for library code.
30	30	void fixGlobalInit( std::list< Declaration * > & translationUnit, bool inLibrary );
31		void fixGlobalInit( ~~std::list< ast::ptr<ast::Decl> >~~ & translationUnit, bool inLibrary );
	31	void fixGlobalInit( ast::TranslationUnit & translationUnit, bool inLibrary );
32	32	} // namespace
33	33

src/InitTweak/FixInit.h

-              r55acc3a
+              r139775e
 #include <string>  // for string
-#include <AST/Fwd.hpp>
 class Declaration;
+namespace ast {
+        class TranslationUnit;
+}
 namespace InitTweak {
 …
         void fix( std::list< Declaration * > & translationUnit, bool inLibrary );
         void fix( std::list<ast::ptr<ast::Decl>> & translationUnit, bool inLibrary);
+        void fix( ast::TranslationUnit & translationUnit, bool inLibrary);
 } // namespace

src/InitTweak/FixInitNew.cpp

-              r55acc3a
+              r139775e
         /// expand each object declaration to use its constructor after it is declared.
         struct FixInit : public ast::WithStmtsToAdd<> {
                 static void fixInitializers( std::list< ast::ptr<ast::Decl> > &translationUnit );
+                static void fixInitializers( ast::TranslationUnit &translationUnit );
                 const ast::DeclWithType * postvisit( const ast::ObjectDecl *objDecl );
 …
 } // namespace
 void fix( std::list< ast::ptr<ast::Decl> > & translationUnit, bool inLibrary ) {
+void fix( ast::TranslationUnit & translationUnit, bool inLibrary ) {
         ast::Pass<SelfAssignChecker>::run( translationUnit );
 …
+        }
         void FixInit::fixInitializers( std::list< ast::ptr<ast::Decl> > & translationUnit ) {
+        void FixInit::fixInitializers( ast::TranslationUnit & translationUnit ) {
                 ast::Pass<FixInit> fixer;
 …
                 // can't use DeclMutator, because sometimes need to insert IfStmt, etc.
                 SemanticErrorException errors;
                 for ( auto i = translationUnit.begin(); i != translationUnit.end(); ++i ) {
+                for ( auto i = translationUnit.decls.begin(); i != translationUnit.decls.end(); ++i ) {
                         try {
                                 // maybeAccept( *i, fixer ); translationUnit should never contain null
                                 *i = (*i)->accept(fixer);
                                 translationUnit.splice( i, fixer.core.staticDtorDecls );
+                                translationUnit.decls.splice( i, fixer.core.staticDtorDecls );
                         } catch( SemanticErrorException &e ) {
                                 errors.append( e );
 …
                         if ( const ast::Stmt * ctor = ctorInit->ctor ) {
                                 if ( objDecl->storage.is_static ) {
+                                        addDataSectionAttribute(objDecl);
                                         // originally wanted to take advantage of gcc nested functions, but
                                         // we get memory errors with this approach. To remedy this, the static
 …
                                                 objDecl->name = objDecl->name + staticNamer.newName();
                                                 objDecl->mangleName = Mangle::mangle( objDecl );
+                                                objDecl->init = nullptr;
                                                 // xxx - temporary hack: need to return a declaration, but want to hoist the current object out of this scope

src/InitTweak/InitTweak.cc

r55acc3a	r139775e
1113	1113	}
1114	1114
	1115	void addDataSectionAttribute( ast::ObjectDecl * objDecl ) {
	1116	auto strLitT = new ast::PointerType(new ast::BasicType(ast::BasicType::Char));
	1117	objDecl->attributes.push_back(new ast::Attribute("section", {new ast::ConstantExpr(objDecl->location, strLitT, "\".data#\"", std::nullopt)}));
	1118	}
	1119
1115	1120	}

src/InitTweak/InitTweak.h

r55acc3a	r139775e
119	119	void addDataSectonAttribute( ObjectDecl * objDecl );
120	120
	121	void addDataSectionAttribute( ast::ObjectDecl * objDecl );
	122
121	123	class InitExpander_old {
122	124	public:

src/ResolvExpr/Resolver.cc

r55acc3a	r139775e
1274	1274	// size_t Resolver_new::traceId = Stats::Heap::new_stacktrace_id("Resolver");
1275	1275
1276		void resolve( ~~std::list< ast::ptr< ast::Decl > >~~& translationUnit ) {
	1276	void resolve( ast::TranslationUnit& translationUnit ) {
1277	1277	ast::Pass< Resolver_new >::run( translationUnit );
1278	1278	}

src/ResolvExpr/Resolver.h

-              r55acc3a
+              r139775e
         class StmtExpr;
         class SymbolTable;
+        class TranslationUnit;
         class Type;
         class TypeEnvironment;
 …
         /// Checks types and binds syntactic constructs to typed representations
         void resolve( std::list< ast::ptr<ast::Decl> >& translationUnit );
+        void resolve( ast::TranslationUnit& translationUnit );
         /// Searches expr and returns the first DeletedExpr found, otherwise nullptr
         const ast::DeletedExpr * findDeletedExpr( const ast::Expr * expr );
 …
                 const ast::Expr * untyped, const ast::SymbolTable & symtab);
         /// Resolves a constructor init expression
         ast::ptr< ast::Init > resolveCtorInit(
+        ast::ptr< ast::Init > resolveCtorInit(
                 const ast::ConstructorInit * ctorInit, const ast::SymbolTable & symtab );
         /// Resolves a statement expression
         ast::ptr< ast::Expr > resolveStmtExpr(
+        ast::ptr< ast::Expr > resolveStmtExpr(
                 const ast::StmtExpr * stmtExpr, const ast::SymbolTable & symtab );
 } // namespace ResolvExpr

tests/Makefile.am

-              r55acc3a
+              r139775e
 # adjust CC to current flags
 CC = LC_ALL=C $(if $(DISTCC_CFA_PATH),distcc $(DISTCC_CFA_PATH) ${ARCH_FLAGS},$(TARGET_CFA) ${DEBUG_FLAGS} ${ARCH_FLAGS})
+CC = LC_ALL=C $(if $(DISTCC_CFA_PATH),distcc $(DISTCC_CFA_PATH) ${ARCH_FLAGS} ${AST_FLAGS},$(TARGET_CFA) ${DEBUG_FLAGS} ${ARCH_FLAGS} ${AST_FLAGS})
 CFACC = $(CC)
 …
 # adjusted CC but without the actual distcc call
 CFACCLOCAL = $(if $(DISTCC_CFA_PATH),$(DISTCC_CFA_PATH) ${ARCH_FLAGS},$(TARGET_CFA) ${DEBUG_FLAGS} ${ARCH_FLAGS})
+CFACCLOCAL = $(if $(DISTCC_CFA_PATH),$(DISTCC_CFA_PATH) ${ARCH_FLAGS} ${AST_FLAGS},$(TARGET_CFA) ${DEBUG_FLAGS} ${ARCH_FLAGS} ${AST_FLAGS})
 CFACCLINK = $(CFACCLOCAL) -quiet $(if $(test), 2> $(test), ) $($(shell echo "${@}_FLAGSLD" | sed 's/-\|\//_/g'))

tests/config.py.in

r55acc3a	r139775e
9	9	HOSTARCH = "@host_cpu@"
10	10	DISTRIBUTE = @HAS_DISTCC@
	11	NEWAST = @DEFAULT_NEW_AST@

tests/pybin/settings.py

-              r55acc3a
+              r139775e
         def filter(self, tests):
                 return [test for test in tests if not test.arch or self.target == test.arch]
-                return True if not arch else self.target == arch
         @staticmethod
 …
                 self.path   = "debug" if value else "nodebug"
+class AST:
+        def __init__(self, ast):
+                if ast == "new":
+                        self.target = ast
+                        self.string = "New AST"
+                        self.flags  = """AST_FLAGS=-XCFA,--new-ast"""
+                elif ast == "old":
+                        self.target = ast
+                        self.string = "Old AST"
+                        self.flags  = """AST_FLAGS=-XCFA,--old-ast"""
+                elif ast == None:
+                        self.target = "new" if config.NEWAST else "old"
+                        self.string = "Default AST (%s)" % self.target
+                        self.flags  = """AST_FLAGS="""
+                else:
+                        print("""ERROR: Invalid ast configuration, must be "old", "new" or left unspecified, was %s""" % (value), file=sys.stderr)
+                        sys.exit(1)
+        def filter(self, tests):
+                return [test for test in tests if not test.astv or self.target == test.astv]
 class Install:
         def __init__(self, value):
 …
 def init( options ):
+        global all_ast
         global all_arch
         global all_debug
         global all_install
+        global ast
         global arch
+        global debug
         global archive
+        global install
         global continue_
-        global debug
         global dry_run
         global generating
-        global install
         global make
         global output_width
 …
         global timeout2gdb
+        all_ast      = [AST(o)          for o in list(dict.fromkeys(options.ast    ))] if options.ast  else [AST(None)]
         all_arch     = [Architecture(o) for o in list(dict.fromkeys(options.arch   ))] if options.arch else [Architecture(None)]
         all_debug    = [Debug(o)        for o in list(dict.fromkeys(options.debug  ))]

tests/pybin/test_run.py

-              r55acc3a
+              r139775e
                 self.path = ''
                 self.arch = ''
+                self.astv = ''
         def toString(self):
                 return "{:25s} ({:5s} {:s})".format( self.name, self.arch if self.arch else "Any", self.target() )
+                return "{:25s} ({:5s} arch, {:s} ast: {:s})".format( self.name, self.arch if self.arch else "Any", self.astv if self.astv else "Any", self.target() )
         def prepare(self):
 …
         def expect(self):
+                return os.path.normpath( os.path.join(settings.SRCDIR  , self.path, ".expect", "%s%s.txt" % (self.name,'' if not self.arch else ".%s" % self.arch)) )
+                arch = '' if not self.arch else ".%s" % self.arch
+                astv = '' if not self.astv else ".nast" if self.astv == "new" else ".oast"
+                return os.path.normpath( os.path.join(settings.SRCDIR  , self.path, ".expect", "%s%s%s.txt" % (self.name,astv,arch)) )
         def error_log(self):
 …
         @staticmethod
         def new_target(target, arch):
+        def new_target(target, arch, astv):
                 test = Test()
                 test.name = os.path.basename(target)
                 test.path = os.path.relpath (os.path.dirname(target), settings.SRCDIR)
                 test.arch = arch.target if arch else ''
+                test.astv = astv.target if astv else ''
                 return test

tests/pybin/tools.py

r55acc3a	r139775e
181	181	'-s' if silent else None,
182	182	test_param,
	183	settings.ast.flags,
183	184	settings.arch.flags,
184	185	settings.debug.flags,

tests/test.py

-              r55acc3a
+              r139775e
         def match_test(path):
                 match = re.search("^%s\/([\w\/\-_]*).expect\/([\w\-_]+)(\.[\w\-_]+)?\.txt$" % settings.SRCDIR, path)
+                match = re.search("^%s\/([\w\/\-_]*).expect\/([\w\-_]+)(\.nast|\.oast)?(\.[\w\-_]+)?\.txt$" % settings.SRCDIR, path)
                 if match :
                         test = Test()
                         test.name = match.group(2)
                         test.path = match.group(1)
+                        test.arch = match.group(3)[1:] if match.group(3) else None
+                        test.arch = match.group(4)[1:] if match.group(4) else None
+                        astv = match.group(3)[1:] if match.group(3) else None
+                        if astv == 'oast':
+                                test.astv = 'old'
+                        elif astv == 'nast':
+                                test.astv = 'new'
+                        elif astv:
+                                print('ERROR: "%s", expect file has astv but it is not "nast" or "oast"' % testname, file=sys.stderr)
+                                sys.exit(1)
                         expected.append(test)
 …
         if options.regenerate_expected :
                 for testname in options.tests :
+                        testname = canonical_path( testname )
+                        testname = os.path.normpath( os.path.join(settings.SRCDIR, testname) )
                         # first check if this is a valid name to regenerate
                         if Test.valid_name(testname):
                                 # this is a valid name, let's check if it already exists
                                 found = [test for test in all_tests if canonical_path( test.target() ) == testname]
+                                setup = itertools.product(settings.all_arch if options.arch else [None], settings.all_ast if options.ast else [None])
                                 if not found:
+                                        # it's a new name, create it according to the name and specified architecture
+                                        if options.arch:
+                                                # user specified one or multiple architectures, assume the tests will have architecture specific results
+                                                tests.extend( [Test.new_target(testname, arch) for arch in settings.all_arch] )
+                                        else:
+                                                # user didn't specify an architecture, just create a cross platform test
+                                                tests.append( Test.new_target( testname, None ) )
+                                        # it's a new name, create it according to the name and specified architecture/ast version
+                                        tests.extend( [Test.new_target(testname, arch, ast) for arch, ast in setup] )
                                 elif len(found) == 1 and not found[0].arch:
                                         # we found a single test, the user better be wanting to create a cross platform test
                                         if options.arch:
                                                 print('ERROR: "%s", test has no specified architecture but --arch was specified, ignoring it' % testname, file=sys.stderr)
+                                        elif options.ast:
+                                                print('ERROR: "%s", test has no specified ast version but --ast was specified, ignoring it' % testname, file=sys.stderr)
                                         else:
                                                 tests.append( found[0] )
                                 else:
                                         # this test is already cross platform, just add a test for each platform the user asked
                                         tests.extend( [Test.new_target(testname, arch) for arch in settings.all_arch] )
+                                        tests.extend( [Test.new_target(testname, arch, ast) for arch, ast in setup] )
                                         # print a warning if it users didn't ask for a specific architecture
                                         if not options.arch:
                                                 print('WARNING: "%s", test has architecture specific expected files but --arch was not specified, regenerating only for current host' % testname, file=sys.stderr)
+                                        # print a warning if it users didn't ask for a specific ast version
+                                        if not options.ast:
+                                                print('WARNING: "%s", test has ast version specific expected files but --ast was not specified, regenerating only for current ast' % testname, file=sys.stderr)
                         else :
 …
         # create a parser with the arguments for the tests script
         parser = argparse.ArgumentParser(description='Script which runs cforall tests')
+        parser.add_argument('--ast', help='Test for specific ast', type=comma_separated(str), default=None)
+        parser.add_argument('--arch', help='Test for specific architecture', type=comma_separated(str), default=None)
         parser.add_argument('--debug', help='Run all tests in debug or release', type=comma_separated(yes_no), default='yes')
         parser.add_argument('--install', help='Run all tests based on installed binaries or tree binaries', type=comma_separated(yes_no), default='no')
-        parser.add_argument('--arch', help='Test for specific architecture', type=comma_separated(str), default=None)
         parser.add_argument('--continue', help='When multiple specifications are passed (debug/install/arch), sets whether or not to continue if the last specification failed', type=yes_no, default='yes', dest='continue_')
         parser.add_argument('--timeout', help='Maximum duration in seconds after a single test is considered to have timed out', type=int, default=120)
 …
         except KeyboardInterrupt:
                 return False, ""
         except Exception as ex:
                 print("Unexpected error in worker thread running {}: {}".format(t.target(), ex), file=sys.stderr)
                 sys.stderr.flush()
                 return False, ""
+        # except Exception as ex:
+        #       print("Unexpected error in worker thread running {}: {}".format(t.target(), ex), file=sys.stderr)
+        #       sys.stderr.flush()
+        #       return False, ""
 …
                 # for each build configurations, run the test
                 with Timed() as total_dur:
+                        for arch, debug, install in itertools.product(settings.all_arch, settings.all_debug, settings.all_install):
+                        for ast, arch, debug, install in itertools.product(settings.all_ast, settings.all_arch, settings.all_debug, settings.all_install):
+                                settings.ast     = ast
                                 settings.arch    = arch
                                 settings.debug   = debug
 …
                                 # filter out the tests for a different architecture
                                 # tests are the same across debug/install
+                                local_tests = settings.arch.filter( tests )
+                                local_tests = settings.ast.filter( tests )
+                                local_tests = settings.arch.filter( local_tests )
                                 options.jobs, forceJobs = job_count( options, local_tests )
                                 settings.update_make_cmd(forceJobs, options.jobs)
 …
                                 # print configuration
                                 print('%s %i tests on %i cores (%s:%s)' % (
+                                print('%s %i tests on %i cores (%s:%s - %s)' % (
                                         'Regenerating' if settings.generating else 'Running',
                                         len(local_tests),
                                         options.jobs,
+                                        settings.ast.string,
                                         settings.arch.string,
                                         settings.debug.string
                                 ))
+                                if not local_tests :
+                                        print('WARNING: No tests for this configuration')
+                                        continue
                                 # otherwise run all tests and make sure to return the correct error code

tools/stat.py

-              r55acc3a
+              r139775e
 #!/usr/bin/python
+#!/usr/bin/python3
 import sys
 …
                 avg = numpy.mean  (content)
                 std = numpy.std   (content)
                 print "median {0:.1f} avg {1:.1f} stddev {2:.1f}".format( med, avg, std )
+                print("median {0:.1f} avg {1:.1f} stddev {2:.1f}".format( med, avg, std ))

Context Navigation

Legend:

Download in other formats: