Diff [231b18f299ecfc1eaab7aaa615d70064e3240d08:54dcab16a65ef7e9617720bda2909ad715e5b8cc] for / – Cforall

benchmark/readyQ/cycle.cfa

-              r231b18f
+              r54dcab1
+                }
                 printf("Duration (ms)        : %'ld\n", (end - start)`ms);
+                printf("Duration (ms)        : %'ld\n", (end - start)`dms);
                 printf("Number of processors : %'d\n", nprocs);
                 printf("Number of threads    : %'d\n", tthreads);
 …
                 printf("Total Operations(ops): %'15llu\n", global_counter);
                 printf("Total blocks         : %'15llu\n", global_blocks);
                 printf("Ops per second       : %'18.2lf\n", ((double)global_counter) / (end - start)`s);
                 printf("ns per ops           : %'18.2lf\n", ((double)(end - start)`ns) / global_counter);
+                printf("Ops per second       : %'18.2lf\n", ((double)global_counter) / (end - start)`ds);
+                printf("ns per ops           : %'18.2lf\n", (end - start)`dns / global_counter);
                 printf("Ops per threads      : %'15llu\n", global_counter / tthreads);
                 printf("Ops per procs        : %'15llu\n", global_counter / nprocs);
                 printf("Ops/sec/procs        : %'18.2lf\n", (((double)global_counter) / nprocs) / (end - start)`s);
                 printf("ns per ops/procs     : %'18.2lf\n", ((double)(end - start)`ns) / (global_counter / nprocs));
+                printf("Ops/sec/procs        : %'18.2lf\n", (((double)global_counter) / nprocs) / (end - start)`ds);
+                printf("ns per ops/procs     : %'18.2lf\n", (end - start)`dns / (global_counter / nprocs));
                 fflush(stdout);
+        }

benchmark/readyQ/cycle.go

r231b18f	r54dcab1
72	72	p.Printf("Cycle size (# thrds) : %d\n", ring_size);
73	73	p.Printf("Total Operations(ops): %15d\n", global_counter)
74		p.Printf("~~Yields per second~~ : %18.2f\n", float64(global_counter) / delta.Seconds())
	74	p.Printf("Ops per second : %18.2f\n", float64(global_counter) / delta.Seconds())
75	75	p.Printf("ns per ops : %18.2f\n", float64(delta.Nanoseconds()) / float64(global_counter))
76	76	p.Printf("Ops per threads : %15d\n", global_counter / uint64(tthreads))

benchmark/readyQ/rq_bench.hfa

r231b18f	r54dcab1
88	88	}
89	89
90		struct bench_sem {
	90	struct __attribute__((aligned(128))) bench_sem {
91	91	struct $thread * volatile ptr;
92	92	};

benchmark/readyQ/rq_bench.hpp

r231b18f	r54dcab1
75	75	}
76	76
77		class bench_sem {
	77	class __attribute__((aligned(128))) bench_sem {
78	78	Fibre * volatile ptr = nullptr;
79	79	public:

libcfa/src/concurrency/io.cfa

-              r231b18f
+              r54dcab1
         static inline bool next( __leaderlock_t & this ) {
                 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+                /* paranoid */ verify( ! __preemption_enabled() );
                 struct $thread * nextt;
                 for() {
 …
         // This is NOT thread-safe
         static [int, bool] __drain_io( & struct __io_data ring ) {
                 /* paranoid */ verify( !kernelTLS.preemption_state.enabled );
+                /* paranoid */ verify( ! __preemption_enabled() );
                 unsigned to_submit = 0;
 …
                                         return;
+                                }
                                 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+                                /* paranoid */ verify( ! __preemption_enabled() );
                                 __STATS__( true,
                                         io.submit_q.leader += 1;
 …
                         #if defined(LEADER_LOCK)
                                 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+                                /* paranoid */ verify( ! __preemption_enabled() );
                                 next(ring.submit_q.submit_lock);
                         #else

libcfa/src/concurrency/io/setup.cfa

-              r231b18f
+              r54dcab1
                 id.full_proc = false;
                 id.id = doregister(&id);
                 kernelTLS.this_proc_id = &id;
+                __cfaabi_tls.this_proc_id = &id;
                 __cfaabi_dbg_print_safe( "Kernel : IO poller thread starting\n" );
 …
                                 __cfadbg_print_safe(io_core, "Kernel I/O : Unparking io poller %p\n", io_ctx);
                                 #if !defined( __CFA_NO_STATISTICS__ )
                                         kernelTLS.this_stats = io_ctx->self.curr_cluster->stats;
+                                        __cfaabi_tls.this_stats = io_ctx->self.curr_cluster->stats;
                                 #endif
                                 post( io_ctx->sem );

libcfa/src/concurrency/kernel.cfa

-              r231b18f
+              r54dcab1
         // Because of a bug, we couldn't initialized the seed on construction
         // Do it here
         kernelTLS.rand_seed ^= rdtscl();
         kernelTLS.ready_rng.fwd_seed = 25214903917_l64u * (rdtscl() ^ (uintptr_t)&runner);
+        __cfaabi_tls.rand_seed ^= rdtscl();
+        __cfaabi_tls.ready_rng.fwd_seed = 25214903917_l64u * (rdtscl() ^ (uintptr_t)&runner);
         __tls_rand_advance_bck();
 …
                 // and it make sense for it to be set in all other cases except here
                 // fake it
                 kernelTLS.this_thread = mainThread;
+                __cfaabi_tls.this_thread = mainThread;
+        }
 …
 // from the processor coroutine to the target thread
 static void __run_thread(processor * this, $thread * thrd_dst) {
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
         /* paranoid */ verifyf( thrd_dst->state == Ready || thrd_dst->preempted != __NO_PREEMPTION, "state : %d, preempted %d\n", thrd_dst->state, thrd_dst->preempted);
         /* paranoid */ verifyf( thrd_dst->link.next == 0p, "Expected null got %p", thrd_dst->link.next );
 …
                 // Update global state
                 kernelTLS.this_thread = thrd_dst;
                 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
                 /* paranoid */ verify( kernelTLS.this_thread == thrd_dst );
+                kernelTLS().this_thread = thrd_dst;
+                /* paranoid */ verify( ! __preemption_enabled() );
+                /* paranoid */ verify( kernelTLS().this_thread == thrd_dst );
                 /* paranoid */ verify( thrd_dst->context.SP );
                 /* paranoid */ verify( thrd_dst->state != Halted );
 …
                 /* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) < ((uintptr_t)__get_stack(thrd_dst->curr_cor)->base ), "ERROR : Destination $thread %p has been corrupted.\n StackPointer too small.\n", thrd_dst );
                 /* paranoid */ verify( thrd_dst->context.SP );
                 /* paranoid */ verify( kernelTLS.this_thread == thrd_dst );
                 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+                /* paranoid */ verify( kernelTLS().this_thread == thrd_dst );
+                /* paranoid */ verify( ! __preemption_enabled() );
                 // Reset global state
                 kernelTLS.this_thread = 0p;
+                kernelTLS().this_thread = 0p;
                 // We just finished running a thread, there are a few things that could have happened.
 …
         proc_cor->state = Active;
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
+}
 // KERNEL_ONLY
 void returnToKernel() {
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
         $coroutine * proc_cor = get_coroutine(kernelTLS.this_processor->runner);
         $thread * thrd_src = kernelTLS.this_thread;
+        /* paranoid */ verify( ! __preemption_enabled() );
+        $coroutine * proc_cor = get_coroutine(kernelTLS().this_processor->runner);
+        $thread * thrd_src = kernelTLS().this_thread;
         #if !defined(__CFA_NO_STATISTICS__)
                 struct processor * last_proc = kernelTLS.this_processor;
+                struct processor * last_proc = kernelTLS().this_processor;
         #endif
 …
         #if !defined(__CFA_NO_STATISTICS__)
                 if(last_proc != kernelTLS.this_processor) {
+                if(last_proc != kernelTLS().this_processor) {
                         __tls_stats()->ready.threads.migration++;
+                }
         #endif
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
         /* paranoid */ verifyf( ((uintptr_t)thrd_src->context.SP) < ((uintptr_t)__get_stack(thrd_src->curr_cor)->base ), "ERROR : Returning $thread %p has been corrupted.\n StackPointer too small.\n", thrd_src );
         /* paranoid */ verifyf( ((uintptr_t)thrd_src->context.SP) > ((uintptr_t)__get_stack(thrd_src->curr_cor)->limit), "ERROR : Returning $thread %p has been corrupted.\n StackPointer too large.\n", thrd_src );
 …
 // KERNEL ONLY
 void __schedule_thread( $thread * thrd ) {
+        /* paranoid */ verify( ! __preemption_enabled() );
         /* paranoid */ verify( thrd );
         /* paranoid */ verify( thrd->state != Halted );
+        /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( kernelTLS.this_proc_id );
+        /* paranoid */ verify( kernelTLS().this_proc_id );
         /* paranoid */ #if defined( __CFA_WITH_VERIFY__ )
         /* paranoid */  if( thrd->state == Blocked || thrd->state == Start ) assertf( thrd->preempted == __NO_PREEMPTION,
 …
         ready_schedule_unlock();
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
+}
 // KERNEL ONLY
 static inline $thread * __next_thread(cluster * this) with( *this ) {
         /* paranoid */ verify( kernelTLS.this_proc_id );
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
+        /* paranoid */ verify( kernelTLS().this_proc_id );
         ready_schedule_lock();
 …
         ready_schedule_unlock();
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
         /* paranoid */ verify( kernelTLS.this_proc_id );
+        /* paranoid */ verify( kernelTLS().this_proc_id );
+        /* paranoid */ verify( ! __preemption_enabled() );
         return thrd;
+}
 …
 // KERNEL ONLY
 static inline $thread * __next_thread_slow(cluster * this) with( *this ) {
         /* paranoid */ verify( kernelTLS.this_proc_id );
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
+        /* paranoid */ verify( kernelTLS().this_proc_id );
         ready_schedule_lock();
 …
         ready_schedule_unlock();
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
         /* paranoid */ verify( kernelTLS.this_proc_id );
+        /* paranoid */ verify( kernelTLS().this_proc_id );
+        /* paranoid */ verify( ! __preemption_enabled() );
         return thrd;
+}
 …
         if( !thrd ) return;
-        /* paranoid */ verify( kernelTLS.this_proc_id );
-        bool full = kernelTLS.this_proc_id->full_proc;
-        if(full) disable_interrupts();
-        /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
         int old_ticket = __atomic_fetch_add(&thrd->ticket, 1, __ATOMIC_SEQ_CST);
         switch(old_ticket) {
 …
                         /* paranoid */ verify( thrd->state == Blocked );
+                        // Wake lost the race,
+                        __schedule_thread( thrd );
+                        {
+                                /* paranoid */ verify( publicTLS_get(this_proc_id) );
+                                bool full = publicTLS_get(this_proc_id)->full_proc;
+                                if(full) disable_interrupts();
+                                /* paranoid */ verify( ! __preemption_enabled() );
+                                // Wake lost the race,
+                                __schedule_thread( thrd );
+                                /* paranoid */ verify( ! __preemption_enabled() );
+                                if(full) enable_interrupts( __cfaabi_dbg_ctx );
+                                /* paranoid */ verify( publicTLS_get(this_proc_id) );
+                        }
                         break;
                 default:
 …
                         abort("Thread %p (%s) has mismatch park/unpark\n", thrd, thrd->self_cor.name);
+        }
-        /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
-        if(full) enable_interrupts( __cfaabi_dbg_ctx );
-        /* paranoid */ verify( kernelTLS.this_proc_id );
+}
 void park( void ) {
         /* paranoid */ verify( kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( __preemption_enabled() );
         disable_interrupts();
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
         /* paranoid */ verify( kernelTLS.this_thread->preempted == __NO_PREEMPTION );
+        /* paranoid */ verify( ! __preemption_enabled() );
+        /* paranoid */ verify( kernelTLS().this_thread->preempted == __NO_PREEMPTION );
         returnToKernel();
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
         enable_interrupts( __cfaabi_dbg_ctx );
         /* paranoid */ verify( kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( __preemption_enabled() );
+}
 …
         // Should never return
         void __cfactx_thrd_leave() {
                 $thread * thrd = TL_GET( this_thread );
+                $thread * thrd = active_thread();
                 $monitor * this = &thrd->self_mon;
 …
                 // Leave the thread
                 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+                /* paranoid */ verify( ! __preemption_enabled() );
                 returnToKernel();
                 abort();
 …
 // KERNEL ONLY
 bool force_yield( __Preemption_Reason reason ) {
         /* paranoid */ verify( kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( __preemption_enabled() );
         disable_interrupts();
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
         $thread * thrd = kernelTLS.this_thread;
+        /* paranoid */ verify( ! __preemption_enabled() );
+        $thread * thrd = kernelTLS().this_thread;
         /* paranoid */ verify(thrd->state == Active);
 …
+        }
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
         enable_interrupts_noPoll();
         /* paranoid */ verify( kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( __preemption_enabled() );
         return preempted;
 …
 // Wake a thread from the front if there are any
 static void __wake_one(cluster * this) {
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
         /* paranoid */ verify( ready_schedule_islocked() );
 …
         /* paranoid */ verify( ready_schedule_islocked() );
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
         return;
 …
         disable_interrupts();
                 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+                /* paranoid */ verify( ! __preemption_enabled() );
                 post( this->idle );
         enable_interrupts( __cfaabi_dbg_ctx );
 …
 static void push  (__cluster_idles & this, processor & proc) {
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
         lock( this );
                 this.idle++;
 …
                 insert_first(this.list, proc);
         unlock( this );
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
+}
 static void remove(__cluster_idles & this, processor & proc) {
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
         lock( this );
                 this.idle--;
 …
                 remove(proc);
         unlock( this );
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
+}
 …
+        }
         return kernelTLS.this_thread;
+        return __cfaabi_tls.this_thread;
+}
 …
 int kernel_abort_lastframe( void ) __attribute__ ((__nothrow__)) {
         return get_coroutine(kernelTLS.this_thread) == get_coroutine(mainThread) ? 4 : 2;
+        return get_coroutine(kernelTLS().this_thread) == get_coroutine(mainThread) ? 4 : 2;
+}
 …
         if ( count < 0 ) {
                 // queue current task
                 append( waiting, kernelTLS.this_thread );
+                append( waiting, active_thread() );
                 // atomically release spin lock and block
 …
                 void __cfaabi_dbg_record_lock(__spinlock_t & this, const char prev_name[]) {
                         this.prev_name = prev_name;
                         this.prev_thrd = kernelTLS.this_thread;
+                        this.prev_thrd = kernelTLS().this_thread;
+                }
+        }

libcfa/src/concurrency/kernel.hfa

-              r231b18f
+              r54dcab1
 static inline [cluster *&, cluster *& ] __get( cluster & this ) __attribute__((const)) { return this.node.[next, prev]; }
 static inline struct processor * active_processor() { return TL_GET( this_processor ); } // UNSAFE
 static inline struct cluster   * active_cluster  () { return TL_GET( this_processor )->cltr; }
+static inline struct processor * active_processor() { return publicTLS_get( this_processor ); } // UNSAFE
+static inline struct cluster   * active_cluster  () { return publicTLS_get( this_processor )->cltr; }
 #if !defined(__CFA_NO_STATISTICS__)
+        void print_stats_now( cluster & this, int flags );
         static inline void print_stats_at_exit( cluster & this, int flags ) {
                 this.print_stats |= flags;

libcfa/src/concurrency/kernel/fwd.hfa

-              r231b18f
+              r54dcab1
                                 uint64_t bck_seed;
                         } ready_rng;
                 } kernelTLS __attribute__ ((tls_model ( "initial-exec" )));
+                } __cfaabi_tls __attribute__ ((tls_model ( "initial-exec" )));
+                extern bool __preemption_enabled();
+                static inline KernelThreadData & kernelTLS( void ) {
+                        /* paranoid */ verify( ! __preemption_enabled() );
+                        return __cfaabi_tls;
+                }
+                extern uintptr_t __cfatls_get( unsigned long int member );
+                // #define publicTLS_get( member ) ((typeof(__cfaabi_tls.member))__cfatls_get( __builtin_offsetof(KernelThreadData, member) ))
+                #define publicTLS_get( member ) (__cfaabi_tls.member)
+                // extern forall(otype T) T __cfatls_get( T * member, T value );
+                // #define publicTLS_set( member, value ) __cfatls_set( (typeof(member)*)__builtin_offsetof(KernelThreadData, member), value );
                 static inline uint64_t __tls_rand() {
                         #if defined(__SIZEOF_INT128__)
                                 return __lehmer64( kernelTLS.rand_seed );
+                                return __lehmer64( kernelTLS().rand_seed );
                         #else
                                 return __xorshift64( kernelTLS.rand_seed );
+                                return __xorshift64( kernelTLS().rand_seed );
                         #endif
+                }
 …
                 static inline unsigned __tls_rand_fwd() {
                         kernelTLS.ready_rng.fwd_seed = (A * kernelTLS.ready_rng.fwd_seed + C) & (M - 1);
                         return kernelTLS.ready_rng.fwd_seed >> D;
+                        kernelTLS().ready_rng.fwd_seed = (A * kernelTLS().ready_rng.fwd_seed + C) & (M - 1);
+                        return kernelTLS().ready_rng.fwd_seed >> D;
+                }
                 static inline unsigned __tls_rand_bck() {
                         unsigned int r = kernelTLS.ready_rng.bck_seed >> D;
                         kernelTLS.ready_rng.bck_seed = AI * (kernelTLS.ready_rng.bck_seed - C) & (M - 1);
+                        unsigned int r = kernelTLS().ready_rng.bck_seed >> D;
+                        kernelTLS().ready_rng.bck_seed = AI * (kernelTLS().ready_rng.bck_seed - C) & (M - 1);
                         return r;
+                }
 …
                 static inline void __tls_rand_advance_bck(void) {
                         kernelTLS.ready_rng.bck_seed = kernelTLS.ready_rng.fwd_seed;
+                        kernelTLS().ready_rng.bck_seed = kernelTLS().ready_rng.fwd_seed;
+                }
+        }
-        #if 0 // def __ARM_ARCH
-                // function prototypes are only really used by these macros on ARM
-                void disable_global_interrupts();
-                void enable_global_interrupts();
-                #define TL_GET( member ) ( { __typeof__( kernelTLS.member ) target; \
-                        disable_global_interrupts(); \
-                        target = kernelTLS.member; \
-                        enable_global_interrupts(); \
-                        target; } )
-                #define TL_SET( member, value ) disable_global_interrupts(); \
-                        kernelTLS.member = value; \
-                        enable_global_interrupts();
-        #else
-                #define TL_GET( member ) kernelTLS.member
-                #define TL_SET( member, value ) kernelTLS.member = value;
-        #endif
         extern void disable_interrupts();
 …
                 extern void park( void );
                 extern void unpark( struct $thread * this );
+                static inline struct $thread * active_thread () { return TL_GET( this_thread ); }
+                static inline struct $thread * active_thread () {
+                        struct $thread * t = publicTLS_get( this_thread );
+                        /* paranoid */ verify( t );
+                        return t;
+                }
                 extern bool force_yield( enum __Preemption_Reason );
 …
                 #if !defined(__CFA_NO_STATISTICS__)
                         static inline struct __stats_t * __tls_stats() {
                                 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
                                 /* paranoid */ verify( kernelTLS.this_stats );
                                 return kernelTLS.this_stats;
+                                /* paranoid */ verify( ! __preemption_enabled() );
+                                /* paranoid */ verify( kernelTLS().this_stats );
+                                return kernelTLS().this_stats;
+                        }

libcfa/src/concurrency/kernel/startup.cfa

-              r231b18f
+              r54dcab1
 //-----------------------------------------------------------------------------
 // Global state
 thread_local struct KernelThreadData kernelTLS __attribute__ ((tls_model ( "initial-exec" ))) @= {
+thread_local struct KernelThreadData __cfaabi_tls __attribute__ ((tls_model ( "initial-exec" ))) @= {
         NULL,                                                                                           // cannot use 0p
         NULL,
 …
 // Kernel boot procedures
 static void __kernel_startup(void) {
         verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
         __cfadbg_print_safe(runtime_core, "Kernel : Starting\n");
 …
         //initialize the global state variables
         kernelTLS.this_processor = mainProcessor;
         kernelTLS.this_proc_id   = (__processor_id_t*)mainProcessor;
         kernelTLS.this_thread    = mainThread;
+        __cfaabi_tls.this_processor = mainProcessor;
+        __cfaabi_tls.this_proc_id   = (__processor_id_t*)mainProcessor;
+        __cfaabi_tls.this_thread    = mainThread;
         #if !defined( __CFA_NO_STATISTICS__ )
                 kernelTLS.this_stats = (__stats_t *)& storage_mainProcStats;
                 __init_stats( kernelTLS.this_stats );
+                __cfaabi_tls.this_stats = (__stats_t *)& storage_mainProcStats;
+                __init_stats( __cfaabi_tls.this_stats );
         #endif
 …
         // context. Hence, the main thread does not begin through __cfactx_invoke_thread, like all other threads. The trick here is that
         // mainThread is on the ready queue when this call is made.
         __kernel_first_resume( kernelTLS.this_processor );
+        __kernel_first_resume( __cfaabi_tls.this_processor );
 …
         __cfadbg_print_safe(runtime_core, "Kernel : Started\n--------------------------------------------------\n\n");
         verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
         enable_interrupts( __cfaabi_dbg_ctx );
+        verify( TL_GET( preemption_state.enabled ) );
+        /* paranoid */ verify( __preemption_enabled() );
+}
 …
         mainCluster->io.ctxs = 0p;
         /* paranoid */ verify( TL_GET( preemption_state.enabled ) );
+        /* paranoid */ verify( __preemption_enabled() );
         disable_interrupts();
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
         __cfadbg_print_safe(runtime_core, "\n--------------------------------------------------\nKernel : Shutting down\n");
 …
         // which is currently here
         __atomic_store_n(&mainProcessor->do_terminate, true, __ATOMIC_RELEASE);
         __kernel_last_resume( kernelTLS.this_processor );
+        __kernel_last_resume( __cfaabi_tls.this_processor );
         mainThread->self_cor.state = Halted;
 …
                 __stats_t local_stats;
                 __init_stats( &local_stats );
                 kernelTLS.this_stats = &local_stats;
+                __cfaabi_tls.this_stats = &local_stats;
         #endif
         processor * proc = (processor *) arg;
         kernelTLS.this_processor = proc;
         kernelTLS.this_proc_id   = (__processor_id_t*)proc;
         kernelTLS.this_thread    = 0p;
         kernelTLS.preemption_state.[enabled, disable_count] = [false, 1];
+        __cfaabi_tls.this_processor = proc;
+        __cfaabi_tls.this_proc_id   = (__processor_id_t*)proc;
+        __cfaabi_tls.this_thread    = 0p;
+        __cfaabi_tls.preemption_state.[enabled, disable_count] = [false, 1];
         // SKULLDUGGERY: We want to create a context for the processor coroutine
         // which is needed for the 2-step context switch. However, there is no reason
 …
         //Set global state
         kernelTLS.this_thread = 0p;
+        __cfaabi_tls.this_thread = 0p;
         //We now have a proper context from which to schedule threads
 …
         $coroutine * dst = get_coroutine(this->runner);
         verify( ! kernelTLS.preemption_state.enabled );
         kernelTLS.this_thread->curr_cor = dst;
+        /* paranoid */ verify( ! __preemption_enabled() );
+        __cfaabi_tls.this_thread->curr_cor = dst;
         __stack_prepare( &dst->stack, 65000 );
         __cfactx_start(main, dst, this->runner, __cfactx_invoke_coroutine);
         verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
         dst->last = &src->self_cor;
 …
         /* paranoid */ verify(src->state == Active);
         verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
+}
 …
         $coroutine * dst = get_coroutine(this->runner);
         verify( ! kernelTLS.preemption_state.enabled );
         verify( dst->starter == src );
         verify( dst->context.SP );
+        /* paranoid */ verify( ! __preemption_enabled() );
+        /* paranoid */ verify( dst->starter == src );
+        /* paranoid */ verify( dst->context.SP );
         // SKULLDUGGERY in debug the processors check that the
 …
                 P( terminated );
                 verify( kernelTLS.this_processor != &this);
+                /* paranoid */ verify( active_processor() != &this);
+        }
 …
 #if defined(__CFA_WITH_VERIFY__)
 static bool verify_fwd_bck_rng(void) {
         kernelTLS.ready_rng.fwd_seed = 25214903917_l64u * (rdtscl() ^ (uintptr_t)&verify_fwd_bck_rng);
+        __cfaabi_tls.ready_rng.fwd_seed = 25214903917_l64u * (rdtscl() ^ (uintptr_t)&verify_fwd_bck_rng);
         unsigned values[10];

libcfa/src/concurrency/kernel_private.hfa

-              r231b18f
+              r54dcab1
 #endif
+;
+extern bool __preemption_enabled();
 //release/wake-up the following resources
 …
 //  creating/destroying queues
 static inline void ready_schedule_lock(void) with(*__scheduler_lock) {
+        /*paranoid*/ verify( kernelTLS.this_proc_id );
+        unsigned iproc = kernelTLS.this_proc_id->id;
+        /*paranoid*/ verify(data[iproc].handle == kernelTLS.this_proc_id);
+        /* paranoid */ verify( ! __preemption_enabled() );
+        /* paranoid */ verify( kernelTLS().this_proc_id );
+        unsigned iproc = kernelTLS().this_proc_id->id;
+        /*paranoid*/ verify(data[iproc].handle == kernelTLS().this_proc_id);
         /*paranoid*/ verify(iproc < ready);
 …
 static inline void ready_schedule_unlock(void) with(*__scheduler_lock) {
+        /*paranoid*/ verify( kernelTLS.this_proc_id );
+        unsigned iproc = kernelTLS.this_proc_id->id;
+        /*paranoid*/ verify(data[iproc].handle == kernelTLS.this_proc_id);
+        /* paranoid */ verify( ! __preemption_enabled() );
+        /* paranoid */ verify( kernelTLS().this_proc_id );
+        unsigned iproc = kernelTLS().this_proc_id->id;
+        /*paranoid*/ verify(data[iproc].handle == kernelTLS().this_proc_id);
         /*paranoid*/ verify(iproc < ready);
         /*paranoid*/ verify(data[iproc].lock);
 …
 #ifdef __CFA_WITH_VERIFY__
         static inline bool ready_schedule_islocked(void) {
+                /*paranoid*/ verify( kernelTLS.this_proc_id );
+                __processor_id_t * proc = kernelTLS.this_proc_id;
+                /* paranoid */ verify( ! __preemption_enabled() );
+                /*paranoid*/ verify( kernelTLS().this_proc_id );
+                __processor_id_t * proc = kernelTLS().this_proc_id;
                 return __scheduler_lock->data[proc->id].owned;
+        }

libcfa/src/concurrency/monitor.cfa

-              r231b18f
+              r54dcab1
 // Enter single monitor
 static void __enter( $monitor * this, const __monitor_group_t & group ) {
+        $thread * thrd = active_thread();
         // Lock the monitor spinlock
         lock( this->lock __cfaabi_dbg_ctx2 );
-        // Interrupts disable inside critical section
-        $thread * thrd = kernelTLS.this_thread;
         __cfaabi_dbg_print_safe( "Kernel : %10p Entering mon %p (%p)\n", thrd, this, this->owner);
 …
                 __cfaabi_dbg_print_safe( "Kernel : %10p Entered  mon %p\n", thrd, this);
                 /* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+                /* paranoid */ verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
                 return;
+        }
 …
         __cfaabi_dbg_print_safe( "Kernel : %10p Entered  mon %p\n", thrd, this);
         /* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+        /* paranoid */ verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
         /* paranoid */ verify( this->lock.lock );
 …
 static void __dtor_enter( $monitor * this, fptr_t func, bool join ) {
+        $thread * thrd = active_thread();
         // Lock the monitor spinlock
         lock( this->lock __cfaabi_dbg_ctx2 );
-        // Interrupts disable inside critical section
-        $thread * thrd = kernelTLS.this_thread;
         __cfaabi_dbg_print_safe( "Kernel : %10p Entering dtor for mon %p (%p)\n", thrd, this, this->owner);
 …
                 __set_owner( this, thrd );
                 verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+                verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
                 unlock( this->lock );
 …
                 this->owner = thrd;
                 verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+                verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
                 unlock( this->lock );
 …
                 // Release the next thread
                 /* paranoid */ verifyf( urgent->owner->waiting_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+                /* paranoid */ verifyf( urgent->owner->waiting_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
                 unpark( urgent->owner->waiting_thread );
 …
                 // Some one was waiting for us, enter
                 /* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+                /* paranoid */ verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
+        }
         else {
 …
                 park();
                 /* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+                /* paranoid */ verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
                 return;
+        }
 …
         lock( this->lock __cfaabi_dbg_ctx2 );
         __cfaabi_dbg_print_safe( "Kernel : %10p Leaving mon %p (%p)\n", kernelTLS.this_thread, this, this->owner);
         /* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+        __cfaabi_dbg_print_safe( "Kernel : %10p Leaving mon %p (%p)\n", active_thread(), this, this->owner);
+        /* paranoid */ verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
         // Leaving a recursion level, decrement the counter
 …
 void __dtor_leave( $monitor * this, bool join ) {
         __cfaabi_dbg_debug_do(
                 if( TL_GET( this_thread ) != this->owner ) {
                         abort( "Destroyed monitor %p has inconsistent owner, expected %p got %p.\n", this, TL_GET( this_thread ), this->owner);
+                if( active_thread() != this->owner ) {
+                        abort( "Destroyed monitor %p has inconsistent owner, expected %p got %p.\n", this, active_thread(), this->owner);
+                }
                 if( this->recursion != 1  && !join ) {
 …
         /* paranoid */ verify( this->lock.lock );
         /* paranoid */ verifyf( thrd == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", thrd, this->owner, this->recursion, this );
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
         /* paranoid */ verify( thrd->state == Halted );
         /* paranoid */ verify( this->recursion == 1 );
 …
         // Unpark the next owner if needed
         /* paranoid */ verifyf( !new_owner || new_owner == this->owner, "Expected owner to be %p, got %p (m: %p)", new_owner, this->owner, this );
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
         /* paranoid */ verify( thrd->state == Halted );
         unpark( new_owner );
 …
 // Sorts monitors before entering
 void ?{}( monitor_guard_t & this, $monitor * m [], __lock_size_t count, fptr_t func ) {
         $thread * thrd = TL_GET( this_thread );
+        $thread * thrd = active_thread();
         // Store current array
 …
         // Restore thread context
         TL_GET( this_thread )->monitors = this.prev;
+        active_thread()->monitors = this.prev;
+}
 …
 void ?{}( monitor_dtor_guard_t & this, $monitor * m [], fptr_t func, bool join ) {
         // optimization
         $thread * thrd = TL_GET( this_thread );
+        $thread * thrd = active_thread();
         // Store current array
 …
         // Restore thread context
         TL_GET( this_thread )->monitors = this.prev;
+        active_thread()->monitors = this.prev;
+}
 …
         // Create the node specific to this wait operation
         wait_ctx( TL_GET( this_thread ), user_info );
+        wait_ctx( active_thread(), user_info );
         // Append the current wait operation to the ones already queued on the condition
 …
         //Some more checking in debug
         __cfaabi_dbg_debug_do(
                 $thread * this_thrd = TL_GET( this_thread );
+                $thread * this_thrd = active_thread();
                 if ( this.monitor_count != this_thrd->monitors.size ) {
                         abort( "Signal on condition %p made with different number of monitor(s), expected %zi got %zi", &this, this.monitor_count, this_thrd->monitors.size );
 …
         // Create the node specific to this wait operation
         wait_ctx_primed( kernelTLS.this_thread, 0 )
+        wait_ctx_primed( active_thread(), 0 )
         //save contexts
 …
                                 // Create the node specific to this wait operation
                                 wait_ctx_primed( kernelTLS.this_thread, 0 );
+                                wait_ctx_primed( active_thread(), 0 );
                                 // Save monitor states
 …
         // Create the node specific to this wait operation
         wait_ctx_primed( kernelTLS.this_thread, 0 );
+        wait_ctx_primed( active_thread(), 0 );
         monitor_save;
 …
         for( __lock_size_t i = 0; i < count; i++) {
                 verify( monitors[i]->owner == kernelTLS.this_thread );
+                verify( monitors[i]->owner == active_thread() );
+        }
 …
 static inline void __set_owner( $monitor * monitors [], __lock_size_t count, $thread * owner ) {
         /* paranoid */ verify ( monitors[0]->lock.lock );
         /* paranoid */ verifyf( monitors[0]->owner == kernelTLS.this_thread, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, monitors[0]->owner, monitors[0]->recursion, monitors[0] );
+        /* paranoid */ verifyf( monitors[0]->owner == active_thread(), "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), monitors[0]->owner, monitors[0]->recursion, monitors[0] );
         monitors[0]->owner        = owner;
         monitors[0]->recursion    = 1;
         for( __lock_size_t i = 1; i < count; i++ ) {
                 /* paranoid */ verify ( monitors[i]->lock.lock );
                 /* paranoid */ verifyf( monitors[i]->owner == kernelTLS.this_thread, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, monitors[i]->owner, monitors[i]->recursion, monitors[i] );
+                /* paranoid */ verifyf( monitors[i]->owner == active_thread(), "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), monitors[i]->owner, monitors[i]->recursion, monitors[i] );
                 monitors[i]->owner        = owner;
                 monitors[i]->recursion    = 0;
 …
                 //regardless of if we are ready to baton pass,
                 //we need to set the monitor as in use
                 /* paranoid */ verifyf( !this->owner || kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+                /* paranoid */ verifyf( !this->owner || active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
                 __set_owner( this,  urgent->owner->waiting_thread );
 …
         // Get the next thread in the entry_queue
         $thread * new_owner = pop_head( this->entry_queue );
         /* paranoid */ verifyf( !this->owner || kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
+        /* paranoid */ verifyf( !this->owner || active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
         /* paranoid */ verify( !new_owner || new_owner->link.next == 0p );
         __set_owner( this, new_owner );
 …
 static inline void brand_condition( condition & this ) {
         $thread * thrd = TL_GET( this_thread );
+        $thread * thrd = active_thread();
         if( !this.monitors ) {
                 // __cfaabi_dbg_print_safe( "Branding\n" );

libcfa/src/concurrency/preemption.cfa

-              r231b18f
+              r54dcab1
 // Kernel Signal Tools
 //=============================================================================================
 // In a user-level threading system, there are handful of thread-local variables where this problem occurs on the ARM.
 //
+//
 // For each kernel thread running user-level threads, there is a flag variable to indicate if interrupts are
 // enabled/disabled for that kernel thread. Therefore, this variable is made thread local.
 //
+//
 // For example, this code fragment sets the state of the "interrupt" variable in thread-local memory.
 //
+//
 // _Thread_local volatile int interrupts;
 // int main() {
 //     interrupts = 0; // disable interrupts }
 //
+//
 // which generates the following code on the ARM
 //
+//
 // (gdb) disassemble main
 // Dump of assembler code for function main:
 …
 //    0x0000000000000620 <+16>: str     wzr, [x1]
 //    0x0000000000000624 <+20>: ret
 //
+//
 // The mrs moves a pointer from coprocessor register tpidr_el0 into register x1.  Register w0 is set to 0. The two adds
 // increase the TLS pointer with the displacement (offset) 0x10, which is the location in the TSL of variable
 …
 // the user-level ready-queue it is run on kernel thread M. It now stores 0 into "interrupts" back on kernel thread N,
 // turning off interrupt on the wrong kernel thread.
 //
+//
 // On the x86, the following code is generated for the same code fragment.
 //
+//
 // (gdb) disassemble main
 // Dump of assembler code for function main:
 //    0x0000000000400420 <+0>:  movl   $0x0,%fs:0xfffffffffffffffc
 //    0x000000000040042c <+12>: xor    %eax,%eax
 //    0x000000000040042e <+14>: retq
 //
+//    0x000000000040042e <+14>: retq
+//
 // and there is base-displacement addressing used to atomically reset variable "interrupts" off of the TSL pointer in
 // register "fs".
 //
+//
 // Hence, the ARM has base-displacement address for the general purpose registers, BUT not to the coprocessor
 // registers. As a result, generating the address for the write into variable "interrupts" is no longer atomic.
 //
+//
 // Note this problem does NOT occur when just using multiple kernel threads because the preemption ALWAYS restarts the
 // thread on the same kernel thread.
 //
+//
 // The obvious question is why does ARM use a coprocessor register to store the TSL pointer given that coprocessor
 // registers are second-class registers with respect to the instruction set. One possible answer is that they did not
 …
 // available.
+__cfaabi_dbg_debug_do( static thread_local void * last_interrupt = 0; )
+//----------
+// special case for preemption since used often
+bool __preemption_enabled() {
+        // create a assembler label before
+        // marked as clobber all to avoid movement
+        asm volatile("__cfaasm_check_before:":::"memory");
+        // access tls as normal
+        bool enabled = __cfaabi_tls.preemption_state.enabled;
+        // create a assembler label after
+        // marked as clobber all to avoid movement
+        asm volatile("__cfaasm_check_after:":::"memory");
+        return enabled;
+}
+//----------
+// Get data from the TLS block
+uintptr_t __cfatls_get( unsigned long int offset ) __attribute__((__noinline__)); //no inline to avoid problems
+uintptr_t __cfatls_get( unsigned long int offset ) {
+        // create a assembler label before
+        // marked as clobber all to avoid movement
+        asm volatile("__cfaasm_get_before:":::"memory");
+        // access tls as normal (except for pointer arithmetic)
+        uintptr_t val = *(uintptr_t*)((uintptr_t)&__cfaabi_tls + offset);
+        // create a assembler label after
+        // marked as clobber all to avoid movement
+        asm volatile("__cfaasm_get_after:":::"memory");
+        return val;
+}
 extern "C" {
         // Disable interrupts by incrementing the counter
         void disable_interrupts() {
+                with( kernelTLS.preemption_state ) {
+                // create a assembler label before
+                // marked as clobber all to avoid movement
+                asm volatile("__cfaasm_disable_before:":::"memory");
+                with( __cfaabi_tls.preemption_state ) {
                         #if GCC_VERSION > 50000
                         static_assert(__atomic_always_lock_free(sizeof(enabled), &enabled), "Must be lock-free");
 …
                         verify( new_val < 65_000u );              // If this triggers someone is disabling interrupts without enabling them
+                }
+                // create a assembler label after
+                // marked as clobber all to avoid movement
+                asm volatile("__cfaasm_disable_after:":::"memory");
+        }
 …
         // If counter reaches 0, execute any pending __cfactx_switch
         void enable_interrupts( __cfaabi_dbg_ctx_param ) {
+                processor   * proc = kernelTLS.this_processor; // Cache the processor now since interrupts can start happening after the atomic store
+                // create a assembler label before
+                // marked as clobber all to avoid movement
+                asm volatile("__cfaasm_enable_before:":::"memory");
+                processor   * proc = __cfaabi_tls.this_processor; // Cache the processor now since interrupts can start happening after the atomic store
                 /* paranoid */ verify( proc );
                 with( kernelTLS.preemption_state ){
+                with( __cfaabi_tls.preemption_state ){
                         unsigned short prev = disable_count;
                         disable_count -= 1;
 …
                 // For debugging purposes : keep track of the last person to enable the interrupts
                 __cfaabi_dbg_debug_do( proc->last_enable = caller; )
+                // create a assembler label after
+                // marked as clobber all to avoid movement
+                asm volatile("__cfaasm_enable_after:":::"memory");
+        }
 …
         // Don't execute any pending __cfactx_switch even if counter reaches 0
         void enable_interrupts_noPoll() {
+                unsigned short prev = kernelTLS.preemption_state.disable_count;
+                kernelTLS.preemption_state.disable_count -= 1;
+                // create a assembler label before
+                // marked as clobber all to avoid movement
+                asm volatile("__cfaasm_nopoll_before:":::"memory");
+                unsigned short prev = __cfaabi_tls.preemption_state.disable_count;
+                __cfaabi_tls.preemption_state.disable_count -= 1;
                 verifyf( prev != 0u, "Incremented from %u\n", prev );                     // If this triggers someone is enabled already enabled interrupts
                 if( prev == 1 ) {
                         #if GCC_VERSION > 50000
                         static_assert(__atomic_always_lock_free(sizeof(kernelTLS.preemption_state.enabled), &kernelTLS.preemption_state.enabled), "Must be lock-free");
+                        static_assert(__atomic_always_lock_free(sizeof(__cfaabi_tls.preemption_state.enabled), &__cfaabi_tls.preemption_state.enabled), "Must be lock-free");
                         #endif
                         // Set enabled flag to true
                         // should be atomic to avoid preemption in the middle of the operation.
                         // use memory order RELAXED since there is no inter-thread on this variable requirements
                         __atomic_store_n(&kernelTLS.preemption_state.enabled, true, __ATOMIC_RELAXED);
+                        __atomic_store_n(&__cfaabi_tls.preemption_state.enabled, true, __ATOMIC_RELAXED);
                         // Signal the compiler that a fence is needed but only for signal handlers
                         __atomic_signal_fence(__ATOMIC_RELEASE);
+                }
+                // create a assembler label after
+                // marked as clobber all to avoid movement
+                asm volatile("__cfaasm_nopoll_after:":::"memory");
+        }
+}
 …
 static void timeout( $thread * this ) {
         #if !defined( __CFA_NO_STATISTICS__ )
                 kernelTLS.this_stats = this->curr_cluster->stats;
+                kernelTLS().this_stats = this->curr_cluster->stats;
         #endif
         unpark( this );
 …
 static inline bool preemption_ready() {
         // Check if preemption is safe
         bool ready = kernelTLS.preemption_state.enabled && ! kernelTLS.preemption_state.in_progress;
+        bool ready = __cfaabi_tls.preemption_state.enabled && ! __cfaabi_tls.preemption_state.in_progress;
         // Adjust the pending flag accordingly
         kernelTLS.this_processor->pending_preemption = !ready;
+        __cfaabi_tls.this_processor->pending_preemption = !ready;
         return ready;
+}
 …
         // Start with preemption disabled until ready
         kernelTLS.preemption_state.enabled = false;
         kernelTLS.preemption_state.disable_count = 1;
+        __cfaabi_tls.preemption_state.enabled = false;
+        __cfaabi_tls.preemption_state.disable_count = 1;
         // Initialize the event kernel
 …
 // Kernel Signal Handlers
 //=============================================================================================
+struct asm_region {
+        void * before;
+        void * after;
+};
+//-----------------------------------------------------------------------------
+// Some assembly required
+#if defined( __i386 )
+        #define __cfaasm_label( label ) \
+                ({ \
+                        struct asm_region region; \
+                        asm( \
+                                "movl $__cfaasm_" #label "_before, %[vb]\n\t" \
+                                "movl $__cfaasm_" #label "_after , %[va]\n\t" \
+                                 : [vb]"=r"(region.before), [vb]"=r"(region.before) \
+                        ); \
+                        region; \
+                });
+#elif defined( __x86_64 )
+        #ifdef __PIC__
+                #define PLT "@PLT"
+        #else
+                #define PLT ""
+        #endif
+        #define __cfaasm_label( label ) \
+                ({ \
+                        struct asm_region region; \
+                        asm( \
+                                "movq $__cfaasm_" #label "_before" PLT ", %[vb]\n\t" \
+                                "movq $__cfaasm_" #label "_after"  PLT ", %[va]\n\t" \
+                                 : [vb]"=r"(region.before), [va]"=r"(region.after) \
+                        ); \
+                        region; \
+                });
+#elif defined( __aarch64__ )
+        #error __cfaasm_label undefined for arm
+#else
+        #error unknown hardware architecture
+#endif
+__cfaabi_dbg_debug_do( static thread_local void * last_interrupt = 0; )
 // Context switch signal handler
 // Receives SIGUSR1 signal and causes the current thread to yield
 static void sigHandler_ctxSwitch( __CFA_SIGPARMS__ ) {
+        __cfaabi_dbg_debug_do( last_interrupt = (void *)(cxt->uc_mcontext.CFA_REG_IP); )
+        void * ip = (void *)(cxt->uc_mcontext.CFA_REG_IP);
+        __cfaabi_dbg_debug_do( last_interrupt = ip; )
         // SKULLDUGGERY: if a thread creates a processor and the immediately deletes it,
 …
         // before the kernel thread has even started running. When that happens, an interrupt
         // with a null 'this_processor' will be caught, just ignore it.
         if(! kernelTLS.this_processor ) return;
+        if(! __cfaabi_tls.this_processor ) return;
         choose(sfp->si_value.sival_int) {
                 case PREEMPT_NORMAL   : ;// Normal case, nothing to do here
                 case PREEMPT_TERMINATE: verify( __atomic_load_n( &kernelTLS.this_processor->do_terminate, __ATOMIC_SEQ_CST ) );
+                case PREEMPT_TERMINATE: verify( __atomic_load_n( &__cfaabi_tls.this_processor->do_terminate, __ATOMIC_SEQ_CST ) );
                 default:
                         abort( "internal error, signal value is %d", sfp->si_value.sival_int );
 …
         if( !preemption_ready() ) { return; }
+        __cfaabi_dbg_print_buffer_decl( " KERNEL: preempting core %p (%p @ %p).\n", kernelTLS.this_processor, kernelTLS.this_thread, (void *)(cxt->uc_mcontext.CFA_REG_IP) );
+        struct asm_region region;
+        region = __cfaasm_label( get     ); if( ip >= region.before && ip <= region.after ) return;
+        region = __cfaasm_label( check   ); if( ip >= region.before && ip <= region.after ) return;
+        region = __cfaasm_label( disable ); if( ip >= region.before && ip <= region.after ) return;
+        region = __cfaasm_label( enable  ); if( ip >= region.before && ip <= region.after ) return;
+        region = __cfaasm_label( nopoll  ); if( ip >= region.before && ip <= region.after ) return;
+        __cfaabi_dbg_print_buffer_decl( " KERNEL: preempting core %p (%p @ %p).\n", __cfaabi_tls.this_processor, __cfaabi_tls.this_thread, (void *)(cxt->uc_mcontext.CFA_REG_IP) );
         // Sync flag : prevent recursive calls to the signal handler
         kernelTLS.preemption_state.in_progress = true;
+        __cfaabi_tls.preemption_state.in_progress = true;
         // Clear sighandler mask before context switching.
 …
+        }
-        // TODO: this should go in finish action
         // Clear the in progress flag
         kernelTLS.preemption_state.in_progress = false;
+        __cfaabi_tls.preemption_state.in_progress = false;
         // Preemption can occur here
 …
         id.full_proc = false;
         id.id = doregister(&id);
         kernelTLS.this_proc_id = &id;
+        __cfaabi_tls.this_proc_id = &id;
         // Block sigalrms to control when they arrive
 …
 void __cfaabi_check_preemption() {
         bool ready = kernelTLS.preemption_state.enabled;
+        bool ready = __preemption_enabled();
         if(!ready) { abort("Preemption should be ready"); }
 …
 #ifdef __CFA_WITH_VERIFY__
 bool __cfaabi_dbg_in_kernel() {
         return !kernelTLS.preemption_state.enabled;
+        return !__preemption_enabled();
+}
 #endif

libcfa/src/concurrency/ready_queue.cfa

-              r231b18f
+              r54dcab1
 //  queues or removing them.
 uint_fast32_t ready_mutate_lock( void ) with(*__scheduler_lock) {
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
         // Step 1 : lock global lock
 …
+        }
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
         return s;
+}
 void ready_mutate_unlock( uint_fast32_t last_s ) with(*__scheduler_lock) {
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
         // Step 1 : release local locks
 …
         __atomic_store_n(&lock, (bool)false, __ATOMIC_RELEASE);
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        /* paranoid */ verify( ! __preemption_enabled() );
+}
 …
                 preferred =
                         //*
                         kernelTLS.this_processor ? kernelTLS.this_processor->id * 4 : -1;
+                        kernelTLS().this_processor ? kernelTLS().this_processor->id * 4 : -1;
                         /*/
                         thrd->link.preferred * 4;
 …
                 // Don't bother trying locally too much
                 int local_tries = 8;
                 preferred = kernelTLS.this_processor->id * 4;
+                preferred = kernelTLS().this_processor->id * 4;
         #endif

tools/stat.py

-              r231b18f
+              r54dcab1
 #!/usr/bin/python
+#!/usr/bin/python3
 import sys
 …
                 avg = numpy.mean  (content)
                 std = numpy.std   (content)
                 print "median {0:.1f} avg {1:.1f} stddev {2:.1f}".format( med, avg, std )
+                print("median {0:.1f} avg {1:.1f} stddev {2:.1f}".format( med, avg, std ))

Context Navigation

Changes in / [231b18f:54dcab1]

Legend:

benchmark/readyQ/cycle.cfa

benchmark/readyQ/cycle.go

benchmark/readyQ/rq_bench.hfa

benchmark/readyQ/rq_bench.hpp

libcfa/src/concurrency/io.cfa

libcfa/src/concurrency/io/setup.cfa

libcfa/src/concurrency/kernel.cfa

libcfa/src/concurrency/kernel.hfa

libcfa/src/concurrency/kernel/fwd.hfa

libcfa/src/concurrency/kernel/startup.cfa

libcfa/src/concurrency/kernel_private.hfa

libcfa/src/concurrency/monitor.cfa

libcfa/src/concurrency/preemption.cfa

libcfa/src/concurrency/ready_queue.cfa

tools/stat.py

Download in other formats: