Ignore:
Timestamp:
Jan 7, 2021, 3:27:00 PM (5 years ago)
Author:
Thierry Delisle <tdelisle@…>
Branches:
ADT, arm-eh, ast-experimental, enum, forall-pointer-decay, jacob/cs343-translation, master, new-ast-unique-expr, pthread-emulation, qualifiedEnum
Children:
2b4daf2, 64aeca0
Parents:
3c64c668 (diff), eef8dfb (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.
Message:

Merge branch 'master' into park_unpark

Location:
libcfa/src/concurrency
Files:
20 added
18 edited
1 moved

Legend:

Unmodified
Added
Removed
  • libcfa/src/concurrency/CtxSwitch-arm32.S

    r3c64c668 r58fe85a  
    1         @ 32 bit ARM context switch
    2         @ This function assumes that r9 has no special meaning on the platform it's
    3         @ being built on.
    4         @ If r9 is special, uncomment the following line and it will be left alone
     1        # 32 bit ARM context switch
     2        # This function assumes that r9 has no special meaning on the platform it's
     3        # being built on.
     4        # If r9 is special, uncomment the following line and it will be left alone
    55
    6         @ #define R9_SPECIAL
     6        # #define R9_SPECIAL
    77
    88        #define PTR_BYTE        4
     
    1717
    1818__cfactx_switch:
    19         @ save callee-saved registers: r4-r8, r10, r11, r13(sp) (plus r9 depending on platform specification)
    20         @ I've seen reference to 31 registers on 64-bit, if this is the case, more need to be saved
    21         @ save thread state registers: r14(lr)
    22         @ r12(ip) is intra-procedure-call scratch register, does not need saving between function calls
     19        # save callee-saved registers: r4-r8, r10, r11, r13(sp) (plus r9 depending on platform specification)
     20        # I've seen reference to 31 registers on 64-bit, if this is the case, more need to be saved
     21        # save thread state registers: r14(lr)
     22        # r12(ip) is intra-procedure-call scratch register, does not need saving between function calls
    2323
    2424        #ifdef R9_SPECIAL
     
    2828        #endif // R9_SPECIAL
    2929
    30         @ save floating point registers: s16-s31
     30        # save floating point registers: s16-s31
    3131        vstmdb r13!, {s16-s31}
    3232
    33         @ save frame pointer and stack pointer to outgoing datastructure
     33        # save frame pointer and stack pointer to outgoing datastructure
    3434        str sp, [r0, #SP_OFFSET]
    3535        str fp, [r0, #FP_OFFSET]
    3636
    37         @ restore frame pointer and stack pointer from incoming datastructure
     37        # restore frame pointer and stack pointer from incoming datastructure
    3838        ldr fp, [r1, #FP_OFFSET]
    3939        ldr sp, [r1, #SP_OFFSET]
    4040
    41         @ restore floating point registers: s16-s31
     41        # restore floating point registers: s16-s31
    4242        vldm r13!, {s16-s31}
    43         @ restore r14(lr)
    44         @ restore 64-bit extra registers?
    45         @ restore callee-saved registers: r4-r8, r10, r11, r13
     43        # restore r14(lr)
     44        # restore 64-bit extra registers?
     45        # restore callee-saved registers: r4-r8, r10, r11, r13
    4646
    4747        #ifdef R9_SPECIAL
    4848        ldmfd r13!, {r4-r8,r10,r11,r15}
    4949        #else
    50         ldmfd r13!, {r4-r11,r14}    @ loading r14 back into r15 returns
     50        ldmfd r13!, {r4-r11,r14}    # loading r14 back into r15 returns
    5151
    5252        mov r15, r14
  • libcfa/src/concurrency/CtxSwitch-i386.S

    r3c64c668 r58fe85a  
    1010// Created On       : Tue Dec 6 12:27:26 2016
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Fri Jul 21 22:29:25 2017
    13 // Update Count     : 1
    14 //
    15 // This  library is free  software; you  can redistribute  it and/or  modify it
    16 // under the terms of the GNU Lesser General Public License as published by the
    17 // Free Software  Foundation; either  version 2.1 of  the License, or  (at your
    18 // option) any later version.
    19 //
    20 // This library is distributed in the  hope that it will be useful, but WITHOUT
    21 // ANY  WARRANTY;  without even  the  implied  warranty  of MERCHANTABILITY  or
    22 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
    23 // for more details.
    24 //
    25 // You should  have received a  copy of the  GNU Lesser General  Public License
    26 // along  with this library.
     12// Last Modified On : Sun Sep  6 18:23:37 2020
     13// Update Count     : 5
    2714//
    2815
    29 // This context switch routine depends on the fact that the stack of a new
    30 // thread has been set up to look like the thread has saved its context in
    31 // the normal manner.
    32 //
    33 // void CtxSwitch( machine_context *from, machine_context *to );
     16// The context switch routine requires the initial the stack of a thread to
     17// look like the thread has saved its context in the normal manner.
    3418
    35 // Offsets in the context structure. This needs to be synchronized with the
    36 // high level code a little better.
     19// Offsets must synchronized with the __stack_context_t in invoke.h.
    3720
    3821#define PTR_BYTE        4
    3922#define SP_OFFSET       ( 0 * PTR_BYTE )
    4023#define FP_OFFSET       ( 1 * PTR_BYTE )
    41 #define PC_OFFSET       ( 2 * PTR_BYTE )
    4224
     25// Context switch between coroutines/tasks.
     26//   void __cfactx_switch( struct __stack_context_t * from, struct __stack_context_t * to ) ;
     27// Arguments "from" in register 4(%esp), "to" in register 20(%esp)
     28
     29        .file "CtxSwitch-i386.S"
    4330        .text
    4431        .align 2
    45         .globl __cfactx_switch
    46         .type  __cfactx_switch, @function
     32        .global __cfactx_switch
     33        .type __cfactx_switch, @function
    4734__cfactx_switch:
    4835
    4936        // Copy the "from" context argument from the stack to register eax
    50         // Return address is at 0(%esp), with parameters following
     37        // Return address is at 0(%esp), with parameters following.
    5138
    5239        movl 4(%esp),%eax
     
    6350        movl %ebp,FP_OFFSET(%eax)
    6451
    65         // Copy the "to" context argument from the stack to register eax
    66         // Having pushed three words (= 12 bytes) on the stack, the
    67         // argument is now at 8 + 12 = 20(%esp)
     52        // Copy the "to" context argument from the stack to register eax. Having
     53        // pushed 3 words (= 12 bytes) on the stack, the argument is now at
     54        // 8 + 12 = 20(%esp).
    6855
    6956        movl 20(%esp),%eax
     
    8370
    8471        ret
    85         .size  __cfactx_switch, .-__cfactx_switch
     72        .size __cfactx_switch, .-__cfactx_switch
    8673
    8774// Local Variables: //
  • libcfa/src/concurrency/CtxSwitch-x86_64.S

    r3c64c668 r58fe85a  
    77// CtxSwitch-x86_64.S --
    88//
    9 // Author           : Thierry Delisle
    10 // Created On       : Mon Nov 28 12:27:26 2016
     9// Author           : Peter A. Buhr
     10// Created On       : Mon Aug 10 08:10:26 2020
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Fri Jul 21 22:28:11 2017
    13 // Update Count     : 1
    14 //
    15 // This  library is free  software; you  can redistribute  it and/or  modify it
    16 // under the terms of the GNU Lesser General Public License as published by the
    17 // Free Software  Foundation; either  version 2.1 of  the License, or  (at your
    18 // option) any later version.
    19 //
    20 // This library is distributed in the  hope that it will be useful, but WITHOUT
    21 // ANY  WARRANTY;  without even  the  implied  warranty  of MERCHANTABILITY  or
    22 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
    23 // for more details.
    24 //
    25 // You should  have received a  copy of the  GNU Lesser General  Public License
    26 // along  with this library.
     12// Last Modified On : Sat Oct 24 14:36:25 2020
     13// Update Count     : 10
    2714//
    2815
    29 // This context switch routine depends on the fact that the stack of a new
    30 // thread has been set up to look like the thread has saved its context in
    31 // the normal manner.
    32 //
    33 // void CtxSwitch( machine_context *from, machine_context *to );
     16// The context switch routine requires the initial the stack of a thread to
     17// look like the thread has saved its context in the normal manner.
    3418
    35 // Offsets in the context structure. This needs to be synchronized with the
    36 // high level code a little better.
     19// Offsets must synchronized with the __stack_context_t in invoke.h.
    3720
    3821#define PTR_BYTE        8
     
    4023#define FP_OFFSET       ( 1 * PTR_BYTE )
    4124
    42 //-----------------------------------------------------------------------------
    43 // Regular context switch routine which enables switching from one context to anouther
     25// Context switch between coroutines/tasks.
     26//   void __cfactx_switch( struct __stack_context_t * from, struct __stack_context_t * to ) ;
     27// Arguments "from" in register rdi, "to" in register rsi.
     28
     29        .file "CtxSwitch-x86_64.S"
    4430        .text
    4531        .align 2
    46         .globl __cfactx_switch
    47         .type  __cfactx_switch, @function
     32        .global __cfactx_switch
     33        .type __cfactx_switch, @function
    4834__cfactx_switch:
    4935
     
    7763
    7864        ret
    79         .size  __cfactx_switch, .-__cfactx_switch
     65        .size __cfactx_switch, .-__cfactx_switch
    8066
    81 //-----------------------------------------------------------------------------
    82 // Stub used to create new stacks which are ready to be context switched to
     67// Stub to create new stacks which can be context switched to
     68//   void __cfactx_invoke_stub( void );
     69
    8370        .text
    8471        .align 2
    85         .globl __cfactx_invoke_stub
    86         .type    __cfactx_invoke_stub, @function
     72        .global __cfactx_invoke_stub
     73        .type __cfactx_invoke_stub, @function
    8774__cfactx_invoke_stub:
    88         movq %rbx, %rdi
     75        movq %rbx, %rdi                                         // move main and this to first two arguments
    8976        movq %r12, %rsi
    90         jmp *%r13
    91         .size  __cfactx_invoke_stub, .-__cfactx_invoke_stub
     77        jmp *%r13                                                       // jmp to invoke
     78        .size __cfactx_invoke_stub, .-__cfactx_invoke_stub
    9279
    9380// Local Variables: //
    94 // mode: c //
     81// mode: asm //
    9582// tab-width: 4 //
    9683// End: //
  • libcfa/src/concurrency/alarm.cfa

    r3c64c668 r58fe85a  
    1010// Created On       : Fri Jun 2 11:31:25 2017
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Sun Jan  5 08:41:36 2020
    13 // Update Count     : 69
     12// Last Modified On : Wed Jun 17 16:11:35 2020
     13// Update Count     : 75
    1414//
    1515
    1616#define __cforall_thread__
    1717
    18 extern "C" {
    1918#include <errno.h>
    2019#include <stdio.h>
     20#include <unistd.h>
    2121#include <string.h>
    22 #include <unistd.h>
    2322#include <sys/time.h>
    24 }
    2523
    2624#include "alarm.hfa"
    27 #include "kernel_private.hfa"
     25#include "kernel/fwd.hfa"
    2826#include "preemption.hfa"
    2927
     
    4745//=============================================================================================
    4846
    49 void ?{}( alarm_node_t & this, $thread * thrd, Time alarm, Duration period ) with( this ) {
     47void ?{}( alarm_node_t & this, $thread * thrd, Time alarm, Duration period) with( this ) {
    5048        this.thrd = thrd;
    5149        this.alarm = alarm;
    5250        this.period = period;
    53         next = 0;
    5451        set = false;
    55         kernel_alarm = false;
     52        type = User;
    5653}
    5754
    58 void ?{}( alarm_node_t & this, processor   * proc, Time alarm, Duration period ) with( this ) {
     55void ?{}( alarm_node_t & this, processor * proc, Time alarm, Duration period ) with( this ) {
    5956        this.proc = proc;
    6057        this.alarm = alarm;
    6158        this.period = period;
    62         next = 0;
    6359        set = false;
    64         kernel_alarm = true;
     60        type = Kernel;
     61}
     62void ?{}( alarm_node_t & this, Alarm_Callback callback, Time alarm, Duration period ) with( this ) {
     63        this.alarm = alarm;
     64        this.period = period;
     65        this.callback = callback;
     66        set = false;
     67        type = Callback;
    6568}
    6669
     
    7174}
    7275
    73 #if !defined(NDEBUG) && (defined(__CFA_DEBUG__) || defined(__CFA_VERIFY__))
    74 bool validate( alarm_list_t * this ) {
    75         alarm_node_t ** it = &this->head;
    76         while( (*it) ) {
    77                 it = &(*it)->next;
     76void insert( alarm_list_t * this, alarm_node_t * n ) {
     77        alarm_node_t * it = & (*this)`first;
     78        while( it && (n->alarm > it->alarm) ) {
     79                it = & (*it)`next;
     80        }
     81        if ( it ) {
     82                insert_before( *it, *n );
     83        } else {
     84                insert_last(*this, *n);
    7885        }
    7986
    80         return it == this->tail;
    81 }
    82 #endif
    83 
    84 static inline void insert_at( alarm_list_t * this, alarm_node_t * n, __alarm_it_t p ) {
    85         verify( !n->next );
    86         if( p == this->tail ) {
    87                 this->tail = &n->next;
    88         }
    89         else {
    90                 n->next = *p;
    91         }
    92         *p = n;
    93 
    94         verify( validate( this ) );
    95 }
    96 
    97 void insert( alarm_list_t * this, alarm_node_t * n ) {
    98         alarm_node_t ** it = &this->head;
    99         while( (*it) && (n->alarm > (*it)->alarm) ) {
    100                 it = &(*it)->next;
    101         }
    102 
    103         insert_at( this, n, it );
    104 
    105         verify( validate( this ) );
     87        verify( validate( *this ) );
    10688}
    10789
    10890alarm_node_t * pop( alarm_list_t * this ) {
    109         alarm_node_t * head = this->head;
     91        verify( validate( *this ) );
     92        alarm_node_t * head = & (*this)`first;
    11093        if( head ) {
    111                 this->head = head->next;
    112                 if( !head->next ) {
    113                         this->tail = &this->head;
    114                 }
    115                 head->next = 0p;
     94                remove(*head);
    11695        }
    117         verify( validate( this ) );
     96        verify( validate( *this ) );
    11897        return head;
    11998}
    12099
    121 static inline void remove_at( alarm_list_t * this, alarm_node_t * n, __alarm_it_t it ) {
    122         verify( it );
    123         verify( (*it) == n );
    124 
    125         (*it) = n->next;
    126         if( !n-> next ) {
    127                 this->tail = it;
    128         }
    129         n->next = 0p;
    130 
    131         verify( validate( this ) );
    132 }
    133 
    134 static inline void remove( alarm_list_t * this, alarm_node_t * n ) {
    135         alarm_node_t ** it = &this->head;
    136         while( (*it) && (*it) != n ) {
    137                 it = &(*it)->next;
    138         }
    139 
    140         verify( validate( this ) );
    141 
    142         if( *it ) { remove_at( this, n, it ); }
    143 
    144         verify( validate( this ) );
    145 }
    146 
    147100void register_self( alarm_node_t * this ) {
    148         alarm_list_t * alarms = &event_kernel->alarms;
     101        alarm_list_t & alarms = event_kernel->alarms;
    149102
    150103        disable_interrupts();
     
    152105        {
    153106                verify( validate( alarms ) );
    154                 bool first = !alarms->head;
     107                bool first = ! & alarms`first;
    155108
    156                 insert( alarms, this );
     109                insert( &alarms, this );
    157110                if( first ) {
    158                         __kernel_set_timer( alarms->head->alarm - __kernel_get_time() );
     111                        __kernel_set_timer( alarms`first.alarm - __kernel_get_time() );
    159112                }
    160113        }
     
    168121        lock( event_kernel->lock __cfaabi_dbg_ctx2 );
    169122        {
    170                 verify( validate( &event_kernel->alarms ) );
    171                 remove( &event_kernel->alarms, this );
     123                verify( validate( event_kernel->alarms ) );
     124                remove( *this );
    172125        }
    173126        unlock( event_kernel->lock );
     
    176129}
    177130
     131//=============================================================================================
     132// Utilities
     133//=============================================================================================
     134
     135void sleep( Duration duration ) {
     136        alarm_node_t node = { active_thread(), __kernel_get_time() + duration, 0`s };
     137
     138        register_self( &node );
     139        park();
     140
     141        /* paranoid */ verify( !node.set );
     142        /* paranoid */ verify( & node`next == 0p );
     143        /* paranoid */ verify( & node`prev == 0p );
     144}
     145
    178146// Local Variables: //
    179147// mode: c //
  • libcfa/src/concurrency/alarm.hfa

    r3c64c668 r58fe85a  
    2323#include "time.hfa"
    2424
     25#include "containers/list.hfa"
     26
    2527struct $thread;
    2628struct processor;
     
    3739//=============================================================================================
    3840
     41enum alarm_type{ Kernel = 0, User = 1, Callback = 2 };
     42
     43struct alarm_node_t;
     44
     45typedef void (*Alarm_Callback)(alarm_node_t & );
     46
    3947struct alarm_node_t {
    4048        Time alarm;                             // time when alarm goes off
    4149        Duration period;                        // if > 0 => period of alarm
    42         alarm_node_t * next;            // intrusive link list field
     50
     51        DLISTED_MGD_IMPL_IN(alarm_node_t)
    4352
    4453        union {
    45                 $thread * thrd; // thrd who created event
    46                 processor * proc;               // proc who created event
     54                $thread * thrd;                                 // thrd who created event
     55                processor * proc;                               // proc who created event
     56                Alarm_Callback callback;                // callback to handle event
    4757        };
    4858
    4959        bool set                :1;             // whether or not the alarm has be registered
    50         bool kernel_alarm       :1;             // true if this is not a user defined alarm
     60        enum alarm_type type;           // true if this is not a user defined alarm
    5161};
    52 
    53 typedef alarm_node_t ** __alarm_it_t;
     62DLISTED_MGD_IMPL_OUT(alarm_node_t)
    5463
    5564void ?{}( alarm_node_t & this, $thread * thrd, Time alarm, Duration period );
    5665void ?{}( alarm_node_t & this, processor   * proc, Time alarm, Duration period );
     66void ?{}( alarm_node_t & this, Alarm_Callback callback, Time alarm, Duration period );
    5767void ^?{}( alarm_node_t & this );
    5868
    59 struct alarm_list_t {
    60         alarm_node_t * head;
    61         __alarm_it_t tail;
    62 };
    63 
    64 static inline void ?{}( alarm_list_t & this ) with( this ) {
    65         head = 0;
    66         tail = &head;
    67 }
     69typedef dlist(alarm_node_t, alarm_node_t) alarm_list_t;
    6870
    6971void insert( alarm_list_t * this, alarm_node_t * n );
  • libcfa/src/concurrency/coroutine.cfa

    r3c64c668 r58fe85a  
    1010// Created On       : Mon Nov 28 12:27:26 2016
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Tue Feb  4 12:29:25 2020
    13 // Update Count     : 16
     12// Last Modified On : Tue Dec 15 12:06:04 2020
     13// Update Count     : 23
    1414//
    1515
     
    1818#include "coroutine.hfa"
    1919
    20 extern "C" {
    2120#include <stddef.h>
    2221#include <malloc.h>
     
    2423#include <string.h>
    2524#include <unistd.h>
    26 // use this define to make unwind.h play nice, definetely a hack
    27 #define HIDE_EXPORTS
     25#include <sys/mman.h>                                                                   // mprotect
    2826#include <unwind.h>
    29 #undef HIDE_EXPORTS
    30 #include <sys/mman.h>
    31 }
    3227
    3328#include "kernel_private.hfa"
     29#include "exception.hfa"
     30#include "math.hfa"
     31
     32#define CFA_COROUTINE_USE_MMAP 0
    3433
    3534#define __CFA_INVOKE_PRIVATE__
     
    4746
    4847//-----------------------------------------------------------------------------
     48FORALL_DATA_INSTANCE(CoroutineCancelled, (dtype coroutine_t), (coroutine_t))
     49
     50forall(dtype T)
     51void mark_exception(CoroutineCancelled(T) *) {}
     52
     53forall(dtype T)
     54void copy(CoroutineCancelled(T) * dst, CoroutineCancelled(T) * src) {
     55        dst->virtual_table = src->virtual_table;
     56        dst->the_coroutine = src->the_coroutine;
     57        dst->the_exception = src->the_exception;
     58}
     59
     60forall(dtype T)
     61const char * msg(CoroutineCancelled(T) *) {
     62        return "CoroutineCancelled(...)";
     63}
     64
     65// This code should not be inlined. It is the error path on resume.
     66forall(dtype T | is_coroutine(T))
     67void __cfaehm_cancelled_coroutine( T & cor, $coroutine * desc ) {
     68        verify( desc->cancellation );
     69        desc->state = Cancelled;
     70        exception_t * except = __cfaehm_cancellation_exception( desc->cancellation );
     71
     72        // TODO: Remove explitate vtable set once trac#186 is fixed.
     73        CoroutineCancelled(T) except;
     74        except.virtual_table = &get_exception_vtable(&except);
     75        except.the_coroutine = &cor;
     76        except.the_exception = except;
     77        throwResume except;
     78
     79        except->virtual_table->free( except );
     80        free( desc->cancellation );
     81        desc->cancellation = 0p;
     82}
     83
     84//-----------------------------------------------------------------------------
    4985// Global state variables
    5086
    5187// minimum feasible stack size in bytes
    52 #define MinStackSize 1000
     88static const size_t MinStackSize = 1000;
    5389extern size_t __page_size;                              // architecture pagesize HACK, should go in proper runtime singleton
     90extern int __map_prot;
    5491
    5592void __stack_prepare( __stack_info_t * this, size_t create_size );
     93void __stack_clean  ( __stack_info_t * this );
    5694
    5795//-----------------------------------------------------------------------------
     
    74112        bool userStack = ((intptr_t)this.storage & 0x1) != 0;
    75113        if ( ! userStack && this.storage ) {
    76                 __attribute__((may_alias)) intptr_t * istorage = (intptr_t *)&this.storage;
    77                 *istorage &= (intptr_t)-1;
    78 
    79                 void * storage = this.storage->limit;
    80                 __cfaabi_dbg_debug_do(
    81                         storage = (char*)(storage) - __page_size;
    82                         if ( mprotect( storage, __page_size, PROT_READ | PROT_WRITE ) == -1 ) {
    83                                 abort( "(coStack_t *)%p.^?{}() : internal error, mprotect failure, error(%d) %s.", &this, errno, strerror( errno ) );
    84                         }
    85                 );
    86                 __cfaabi_dbg_print_safe("Kernel : Deleting stack %p\n", storage);
    87                 free( storage );
     114                __stack_clean( &this );
    88115        }
    89116}
     
    101128void ^?{}($coroutine& this) {
    102129        if(this.state != Halted && this.state != Start && this.state != Primed) {
    103                 $coroutine * src = TL_GET( this_thread )->curr_cor;
     130                $coroutine * src = active_coroutine();
    104131                $coroutine * dst = &this;
    105132
     
    134161        assert(__page_size != 0l);
    135162        size_t size = libCeiling( storageSize, 16 ) + stack_data_size;
     163        size = ceiling(size, __page_size);
    136164
    137165        // If we are running debug, we also need to allocate a guardpage to catch stack overflows.
    138166        void * storage;
    139         __cfaabi_dbg_debug_do(
    140                 storage = memalign( __page_size, size + __page_size );
    141         );
    142         __cfaabi_dbg_no_debug_do(
    143                 storage = (void*)malloc(size);
    144         );
    145 
     167        #if CFA_COROUTINE_USE_MMAP
     168                storage = mmap(0p, size + __page_size, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
     169                if(storage == ((void*)-1)) {
     170                        abort( "coroutine stack creation : internal error, mmap failure, error(%d) %s.", errno, strerror( errno ) );
     171                }
     172                if ( mprotect( storage, __page_size, PROT_NONE ) == -1 ) {
     173                        abort( "coroutine stack creation : internal error, mprotect failure, error(%d) %s.", errno, strerror( errno ) );
     174                } // if
     175                storage = (void *)(((intptr_t)storage) + __page_size);
     176        #else
     177                __cfaabi_dbg_debug_do(
     178                        storage = memalign( __page_size, size + __page_size );
     179                );
     180                __cfaabi_dbg_no_debug_do(
     181                        storage = (void*)malloc(size);
     182                );
     183
     184                __cfaabi_dbg_debug_do(
     185                        if ( mprotect( storage, __page_size, PROT_NONE ) == -1 ) {
     186                                abort( "__stack_alloc : internal error, mprotect failure, error(%d) %s.", (int)errno, strerror( (int)errno ) );
     187                        }
     188                        storage = (void *)(((intptr_t)storage) + __page_size);
     189                );
     190        #endif
    146191        __cfaabi_dbg_print_safe("Kernel : Created stack %p of size %zu\n", storage, size);
    147         __cfaabi_dbg_debug_do(
    148                 if ( mprotect( storage, __page_size, PROT_NONE ) == -1 ) {
    149                         abort( "__stack_alloc : internal error, mprotect failure, error(%d) %s.", (int)errno, strerror( (int)errno ) );
    150                 }
    151                 storage = (void *)(((intptr_t)storage) + __page_size);
    152         );
    153192
    154193        verify( ((intptr_t)storage & (libAlign() - 1)) == 0ul );
    155194        return [storage, size];
     195}
     196
     197void __stack_clean  ( __stack_info_t * this ) {
     198        size_t size = ((intptr_t)this->storage->base) - ((intptr_t)this->storage->limit) + sizeof(__stack_t);
     199        void * storage = this->storage->limit;
     200
     201        #if CFA_COROUTINE_USE_MMAP
     202                storage = (void *)(((intptr_t)storage) - __page_size);
     203                if(munmap(storage, size + __page_size) == -1) {
     204                        abort( "coroutine stack destruction : internal error, munmap failure, error(%d) %s.", errno, strerror( errno ) );
     205                }
     206        #else
     207                __cfaabi_dbg_debug_do(
     208                        storage = (char*)(storage) - __page_size;
     209                        if ( mprotect( storage, __page_size, __map_prot ) == -1 ) {
     210                                abort( "(coStack_t *)%p.^?{}() : internal error, mprotect failure, error(%d) %s.", &this, errno, strerror( errno ) );
     211                        }
     212                );
     213
     214                free( storage );
     215        #endif
     216        __cfaabi_dbg_print_safe("Kernel : Deleting stack %p\n", storage);
    156217}
    157218
     
    175236                size = libFloor(create_size - stack_data_size - diff, libAlign());
    176237        } // if
    177         assertf( size >= MinStackSize, "Stack size %zd provides less than minimum of %d bytes for a stack.", size, MinStackSize );
    178 
    179         this->storage = (__stack_t *)((intptr_t)storage + size);
     238        assertf( size >= MinStackSize, "Stack size %zd provides less than minimum of %zd bytes for a stack.", size, MinStackSize );
     239
     240        this->storage = (__stack_t *)((intptr_t)storage + size - sizeof(__stack_t));
    180241        this->storage->limit = storage;
    181         this->storage->base  = (void*)((intptr_t)storage + size);
     242        this->storage->base  = (void*)((intptr_t)storage + size - sizeof(__stack_t));
     243        this->storage->exception_context.top_resume = 0p;
     244        this->storage->exception_context.current_exception = 0p;
    182245        __attribute__((may_alias)) intptr_t * istorage = (intptr_t*)&this->storage;
    183246        *istorage |= userStack ? 0x1 : 0x0;
     
    205268
    206269        struct $coroutine * __cfactx_cor_finish(void) {
    207                 struct $coroutine * cor = kernelTLS.this_thread->curr_cor;
     270                struct $coroutine * cor = active_coroutine();
    208271
    209272                if(cor->state == Primed) {
    210                         suspend();
     273                        __cfactx_suspend();
    211274                }
    212275
  • libcfa/src/concurrency/coroutine.hfa

    r3c64c668 r58fe85a  
    1818#include <assert.h>
    1919#include "invoke.h"
     20#include "../exception.hfa"
     21
     22//-----------------------------------------------------------------------------
     23// Exception thrown from resume when a coroutine stack is cancelled.
     24FORALL_DATA_EXCEPTION(CoroutineCancelled, (dtype coroutine_t), (coroutine_t)) (
     25        coroutine_t * the_coroutine;
     26        exception_t * the_exception;
     27);
     28
     29forall(dtype T)
     30void copy(CoroutineCancelled(T) * dst, CoroutineCancelled(T) * src);
     31
     32forall(dtype T)
     33const char * msg(CoroutineCancelled(T) *);
    2034
    2135//-----------------------------------------------------------------------------
     
    2337// Anything that implements this trait can be resumed.
    2438// Anything that is resumed is a coroutine.
    25 trait is_coroutine(dtype T) {
    26       void main(T & this);
    27       $coroutine * get_coroutine(T & this);
     39trait is_coroutine(dtype T | IS_RESUMPTION_EXCEPTION(CoroutineCancelled, (T))) {
     40        void main(T & this);
     41        $coroutine * get_coroutine(T & this);
    2842};
    2943
     
    4660//-----------------------------------------------------------------------------
    4761// Public coroutine API
    48 static inline void suspend(void);
    49 
    50 forall(dtype T | is_coroutine(T))
    51 static inline T & resume(T & cor);
    52 
    5362forall(dtype T | is_coroutine(T))
    5463void prime(T & cor);
    5564
    56 static inline struct $coroutine * active_coroutine() { return TL_GET( this_thread )->curr_cor; }
     65static inline struct $coroutine * active_coroutine() { return active_thread()->curr_cor; }
    5766
    5867//-----------------------------------------------------------------------------
     
    7584static inline void $ctx_switch( $coroutine * src, $coroutine * dst ) __attribute__((nonnull (1, 2))) {
    7685        // set state of current coroutine to inactive
    77         src->state = src->state == Halted ? Halted : Inactive;
     86        src->state = src->state == Halted ? Halted : Blocked;
    7887
    7988        // set new coroutine that task is executing
    80         TL_GET( this_thread )->curr_cor = dst;
     89        active_thread()->curr_cor = dst;
    8190
    8291        // context switch to specified coroutine
     
    93102}
    94103
    95 extern void __stack_prepare   ( __stack_info_t * this, size_t size /* ignored if storage already allocated */);
     104extern void __stack_prepare( __stack_info_t * this, size_t size /* ignored if storage already allocated */);
     105extern void __stack_clean  ( __stack_info_t * this );
     106
    96107
    97108// Suspend implementation inlined for performance
    98 static inline void suspend(void) {
    99         // optimization : read TLS once and reuse it
    100         // Safety note: this is preemption safe since if
    101         // preemption occurs after this line, the pointer
    102         // will also migrate which means this value will
    103         // stay in syn with the TLS
    104         $coroutine * src = TL_GET( this_thread )->curr_cor;
     109extern "C" {
     110        static inline void __cfactx_suspend(void) {
     111                // optimization : read TLS once and reuse it
     112                // Safety note: this is preemption safe since if
     113                // preemption occurs after this line, the pointer
     114                // will also migrate which means this value will
     115                // stay in syn with the TLS
     116                $coroutine * src = active_coroutine();
    105117
    106         assertf( src->last != 0,
    107                 "Attempt to suspend coroutine \"%.256s\" (%p) that has never been resumed.\n"
    108                 "Possible cause is a suspend executed in a member called by a coroutine user rather than by the coroutine main.",
    109                 src->name, src );
    110         assertf( src->last->state != Halted,
    111                 "Attempt by coroutine \"%.256s\" (%p) to suspend back to terminated coroutine \"%.256s\" (%p).\n"
    112                 "Possible cause is terminated coroutine's main routine has already returned.",
    113                 src->name, src, src->last->name, src->last );
     118                assertf( src->last != 0,
     119                        "Attempt to suspend coroutine \"%.256s\" (%p) that has never been resumed.\n"
     120                        "Possible cause is a suspend executed in a member called by a coroutine user rather than by the coroutine main.",
     121                        src->name, src );
     122                assertf( src->last->state != Halted,
     123                        "Attempt by coroutine \"%.256s\" (%p) to suspend back to terminated coroutine \"%.256s\" (%p).\n"
     124                        "Possible cause is terminated coroutine's main routine has already returned.",
     125                        src->name, src, src->last->name, src->last );
    114126
    115         $ctx_switch( src, src->last );
     127                $ctx_switch( src, src->last );
     128        }
    116129}
     130
     131forall(dtype T | is_coroutine(T))
     132void __cfaehm_cancelled_coroutine( T & cor, $coroutine * desc );
    117133
    118134// Resume implementation inlined for performance
     
    124140        // will also migrate which means this value will
    125141        // stay in syn with the TLS
    126         $coroutine * src = TL_GET( this_thread )->curr_cor;
     142        $coroutine * src = active_coroutine();
    127143        $coroutine * dst = get_coroutine(cor);
    128144
    129145        if( unlikely(dst->context.SP == 0p) ) {
    130                 TL_GET( this_thread )->curr_cor = dst;
    131146                __stack_prepare(&dst->stack, 65000);
    132147                __cfactx_start(main, dst, cor, __cfactx_invoke_coroutine);
    133                 TL_GET( this_thread )->curr_cor = src;
    134148        }
    135149
     
    148162        // always done for performance testing
    149163        $ctx_switch( src, dst );
     164        if ( unlikely(dst->cancellation) ) {
     165                __cfaehm_cancelled_coroutine( cor, dst );
     166        }
    150167
    151168        return cor;
     
    158175        // will also migrate which means this value will
    159176        // stay in syn with the TLS
    160         $coroutine * src = TL_GET( this_thread )->curr_cor;
     177        $coroutine * src = active_coroutine();
    161178
    162179        // not resuming self ?
  • libcfa/src/concurrency/invoke.c

    r3c64c668 r58fe85a  
    1010// Created On       : Tue Jan 17 12:27:26 2016
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Fri Feb  9 16:37:42 2018
    13 // Update Count     : 5
     12// Last Modified On : Sat Oct 24 14:35:28 2020
     13// Update Count     : 32
    1414//
    1515
     
    109109
    110110        struct FakeStack {
    111             void *fixedRegisters[3];              // fixed registers ebx, edi, esi (popped on 1st uSwitch, values unimportant)
    112             void *rturn;                          // where to go on return from uSwitch
    113             void *dummyReturn;                    // fake return compiler would have pushed on call to uInvoke
    114             void *argument[3];                    // for 16-byte ABI, 16-byte alignment starts here
    115             void *padding;                        // padding to force 16-byte alignment, as "base" is 16-byte aligned
     111            void *fixedRegisters[3];                                            // fixed registers ebx, edi, esi (popped on 1st uSwitch, values unimportant)
     112            void *rturn;                                                                        // where to go on return from uSwitch
     113            void *dummyReturn;                                                          // fake return compiler would have pushed on call to uInvoke
     114            void *argument[3];                                                          // for 16-byte ABI, 16-byte alignment starts here
     115            void *padding;                                                                      // padding to force 16-byte alignment, as "base" is 16-byte aligned
    116116        };
    117117
     
    122122
    123123        fs->dummyReturn = NULL;
    124         fs->argument[0] = main;     // argument to invoke
    125         fs->argument[1] = this;     // argument to invoke
     124        fs->argument[0] = main;                                                         // argument to invoke
     125        fs->argument[1] = this;                                                         // argument to invoke
    126126        fs->rturn = invoke;
    127127
     
    129129
    130130        struct FakeStack {
    131                 void *fixedRegisters[5];            // fixed registers rbx, r12, r13, r14, r15
    132                 void *rturn;                        // where to go on return from uSwitch
    133                 void *dummyReturn;                  // NULL return address to provide proper alignment
     131                void *fixedRegisters[5];                                                // fixed registers rbx, r12, r13, r14, r15
     132                void *rturn;                                                                    // where to go on return from uSwitch
     133                void *dummyReturn;                                                              // NULL return address to provide proper alignment
    134134        };
    135135
    136136        cor->context.SP = (char *)stack->base - sizeof( struct FakeStack );
    137         cor->context.FP = NULL;         // terminate stack with NULL fp
     137        cor->context.FP = NULL;                                                         // terminate stack with NULL fp
    138138
    139139        struct FakeStack *fs = (struct FakeStack *)cor->context.SP;
     
    141141        fs->dummyReturn = NULL;
    142142        fs->rturn = __cfactx_invoke_stub;
    143         fs->fixedRegisters[0] = main;
    144         fs->fixedRegisters[1] = this;
     143        fs->fixedRegisters[0] = main;                                           // argument to invoke
     144        fs->fixedRegisters[1] = this;                                           // argument to invoke
    145145        fs->fixedRegisters[2] = invoke;
    146146
    147 #elif defined( __ARM_ARCH )
    148 #error ARM needs to be upgrade to use to parameters like X86/X64 (A.K.A. : I broke this and do not know how to fix it)
     147#elif defined( __ARM_ARCH_32 )
     148#error ARM needs to be upgrade to use two parameters like X86/X64 (A.K.A. : I broke this and do not know how to fix it)
     149        // More details about the error:
     150        // To avoid the thunk problem, I changed the invoke routine to pass the main explicitly
     151        // instead of relying on an assertion. This effectively hoists any required thunk one level
     152        // which was enough to get to global scope in most cases.
     153        // This means that __cfactx_invoke_... now takes two parameters and the FakeStack needs
     154        // to be adjusted as a consequence of that.
     155        // I don't know how to do that for ARM, hence the #error
     156
    149157        struct FakeStack {
    150                 float fpRegs[16];                       // floating point registers
    151                 void *intRegs[9];                       // integer/pointer registers
    152                 void *arg[2];                           // placeholder for this pointer
     158                float fpRegs[16];                                                               // floating point registers
     159                void * intRegs[9];                                                              // integer/pointer registers
     160                void * arg[2];                                                                  // placeholder for this pointer
    153161        };
    154162
     
    162170        fs->arg[1] = invoke;
    163171
     172#elif defined( __ARM_ARCH )
     173        struct FakeStack {
     174                void * intRegs[12];                                                             // x19-x30 integer registers
     175                double fpRegs[8];                                                               // v8-v15 floating point
     176        };
     177
     178        cor->context.SP = (char *)stack->base - sizeof( struct FakeStack );
     179        cor->context.FP = NULL;
     180
     181        struct FakeStack *fs = (struct FakeStack *)cor->context.SP;
     182
     183        fs->intRegs[0] = main;                                                          // argument to invoke x19 => x0
     184        fs->intRegs[1] = this;                                                          // argument to invoke x20 => x1
     185        fs->intRegs[2] = invoke;
     186        fs->intRegs[11] = __cfactx_invoke_stub;                         // link register x30 => ret moves to pc
    164187#else
    165188        #error uknown hardware architecture
  • libcfa/src/concurrency/invoke.h

    r3c64c668 r58fe85a  
    1717#include "bits/defs.hfa"
    1818#include "bits/locks.hfa"
     19#include "kernel/fwd.hfa"
    1920
    2021#ifdef __cforall
     
    2627#define _INVOKE_H_
    2728
    28 #ifdef __ARM_ARCH
    29         // function prototypes are only really used by these macros on ARM
    30         void disable_global_interrupts();
    31         void enable_global_interrupts();
    32 
    33         #define TL_GET( member ) ( { __typeof__( kernelTLS.member ) target; \
    34                 disable_global_interrupts(); \
    35                 target = kernelTLS.member; \
    36                 enable_global_interrupts(); \
    37                 target; } )
    38         #define TL_SET( member, value ) disable_global_interrupts(); \
    39                 kernelTLS.member = value; \
    40                 enable_global_interrupts();
    41 #else
    42         #define TL_GET( member ) kernelTLS.member
    43         #define TL_SET( member, value ) kernelTLS.member = value;
    44 #endif
    45 
    46         #ifdef __cforall
    47         extern "Cforall" {
    48                 extern __attribute__((aligned(128))) thread_local struct KernelThreadData {
    49                         struct $thread    * volatile this_thread;
    50                         struct processor      * volatile this_processor;
    51 
    52                         struct {
    53                                 volatile unsigned short disable_count;
    54                                 volatile bool enabled;
    55                                 volatile bool in_progress;
    56                         } preemption_state;
    57 
    58                         uint32_t rand_seed;
    59                 } kernelTLS __attribute__ ((tls_model ( "initial-exec" )));
    60         }
    61         #endif
     29        struct __cfaehm_try_resume_node;
     30        struct __cfaehm_base_exception_t;
     31        struct exception_context_t {
     32                struct __cfaehm_try_resume_node * top_resume;
     33                struct __cfaehm_base_exception_t * current_exception;
     34        };
    6235
    6336        struct __stack_context_t {
     
    8558                // base of stack
    8659                void * base;
     60
     61                // Information for exception handling.
     62                struct exception_context_t exception_context;
    8763        };
    8864
     
    9268        };
    9369
    94         enum coroutine_state { Halted, Start, Primed, Inactive, Active, Rerun };
    95         enum __Preemption_Reason { __NO_PREEMPTION, __ALARM_PREEMPTION, __POLL_PREEMPTION, __MANUAL_PREEMPTION };
     70        enum __Coroutine_State { Halted, Start, Primed, Blocked, Ready, Active, Cancelled, Halting };
    9671
    9772        struct $coroutine {
     
    10681
    10782                // current execution status for coroutine
    108                 enum coroutine_state state;
     83                enum __Coroutine_State state;
    10984
    11085                // first coroutine to resume this one
     
    11893
    11994        };
     95        // Wrapper for gdb
     96        struct cfathread_coroutine_t { struct $coroutine debug; };
     97
     98        static inline struct __stack_t * __get_stack( struct $coroutine * cor ) {
     99                return (struct __stack_t*)(((uintptr_t)cor->stack.storage) & ((uintptr_t)-2));
     100        }
    120101
    121102        // struct which calls the monitor is accepting
     
    150131                struct __condition_node_t * dtor_node;
    151132        };
     133        // Wrapper for gdb
     134        struct cfathread_monitor_t { struct $monitor debug; };
    152135
    153136        struct __monitor_group_t {
     
    157140                // last function that acquired monitors
    158141                fptr_t func;
     142        };
     143
     144        // Link lists fields
     145        // instrusive link field for threads
     146        struct __thread_desc_link {
     147                struct $thread * next;
     148                struct $thread * prev;
     149                volatile unsigned long long ts;
     150                int preferred;
    159151        };
    160152
     
    165157
    166158                // current execution status for coroutine
    167                 volatile int state;
    168                 enum __Preemption_Reason preempted;
     159                // Possible values are:
     160                //    - TICKET_BLOCKED (-1) thread is blocked
     161                //    - TICKET_RUNNING ( 0) thread is running
     162                //    - TICKET_UNBLOCK ( 1) thread should ignore next block
     163                volatile int ticket;
     164                enum __Coroutine_State state:8;
     165                enum __Preemption_Reason preempted:8;
    169166
    170167                //SKULLDUGGERY errno is not save in the thread data structure because returnToKernel appears to be the only function to require saving and restoring it
     168
     169                // pointer to the cluster on which the thread is running
     170                struct cluster * curr_cluster;
     171
     172                // Link lists fields
     173                // instrusive link field for threads
     174                struct __thread_desc_link link;
    171175
    172176                // coroutine body used to store context
     
    182186                struct $monitor *  self_mon_p;
    183187
    184                 // pointer to the cluster on which the thread is running
    185                 struct cluster * curr_cluster;
    186 
    187188                // monitors currently held by this thread
    188189                struct __monitor_group_t monitors;
    189190
    190                 // Link lists fields
    191                 // instrusive link field for threads
    192                 struct $thread * next;
     191                // used to put threads on user data structures
     192                struct {
     193                        struct $thread * next;
     194                        struct $thread * back;
     195                } seqable;
    193196
    194197                struct {
     
    196199                        struct $thread * prev;
    197200                } node;
    198         };
     201
     202                #if defined( __CFA_WITH_VERIFY__ )
     203                        void * canary;
     204                #endif
     205        };
     206        // Wrapper for gdb
     207        struct cfathread_thread_t { struct $thread debug; };
     208
     209        #ifdef __CFA_DEBUG__
     210                void __cfaabi_dbg_record_thrd($thread & this, bool park, const char prev_name[]);
     211        #else
     212                #define __cfaabi_dbg_record_thrd(x, y, z)
     213        #endif
    199214
    200215        #ifdef __cforall
    201216        extern "Cforall" {
     217
    202218                static inline $thread *& get_next( $thread & this ) __attribute__((const)) {
    203                         return this.next;
     219                        return this.link.next;
    204220                }
    205221
    206222                static inline [$thread *&, $thread *& ] __get( $thread & this ) __attribute__((const)) {
    207223                        return this.node.[next, prev];
     224                }
     225
     226                static inline $thread *& Back( $thread * this ) __attribute__((const)) {
     227                        return this->seqable.back;
     228                }
     229
     230                static inline $thread *& Next( $thread * this ) __attribute__((const)) {
     231                        return this->seqable.next;
     232                }
     233
     234                static inline bool listed( $thread * this ) {
     235                        return this->seqable.next != 0p;
    208236                }
    209237
  • libcfa/src/concurrency/kernel.cfa

    r3c64c668 r58fe85a  
    1010// Created On       : Tue Jan 17 12:27:26 2017
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Tue Feb  4 13:03:15 2020
    13 // Update Count     : 58
     12// Last Modified On : Mon Aug 31 07:08:20 2020
     13// Update Count     : 71
    1414//
    1515
    1616#define __cforall_thread__
     17// #define __CFA_DEBUG_PRINT_RUNTIME_CORE__
    1718
    1819//C Includes
    19 #include <stddef.h>
    2020#include <errno.h>
    21 #include <string.h>
    22 extern "C" {
    2321#include <stdio.h>
    24 #include <fenv.h>
    25 #include <sys/resource.h>
    2622#include <signal.h>
    2723#include <unistd.h>
    28 #include <limits.h>                                                                             // PTHREAD_STACK_MIN
    29 #include <sys/mman.h>                                                                   // mprotect
    30 }
    3124
    3225//CFA Includes
    33 #include "time.hfa"
    3426#include "kernel_private.hfa"
    3527#include "preemption.hfa"
    36 #include "startup.hfa"
    3728
    3829//Private includes
     
    4031#include "invoke.h"
    4132
     33
    4234//-----------------------------------------------------------------------------
    4335// Some assembly required
    4436#if defined( __i386 )
    45         #define CtxGet( ctx )        \
    46                 __asm__ volatile (     \
    47                         "movl %%esp,%0\n"\
    48                         "movl %%ebp,%1\n"\
    49                         : "=rm" (ctx.SP),\
    50                                 "=rm" (ctx.FP) \
    51                 )
    52 
    5337        // mxcr : SSE Status and Control bits (control bits are preserved across function calls)
    5438        // fcw  : X87 FPU control word (preserved across function calls)
     
    7256
    7357#elif defined( __x86_64 )
    74         #define CtxGet( ctx )        \
    75                 __asm__ volatile (     \
    76                         "movq %%rsp,%0\n"\
    77                         "movq %%rbp,%1\n"\
    78                         : "=rm" (ctx.SP),\
    79                                 "=rm" (ctx.FP) \
    80                 )
    81 
    8258        #define __x87_store         \
    8359                uint32_t __mxcr;      \
     
    9874                )
    9975
    100 
    101 #elif defined( __ARM_ARCH )
    102 #define CtxGet( ctx ) __asm__ ( \
    103                 "mov %0,%%sp\n"   \
    104                 "mov %1,%%r11\n"   \
    105         : "=rm" (ctx.SP), "=rm" (ctx.FP) )
     76#elif defined( __arm__ )
     77        #define __x87_store
     78        #define __x87_load
     79
     80#elif defined( __aarch64__ )
     81        #define __x87_store              \
     82                uint32_t __fpcntl[2];    \
     83                __asm__ volatile (    \
     84                        "mrs x9, FPCR\n" \
     85                        "mrs x10, FPSR\n"  \
     86                        "stp x9, x10, %0\n"  \
     87                        : "=m" (__fpcntl) : : "x9", "x10" \
     88                )
     89
     90        #define __x87_load         \
     91                __asm__ volatile (    \
     92                        "ldp x9, x10, %0\n"  \
     93                        "msr FPSR, x10\n"  \
     94                        "msr FPCR, x9\n" \
     95                : "=m" (__fpcntl) : : "x9", "x10" \
     96                )
     97
    10698#else
    107         #error unknown hardware architecture
     99        #error unsupported hardware architecture
    108100#endif
    109101
     102extern $thread * mainThread;
     103extern processor * mainProcessor;
     104
    110105//-----------------------------------------------------------------------------
    111 //Start and stop routine for the kernel, declared first to make sure they run first
    112 static void __kernel_startup (void) __attribute__(( constructor( STARTUP_PRIORITY_KERNEL ) ));
    113 static void __kernel_shutdown(void) __attribute__(( destructor ( STARTUP_PRIORITY_KERNEL ) ));
    114 
    115 //-----------------------------------------------------------------------------
    116 // Kernel storage
    117 KERNEL_STORAGE(cluster,         mainCluster);
    118 KERNEL_STORAGE(processor,       mainProcessor);
    119 KERNEL_STORAGE($thread, mainThread);
    120 KERNEL_STORAGE(__stack_t,       mainThreadCtx);
    121 
    122 cluster     * mainCluster;
    123 processor   * mainProcessor;
    124 $thread * mainThread;
    125 
    126 extern "C" {
    127         struct { __dllist_t(cluster) list; __spinlock_t lock; } __cfa_dbg_global_clusters;
    128 }
    129 
    130 size_t __page_size = 0;
    131 
    132 //-----------------------------------------------------------------------------
    133 // Global state
    134 thread_local struct KernelThreadData kernelTLS __attribute__ ((tls_model ( "initial-exec" ))) = {
    135         NULL,                                                                                           // cannot use 0p
    136         NULL,
    137         { 1, false, false },
    138         6u //this should be seeded better but due to a bug calling rdtsc doesn't work
    139 };
    140 
    141 //-----------------------------------------------------------------------------
    142 // Struct to steal stack
    143 struct current_stack_info_t {
    144         __stack_t * storage;                                                            // pointer to stack object
    145         void * base;                                                                            // base of stack
    146         void * limit;                                                                           // stack grows towards stack limit
    147         void * context;                                                                         // address of cfa_context_t
    148 };
    149 
    150 void ?{}( current_stack_info_t & this ) {
    151         __stack_context_t ctx;
    152         CtxGet( ctx );
    153         this.base = ctx.FP;
    154 
    155         rlimit r;
    156         getrlimit( RLIMIT_STACK, &r);
    157         size_t size = r.rlim_cur;
    158 
    159         this.limit = (void *)(((intptr_t)this.base) - size);
    160         this.context = &storage_mainThreadCtx;
    161 }
    162 
    163 //-----------------------------------------------------------------------------
    164 // Main thread construction
    165 
    166 void ?{}( $coroutine & this, current_stack_info_t * info) with( this ) {
    167         stack.storage = info->storage;
    168         with(*stack.storage) {
    169                 limit     = info->limit;
    170                 base      = info->base;
    171         }
    172         __attribute__((may_alias)) intptr_t * istorage = (intptr_t*) &stack.storage;
    173         *istorage |= 0x1;
    174         name = "Main Thread";
    175         state = Start;
    176         starter = 0p;
    177         last = 0p;
    178         cancellation = 0p;
    179 }
    180 
    181 void ?{}( $thread & this, current_stack_info_t * info) with( this ) {
    182         state = Start;
    183         self_cor{ info };
    184         curr_cor = &self_cor;
    185         curr_cluster = mainCluster;
    186         self_mon.owner = &this;
    187         self_mon.recursion = 1;
    188         self_mon_p = &self_mon;
    189         next = 0p;
    190 
    191         node.next = 0p;
    192         node.prev = 0p;
    193         doregister(curr_cluster, this);
    194 
    195         monitors{ &self_mon_p, 1, (fptr_t)0 };
    196 }
    197 
    198 //-----------------------------------------------------------------------------
    199 // Processor coroutine
    200 void ?{}(processorCtx_t & this) {
    201 
    202 }
    203 
    204 // Construct the processor context of non-main processors
    205 static void ?{}(processorCtx_t & this, processor * proc, current_stack_info_t * info) {
    206         (this.__cor){ info };
    207         this.proc = proc;
    208 }
    209 
    210 static void * __invoke_processor(void * arg);
    211 
    212 void ?{}(processor & this, const char name[], cluster & cltr) with( this ) {
    213         this.name = name;
    214         this.cltr = &cltr;
    215         terminated{ 0 };
    216         destroyer = 0p;
    217         do_terminate = false;
    218         preemption_alarm = 0p;
    219         pending_preemption = false;
    220         runner.proc = &this;
    221 
    222         idleLock{};
    223 
    224         __cfaabi_dbg_print_safe("Kernel : Starting core %p\n", &this);
    225 
    226         this.stack = __create_pthread( &this.kernel_thread, __invoke_processor, (void *)&this );
    227 
    228         __cfaabi_dbg_print_safe("Kernel : core %p started\n", &this);
    229 }
    230 
    231 void ^?{}(processor & this) with( this ){
    232         if( ! __atomic_load_n(&do_terminate, __ATOMIC_ACQUIRE) ) {
    233                 __cfaabi_dbg_print_safe("Kernel : core %p signaling termination\n", &this);
    234 
    235                 __atomic_store_n(&do_terminate, true, __ATOMIC_RELAXED);
    236                 wake( &this );
    237 
    238                 P( terminated );
    239                 verify( kernelTLS.this_processor != &this);
    240         }
    241 
    242         pthread_join( kernel_thread, 0p );
    243         free( this.stack );
    244 }
    245 
    246 void ?{}(cluster & this, const char name[], Duration preemption_rate) with( this ) {
    247         this.name = name;
    248         this.preemption_rate = preemption_rate;
    249         ready_queue{};
    250         ready_queue_lock{};
    251 
    252         procs{ __get };
    253         idles{ __get };
    254         threads{ __get };
    255 
    256         doregister(this);
    257 }
    258 
    259 void ^?{}(cluster & this) {
    260         unregister(this);
    261 }
     106// Kernel Scheduling logic
     107static $thread * __next_thread(cluster * this);
     108static $thread * __next_thread_slow(cluster * this);
     109static void __run_thread(processor * this, $thread * dst);
     110static void __wake_one(cluster * cltr);
     111
     112static void push  (__cluster_idles & idles, processor & proc);
     113static void remove(__cluster_idles & idles, processor & proc);
     114static [unsigned idle, unsigned total, * processor] query( & __cluster_idles idles );
     115
    262116
    263117//=============================================================================================
    264118// Kernel Scheduling logic
    265119//=============================================================================================
    266 static $thread * __next_thread(cluster * this);
    267 static void __run_thread(processor * this, $thread * dst);
    268 static void __halt(processor * this);
    269 
    270120//Main of the processor contexts
    271121void main(processorCtx_t & runner) {
    272122        // Because of a bug, we couldn't initialized the seed on construction
    273123        // Do it here
    274         kernelTLS.rand_seed ^= rdtscl();
     124        __cfaabi_tls.rand_seed ^= rdtscl();
     125        __cfaabi_tls.ready_rng.fwd_seed = 25214903917_l64u * (rdtscl() ^ (uintptr_t)&runner);
     126        __tls_rand_advance_bck();
    275127
    276128        processor * this = runner.proc;
    277129        verify(this);
    278130
    279         __cfaabi_dbg_print_safe("Kernel : core %p starting\n", this);
    280 
    281         doregister(this->cltr, this);
     131        __cfadbg_print_safe(runtime_core, "Kernel : core %p starting\n", this);
     132        #if !defined(__CFA_NO_STATISTICS__)
     133                if( this->print_halts ) {
     134                        __cfaabi_bits_print_safe( STDOUT_FILENO, "Processor : %d - %s (%p)\n", this->id, this->name, (void*)this);
     135                }
     136        #endif
    282137
    283138        {
     
    285140                preemption_scope scope = { this };
    286141
    287                 __cfaabi_dbg_print_safe("Kernel : core %p started\n", this);
     142                __cfadbg_print_safe(runtime_core, "Kernel : core %p started\n", this);
    288143
    289144                $thread * readyThread = 0p;
    290                 for( unsigned int spin_count = 0; ! __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST); spin_count++ ) {
     145                MAIN_LOOP:
     146                for() {
     147                        // Try to get the next thread
    291148                        readyThread = __next_thread( this->cltr );
    292149
    293                         if(readyThread) {
    294                                 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    295                                 /* paranoid */ verifyf( readyThread->state == Inactive || readyThread->state == Start || readyThread->preempted != __NO_PREEMPTION, "state : %d, preempted %d\n", readyThread->state, readyThread->preempted);
    296                                 /* paranoid */ verifyf( readyThread->next == 0p, "Expected null got %p", readyThread->next );
    297 
    298                                 __run_thread(this, readyThread);
    299 
    300                                 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    301 
    302                                 spin_count = 0;
    303                         } else {
    304                                 // spin(this, &spin_count);
    305                                 __halt(this);
     150                        if( !readyThread ) {
     151                                readyThread = __next_thread_slow( this->cltr );
    306152                        }
    307                 }
    308 
    309                 __cfaabi_dbg_print_safe("Kernel : core %p stopping\n", this);
    310         }
    311 
    312         unregister(this->cltr, this);
     153
     154                        HALT:
     155                        if( !readyThread ) {
     156                                // Don't block if we are done
     157                                if( __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST) ) break MAIN_LOOP;
     158
     159                                #if !defined(__CFA_NO_STATISTICS__)
     160                                        __tls_stats()->ready.sleep.halts++;
     161                                #endif
     162
     163                                // Push self to idle stack
     164                                push(this->cltr->idles, * this);
     165
     166                                // Confirm the ready-queue is empty
     167                                readyThread = __next_thread_slow( this->cltr );
     168                                if( readyThread ) {
     169                                        // A thread was found, cancel the halt
     170                                        remove(this->cltr->idles, * this);
     171
     172                                        #if !defined(__CFA_NO_STATISTICS__)
     173                                                __tls_stats()->ready.sleep.cancels++;
     174                                        #endif
     175
     176                                        // continue the mai loop
     177                                        break HALT;
     178                                }
     179
     180                                #if !defined(__CFA_NO_STATISTICS__)
     181                                        if(this->print_halts) {
     182                                                __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 0\n", this->id, rdtscl());
     183                                        }
     184                                #endif
     185
     186                                wait( this->idle );
     187
     188                                #if !defined(__CFA_NO_STATISTICS__)
     189                                        if(this->print_halts) {
     190                                                __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 1\n", this->id, rdtscl());
     191                                        }
     192                                #endif
     193
     194                                // We were woken up, remove self from idle
     195                                remove(this->cltr->idles, * this);
     196
     197                                // DON'T just proceed, start looking again
     198                                continue MAIN_LOOP;
     199                        }
     200
     201                        /* paranoid */ verify( readyThread );
     202
     203                        // We found a thread run it
     204                        __run_thread(this, readyThread);
     205
     206                        // Are we done?
     207                        if( __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST) ) break MAIN_LOOP;
     208                }
     209
     210                __cfadbg_print_safe(runtime_core, "Kernel : core %p stopping\n", this);
     211        }
    313212
    314213        V( this->terminated );
    315214
    316         __cfaabi_dbg_print_safe("Kernel : core %p terminated\n", this);
     215        if(this == mainProcessor) {
     216                // HACK : the coroutine context switch expects this_thread to be set
     217                // and it make sense for it to be set in all other cases except here
     218                // fake it
     219                __cfaabi_tls.this_thread = mainThread;
     220        }
     221
     222        __cfadbg_print_safe(runtime_core, "Kernel : core %p terminated\n", this);
    317223}
    318224
     
    324230// from the processor coroutine to the target thread
    325231static void __run_thread(processor * this, $thread * thrd_dst) {
     232        /* paranoid */ verify( ! __preemption_enabled() );
     233        /* paranoid */ verifyf( thrd_dst->state == Ready || thrd_dst->preempted != __NO_PREEMPTION, "state : %d, preempted %d\n", thrd_dst->state, thrd_dst->preempted);
     234        /* paranoid */ verifyf( thrd_dst->link.next == 0p, "Expected null got %p", thrd_dst->link.next );
     235        __builtin_prefetch( thrd_dst->context.SP );
     236
    326237        $coroutine * proc_cor = get_coroutine(this->runner);
    327 
    328         // Update global state
    329         kernelTLS.this_thread = thrd_dst;
    330238
    331239        // set state of processor coroutine to inactive
    332240        verify(proc_cor->state == Active);
    333         proc_cor->state = Inactive;
     241        proc_cor->state = Blocked;
    334242
    335243        // Actually run the thread
    336244        RUNNING:  while(true) {
    337                 if(unlikely(thrd_dst->preempted)) {
    338                         thrd_dst->preempted = __NO_PREEMPTION;
    339                         verify(thrd_dst->state == Active || thrd_dst->state == Rerun);
    340                 } else {
    341                         verify(thrd_dst->state == Start || thrd_dst->state == Primed || thrd_dst->state == Inactive);
    342                         thrd_dst->state = Active;
    343                 }
    344 
    345                 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
     245                thrd_dst->preempted = __NO_PREEMPTION;
     246                thrd_dst->state = Active;
     247
     248                // Update global state
     249                kernelTLS().this_thread = thrd_dst;
     250
     251                /* paranoid */ verify( ! __preemption_enabled() );
     252                /* paranoid */ verify( kernelTLS().this_thread == thrd_dst );
     253                /* paranoid */ verify( thrd_dst->curr_cluster == this->cltr );
     254                /* paranoid */ verify( thrd_dst->context.SP );
     255                /* paranoid */ verify( thrd_dst->state != Halted );
     256                /* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) < ((uintptr_t)__get_stack(thrd_dst->curr_cor)->base ) || thrd_dst->curr_cor == proc_cor, "ERROR : Destination $thread %p has been corrupted.\n StackPointer too small.\n", thrd_dst ); // add escape condition if we are setting up the processor
     257                /* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) > ((uintptr_t)__get_stack(thrd_dst->curr_cor)->limit) || thrd_dst->curr_cor == proc_cor, "ERROR : Destination $thread %p has been corrupted.\n StackPointer too large.\n", thrd_dst ); // add escape condition if we are setting up the processor
     258                /* paranoid */ verify( 0x0D15EA5E0D15EA5Ep == thrd_dst->canary );
     259
     260
    346261
    347262                // set context switch to the thread that the processor is executing
    348                 verify( thrd_dst->context.SP );
    349263                __cfactx_switch( &proc_cor->context, &thrd_dst->context );
    350264                // when __cfactx_switch returns we are back in the processor coroutine
    351265
    352                 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    353 
     266                /* paranoid */ verify( 0x0D15EA5E0D15EA5Ep == thrd_dst->canary );
     267                /* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) > ((uintptr_t)__get_stack(thrd_dst->curr_cor)->limit), "ERROR : Destination $thread %p has been corrupted.\n StackPointer too large.\n", thrd_dst );
     268                /* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) < ((uintptr_t)__get_stack(thrd_dst->curr_cor)->base ), "ERROR : Destination $thread %p has been corrupted.\n StackPointer too small.\n", thrd_dst );
     269                /* paranoid */ verify( thrd_dst->context.SP );
     270                /* paranoid */ verify( thrd_dst->curr_cluster == this->cltr );
     271                /* paranoid */ verify( kernelTLS().this_thread == thrd_dst );
     272                /* paranoid */ verify( ! __preemption_enabled() );
     273
     274                // Reset global state
     275                kernelTLS().this_thread = 0p;
    354276
    355277                // We just finished running a thread, there are a few things that could have happened.
    356278                // 1 - Regular case : the thread has blocked and now one has scheduled it yet.
    357279                // 2 - Racy case    : the thread has blocked but someone has already tried to schedule it.
    358                 // 3 - Polite Racy case : the thread has blocked, someone has already tried to schedule it, but the thread is nice and wants to go through the ready-queue any way
    359280                // 4 - Preempted
    360281                // In case 1, we may have won a race so we can't write to the state again.
    361282                // In case 2, we lost the race so we now own the thread.
    362                 // In case 3, we lost the race but can just reschedule the thread.
    363283
    364284                if(unlikely(thrd_dst->preempted != __NO_PREEMPTION)) {
     
    368288                }
    369289
     290                if(unlikely(thrd_dst->state == Halting)) {
     291                        // The thread has halted, it should never be scheduled/run again
     292                        // finish the thread
     293                        __thread_finish( thrd_dst );
     294                        break RUNNING;
     295                }
     296
     297                /* paranoid */ verify( thrd_dst->state == Active );
     298                thrd_dst->state = Blocked;
     299
    370300                // set state of processor coroutine to active and the thread to inactive
    371                 static_assert(sizeof(thrd_dst->state) == sizeof(int));
    372                 enum coroutine_state old_state = __atomic_exchange_n(&thrd_dst->state, Inactive, __ATOMIC_SEQ_CST);
    373                 switch(old_state) {
    374                         case Halted:
    375                                 // The thread has halted, it should never be scheduled/run again, leave it back to Halted and move on
    376                                 thrd_dst->state = Halted;
    377 
    378                                 // We may need to wake someone up here since
    379                                 unpark( this->destroyer );
    380                                 this->destroyer = 0p;
    381                                 break RUNNING;
    382                         case Active:
     301                int old_ticket = __atomic_fetch_sub(&thrd_dst->ticket, 1, __ATOMIC_SEQ_CST);
     302                switch(old_ticket) {
     303                        case TICKET_RUNNING:
    383304                                // This is case 1, the regular case, nothing more is needed
    384305                                break RUNNING;
    385                         case Rerun:
     306                        case TICKET_UNBLOCK:
    386307                                // This is case 2, the racy case, someone tried to run this thread before it finished blocking
    387308                                // In this case, just run it again.
     
    389310                        default:
    390311                                // This makes no sense, something is wrong abort
    391                                 abort("Finished running a thread that was Inactive/Start/Primed %d\n", old_state);
     312                                abort();
    392313                }
    393314        }
     
    395316        // Just before returning to the processor, set the processor coroutine to active
    396317        proc_cor->state = Active;
     318
     319        /* paranoid */ verify( ! __preemption_enabled() );
    397320}
    398321
    399322// KERNEL_ONLY
    400323void returnToKernel() {
    401         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    402         $coroutine * proc_cor = get_coroutine(kernelTLS.this_processor->runner);
    403         $thread * thrd_src = kernelTLS.this_thread;
     324        /* paranoid */ verify( ! __preemption_enabled() );
     325        $coroutine * proc_cor = get_coroutine(kernelTLS().this_processor->runner);
     326        $thread * thrd_src = kernelTLS().this_thread;
     327
     328        #if !defined(__CFA_NO_STATISTICS__)
     329                struct processor * last_proc = kernelTLS().this_processor;
     330        #endif
    404331
    405332        // Run the thread on this processor
     
    409336                        __x87_store;
    410337                #endif
    411                 verify( proc_cor->context.SP );
     338                /* paranoid */ verify( proc_cor->context.SP );
     339                /* paranoid */ verify( 0x0D15EA5E0D15EA5Ep == thrd_src->canary );
    412340                __cfactx_switch( &thrd_src->context, &proc_cor->context );
     341                /* paranoid */ verify( 0x0D15EA5E0D15EA5Ep == thrd_src->canary );
    413342                #if defined( __i386 ) || defined( __x86_64 )
    414343                        __x87_load;
     
    417346        }
    418347
    419         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    420 }
    421 
    422 // KERNEL_ONLY
    423 // Context invoker for processors
    424 // This is the entry point for processors (kernel threads)
    425 // It effectively constructs a coroutine by stealing the pthread stack
    426 static void * __invoke_processor(void * arg) {
    427         processor * proc = (processor *) arg;
    428         kernelTLS.this_processor = proc;
    429         kernelTLS.this_thread    = 0p;
    430         kernelTLS.preemption_state.[enabled, disable_count] = [false, 1];
    431         // SKULLDUGGERY: We want to create a context for the processor coroutine
    432         // which is needed for the 2-step context switch. However, there is no reason
    433         // to waste the perfectly valid stack create by pthread.
    434         current_stack_info_t info;
    435         __stack_t ctx;
    436         info.storage = &ctx;
    437         (proc->runner){ proc, &info };
    438 
    439         __cfaabi_dbg_print_safe("Coroutine : created stack %p\n", get_coroutine(proc->runner)->stack.storage);
    440 
    441         //Set global state
    442         kernelTLS.this_thread = 0p;
    443 
    444         //We now have a proper context from which to schedule threads
    445         __cfaabi_dbg_print_safe("Kernel : core %p created (%p, %p)\n", proc, &proc->runner, &ctx);
    446 
    447         // SKULLDUGGERY: Since the coroutine doesn't have its own stack, we can't
    448         // resume it to start it like it normally would, it will just context switch
    449         // back to here. Instead directly call the main since we already are on the
    450         // appropriate stack.
    451         get_coroutine(proc->runner)->state = Active;
    452         main( proc->runner );
    453         get_coroutine(proc->runner)->state = Halted;
    454 
    455         // Main routine of the core returned, the core is now fully terminated
    456         __cfaabi_dbg_print_safe("Kernel : core %p main ended (%p)\n", proc, &proc->runner);
    457 
    458         return 0p;
    459 }
    460 
    461 static void Abort( int ret, const char func[] ) {
    462         if ( ret ) {                                                                            // pthread routines return errno values
    463                 abort( "%s : internal error, error(%d) %s.", func, ret, strerror( ret ) );
    464         } // if
    465 } // Abort
    466 
    467 void * __create_pthread( pthread_t * pthread, void * (*start)(void *), void * arg ) {
    468         pthread_attr_t attr;
    469 
    470         Abort( pthread_attr_init( &attr ), "pthread_attr_init" ); // initialize attribute
    471 
    472         size_t stacksize;
    473         // default stack size, normally defined by shell limit
    474         Abort( pthread_attr_getstacksize( &attr, &stacksize ), "pthread_attr_getstacksize" );
    475         assert( stacksize >= PTHREAD_STACK_MIN );
    476 
    477         void * stack;
    478         __cfaabi_dbg_debug_do(
    479                 stack = memalign( __page_size, stacksize + __page_size );
    480                 // pthread has no mechanism to create the guard page in user supplied stack.
    481                 if ( mprotect( stack, __page_size, PROT_NONE ) == -1 ) {
    482                         abort( "mprotect : internal error, mprotect failure, error(%d) %s.", errno, strerror( errno ) );
    483                 } // if
    484         );
    485         __cfaabi_dbg_no_debug_do(
    486                 stack = malloc( stacksize );
    487         );
    488 
    489         Abort( pthread_attr_setstack( &attr, stack, stacksize ), "pthread_attr_setstack" );
    490 
    491         Abort( pthread_create( pthread, &attr, start, arg ), "pthread_create" );
    492         return stack;
    493 }
    494 
    495 // KERNEL_ONLY
    496 static void __kernel_first_resume( processor * this ) {
    497         $thread * src = mainThread;
    498         $coroutine * dst = get_coroutine(this->runner);
    499 
    500         verify( ! kernelTLS.preemption_state.enabled );
    501 
    502         kernelTLS.this_thread->curr_cor = dst;
    503         __stack_prepare( &dst->stack, 65000 );
    504         __cfactx_start(main, dst, this->runner, __cfactx_invoke_coroutine);
    505 
    506         verify( ! kernelTLS.preemption_state.enabled );
    507 
    508         dst->last = &src->self_cor;
    509         dst->starter = dst->starter ? dst->starter : &src->self_cor;
    510 
    511         // set state of current coroutine to inactive
    512         src->state = src->state == Halted ? Halted : Inactive;
    513 
    514         // context switch to specified coroutine
    515         verify( dst->context.SP );
    516         __cfactx_switch( &src->context, &dst->context );
    517         // when __cfactx_switch returns we are back in the src coroutine
    518 
    519         mainThread->curr_cor = &mainThread->self_cor;
    520 
    521         // set state of new coroutine to active
    522         src->state = Active;
    523 
    524         verify( ! kernelTLS.preemption_state.enabled );
    525 }
    526 
    527 // KERNEL_ONLY
    528 static void __kernel_last_resume( processor * this ) {
    529         $coroutine * src = &mainThread->self_cor;
    530         $coroutine * dst = get_coroutine(this->runner);
    531 
    532         verify( ! kernelTLS.preemption_state.enabled );
    533         verify( dst->starter == src );
    534         verify( dst->context.SP );
    535 
    536         // context switch to the processor
    537         __cfactx_switch( &src->context, &dst->context );
     348        #if !defined(__CFA_NO_STATISTICS__)
     349                if(last_proc != kernelTLS().this_processor) {
     350                        __tls_stats()->ready.threads.migration++;
     351                }
     352        #endif
     353
     354        /* paranoid */ verify( ! __preemption_enabled() );
     355        /* paranoid */ verifyf( ((uintptr_t)thrd_src->context.SP) < ((uintptr_t)__get_stack(thrd_src->curr_cor)->base ), "ERROR : Returning $thread %p has been corrupted.\n StackPointer too small.\n", thrd_src );
     356        /* paranoid */ verifyf( ((uintptr_t)thrd_src->context.SP) > ((uintptr_t)__get_stack(thrd_src->curr_cor)->limit), "ERROR : Returning $thread %p has been corrupted.\n StackPointer too large.\n", thrd_src );
    538357}
    539358
     
    541360// Scheduler routines
    542361// KERNEL ONLY
    543 void __schedule_thread( $thread * thrd ) with( *thrd->curr_cluster ) {
    544         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
     362void __schedule_thread( $thread * thrd ) {
     363        /* paranoid */ verify( ! __preemption_enabled() );
     364        /* paranoid */ verify( kernelTLS().this_proc_id );
     365        /* paranoid */ verify( thrd );
     366        /* paranoid */ verify( thrd->state != Halted );
     367        /* paranoid */ verify( thrd->curr_cluster );
    545368        /* paranoid */ #if defined( __CFA_WITH_VERIFY__ )
    546         /* paranoid */ if( thrd->state == Inactive || thrd->state == Start ) assertf( thrd->preempted == __NO_PREEMPTION,
    547                           "Error inactive thread marked as preempted, state %d, preemption %d\n", thrd->state, thrd->preempted );
    548         /* paranoid */ if( thrd->preempted != __NO_PREEMPTION ) assertf(thrd->state == Active || thrd->state == Rerun,
    549                           "Error preempted thread marked as not currently running, state %d, preemption %d\n", thrd->state, thrd->preempted );
     369        /* paranoid */  if( thrd->state == Blocked || thrd->state == Start ) assertf( thrd->preempted == __NO_PREEMPTION,
     370                                        "Error inactive thread marked as preempted, state %d, preemption %d\n", thrd->state, thrd->preempted );
     371        /* paranoid */  if( thrd->preempted != __NO_PREEMPTION ) assertf(thrd->state == Active,
     372                                        "Error preempted thread marked as not currently running, state %d, preemption %d\n", thrd->state, thrd->preempted );
    550373        /* paranoid */ #endif
    551         /* paranoid */ verifyf( thrd->next == 0p, "Expected null got %p", thrd->next );
    552 
    553         lock  ( ready_queue_lock __cfaabi_dbg_ctx2 );
    554         bool was_empty = !(ready_queue != 0);
    555         append( ready_queue, thrd );
    556         unlock( ready_queue_lock );
    557 
    558         if(was_empty) {
    559                 lock      (proc_list_lock __cfaabi_dbg_ctx2);
    560                 if(idles) {
    561                         wake_fast(idles.head);
    562                 }
    563                 unlock    (proc_list_lock);
    564         }
    565         else if( struct processor * idle = idles.head ) {
    566                 wake_fast(idle);
    567         }
    568 
    569         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
     374        /* paranoid */ verifyf( thrd->link.next == 0p, "Expected null got %p", thrd->link.next );
     375        /* paranoid */ verify( 0x0D15EA5E0D15EA5Ep == thrd->canary );
     376
     377
     378        if (thrd->preempted == __NO_PREEMPTION) thrd->state = Ready;
     379
     380        ready_schedule_lock();
     381                // Dereference the thread now because once we push it, there is not guaranteed it's still valid.
     382                struct cluster * cl = thrd->curr_cluster;
     383
     384                // push the thread to the cluster ready-queue
     385                push( cl, thrd );
     386
     387                // variable thrd is no longer safe to use
     388
     389                // wake the cluster using the save variable.
     390                __wake_one( cl );
     391        ready_schedule_unlock();
     392
     393        /* paranoid */ verify( ! __preemption_enabled() );
    570394}
    571395
    572396// KERNEL ONLY
    573 static $thread * __next_thread(cluster * this) with( *this ) {
    574         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    575 
    576         lock( ready_queue_lock __cfaabi_dbg_ctx2 );
    577         $thread * head = pop_head( ready_queue );
    578         unlock( ready_queue_lock );
    579 
    580         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    581         return head;
     397static inline $thread * __next_thread(cluster * this) with( *this ) {
     398        /* paranoid */ verify( ! __preemption_enabled() );
     399        /* paranoid */ verify( kernelTLS().this_proc_id );
     400
     401        ready_schedule_lock();
     402                $thread * thrd = pop( this );
     403        ready_schedule_unlock();
     404
     405        /* paranoid */ verify( kernelTLS().this_proc_id );
     406        /* paranoid */ verify( ! __preemption_enabled() );
     407        return thrd;
     408}
     409
     410// KERNEL ONLY
     411static inline $thread * __next_thread_slow(cluster * this) with( *this ) {
     412        /* paranoid */ verify( ! __preemption_enabled() );
     413        /* paranoid */ verify( kernelTLS().this_proc_id );
     414
     415        ready_schedule_lock();
     416                $thread * thrd = pop_slow( this );
     417        ready_schedule_unlock();
     418
     419        /* paranoid */ verify( kernelTLS().this_proc_id );
     420        /* paranoid */ verify( ! __preemption_enabled() );
     421        return thrd;
    582422}
    583423
     
    585425        if( !thrd ) return;
    586426
    587         disable_interrupts();
    588         static_assert(sizeof(thrd->state) == sizeof(int));
    589         enum coroutine_state old_state = __atomic_exchange_n(&thrd->state, Rerun, __ATOMIC_SEQ_CST);
    590         switch(old_state) {
    591                 case Active:
     427        int old_ticket = __atomic_fetch_add(&thrd->ticket, 1, __ATOMIC_SEQ_CST);
     428        switch(old_ticket) {
     429                case TICKET_RUNNING:
    592430                        // Wake won the race, the thread will reschedule/rerun itself
    593431                        break;
    594                 case Inactive:
     432                case TICKET_BLOCKED:
    595433                        /* paranoid */ verify( ! thrd->preempted != __NO_PREEMPTION );
    596 
    597                         // Wake lost the race,
    598                         thrd->state = Inactive;
    599                         __schedule_thread( thrd );
     434                        /* paranoid */ verify( thrd->state == Blocked );
     435
     436                        {
     437                                /* paranoid */ verify( publicTLS_get(this_proc_id) );
     438                                bool full = publicTLS_get(this_proc_id)->full_proc;
     439                                if(full) disable_interrupts();
     440
     441                                /* paranoid */ verify( ! __preemption_enabled() );
     442
     443                                // Wake lost the race,
     444                                __schedule_thread( thrd );
     445
     446                                /* paranoid */ verify( ! __preemption_enabled() );
     447
     448                                if(full) enable_interrupts( __cfaabi_dbg_ctx );
     449                                /* paranoid */ verify( publicTLS_get(this_proc_id) );
     450                        }
     451
    600452                        break;
    601                 case Rerun:
    602                         abort("More than one thread attempted to schedule thread %p\n", thrd);
    603                         break;
    604                 case Halted:
    605                 case Start:
    606                 case Primed:
    607453                default:
    608454                        // This makes no sense, something is wrong abort
    609                         abort();
    610         }
     455                        abort("Thread %p (%s) has mismatch park/unpark\n", thrd, thrd->self_cor.name);
     456        }
     457}
     458
     459void park( void ) {
     460        /* paranoid */ verify( __preemption_enabled() );
     461        disable_interrupts();
     462        /* paranoid */ verify( ! __preemption_enabled() );
     463        /* paranoid */ verify( kernelTLS().this_thread->preempted == __NO_PREEMPTION );
     464
     465        returnToKernel();
     466
     467        /* paranoid */ verify( ! __preemption_enabled() );
    611468        enable_interrupts( __cfaabi_dbg_ctx );
    612 }
    613 
    614 void park( void ) {
    615         /* paranoid */ verify( kernelTLS.preemption_state.enabled );
    616         disable_interrupts();
    617         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    618         /* paranoid */ verify( kernelTLS.this_thread->preempted == __NO_PREEMPTION );
    619 
    620         returnToKernel();
    621 
    622         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    623         enable_interrupts( __cfaabi_dbg_ctx );
    624         /* paranoid */ verify( kernelTLS.preemption_state.enabled );
    625 
    626 }
    627 
    628 // KERNEL ONLY
    629 void __leave_thread() {
    630         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    631         returnToKernel();
    632         abort();
     469        /* paranoid */ verify( __preemption_enabled() );
     470
     471}
     472
     473extern "C" {
     474        // Leave the thread monitor
     475        // last routine called by a thread.
     476        // Should never return
     477        void __cfactx_thrd_leave() {
     478                $thread * thrd = active_thread();
     479                $monitor * this = &thrd->self_mon;
     480
     481                // Lock the monitor now
     482                lock( this->lock __cfaabi_dbg_ctx2 );
     483
     484                disable_interrupts();
     485
     486                /* paranoid */ verify( ! __preemption_enabled() );
     487                /* paranoid */ verify( thrd->state == Active );
     488                /* paranoid */ verify( 0x0D15EA5E0D15EA5Ep == thrd->canary );
     489                /* paranoid */ verify( kernelTLS().this_thread == thrd );
     490                /* paranoid */ verify( thrd->context.SP );
     491                /* paranoid */ verifyf( ((uintptr_t)thrd->context.SP) > ((uintptr_t)__get_stack(thrd->curr_cor)->limit), "ERROR : $thread %p has been corrupted.\n StackPointer too large.\n", thrd );
     492                /* paranoid */ verifyf( ((uintptr_t)thrd->context.SP) < ((uintptr_t)__get_stack(thrd->curr_cor)->base ), "ERROR : $thread %p has been corrupted.\n StackPointer too small.\n", thrd );
     493
     494                thrd->state = Halting;
     495                if( TICKET_RUNNING != thrd->ticket ) { abort( "Thread terminated with pending unpark" ); }
     496                if( thrd != this->owner ) { abort( "Thread internal monitor has incorrect owner" ); }
     497                if( this->recursion != 1) { abort( "Thread internal monitor has unbalanced recursion" ); }
     498
     499                // Leave the thread
     500                returnToKernel();
     501
     502                // Control flow should never reach here!
     503                abort();
     504        }
    633505}
    634506
    635507// KERNEL ONLY
    636508bool force_yield( __Preemption_Reason reason ) {
    637         /* paranoid */ verify( kernelTLS.preemption_state.enabled );
     509        /* paranoid */ verify( __preemption_enabled() );
    638510        disable_interrupts();
    639         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    640 
    641         $thread * thrd = kernelTLS.this_thread;
    642         /* paranoid */ verify(thrd->state == Active || thrd->state == Rerun);
     511        /* paranoid */ verify( ! __preemption_enabled() );
     512
     513        $thread * thrd = kernelTLS().this_thread;
     514        /* paranoid */ verify(thrd->state == Active);
    643515
    644516        // SKULLDUGGERY: It is possible that we are preempting this thread just before
     
    647519        // If that is the case, abandon the preemption.
    648520        bool preempted = false;
    649         if(thrd->next == 0p) {
     521        if(thrd->link.next == 0p) {
    650522                preempted = true;
    651523                thrd->preempted = reason;
     
    653525        }
    654526
    655         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
     527        /* paranoid */ verify( ! __preemption_enabled() );
    656528        enable_interrupts_noPoll();
    657         /* paranoid */ verify( kernelTLS.preemption_state.enabled );
     529        /* paranoid */ verify( __preemption_enabled() );
    658530
    659531        return preempted;
     
    661533
    662534//=============================================================================================
    663 // Kernel Setup logic
     535// Kernel Idle Sleep
    664536//=============================================================================================
    665 //-----------------------------------------------------------------------------
    666 // Kernel boot procedures
    667 static void __kernel_startup(void) {
    668         verify( ! kernelTLS.preemption_state.enabled );
    669         __cfaabi_dbg_print_safe("Kernel : Starting\n");
    670 
    671         __page_size = sysconf( _SC_PAGESIZE );
    672 
    673         __cfa_dbg_global_clusters.list{ __get };
    674         __cfa_dbg_global_clusters.lock{};
    675 
    676         // Initialize the main cluster
    677         mainCluster = (cluster *)&storage_mainCluster;
    678         (*mainCluster){"Main Cluster"};
    679 
    680         __cfaabi_dbg_print_safe("Kernel : Main cluster ready\n");
    681 
    682         // Start by initializing the main thread
    683         // SKULLDUGGERY: the mainThread steals the process main thread
    684         // which will then be scheduled by the mainProcessor normally
    685         mainThread = ($thread *)&storage_mainThread;
    686         current_stack_info_t info;
    687         info.storage = (__stack_t*)&storage_mainThreadCtx;
    688         (*mainThread){ &info };
    689 
    690         __cfaabi_dbg_print_safe("Kernel : Main thread ready\n");
    691 
    692 
    693 
    694         // Construct the processor context of the main processor
    695         void ?{}(processorCtx_t & this, processor * proc) {
    696                 (this.__cor){ "Processor" };
    697                 this.__cor.starter = 0p;
    698                 this.proc = proc;
    699         }
    700 
    701         void ?{}(processor & this) with( this ) {
    702                 name = "Main Processor";
    703                 cltr = mainCluster;
    704                 terminated{ 0 };
    705                 do_terminate = false;
    706                 preemption_alarm = 0p;
    707                 pending_preemption = false;
    708                 kernel_thread = pthread_self();
    709 
    710                 runner{ &this };
    711                 __cfaabi_dbg_print_safe("Kernel : constructed main processor context %p\n", &runner);
    712         }
    713 
    714         // Initialize the main processor and the main processor ctx
    715         // (the coroutine that contains the processing control flow)
    716         mainProcessor = (processor *)&storage_mainProcessor;
    717         (*mainProcessor){};
    718 
    719         //initialize the global state variables
    720         kernelTLS.this_processor = mainProcessor;
    721         kernelTLS.this_thread    = mainThread;
    722 
    723         // Enable preemption
    724         kernel_start_preemption();
    725 
    726         // Add the main thread to the ready queue
    727         // once resume is called on mainProcessor->runner the mainThread needs to be scheduled like any normal thread
    728         __schedule_thread(mainThread);
    729 
    730         // SKULLDUGGERY: Force a context switch to the main processor to set the main thread's context to the current UNIX
    731         // context. Hence, the main thread does not begin through __cfactx_invoke_thread, like all other threads. The trick here is that
    732         // mainThread is on the ready queue when this call is made.
    733         __kernel_first_resume( kernelTLS.this_processor );
    734 
    735 
    736 
    737         // THE SYSTEM IS NOW COMPLETELY RUNNING
    738         __cfaabi_dbg_print_safe("Kernel : Started\n--------------------------------------------------\n\n");
    739 
    740         verify( ! kernelTLS.preemption_state.enabled );
     537// Wake a thread from the front if there are any
     538static void __wake_one(cluster * this) {
     539        /* paranoid */ verify( ! __preemption_enabled() );
     540        /* paranoid */ verify( ready_schedule_islocked() );
     541
     542        // Check if there is a sleeping processor
     543        processor * p;
     544        unsigned idle;
     545        unsigned total;
     546        [idle, total, p] = query(this->idles);
     547
     548        // If no one is sleeping, we are done
     549        if( idle == 0 ) return;
     550
     551        // We found a processor, wake it up
     552        post( p->idle );
     553
     554        #if !defined(__CFA_NO_STATISTICS__)
     555                __tls_stats()->ready.sleep.wakes++;
     556        #endif
     557
     558        /* paranoid */ verify( ready_schedule_islocked() );
     559        /* paranoid */ verify( ! __preemption_enabled() );
     560
     561        return;
     562}
     563
     564// Unconditionnaly wake a thread
     565void __wake_proc(processor * this) {
     566        __cfadbg_print_safe(runtime_core, "Kernel : waking Processor %p\n", this);
     567
     568        disable_interrupts();
     569                /* paranoid */ verify( ! __preemption_enabled() );
     570                post( this->idle );
    741571        enable_interrupts( __cfaabi_dbg_ctx );
    742         verify( TL_GET( preemption_state.enabled ) );
    743 }
    744 
    745 static void __kernel_shutdown(void) {
    746         __cfaabi_dbg_print_safe("\n--------------------------------------------------\nKernel : Shutting down\n");
    747 
    748         verify( TL_GET( preemption_state.enabled ) );
    749         disable_interrupts();
    750         verify( ! kernelTLS.preemption_state.enabled );
    751 
    752         // SKULLDUGGERY: Notify the mainProcessor it needs to terminates.
    753         // When its coroutine terminates, it return control to the mainThread
    754         // which is currently here
    755         __atomic_store_n(&mainProcessor->do_terminate, true, __ATOMIC_RELEASE);
    756         __kernel_last_resume( kernelTLS.this_processor );
    757         mainThread->self_cor.state = Halted;
    758 
    759         // THE SYSTEM IS NOW COMPLETELY STOPPED
    760 
    761         // Disable preemption
    762         kernel_stop_preemption();
    763 
    764         // Destroy the main processor and its context in reverse order of construction
    765         // These were manually constructed so we need manually destroy them
    766         ^(mainProcessor->runner){};
    767         ^(mainProcessor){};
    768 
    769         // Final step, destroy the main thread since it is no longer needed
    770         // Since we provided a stack to this taxk it will not destroy anything
    771         ^(mainThread){};
    772 
    773         ^(__cfa_dbg_global_clusters.list){};
    774         ^(__cfa_dbg_global_clusters.lock){};
    775 
    776         __cfaabi_dbg_print_safe("Kernel : Shutdown complete\n");
    777 }
    778 
    779 //=============================================================================================
    780 // Kernel Quiescing
    781 //=============================================================================================
    782 static void __halt(processor * this) with( *this ) {
    783         // verify( ! __atomic_load_n(&do_terminate, __ATOMIC_SEQ_CST) );
    784 
    785         with( *cltr ) {
    786                 lock      (proc_list_lock __cfaabi_dbg_ctx2);
    787                 remove    (procs, *this);
    788                 push_front(idles, *this);
    789                 unlock    (proc_list_lock);
    790         }
    791 
    792         __cfaabi_dbg_print_safe("Kernel : Processor %p ready to sleep\n", this);
    793 
    794         wait( idleLock );
    795 
    796         __cfaabi_dbg_print_safe("Kernel : Processor %p woke up and ready to run\n", this);
    797 
    798         with( *cltr ) {
    799                 lock      (proc_list_lock __cfaabi_dbg_ctx2);
    800                 remove    (idles, *this);
    801                 push_front(procs, *this);
    802                 unlock    (proc_list_lock);
     572}
     573
     574static void push  (__cluster_idles & this, processor & proc) {
     575        /* paranoid */ verify( ! __preemption_enabled() );
     576        lock( this );
     577                this.idle++;
     578                /* paranoid */ verify( this.idle <= this.total );
     579
     580                insert_first(this.list, proc);
     581        unlock( this );
     582        /* paranoid */ verify( ! __preemption_enabled() );
     583}
     584
     585static void remove(__cluster_idles & this, processor & proc) {
     586        /* paranoid */ verify( ! __preemption_enabled() );
     587        lock( this );
     588                this.idle--;
     589                /* paranoid */ verify( this.idle >= 0 );
     590
     591                remove(proc);
     592        unlock( this );
     593        /* paranoid */ verify( ! __preemption_enabled() );
     594}
     595
     596static [unsigned idle, unsigned total, * processor] query( & __cluster_idles this ) {
     597        for() {
     598                uint64_t l = __atomic_load_n(&this.lock, __ATOMIC_SEQ_CST);
     599                if( 1 == (l % 2) ) { Pause(); continue; }
     600                unsigned idle    = this.idle;
     601                unsigned total   = this.total;
     602                processor * proc = &this.list`first;
     603                // Compiler fence is unnecessary, but gcc-8 and older incorrectly reorder code without it
     604                asm volatile("": : :"memory");
     605                if(l != __atomic_load_n(&this.lock, __ATOMIC_SEQ_CST)) { Pause(); continue; }
     606                return [idle, total, proc];
    803607        }
    804608}
     
    814618        // the globalAbort flag is true.
    815619        lock( kernel_abort_lock __cfaabi_dbg_ctx2 );
     620
     621        // disable interrupts, it no longer makes sense to try to interrupt this processor
     622        disable_interrupts();
    816623
    817624        // first task to abort ?
     
    831638        }
    832639
    833         return kernelTLS.this_thread;
     640        return __cfaabi_tls.this_thread;
    834641}
    835642
    836643void kernel_abort_msg( void * kernel_data, char * abort_text, int abort_text_size ) {
    837         $thread * thrd = kernel_data;
     644        $thread * thrd = ( $thread * ) kernel_data;
    838645
    839646        if(thrd) {
     
    856663
    857664int kernel_abort_lastframe( void ) __attribute__ ((__nothrow__)) {
    858         return get_coroutine(kernelTLS.this_thread) == get_coroutine(mainThread) ? 4 : 2;
     665        return get_coroutine(kernelTLS().this_thread) == get_coroutine(mainThread) ? 4 : 2;
    859666}
    860667
     
    883690void ^?{}(semaphore & this) {}
    884691
    885 void P(semaphore & this) with( this ){
     692bool P(semaphore & this) with( this ){
    886693        lock( lock __cfaabi_dbg_ctx2 );
    887694        count -= 1;
    888695        if ( count < 0 ) {
    889696                // queue current task
    890                 append( waiting, kernelTLS.this_thread );
     697                append( waiting, active_thread() );
    891698
    892699                // atomically release spin lock and block
    893700                unlock( lock );
    894701                park();
     702                return true;
    895703        }
    896704        else {
    897705            unlock( lock );
    898         }
    899 }
    900 
    901 void V(semaphore & this) with( this ) {
     706            return false;
     707        }
     708}
     709
     710bool V(semaphore & this) with( this ) {
    902711        $thread * thrd = 0p;
    903712        lock( lock __cfaabi_dbg_ctx2 );
     
    912721        // make new owner
    913722        unpark( thrd );
    914 }
    915 
    916 //-----------------------------------------------------------------------------
    917 // Global Queues
    918 void doregister( cluster     & cltr ) {
    919         lock      ( __cfa_dbg_global_clusters.lock __cfaabi_dbg_ctx2);
    920         push_front( __cfa_dbg_global_clusters.list, cltr );
    921         unlock    ( __cfa_dbg_global_clusters.lock );
    922 }
    923 
    924 void unregister( cluster     & cltr ) {
    925         lock  ( __cfa_dbg_global_clusters.lock __cfaabi_dbg_ctx2);
    926         remove( __cfa_dbg_global_clusters.list, cltr );
    927         unlock( __cfa_dbg_global_clusters.lock );
    928 }
    929 
    930 void doregister( cluster * cltr, $thread & thrd ) {
    931         lock      (cltr->thread_list_lock __cfaabi_dbg_ctx2);
    932         cltr->nthreads += 1;
    933         push_front(cltr->threads, thrd);
    934         unlock    (cltr->thread_list_lock);
    935 }
    936 
    937 void unregister( cluster * cltr, $thread & thrd ) {
    938         lock  (cltr->thread_list_lock __cfaabi_dbg_ctx2);
    939         remove(cltr->threads, thrd );
    940         cltr->nthreads -= 1;
    941         unlock(cltr->thread_list_lock);
    942 }
    943 
    944 void doregister( cluster * cltr, processor * proc ) {
    945         lock      (cltr->proc_list_lock __cfaabi_dbg_ctx2);
    946         cltr->nprocessors += 1;
    947         push_front(cltr->procs, *proc);
    948         unlock    (cltr->proc_list_lock);
    949 }
    950 
    951 void unregister( cluster * cltr, processor * proc ) {
    952         lock  (cltr->proc_list_lock __cfaabi_dbg_ctx2);
    953         remove(cltr->procs, *proc );
    954         cltr->nprocessors -= 1;
    955         unlock(cltr->proc_list_lock);
     723
     724        return thrd != 0p;
     725}
     726
     727bool V(semaphore & this, unsigned diff) with( this ) {
     728        $thread * thrd = 0p;
     729        lock( lock __cfaabi_dbg_ctx2 );
     730        int release = max(-count, (int)diff);
     731        count += diff;
     732        for(release) {
     733                unpark( pop_head( waiting ) );
     734        }
     735
     736        unlock( lock );
     737
     738        return thrd != 0p;
    956739}
    957740
     
    960743__cfaabi_dbg_debug_do(
    961744        extern "C" {
    962                 void __cfaabi_dbg_record(__spinlock_t & this, const char prev_name[]) {
     745                void __cfaabi_dbg_record_lock(__spinlock_t & this, const char prev_name[]) {
    963746                        this.prev_name = prev_name;
    964                         this.prev_thrd = kernelTLS.this_thread;
     747                        this.prev_thrd = kernelTLS().this_thread;
    965748                }
    966749        }
     
    972755        return true;
    973756}
     757
     758//-----------------------------------------------------------------------------
     759// Statistics
     760#if !defined(__CFA_NO_STATISTICS__)
     761        void print_halts( processor & this ) {
     762                this.print_halts = true;
     763        }
     764
     765        void print_stats_now( cluster & this, int flags ) {
     766                __print_stats( this.stats, this.print_stats, "Cluster", this.name, (void*)&this );
     767        }
     768
     769        extern int __print_alarm_stats;
     770        void print_alarm_stats() {
     771                __print_alarm_stats = -1;
     772        }
     773#endif
    974774// Local Variables: //
    975775// mode: c //
  • libcfa/src/concurrency/kernel.hfa

    r3c64c668 r58fe85a  
    1616#pragma once
    1717
    18 #include <stdbool.h>
    19 
    2018#include "invoke.h"
    2119#include "time_t.hfa"
    2220#include "coroutine.hfa"
    2321
     22#include "containers/list.hfa"
     23
    2424extern "C" {
    25 #include <pthread.h>
    26 #include <semaphore.h>
     25        #include <bits/pthreadtypes.h>
     26        #include <linux/types.h>
    2727}
    2828
     
    3737void  ?{}(semaphore & this, int count = 1);
    3838void ^?{}(semaphore & this);
    39 void   P (semaphore & this);
    40 void   V (semaphore & this);
     39bool   P (semaphore & this);
     40bool   V (semaphore & this);
     41bool   V (semaphore & this, unsigned count);
    4142
    4243
     
    4546extern struct cluster * mainCluster;
    4647
    47 // Processor
     48// Processor id, required for scheduling threads
     49struct __processor_id_t {
     50        unsigned id:24;
     51        bool full_proc:1;
     52
     53        #if !defined(__CFA_NO_STATISTICS__)
     54                struct __stats_t * stats;
     55        #endif
     56};
     57
    4858coroutine processorCtx_t {
    4959        struct processor * proc;
     
    5161
    5262// Wrapper around kernel threads
    53 struct processor {
     63struct __attribute__((aligned(128))) processor {
    5464        // Main state
     65        inline __processor_id_t;
     66
     67        // Cluster from which to get threads
     68        struct cluster * cltr;
     69
     70        // Set to true to notify the processor should terminate
     71        volatile bool do_terminate;
     72
    5573        // Coroutine ctx who does keeps the state of the processor
    5674        struct processorCtx_t runner;
    5775
    58         // Cluster from which to get threads
    59         struct cluster * cltr;
    60 
    6176        // Name of the processor
    6277        const char * name;
     
    6479        // Handle to pthreads
    6580        pthread_t kernel_thread;
    66 
    67         // RunThread data
    68         // Action to do after a thread is ran
    69         $thread * destroyer;
    7081
    7182        // Preemption data
     
    7687        bool pending_preemption;
    7788
    78         // Idle lock
    79         __bin_sem_t idleLock;
    80 
    81         // Termination
    82         // Set to true to notify the processor should terminate
    83         volatile bool do_terminate;
    84 
    85         // Termination synchronisation
     89        // Idle lock (kernel semaphore)
     90        __bin_sem_t idle;
     91
     92        // Termination synchronisation (user semaphore)
    8693        semaphore terminated;
    8794
     
    9097
    9198        // Link lists fields
    92         struct __dbg_node_proc {
    93                 struct processor * next;
    94                 struct processor * prev;
    95         } node;
     99        DLISTED_MGD_IMPL_IN(processor)
     100
     101        #if !defined(__CFA_NO_STATISTICS__)
     102                int print_stats;
     103                bool print_halts;
     104        #endif
    96105
    97106#ifdef __CFA_DEBUG__
     
    108117static inline void  ?{}(processor & this, const char name[]) { this{name, *mainCluster }; }
    109118
    110 static inline [processor *&, processor *& ] __get( processor & this ) __attribute__((const)) { return this.node.[next, prev]; }
     119DLISTED_MGD_IMPL_OUT(processor)
     120
     121//-----------------------------------------------------------------------------
     122// I/O
     123struct __io_data;
     124
     125// IO poller user-thread
     126// Not using the "thread" keyword because we want to control
     127// more carefully when to start/stop it
     128struct $io_ctx_thread {
     129        struct __io_data * ring;
     130        single_sem sem;
     131        volatile bool done;
     132        $thread self;
     133};
     134
     135
     136struct io_context {
     137        $io_ctx_thread thrd;
     138};
     139
     140struct io_context_params {
     141        int num_entries;
     142        int num_ready;
     143        int submit_aff;
     144        bool eager_submits:1;
     145        bool poller_submits:1;
     146        bool poll_submit:1;
     147        bool poll_complete:1;
     148};
     149
     150void  ?{}(io_context_params & this);
     151
     152void  ?{}(io_context & this, struct cluster & cl);
     153void  ?{}(io_context & this, struct cluster & cl, const io_context_params & params);
     154void ^?{}(io_context & this);
     155
     156struct io_cancellation {
     157        __u64 target;
     158};
     159
     160static inline void  ?{}(io_cancellation & this) { this.target = -1u; }
     161static inline void ^?{}(io_cancellation &) {}
     162bool cancel(io_cancellation & this);
     163
     164//-----------------------------------------------------------------------------
     165// Cluster Tools
     166
     167// Intrusives lanes which are used by the relaxed ready queue
     168struct __attribute__((aligned(128))) __intrusive_lane_t;
     169void  ?{}(__intrusive_lane_t & this);
     170void ^?{}(__intrusive_lane_t & this);
     171
     172// Counter used for wether or not the lanes are all empty
     173struct __attribute__((aligned(128))) __snzi_node_t;
     174struct __snzi_t {
     175        unsigned mask;
     176        int root;
     177        __snzi_node_t * nodes;
     178};
     179
     180void  ?{}( __snzi_t & this, unsigned depth );
     181void ^?{}( __snzi_t & this );
     182
     183//TODO adjust cache size to ARCHITECTURE
     184// Structure holding the relaxed ready queue
     185struct __ready_queue_t {
     186        // Data tracking how many/which lanes are used
     187        // Aligned to 128 for cache locality
     188        __snzi_t snzi;
     189
     190        // Data tracking the actual lanes
     191        // On a seperate cacheline from the used struct since
     192        // used can change on each push/pop but this data
     193        // only changes on shrink/grow
     194        struct {
     195                // Arary of lanes
     196                __intrusive_lane_t * volatile data;
     197
     198                // Number of lanes (empty or not)
     199                volatile size_t count;
     200        } lanes;
     201};
     202
     203void  ?{}(__ready_queue_t & this);
     204void ^?{}(__ready_queue_t & this);
     205
     206// Idle Sleep
     207struct __cluster_idles {
     208        // Spin lock protecting the queue
     209        volatile uint64_t lock;
     210
     211        // Total number of processors
     212        unsigned total;
     213
     214        // Total number of idle processors
     215        unsigned idle;
     216
     217        // List of idle processors
     218        dlist(processor, processor) list;
     219};
    111220
    112221//-----------------------------------------------------------------------------
    113222// Cluster
    114 struct cluster {
    115         // Ready queue locks
    116         __spinlock_t ready_queue_lock;
    117 
     223struct __attribute__((aligned(128))) cluster {
    118224        // Ready queue for threads
    119         __queue_t($thread) ready_queue;
     225        __ready_queue_t ready_queue;
    120226
    121227        // Name of the cluster
     
    125231        Duration preemption_rate;
    126232
    127         // List of processors
    128         __spinlock_t proc_list_lock;
    129         __dllist_t(struct processor) procs;
    130         __dllist_t(struct processor) idles;
    131         unsigned int nprocessors;
     233        // List of idle processors
     234        __cluster_idles idles;
    132235
    133236        // List of threads
     
    141244                cluster * prev;
    142245        } node;
     246
     247        struct {
     248                io_context * ctxs;
     249                unsigned cnt;
     250        } io;
     251
     252        #if !defined(__CFA_NO_STATISTICS__)
     253                struct __stats_t * stats;
     254                int print_stats;
     255        #endif
    143256};
    144257extern Duration default_preemption();
    145258
    146 void ?{} (cluster & this, const char name[], Duration preemption_rate);
     259void ?{} (cluster & this, const char name[], Duration preemption_rate, unsigned num_io, const io_context_params & io_params);
    147260void ^?{}(cluster & this);
    148261
    149 static inline void ?{} (cluster & this)                           { this{"Anonymous Cluster", default_preemption()}; }
    150 static inline void ?{} (cluster & this, Duration preemption_rate) { this{"Anonymous Cluster", preemption_rate}; }
    151 static inline void ?{} (cluster & this, const char name[])        { this{name, default_preemption()}; }
     262static inline void ?{} (cluster & this)                                            { io_context_params default_params;    this{"Anonymous Cluster", default_preemption(), 1, default_params}; }
     263static inline void ?{} (cluster & this, Duration preemption_rate)                  { io_context_params default_params;    this{"Anonymous Cluster", preemption_rate, 1, default_params}; }
     264static inline void ?{} (cluster & this, const char name[])                         { io_context_params default_params;    this{name, default_preemption(), 1, default_params}; }
     265static inline void ?{} (cluster & this, unsigned num_io)                           { io_context_params default_params;    this{"Anonymous Cluster", default_preemption(), num_io, default_params}; }
     266static inline void ?{} (cluster & this, Duration preemption_rate, unsigned num_io) { io_context_params default_params;    this{"Anonymous Cluster", preemption_rate, num_io, default_params}; }
     267static inline void ?{} (cluster & this, const char name[], unsigned num_io)        { io_context_params default_params;    this{name, default_preemption(), num_io, default_params}; }
     268static inline void ?{} (cluster & this, const io_context_params & io_params)                                            { this{"Anonymous Cluster", default_preemption(), 1, io_params}; }
     269static inline void ?{} (cluster & this, Duration preemption_rate, const io_context_params & io_params)                  { this{"Anonymous Cluster", preemption_rate, 1, io_params}; }
     270static inline void ?{} (cluster & this, const char name[], const io_context_params & io_params)                         { this{name, default_preemption(), 1, io_params}; }
     271static inline void ?{} (cluster & this, unsigned num_io, const io_context_params & io_params)                           { this{"Anonymous Cluster", default_preemption(), num_io, io_params}; }
     272static inline void ?{} (cluster & this, Duration preemption_rate, unsigned num_io, const io_context_params & io_params) { this{"Anonymous Cluster", preemption_rate, num_io, io_params}; }
     273static inline void ?{} (cluster & this, const char name[], unsigned num_io, const io_context_params & io_params)        { this{name, default_preemption(), num_io, io_params}; }
    152274
    153275static inline [cluster *&, cluster *& ] __get( cluster & this ) __attribute__((const)) { return this.node.[next, prev]; }
    154276
    155 static inline struct processor * active_processor() { return TL_GET( this_processor ); } // UNSAFE
    156 static inline struct cluster   * active_cluster  () { return TL_GET( this_processor )->cltr; }
     277static inline struct processor * active_processor() { return publicTLS_get( this_processor ); } // UNSAFE
     278static inline struct cluster   * active_cluster  () { return publicTLS_get( this_processor )->cltr; }
     279
     280#if !defined(__CFA_NO_STATISTICS__)
     281        void print_stats_now( cluster & this, int flags );
     282
     283        static inline void print_stats_at_exit( cluster & this, int flags ) {
     284                this.print_stats |= flags;
     285        }
     286
     287        static inline void print_stats_at_exit( processor & this, int flags ) {
     288                this.print_stats |= flags;
     289        }
     290
     291        void print_halts( processor & this );
     292#endif
    157293
    158294// Local Variables: //
  • libcfa/src/concurrency/kernel_private.hfa

    r3c64c668 r58fe85a  
    1010// Created On       : Mon Feb 13 12:27:26 2017
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Sat Nov 30 19:25:02 2019
    13 // Update Count     : 8
     12// Last Modified On : Wed Aug 12 08:21:33 2020
     13// Update Count     : 9
    1414//
    1515
     
    2020
    2121#include "alarm.hfa"
    22 
     22#include "stats.hfa"
    2323
    2424//-----------------------------------------------------------------------------
    2525// Scheduler
     26
     27struct __attribute__((aligned(128))) __scheduler_lock_id_t;
    2628
    2729extern "C" {
     
    3133}
    3234
    33 void __schedule_thread( $thread * ) __attribute__((nonnull (1)));
    34 
    35 //Block current thread and release/wake-up the following resources
    36 void __leave_thread() __attribute__((noreturn));
     35void __schedule_thread( $thread * )
     36#if defined(NDEBUG) || (!defined(__CFA_DEBUG__) && !defined(__CFA_VERIFY__))
     37        __attribute__((nonnull (1)))
     38#endif
     39;
     40
     41extern bool __preemption_enabled();
     42
     43//release/wake-up the following resources
     44void __thread_finish( $thread * thrd );
    3745
    3846//-----------------------------------------------------------------------------
     
    4149
    4250void * __create_pthread( pthread_t *, void * (*)(void *), void * );
    43 
    44 static inline void wake_fast(processor * this) {
    45         __cfaabi_dbg_print_safe("Kernel : Waking up processor %p\n", this);
    46         post( this->idleLock );
    47 }
    48 
    49 static inline void wake(processor * this) {
    50         disable_interrupts();
    51         wake_fast(this);
    52         enable_interrupts( __cfaabi_dbg_ctx );
    53 }
    54 
    55 struct event_kernel_t {
    56         alarm_list_t alarms;
    57         __spinlock_t lock;
    58 };
    59 
    60 extern event_kernel_t * event_kernel;
    61 
    62 struct __cfa_kernel_preemption_state_t {
    63         bool enabled;
    64         bool in_progress;
    65         unsigned short disable_count;
    66 };
    67 
    68 extern volatile thread_local __cfa_kernel_preemption_state_t preemption_state __attribute__ ((tls_model ( "initial-exec" )));
     51void __destroy_pthread( pthread_t pthread, void * stack, void ** retval );
     52
     53
     54
     55extern cluster * mainCluster;
    6956
    7057//-----------------------------------------------------------------------------
     
    7966)
    8067
     68#define TICKET_BLOCKED (-1) // thread is blocked
     69#define TICKET_RUNNING ( 0) // thread is running
     70#define TICKET_UNBLOCK ( 1) // thread should ignore next block
     71
    8172//-----------------------------------------------------------------------------
    8273// Utils
    83 #define KERNEL_STORAGE(T,X) static char storage_##X[sizeof(T)]
    84 
    85 static inline uint32_t __tls_rand() {
    86         kernelTLS.rand_seed ^= kernelTLS.rand_seed << 6;
    87         kernelTLS.rand_seed ^= kernelTLS.rand_seed >> 21;
    88         kernelTLS.rand_seed ^= kernelTLS.rand_seed << 7;
    89         return kernelTLS.rand_seed;
    90 }
    91 
    92 
    93 void doregister( struct cluster & cltr );
    94 void unregister( struct cluster & cltr );
    95 
    9674void doregister( struct cluster * cltr, struct $thread & thrd );
    9775void unregister( struct cluster * cltr, struct $thread & thrd );
    9876
    99 void doregister( struct cluster * cltr, struct processor * proc );
    100 void unregister( struct cluster * cltr, struct processor * proc );
     77//-----------------------------------------------------------------------------
     78// I/O
     79void ^?{}(io_context & this, bool );
     80
     81//=======================================================================
     82// Cluster lock API
     83//=======================================================================
     84// Cells use by the reader writer lock
     85// while not generic it only relies on a opaque pointer
     86struct __attribute__((aligned(128))) __scheduler_lock_id_t {
     87        // Spin lock used as the underlying lock
     88        volatile bool lock;
     89
     90        // Handle pointing to the proc owning this cell
     91        // Used for allocating cells and debugging
     92        __processor_id_t * volatile handle;
     93
     94        #ifdef __CFA_WITH_VERIFY__
     95                // Debug, check if this is owned for reading
     96                bool owned;
     97        #endif
     98};
     99
     100static_assert( sizeof(struct __scheduler_lock_id_t) <= __alignof(struct __scheduler_lock_id_t));
     101
     102// Lock-Free registering/unregistering of threads
     103// Register a processor to a given cluster and get its unique id in return
     104unsigned doregister( struct __processor_id_t * proc );
     105
     106// Unregister a processor from a given cluster using its id, getting back the original pointer
     107void     unregister( struct __processor_id_t * proc );
     108
     109//-----------------------------------------------------------------------
     110// Cluster idle lock/unlock
     111static inline void lock(__cluster_idles & this) {
     112        for() {
     113                uint64_t l = this.lock;
     114                if(
     115                        (0 == (l % 2))
     116                        && __atomic_compare_exchange_n(&this.lock, &l, l + 1, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
     117                ) return;
     118                Pause();
     119        }
     120}
     121
     122static inline void unlock(__cluster_idles & this) {
     123        /* paranoid */ verify( 1 == (this.lock % 2) );
     124        __atomic_fetch_add( &this.lock, 1, __ATOMIC_SEQ_CST );
     125}
     126
     127//=======================================================================
     128// Reader-writer lock implementation
     129// Concurrent with doregister/unregister,
     130//    i.e., threads can be added at any point during or between the entry/exit
     131
     132//-----------------------------------------------------------------------
     133// simple spinlock underlying the RWLock
     134// Blocking acquire
     135static inline void __atomic_acquire(volatile bool * ll) {
     136        while( __builtin_expect(__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST), false) ) {
     137                while(__atomic_load_n(ll, (int)__ATOMIC_RELAXED))
     138                        Pause();
     139        }
     140        /* paranoid */ verify(*ll);
     141}
     142
     143// Non-Blocking acquire
     144static inline bool __atomic_try_acquire(volatile bool * ll) {
     145        return !__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST);
     146}
     147
     148// Release
     149static inline void __atomic_unlock(volatile bool * ll) {
     150        /* paranoid */ verify(*ll);
     151        __atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE);
     152}
     153
     154//-----------------------------------------------------------------------
     155// Reader-Writer lock protecting the ready-queues
     156// while this lock is mostly generic some aspects
     157// have been hard-coded to for the ready-queue for
     158// simplicity and performance
     159struct __scheduler_RWLock_t {
     160        // total cachelines allocated
     161        unsigned int max;
     162
     163        // cachelines currently in use
     164        volatile unsigned int alloc;
     165
     166        // cachelines ready to itereate over
     167        // (!= to alloc when thread is in second half of doregister)
     168        volatile unsigned int ready;
     169
     170        // writer lock
     171        volatile bool lock;
     172
     173        // data pointer
     174        __scheduler_lock_id_t * data;
     175};
     176
     177void  ?{}(__scheduler_RWLock_t & this);
     178void ^?{}(__scheduler_RWLock_t & this);
     179
     180extern __scheduler_RWLock_t * __scheduler_lock;
     181
     182//-----------------------------------------------------------------------
     183// Reader side : acquire when using the ready queue to schedule but not
     184//  creating/destroying queues
     185static inline void ready_schedule_lock(void) with(*__scheduler_lock) {
     186        /* paranoid */ verify( ! __preemption_enabled() );
     187        /* paranoid */ verify( kernelTLS().this_proc_id );
     188
     189        unsigned iproc = kernelTLS().this_proc_id->id;
     190        /*paranoid*/ verify(data[iproc].handle == kernelTLS().this_proc_id);
     191        /*paranoid*/ verify(iproc < ready);
     192
     193        // Step 1 : make sure no writer are in the middle of the critical section
     194        while(__atomic_load_n(&lock, (int)__ATOMIC_RELAXED))
     195                Pause();
     196
     197        // Fence needed because we don't want to start trying to acquire the lock
     198        // before we read a false.
     199        // Not needed on x86
     200        // std::atomic_thread_fence(std::memory_order_seq_cst);
     201
     202        // Step 2 : acquire our local lock
     203        __atomic_acquire( &data[iproc].lock );
     204        /*paranoid*/ verify(data[iproc].lock);
     205
     206        #ifdef __CFA_WITH_VERIFY__
     207                // Debug, check if this is owned for reading
     208                data[iproc].owned = true;
     209        #endif
     210}
     211
     212static inline void ready_schedule_unlock(void) with(*__scheduler_lock) {
     213        /* paranoid */ verify( ! __preemption_enabled() );
     214        /* paranoid */ verify( kernelTLS().this_proc_id );
     215
     216        unsigned iproc = kernelTLS().this_proc_id->id;
     217        /*paranoid*/ verify(data[iproc].handle == kernelTLS().this_proc_id);
     218        /*paranoid*/ verify(iproc < ready);
     219        /*paranoid*/ verify(data[iproc].lock);
     220        /*paranoid*/ verify(data[iproc].owned);
     221        #ifdef __CFA_WITH_VERIFY__
     222                // Debug, check if this is owned for reading
     223                data[iproc].owned = false;
     224        #endif
     225        __atomic_unlock(&data[iproc].lock);
     226}
     227
     228#ifdef __CFA_WITH_VERIFY__
     229        static inline bool ready_schedule_islocked(void) {
     230                /* paranoid */ verify( ! __preemption_enabled() );
     231                /*paranoid*/ verify( kernelTLS().this_proc_id );
     232                __processor_id_t * proc = kernelTLS().this_proc_id;
     233                return __scheduler_lock->data[proc->id].owned;
     234        }
     235
     236        static inline bool ready_mutate_islocked() {
     237                return __scheduler_lock->lock;
     238        }
     239#endif
     240
     241//-----------------------------------------------------------------------
     242// Writer side : acquire when changing the ready queue, e.g. adding more
     243//  queues or removing them.
     244uint_fast32_t ready_mutate_lock( void );
     245
     246void ready_mutate_unlock( uint_fast32_t /* value returned by lock */ );
     247
     248//=======================================================================
     249// Ready-Queue API
     250//-----------------------------------------------------------------------
     251// pop thread from the ready queue of a cluster
     252// returns 0p if empty
     253__attribute__((hot)) bool query(struct cluster * cltr);
     254
     255//-----------------------------------------------------------------------
     256// push thread onto a ready queue for a cluster
     257// returns true if the list was previously empty, false otherwise
     258__attribute__((hot)) bool push(struct cluster * cltr, struct $thread * thrd);
     259
     260//-----------------------------------------------------------------------
     261// pop thread from the ready queue of a cluster
     262// returns 0p if empty
     263// May return 0p spuriously
     264__attribute__((hot)) struct $thread * pop(struct cluster * cltr);
     265
     266//-----------------------------------------------------------------------
     267// pop thread from the ready queue of a cluster
     268// returns 0p if empty
     269// guaranteed to find any threads added before this call
     270__attribute__((hot)) struct $thread * pop_slow(struct cluster * cltr);
     271
     272//-----------------------------------------------------------------------
     273// remove thread from the ready queue of a cluster
     274// returns bool if it wasn't found
     275bool remove_head(struct cluster * cltr, struct $thread * thrd);
     276
     277//-----------------------------------------------------------------------
     278// Increase the width of the ready queue (number of lanes) by 4
     279void ready_queue_grow  (struct cluster * cltr, int target);
     280
     281//-----------------------------------------------------------------------
     282// Decrease the width of the ready queue (number of lanes) by 4
     283void ready_queue_shrink(struct cluster * cltr, int target);
     284
    101285
    102286// Local Variables: //
  • libcfa/src/concurrency/monitor.cfa

    r3c64c668 r58fe85a  
    8282// Enter single monitor
    8383static void __enter( $monitor * this, const __monitor_group_t & group ) {
     84        $thread * thrd = active_thread();
     85
    8486        // Lock the monitor spinlock
    8587        lock( this->lock __cfaabi_dbg_ctx2 );
    86         // Interrupts disable inside critical section
    87         $thread * thrd = kernelTLS.this_thread;
    8888
    8989        __cfaabi_dbg_print_safe( "Kernel : %10p Entering mon %p (%p)\n", thrd, this, this->owner);
    9090
    91         if( !this->owner ) {
     91        if( unlikely(0 != (0x1 & (uintptr_t)this->owner)) ) {
     92                abort( "Attempt by thread \"%.256s\" (%p) to access joined monitor %p.", thrd->self_cor.name, thrd, this );
     93        }
     94        else if( !this->owner ) {
    9295                // No one has the monitor, just take it
    9396                __set_owner( this, thrd );
     
    114117
    115118                // Some one else has the monitor, wait in line for it
    116                 /* paranoid */ verify( thrd->next == 0p );
     119                /* paranoid */ verify( thrd->link.next == 0p );
    117120                append( this->entry_queue, thrd );
    118                 /* paranoid */ verify( thrd->next == 1p );
     121                /* paranoid */ verify( thrd->link.next == 1p );
    119122
    120123                unlock( this->lock );
     
    123126                __cfaabi_dbg_print_safe( "Kernel : %10p Entered  mon %p\n", thrd, this);
    124127
    125                 /* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
     128                /* paranoid */ verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
    126129                return;
    127130        }
     
    129132        __cfaabi_dbg_print_safe( "Kernel : %10p Entered  mon %p\n", thrd, this);
    130133
    131         /* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
     134        /* paranoid */ verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
    132135        /* paranoid */ verify( this->lock.lock );
    133136
     
    137140}
    138141
    139 static void __dtor_enter( $monitor * this, fptr_t func ) {
     142static void __dtor_enter( $monitor * this, fptr_t func, bool join ) {
     143        $thread * thrd = active_thread();
     144        #if defined( __CFA_WITH_VERIFY__ )
     145                bool is_thrd = this == &thrd->self_mon;
     146        #endif
     147
    140148        // Lock the monitor spinlock
    141149        lock( this->lock __cfaabi_dbg_ctx2 );
    142         // Interrupts disable inside critical section
    143         $thread * thrd = kernelTLS.this_thread;
    144150
    145151        __cfaabi_dbg_print_safe( "Kernel : %10p Entering dtor for mon %p (%p)\n", thrd, this, this->owner);
     
    152158                __set_owner( this, thrd );
    153159
    154                 verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
     160                /* paranoid */ verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
     161                /* paranoid */ verify( !is_thrd || thrd->state == Halted || thrd->state == Cancelled );
    155162
    156163                unlock( this->lock );
    157164                return;
    158165        }
    159         else if( this->owner == thrd) {
     166        else if( this->owner == thrd && !join) {
    160167                // We already have the monitor... but where about to destroy it so the nesting will fail
    161168                // Abort!
    162169                abort( "Attempt to destroy monitor %p by thread \"%.256s\" (%p) in nested mutex.", this, thrd->self_cor.name, thrd );
    163170        }
     171        // SKULLDUGGERY: join will act as a dtor so it would normally trigger to above check
     172        // because join will not release the monitor after it executed.
     173        // to avoid that it sets the owner to the special value thrd | 1p before exiting
     174        else if( this->owner == ($thread*)(1 | (uintptr_t)thrd) ) {
     175                // restore the owner and just return
     176                __cfaabi_dbg_print_safe( "Kernel : Destroying free mon %p\n", this);
     177
     178                // No one has the monitor, just take it
     179                __set_owner( this, thrd );
     180
     181                /* paranoid */ verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
     182                /* paranoid */ verify( !is_thrd || thrd->state == Halted || thrd->state == Cancelled );
     183
     184                unlock( this->lock );
     185                return;
     186        }
     187
     188        // The monitor is busy, if this is a thread and the thread owns itself, it better be active
     189        /* paranoid */ verify( !is_thrd || this->owner != thrd || (thrd->state != Halted && thrd->state != Cancelled) );
    164190
    165191        __lock_size_t count = 1;
     
    183209
    184210                // Release the next thread
    185                 /* paranoid */ verifyf( urgent->owner->waiting_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
     211                /* paranoid */ verifyf( urgent->owner->waiting_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
    186212                unpark( urgent->owner->waiting_thread );
    187213
     
    190216
    191217                // Some one was waiting for us, enter
    192                 /* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
     218                /* paranoid */ verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
     219
     220                __cfaabi_dbg_print_safe( "Kernel : Destroying %p\n", this);
     221                return;
    193222        }
    194223        else {
     
    199228
    200229                // Some one else has the monitor, wait in line for it
    201                 /* paranoid */ verify( thrd->next == 0p );
     230                /* paranoid */ verify( thrd->link.next == 0p );
    202231                append( this->entry_queue, thrd );
    203                 /* paranoid */ verify( thrd->next == 1p );
     232                /* paranoid */ verify( thrd->link.next == 1p );
    204233                unlock( this->lock );
    205234
     
    207236                park();
    208237
    209                 /* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
     238                /* paranoid */ verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
    210239                return;
    211240        }
    212 
    213         __cfaabi_dbg_print_safe( "Kernel : Destroying %p\n", this);
    214 
    215241}
    216242
     
    220246        lock( this->lock __cfaabi_dbg_ctx2 );
    221247
    222         __cfaabi_dbg_print_safe( "Kernel : %10p Leaving mon %p (%p)\n", kernelTLS.this_thread, this, this->owner);
    223 
    224         /* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
     248        __cfaabi_dbg_print_safe( "Kernel : %10p Leaving mon %p (%p)\n", active_thread(), this, this->owner);
     249
     250        /* paranoid */ verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
    225251
    226252        // Leaving a recursion level, decrement the counter
     
    251277
    252278// Leave single monitor for the last time
    253 void __dtor_leave( $monitor * this ) {
     279void __dtor_leave( $monitor * this, bool join ) {
    254280        __cfaabi_dbg_debug_do(
    255                 if( TL_GET( this_thread ) != this->owner ) {
    256                         abort( "Destroyed monitor %p has inconsistent owner, expected %p got %p.\n", this, TL_GET( this_thread ), this->owner);
     281                if( active_thread() != this->owner ) {
     282                        abort( "Destroyed monitor %p has inconsistent owner, expected %p got %p.\n", this, active_thread(), this->owner);
    257283                }
    258                 if( this->recursion != 1 ) {
     284                if( this->recursion != 1  && !join ) {
    259285                        abort( "Destroyed monitor %p has %d outstanding nested calls.\n", this, this->recursion - 1);
    260286                }
    261287        )
    262 }
    263 
    264 extern "C" {
    265         // Leave the thread monitor
    266         // last routine called by a thread.
    267         // Should never return
    268         void __cfactx_thrd_leave() {
    269                 $thread * thrd = TL_GET( this_thread );
    270                 $monitor * this = &thrd->self_mon;
    271 
    272                 // Lock the monitor now
    273                 lock( this->lock __cfaabi_dbg_ctx2 );
    274 
    275                 disable_interrupts();
    276 
    277                 thrd->state = Halted;
    278 
    279                 /* paranoid */ verifyf( thrd == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", thrd, this->owner, this->recursion, this );
    280 
    281                 // Leaving a recursion level, decrement the counter
    282                 this->recursion -= 1;
    283 
    284                 // If we haven't left the last level of recursion
    285                 // it must mean there is an error
    286                 if( this->recursion != 0) { abort( "Thread internal monitor has unbalanced recursion" ); }
    287 
    288                 // Fetch the next thread, can be null
    289                 $thread * new_owner = next_thread( this );
    290 
    291                 // Release the monitor lock
    292                 unlock( this->lock );
    293 
    294                 // Unpark the next owner if needed
    295                 /* paranoid */ verifyf( !new_owner || new_owner == this->owner, "Expected owner to be %p, got %p (m: %p)", new_owner, this->owner, this );
    296                 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    297                 /* paranoid */ verify( ! kernelTLS.this_processor->destroyer );
    298                 /* paranoid */ verify( thrd->state == Halted );
    299 
    300                 kernelTLS.this_processor->destroyer = new_owner;
    301 
    302                 // Leave the thread
    303                 __leave_thread();
    304 
    305                 // Control flow should never reach here!
    306         }
     288
     289        this->owner = ($thread*)(1 | (uintptr_t)this->owner);
     290}
     291
     292void __thread_finish( $thread * thrd ) {
     293        $monitor * this = &thrd->self_mon;
     294
     295        // Lock the monitor now
     296        /* paranoid */ verify( 0x0D15EA5E0D15EA5Ep == thrd->canary );
     297        /* paranoid */ verify( this->lock.lock );
     298        /* paranoid */ verify( thrd->context.SP );
     299        /* paranoid */ verifyf( ((uintptr_t)thrd->context.SP) > ((uintptr_t)__get_stack(thrd->curr_cor)->limit), "ERROR : $thread %p has been corrupted.\n StackPointer too large.\n", thrd );
     300        /* paranoid */ verifyf( ((uintptr_t)thrd->context.SP) < ((uintptr_t)__get_stack(thrd->curr_cor)->base ), "ERROR : $thread %p has been corrupted.\n StackPointer too small.\n", thrd );
     301        /* paranoid */ verify( ! __preemption_enabled() );
     302
     303        /* paranoid */ verifyf( thrd == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", thrd, this->owner, this->recursion, this );
     304        /* paranoid */ verify( thrd->state == Halting );
     305        /* paranoid */ verify( this->recursion == 1 );
     306
     307        // Leaving a recursion level, decrement the counter
     308        this->recursion -= 1;
     309        this->owner = 0p;
     310
     311        // Fetch the next thread, can be null
     312        $thread * new_owner = next_thread( this );
     313
     314        // Mark the state as fully halted
     315        thrd->state = Halted;
     316
     317        // Release the monitor lock
     318        unlock( this->lock );
     319
     320        // Unpark the next owner if needed
     321        /* paranoid */ verifyf( !new_owner || new_owner == this->owner, "Expected owner to be %p, got %p (m: %p)", new_owner, this->owner, this );
     322        /* paranoid */ verify( ! __preemption_enabled() );
     323        /* paranoid */ verify( thrd->state == Halted );
     324        unpark( new_owner );
    307325}
    308326
     
    326344// Sorts monitors before entering
    327345void ?{}( monitor_guard_t & this, $monitor * m [], __lock_size_t count, fptr_t func ) {
    328         $thread * thrd = TL_GET( this_thread );
     346        $thread * thrd = active_thread();
    329347
    330348        // Store current array
     
    361379
    362380        // Restore thread context
    363         TL_GET( this_thread )->monitors = this.prev;
     381        active_thread()->monitors = this.prev;
    364382}
    365383
    366384// Ctor for monitor guard
    367385// Sorts monitors before entering
    368 void ?{}( monitor_dtor_guard_t & this, $monitor * m [], fptr_t func ) {
     386void ?{}( monitor_dtor_guard_t & this, $monitor * m [], fptr_t func, bool join ) {
    369387        // optimization
    370         $thread * thrd = TL_GET( this_thread );
     388        $thread * thrd = active_thread();
    371389
    372390        // Store current array
     
    376394        this.prev = thrd->monitors;
    377395
     396        // Save whether we are in a join or not
     397        this.join = join;
     398
    378399        // Update thread context (needed for conditions)
    379400        (thrd->monitors){m, 1, func};
    380401
    381         __dtor_enter( this.m, func );
     402        __dtor_enter( this.m, func, join );
    382403}
    383404
     
    385406void ^?{}( monitor_dtor_guard_t & this ) {
    386407        // Leave the monitors in order
    387         __dtor_leave( this.m );
     408        __dtor_leave( this.m, this.join );
    388409
    389410        // Restore thread context
    390         TL_GET( this_thread )->monitors = this.prev;
     411        active_thread()->monitors = this.prev;
    391412}
    392413
     
    428449
    429450        // Create the node specific to this wait operation
    430         wait_ctx( TL_GET( this_thread ), user_info );
     451        wait_ctx( active_thread(), user_info );
    431452
    432453        // Append the current wait operation to the ones already queued on the condition
     
    479500        //Some more checking in debug
    480501        __cfaabi_dbg_debug_do(
    481                 $thread * this_thrd = TL_GET( this_thread );
     502                $thread * this_thrd = active_thread();
    482503                if ( this.monitor_count != this_thrd->monitors.size ) {
    483504                        abort( "Signal on condition %p made with different number of monitor(s), expected %zi got %zi", &this, this.monitor_count, this_thrd->monitors.size );
     
    527548
    528549        // Create the node specific to this wait operation
    529         wait_ctx_primed( kernelTLS.this_thread, 0 )
     550        wait_ctx_primed( active_thread(), 0 )
    530551
    531552        //save contexts
     
    534555        //Find the thread to run
    535556        $thread * signallee = pop_head( this.blocked )->waiting_thread;
    536         /* paranoid */ verify( signallee->next == 0p );
    537557        __set_owner( monitors, count, signallee );
    538558
     
    627647
    628648                                // Create the node specific to this wait operation
    629                                 wait_ctx_primed( kernelTLS.this_thread, 0 );
     649                                wait_ctx_primed( active_thread(), 0 );
    630650
    631651                                // Save monitor states
     
    679699
    680700        // Create the node specific to this wait operation
    681         wait_ctx_primed( kernelTLS.this_thread, 0 );
     701        wait_ctx_primed( active_thread(), 0 );
    682702
    683703        monitor_save;
     
    685705
    686706        for( __lock_size_t i = 0; i < count; i++) {
    687                 verify( monitors[i]->owner == kernelTLS.this_thread );
     707                verify( monitors[i]->owner == active_thread() );
    688708        }
    689709
     
    721741static inline void __set_owner( $monitor * monitors [], __lock_size_t count, $thread * owner ) {
    722742        /* paranoid */ verify ( monitors[0]->lock.lock );
    723         /* paranoid */ verifyf( monitors[0]->owner == kernelTLS.this_thread, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, monitors[0]->owner, monitors[0]->recursion, monitors[0] );
     743        /* paranoid */ verifyf( monitors[0]->owner == active_thread(), "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), monitors[0]->owner, monitors[0]->recursion, monitors[0] );
    724744        monitors[0]->owner        = owner;
    725745        monitors[0]->recursion    = 1;
    726746        for( __lock_size_t i = 1; i < count; i++ ) {
    727747                /* paranoid */ verify ( monitors[i]->lock.lock );
    728                 /* paranoid */ verifyf( monitors[i]->owner == kernelTLS.this_thread, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, monitors[i]->owner, monitors[i]->recursion, monitors[i] );
     748                /* paranoid */ verifyf( monitors[i]->owner == active_thread(), "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), monitors[i]->owner, monitors[i]->recursion, monitors[i] );
    729749                monitors[i]->owner        = owner;
    730750                monitors[i]->recursion    = 0;
     
    752772                //regardless of if we are ready to baton pass,
    753773                //we need to set the monitor as in use
    754                 /* paranoid */ verifyf( !this->owner || kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
     774                /* paranoid */ verifyf( !this->owner || active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
    755775                __set_owner( this,  urgent->owner->waiting_thread );
    756776
     
    761781        // Get the next thread in the entry_queue
    762782        $thread * new_owner = pop_head( this->entry_queue );
    763         /* paranoid */ verifyf( !this->owner || kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
    764         /* paranoid */ verify( !new_owner || new_owner->next == 0p );
     783        /* paranoid */ verifyf( !this->owner || active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
     784        /* paranoid */ verify( !new_owner || new_owner->link.next == 0p );
    765785        __set_owner( this, new_owner );
    766786
     
    884904        }
    885905
    886         __cfaabi_dbg_print_safe( "Kernel :  Runing %i (%p)\n", ready2run, ready2run ? node->waiting_thread : 0p );
     906        __cfaabi_dbg_print_safe( "Kernel :  Runing %i (%p)\n", ready2run, ready2run ? (thread*)node->waiting_thread : (thread*)0p );
    887907        return ready2run ? node->waiting_thread : 0p;
    888908}
    889909
    890910static inline void brand_condition( condition & this ) {
    891         $thread * thrd = TL_GET( this_thread );
     911        $thread * thrd = active_thread();
    892912        if( !this.monitors ) {
    893913                // __cfaabi_dbg_print_safe( "Branding\n" );
     
    908928        // For each thread in the entry-queue
    909929        for(    $thread ** thrd_it = &entry_queue.head;
    910                 *thrd_it != 1p;
    911                 thrd_it = &(*thrd_it)->next
     930                (*thrd_it) != 1p;
     931                thrd_it = &(*thrd_it)->link.next
    912932        ) {
    913933                // For each acceptable check if it matches
  • libcfa/src/concurrency/monitor.hfa

    r3c64c668 r58fe85a  
    5353        $monitor *    m;
    5454        __monitor_group_t prev;
     55        bool join;
    5556};
    5657
    57 void ?{}( monitor_dtor_guard_t & this, $monitor ** m, void (*func)() );
     58void ?{}( monitor_dtor_guard_t & this, $monitor ** m, void (*func)(), bool join );
    5859void ^?{}( monitor_dtor_guard_t & this );
    5960
     
    131132
    132133              void wait        ( condition & this, uintptr_t user_info = 0 );
     134static inline bool is_empty    ( condition & this ) { return this.blocked.head == 1p; }
    133135              bool signal      ( condition & this );
    134136              bool signal_block( condition & this );
    135 static inline bool is_empty    ( condition & this ) { return this.blocked.head == 1p; }
     137static inline bool signal_all  ( condition & this ) { bool ret = false; while(!is_empty(this)) { ret = signal(this) || ret; } return ret; }
    136138         uintptr_t front       ( condition & this );
    137139
  • libcfa/src/concurrency/mutex.cfa

    r3c64c668 r58fe85a  
    3030        this.lock{};
    3131        this.blocked_threads{};
     32        this.is_locked = false;
    3233}
    3334
     
    3940        lock( lock __cfaabi_dbg_ctx2 );
    4041        if( is_locked ) {
    41                 append( blocked_threads, kernelTLS.this_thread );
     42                append( blocked_threads, active_thread() );
    4243                unlock( lock );
    4344                park();
     
    8586        lock( lock __cfaabi_dbg_ctx2 );
    8687        if( owner == 0p ) {
    87                 owner = kernelTLS.this_thread;
     88                owner = active_thread();
    8889                recursion_count = 1;
    8990                unlock( lock );
    9091        }
    91         else if( owner == kernelTLS.this_thread ) {
     92        else if( owner == active_thread() ) {
    9293                recursion_count++;
    9394                unlock( lock );
    9495        }
    9596        else {
    96                 append( blocked_threads, kernelTLS.this_thread );
     97                append( blocked_threads, active_thread() );
    9798                unlock( lock );
    9899                park();
     
    104105        lock( lock __cfaabi_dbg_ctx2 );
    105106        if( owner == 0p ) {
    106                 owner = kernelTLS.this_thread;
     107                owner = active_thread();
    107108                recursion_count = 1;
    108109                ret = true;
    109110        }
    110         else if( owner == kernelTLS.this_thread ) {
     111        else if( owner == active_thread() ) {
    111112                recursion_count++;
    112113                ret = true;
     
    158159void wait(condition_variable & this) {
    159160        lock( this.lock __cfaabi_dbg_ctx2 );
    160         append( this.blocked_threads, kernelTLS.this_thread );
     161        append( this.blocked_threads, active_thread() );
    161162        unlock( this.lock );
    162163        park();
     
    166167void wait(condition_variable & this, L & l) {
    167168        lock( this.lock __cfaabi_dbg_ctx2 );
    168         append( this.blocked_threads, kernelTLS.this_thread );
     169        append( this.blocked_threads, active_thread() );
    169170        unlock(l);
    170171        unlock(this.lock);
  • libcfa/src/concurrency/preemption.cfa

    r3c64c668 r58fe85a  
    1010// Created On       : Mon Jun 5 14:20:42 2017
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Thu Dec  5 16:34:05 2019
    13 // Update Count     : 43
     12// Last Modified On : Fri Nov  6 07:42:13 2020
     13// Update Count     : 54
    1414//
    1515
     
    1919#include <assert.h>
    2020
    21 extern "C" {
    2221#include <errno.h>
    2322#include <stdio.h>
     
    2524#include <unistd.h>
    2625#include <limits.h>                                                                             // PTHREAD_STACK_MIN
    27 }
    2826
    2927#include "bits/signal.hfa"
     28#include "kernel_private.hfa"
    3029
    3130#if !defined(__CFA_DEFAULT_PREEMPTION__)
     
    4342// FwdDeclarations : Signal handlers
    4443static void sigHandler_ctxSwitch( __CFA_SIGPARMS__ );
     44static void sigHandler_alarm    ( __CFA_SIGPARMS__ );
    4545static void sigHandler_segv     ( __CFA_SIGPARMS__ );
    4646static void sigHandler_ill      ( __CFA_SIGPARMS__ );
     
    5656#elif defined( __x86_64 )
    5757#define CFA_REG_IP gregs[REG_RIP]
    58 #elif defined( __ARM_ARCH )
     58#elif defined( __arm__ )
    5959#define CFA_REG_IP arm_pc
     60#elif defined( __aarch64__ )
     61#define CFA_REG_IP pc
    6062#else
    61 #error unknown hardware architecture
     63#error unsupported hardware architecture
    6264#endif
    6365
     
    8385// Get next expired node
    8486static inline alarm_node_t * get_expired( alarm_list_t * alarms, Time currtime ) {
    85         if( !alarms->head ) return 0p;                                          // If no alarms return null
    86         if( alarms->head->alarm >= currtime ) return 0p;        // If alarms head not expired return null
     87        if( ! & (*alarms)`first ) return 0p;                                            // If no alarms return null
     88        if( (*alarms)`first.alarm >= currtime ) return 0p;      // If alarms head not expired return null
    8789        return pop(alarms);                                                                     // Otherwise just pop head
    8890}
    8991
    9092// Tick one frame of the Discrete Event Simulation for alarms
    91 static void tick_preemption() {
     93static void tick_preemption(void) {
    9294        alarm_node_t * node = 0p;                                                       // Used in the while loop but cannot be declared in the while condition
    9395        alarm_list_t * alarms = &event_kernel->alarms;          // Local copy for ease of reading
     
    9799        while( node = get_expired( alarms, currtime ) ) {
    98100                // __cfaabi_dbg_print_buffer_decl( " KERNEL: preemption tick.\n" );
     101                Duration period = node->period;
     102                if( period == 0) {
     103                        node->set = false;                  // Node is one-shot, just mark it as not pending
     104                }
    99105
    100106                // Check if this is a kernel
    101                 if( node->kernel_alarm ) {
     107                if( node->type == Kernel ) {
    102108                        preempt( node->proc );
    103109                }
     110                else if( node->type == User ) {
     111                        timeout( node->thrd );
     112                }
    104113                else {
    105                         timeout( node->thrd );
     114                        node->callback(*node);
    106115                }
    107116
    108117                // Check if this is a periodic alarm
    109                 Duration period = node->period;
    110118                if( period > 0 ) {
    111119                        // __cfaabi_dbg_print_buffer_local( " KERNEL: alarm period is %lu.\n", period.tv );
     
    113121                        insert( alarms, node );             // Reinsert the node for the next time it triggers
    114122                }
    115                 else {
    116                         node->set = false;                  // Node is one-shot, just mark it as not pending
    117                 }
    118123        }
    119124
    120125        // If there are still alarms pending, reset the timer
    121         if( alarms->head ) {
    122                 __cfaabi_dbg_print_buffer_decl( " KERNEL: @%ju(%ju) resetting alarm to %ju.\n", currtime.tv, __kernel_get_time().tv, (alarms->head->alarm - currtime).tv);
    123                 Duration delta = alarms->head->alarm - currtime;
    124                 Duration caped = max(delta, 50`us);
     126        if( & (*alarms)`first ) {
     127                __cfadbg_print_buffer_decl(preemption, " KERNEL: @%ju(%ju) resetting alarm to %ju.\n", currtime.tv, __kernel_get_time().tv, (alarms->head->alarm - currtime).tv);
     128                Duration delta = (*alarms)`first.alarm - currtime;
     129                Duration capped = max(delta, 50`us);
    125130                // itimerval tim  = { caped };
    126131                // __cfaabi_dbg_print_buffer_local( "    Values are %lu, %lu, %lu %lu.\n", delta.tv, caped.tv, tim.it_value.tv_sec, tim.it_value.tv_usec);
    127132
    128                 __kernel_set_timer( caped );
     133                __kernel_set_timer( capped );
    129134        }
    130135}
     
    158163// Kernel Signal Tools
    159164//=============================================================================================
    160 
    161 __cfaabi_dbg_debug_do( static thread_local void * last_interrupt = 0; )
     165// In a user-level threading system, there are handful of thread-local variables where this problem occurs on the ARM.
     166//
     167// For each kernel thread running user-level threads, there is a flag variable to indicate if interrupts are
     168// enabled/disabled for that kernel thread. Therefore, this variable is made thread local.
     169//
     170// For example, this code fragment sets the state of the "interrupt" variable in thread-local memory.
     171//
     172// _Thread_local volatile int interrupts;
     173// int main() {
     174//     interrupts = 0; // disable interrupts }
     175//
     176// which generates the following code on the ARM
     177//
     178// (gdb) disassemble main
     179// Dump of assembler code for function main:
     180//    0x0000000000000610 <+0>:  mrs     x1, tpidr_el0
     181//    0x0000000000000614 <+4>:  mov     w0, #0x0                        // #0
     182//    0x0000000000000618 <+8>:  add     x1, x1, #0x0, lsl #12
     183//    0x000000000000061c <+12>: add     x1, x1, #0x10
     184//    0x0000000000000620 <+16>: str     wzr, [x1]
     185//    0x0000000000000624 <+20>: ret
     186//
     187// The mrs moves a pointer from coprocessor register tpidr_el0 into register x1.  Register w0 is set to 0. The two adds
     188// increase the TLS pointer with the displacement (offset) 0x10, which is the location in the TSL of variable
     189// "interrupts".  Finally, 0 is stored into "interrupts" through the pointer in register x1 that points into the
     190// TSL. Now once x1 has the pointer to the location of the TSL for kernel thread N, it can be be preempted at a
     191// user-level and the user thread is put on the user-level ready-queue. When the preempted thread gets to the front of
     192// the user-level ready-queue it is run on kernel thread M. It now stores 0 into "interrupts" back on kernel thread N,
     193// turning off interrupt on the wrong kernel thread.
     194//
     195// On the x86, the following code is generated for the same code fragment.
     196//
     197// (gdb) disassemble main
     198// Dump of assembler code for function main:
     199//    0x0000000000400420 <+0>:  movl   $0x0,%fs:0xfffffffffffffffc
     200//    0x000000000040042c <+12>: xor    %eax,%eax
     201//    0x000000000040042e <+14>: retq
     202//
     203// and there is base-displacement addressing used to atomically reset variable "interrupts" off of the TSL pointer in
     204// register "fs".
     205//
     206// Hence, the ARM has base-displacement address for the general purpose registers, BUT not to the coprocessor
     207// registers. As a result, generating the address for the write into variable "interrupts" is no longer atomic.
     208//
     209// Note this problem does NOT occur when just using multiple kernel threads because the preemption ALWAYS restarts the
     210// thread on the same kernel thread.
     211//
     212// The obvious question is why does ARM use a coprocessor register to store the TSL pointer given that coprocessor
     213// registers are second-class registers with respect to the instruction set. One possible answer is that they did not
     214// want to dedicate one of the general registers to hold the TLS pointer and there was a free coprocessor register
     215// available.
     216
     217//-----------------------------------------------------------------------------
     218// Some assembly required
     219#define __cfaasm_label(label, when) when: asm volatile goto(".global __cfaasm_" #label "_" #when "\n" "__cfaasm_" #label "_" #when ":":::"memory":when)
     220
     221//----------
     222// special case for preemption since used often
     223bool __preemption_enabled() {
     224        // create a assembler label before
     225        // marked as clobber all to avoid movement
     226        __cfaasm_label(check, before);
     227
     228        // access tls as normal
     229        bool enabled = __cfaabi_tls.preemption_state.enabled;
     230
     231        // create a assembler label after
     232        // marked as clobber all to avoid movement
     233        __cfaasm_label(check, after);
     234        return enabled;
     235}
     236
     237struct asm_region {
     238        void * before;
     239        void * after;
     240};
     241
     242static inline bool __cfaasm_in( void * ip, struct asm_region & region ) {
     243        return ip >= region.before && ip <= region.after;
     244}
     245
     246
     247//----------
     248// Get data from the TLS block
     249// struct asm_region __cfaasm_get;
     250uintptr_t __cfatls_get( unsigned long int offset ) __attribute__((__noinline__)); //no inline to avoid problems
     251uintptr_t __cfatls_get( unsigned long int offset ) {
     252        // create a assembler label before
     253        // marked as clobber all to avoid movement
     254        __cfaasm_label(get, before);
     255
     256        // access tls as normal (except for pointer arithmetic)
     257        uintptr_t val = *(uintptr_t*)((uintptr_t)&__cfaabi_tls + offset);
     258
     259        // create a assembler label after
     260        // marked as clobber all to avoid movement
     261        __cfaasm_label(get, after);
     262        return val;
     263}
    162264
    163265extern "C" {
    164266        // Disable interrupts by incrementing the counter
    165267        void disable_interrupts() {
    166                 with( kernelTLS.preemption_state ) {
     268                // create a assembler label before
     269                // marked as clobber all to avoid movement
     270                __cfaasm_label(dsable, before);
     271
     272                with( __cfaabi_tls.preemption_state ) {
    167273                        #if GCC_VERSION > 50000
    168274                        static_assert(__atomic_always_lock_free(sizeof(enabled), &enabled), "Must be lock-free");
     
    181287                        verify( new_val < 65_000u );              // If this triggers someone is disabling interrupts without enabling them
    182288                }
     289
     290                // create a assembler label after
     291                // marked as clobber all to avoid movement
     292                __cfaasm_label(dsable, after);
     293
    183294        }
    184295
     
    186297        // If counter reaches 0, execute any pending __cfactx_switch
    187298        void enable_interrupts( __cfaabi_dbg_ctx_param ) {
    188                 processor   * proc = kernelTLS.this_processor; // Cache the processor now since interrupts can start happening after the atomic store
    189 
    190                 with( kernelTLS.preemption_state ){
     299                // Cache the processor now since interrupts can start happening after the atomic store
     300                processor   * proc = __cfaabi_tls.this_processor;
     301                /* paranoid */ verify( proc );
     302
     303                with( __cfaabi_tls.preemption_state ){
    191304                        unsigned short prev = disable_count;
    192305                        disable_count -= 1;
    193                         verify( prev != 0u );                     // If this triggers someone is enabled already enabled interruptsverify( prev != 0u );
     306
     307                        // If this triggers someone is enabled already enabled interruptsverify( prev != 0u );
     308                        /* paranoid */ verify( prev != 0u );
    194309
    195310                        // Check if we need to prempt the thread because an interrupt was missed
    196311                        if( prev == 1 ) {
    197312                                #if GCC_VERSION > 50000
    198                                 static_assert(__atomic_always_lock_free(sizeof(enabled), &enabled), "Must be lock-free");
     313                                        static_assert(__atomic_always_lock_free(sizeof(enabled), &enabled), "Must be lock-free");
    199314                                #endif
    200315
     
    220335        // Don't execute any pending __cfactx_switch even if counter reaches 0
    221336        void enable_interrupts_noPoll() {
    222                 unsigned short prev = kernelTLS.preemption_state.disable_count;
    223                 kernelTLS.preemption_state.disable_count -= 1;
    224                 verifyf( prev != 0u, "Incremented from %u\n", prev );                     // If this triggers someone is enabled already enabled interrupts
     337                unsigned short prev = __cfaabi_tls.preemption_state.disable_count;
     338                __cfaabi_tls.preemption_state.disable_count -= 1;
     339                // If this triggers someone is enabled already enabled interrupts
     340                /* paranoid */ verifyf( prev != 0u, "Incremented from %u\n", prev );
    225341                if( prev == 1 ) {
    226342                        #if GCC_VERSION > 50000
    227                         static_assert(__atomic_always_lock_free(sizeof(kernelTLS.preemption_state.enabled), &kernelTLS.preemption_state.enabled), "Must be lock-free");
     343                                static_assert(__atomic_always_lock_free(sizeof(__cfaabi_tls.preemption_state.enabled), &__cfaabi_tls.preemption_state.enabled), "Must be lock-free");
    228344                        #endif
    229345                        // Set enabled flag to true
    230346                        // should be atomic to avoid preemption in the middle of the operation.
    231347                        // use memory order RELAXED since there is no inter-thread on this variable requirements
    232                         __atomic_store_n(&kernelTLS.preemption_state.enabled, true, __ATOMIC_RELAXED);
     348                        __atomic_store_n(&__cfaabi_tls.preemption_state.enabled, true, __ATOMIC_RELAXED);
    233349
    234350                        // Signal the compiler that a fence is needed but only for signal handlers
     
    237353        }
    238354}
     355
     356//-----------------------------------------------------------------------------
     357// Kernel Signal Debug
     358void __cfaabi_check_preemption() {
     359        bool ready = __preemption_enabled();
     360        if(!ready) { abort("Preemption should be ready"); }
     361
     362        __cfaasm_label(debug, before);
     363
     364                sigset_t oldset;
     365                int ret;
     366                ret = pthread_sigmask(0, ( const sigset_t * ) 0p, &oldset);  // workaround trac#208: cast should be unnecessary
     367                if(ret != 0) { abort("ERROR sigprocmask returned %d", ret); }
     368
     369                ret = sigismember(&oldset, SIGUSR1);
     370                if(ret <  0) { abort("ERROR sigismember returned %d", ret); }
     371                if(ret == 1) { abort("ERROR SIGUSR1 is disabled"); }
     372
     373                ret = sigismember(&oldset, SIGALRM);
     374                if(ret <  0) { abort("ERROR sigismember returned %d", ret); }
     375                if(ret == 0) { abort("ERROR SIGALRM is enabled"); }
     376
     377                ret = sigismember(&oldset, SIGTERM);
     378                if(ret <  0) { abort("ERROR sigismember returned %d", ret); }
     379                if(ret == 1) { abort("ERROR SIGTERM is disabled"); }
     380
     381        __cfaasm_label(debug, after);
     382}
     383
     384#ifdef __CFA_WITH_VERIFY__
     385bool __cfaabi_dbg_in_kernel() {
     386        return !__preemption_enabled();
     387}
     388#endif
     389
     390#undef __cfaasm_label
     391
     392//-----------------------------------------------------------------------------
     393// Signal handling
    239394
    240395// sigprocmask wrapper : unblock a single signal
     
    256411
    257412        if ( pthread_sigmask( SIG_BLOCK, &mask, 0p ) == -1 ) {
    258             abort( "internal error, pthread_sigmask" );
     413                abort( "internal error, pthread_sigmask" );
    259414        }
    260415}
     
    268423// reserved for future use
    269424static void timeout( $thread * this ) {
    270         //TODO : implement waking threads
    271 }
     425        unpark( this );
     426}
     427
     428//-----------------------------------------------------------------------------
     429// Some assembly required
     430#if defined( __i386 )
     431        #ifdef __PIC__
     432                #define RELOC_PRELUDE( label ) \
     433                        "calll   .Lcfaasm_prelude_" #label "$pb\n\t" \
     434                        ".Lcfaasm_prelude_" #label "$pb:\n\t" \
     435                        "popl    %%eax\n\t" \
     436                        ".Lcfaasm_prelude_" #label "_end:\n\t" \
     437                        "addl    $_GLOBAL_OFFSET_TABLE_+(.Lcfaasm_prelude_" #label "_end-.Lcfaasm_prelude_" #label "$pb), %%eax\n\t"
     438                #define RELOC_PREFIX ""
     439                #define RELOC_SUFFIX "@GOT(%%eax)"
     440        #else
     441                #define RELOC_PREFIX "$"
     442                #define RELOC_SUFFIX ""
     443        #endif
     444        #define __cfaasm_label( label ) struct asm_region label = \
     445                ({ \
     446                        struct asm_region region; \
     447                        asm( \
     448                                RELOC_PRELUDE( label ) \
     449                                "movl " RELOC_PREFIX "__cfaasm_" #label "_before" RELOC_SUFFIX ", %[vb]\n\t" \
     450                                "movl " RELOC_PREFIX "__cfaasm_" #label "_after"  RELOC_SUFFIX ", %[va]\n\t" \
     451                                 : [vb]"=r"(region.before), [va]"=r"(region.after) \
     452                        ); \
     453                        region; \
     454                });
     455#elif defined( __x86_64 )
     456        #ifdef __PIC__
     457                #define RELOC_PREFIX ""
     458                #define RELOC_SUFFIX "@GOTPCREL(%%rip)"
     459        #else
     460                #define RELOC_PREFIX "$"
     461                #define RELOC_SUFFIX ""
     462        #endif
     463        #define __cfaasm_label( label ) struct asm_region label = \
     464                ({ \
     465                        struct asm_region region; \
     466                        asm( \
     467                                "movq " RELOC_PREFIX "__cfaasm_" #label "_before" RELOC_SUFFIX ", %[vb]\n\t" \
     468                                "movq " RELOC_PREFIX "__cfaasm_" #label "_after"  RELOC_SUFFIX ", %[va]\n\t" \
     469                                 : [vb]"=r"(region.before), [va]"=r"(region.after) \
     470                        ); \
     471                        region; \
     472                });
     473#elif defined( __aarch64__ )
     474        #ifdef __PIC__
     475                // Note that this works only for gcc
     476                #define __cfaasm_label( label ) struct asm_region label = \
     477                ({ \
     478                        struct asm_region region; \
     479                        asm( \
     480                                "adrp %[vb], _GLOBAL_OFFSET_TABLE_"                              "\n\t" \
     481                                "ldr  %[vb], [%[vb], #:gotpage_lo15:__cfaasm_" #label "_before]" "\n\t" \
     482                                "adrp %[va], _GLOBAL_OFFSET_TABLE_"                              "\n\t" \
     483                                "ldr  %[va], [%[va], #:gotpage_lo15:__cfaasm_" #label "_after]"  "\n\t" \
     484                                 : [vb]"=r"(region.before), [va]"=r"(region.after) \
     485                        ); \
     486                        region; \
     487                });
     488        #else
     489                #error this is not the right thing to do
     490                /*
     491                #define __cfaasm_label( label ) struct asm_region label = \
     492                ({ \
     493                        struct asm_region region; \
     494                        asm( \
     495                                "adrp %[vb], __cfaasm_" #label "_before"              "\n\t" \
     496                                "add  %[vb], %[vb], :lo12:__cfaasm_" #label "_before" "\n\t" \
     497                                "adrp %[va], :got:__cfaasm_" #label "_after"          "\n\t" \
     498                                "add  %[va], %[va], :lo12:__cfaasm_" #label "_after"  "\n\t" \
     499                                 : [vb]"=r"(region.before), [va]"=r"(region.after) \
     500                        ); \
     501                        region; \
     502                });
     503                */
     504        #endif
     505#else
     506        #error unknown hardware architecture
     507#endif
    272508
    273509// KERNEL ONLY
     
    275511// If true  : preemption is safe
    276512// If false : preemption is unsafe and marked as pending
    277 static inline bool preemption_ready() {
     513static inline bool preemption_ready( void * ip ) {
     514        // Get all the region for which it is not safe to preempt
     515        __cfaasm_label( get    );
     516        __cfaasm_label( check  );
     517        __cfaasm_label( dsable );
     518        __cfaasm_label( debug  );
     519
    278520        // Check if preemption is safe
    279         bool ready = kernelTLS.preemption_state.enabled && ! kernelTLS.preemption_state.in_progress;
    280 
     521        bool ready = true;
     522        if( __cfaasm_in( ip, get    ) ) { ready = false; goto EXIT; };
     523        if( __cfaasm_in( ip, check  ) ) { ready = false; goto EXIT; };
     524        if( __cfaasm_in( ip, dsable ) ) { ready = false; goto EXIT; };
     525        if( __cfaasm_in( ip, debug  ) ) { ready = false; goto EXIT; };
     526        if( !__cfaabi_tls.preemption_state.enabled) { ready = false; goto EXIT; };
     527        if( __cfaabi_tls.preemption_state.in_progress ) { ready = false; goto EXIT; };
     528
     529EXIT:
    281530        // Adjust the pending flag accordingly
    282         kernelTLS.this_processor->pending_preemption = !ready;
     531        __cfaabi_tls.this_processor->pending_preemption = !ready;
    283532        return ready;
    284533}
     
    290539// Startup routine to activate preemption
    291540// Called from kernel_startup
    292 void kernel_start_preemption() {
     541void __kernel_alarm_startup() {
    293542        __cfaabi_dbg_print_safe( "Kernel : Starting preemption\n" );
    294543
    295544        // Start with preemption disabled until ready
    296         kernelTLS.preemption_state.enabled = false;
    297         kernelTLS.preemption_state.disable_count = 1;
     545        __cfaabi_tls.preemption_state.enabled = false;
     546        __cfaabi_tls.preemption_state.disable_count = 1;
    298547
    299548        // Initialize the event kernel
     
    303552        // Setup proper signal handlers
    304553        __cfaabi_sigaction( SIGUSR1, sigHandler_ctxSwitch, SA_SIGINFO | SA_RESTART ); // __cfactx_switch handler
     554        __cfaabi_sigaction( SIGALRM, sigHandler_alarm    , SA_SIGINFO | SA_RESTART ); // debug handler
    305555
    306556        signal_block( SIGALRM );
     
    311561// Shutdown routine to deactivate preemption
    312562// Called from kernel_shutdown
    313 void kernel_stop_preemption() {
     563void __kernel_alarm_shutdown() {
    314564        __cfaabi_dbg_print_safe( "Kernel : Preemption stopping\n" );
    315565
     
    325575        // Wait for the preemption thread to finish
    326576
    327         pthread_join( alarm_thread, 0p );
    328         free( alarm_stack );
     577        __destroy_pthread( alarm_thread, alarm_stack, 0p );
    329578
    330579        // Preemption is now fully stopped
     
    352601// Kernel Signal Handlers
    353602//=============================================================================================
     603__cfaabi_dbg_debug_do( static thread_local void * last_interrupt = 0; )
    354604
    355605// Context switch signal handler
    356606// Receives SIGUSR1 signal and causes the current thread to yield
    357607static void sigHandler_ctxSwitch( __CFA_SIGPARMS__ ) {
    358         __cfaabi_dbg_debug_do( last_interrupt = (void *)(cxt->uc_mcontext.CFA_REG_IP); )
     608        void * ip = (void *)(cxt->uc_mcontext.CFA_REG_IP);
     609        __cfaabi_dbg_debug_do( last_interrupt = ip; )
    359610
    360611        // SKULLDUGGERY: if a thread creates a processor and the immediately deletes it,
    361612        // the interrupt that is supposed to force the kernel thread to preempt might arrive
    362         // before the kernel thread has even started running. When that happens an iterrupt
    363         // we a null 'this_processor' will be caught, just ignore it.
    364         if(! kernelTLS.this_processor ) return;
     613        // before the kernel thread has even started running. When that happens, an interrupt
     614        // with a null 'this_processor' will be caught, just ignore it.
     615        if(! __cfaabi_tls.this_processor ) return;
    365616
    366617        choose(sfp->si_value.sival_int) {
    367618                case PREEMPT_NORMAL   : ;// Normal case, nothing to do here
    368                 case PREEMPT_TERMINATE: verify( __atomic_load_n( &kernelTLS.this_processor->do_terminate, __ATOMIC_SEQ_CST ) );
     619                case PREEMPT_TERMINATE: verify( __atomic_load_n( &__cfaabi_tls.this_processor->do_terminate, __ATOMIC_SEQ_CST ) );
    369620                default:
    370621                        abort( "internal error, signal value is %d", sfp->si_value.sival_int );
     
    372623
    373624        // Check if it is safe to preempt here
    374         if( !preemption_ready() ) { return; }
    375 
    376         __cfaabi_dbg_print_buffer_decl( " KERNEL: preempting core %p (%p @ %p).\n", kernelTLS.this_processor, kernelTLS.this_thread, (void *)(cxt->uc_mcontext.CFA_REG_IP) );
     625        if( !preemption_ready( ip ) ) { return; }
     626
     627        __cfaabi_dbg_print_buffer_decl( " KERNEL: preempting core %p (%p @ %p).\n", __cfaabi_tls.this_processor, __cfaabi_tls.this_thread, (void *)(cxt->uc_mcontext.CFA_REG_IP) );
    377628
    378629        // Sync flag : prevent recursive calls to the signal handler
    379         kernelTLS.preemption_state.in_progress = true;
     630        __cfaabi_tls.preemption_state.in_progress = true;
    380631
    381632        // Clear sighandler mask before context switching.
     
    387638        }
    388639
    389         // TODO: this should go in finish action
    390640        // Clear the in progress flag
    391         kernelTLS.preemption_state.in_progress = false;
     641        __cfaabi_tls.preemption_state.in_progress = false;
    392642
    393643        // Preemption can occur here
     
    395645        force_yield( __ALARM_PREEMPTION ); // Do the actual __cfactx_switch
    396646}
     647
     648static void sigHandler_alarm( __CFA_SIGPARMS__ ) {
     649        abort("SIGALRM should never reach the signal handler");
     650}
     651
     652#if !defined(__CFA_NO_STATISTICS__)
     653        int __print_alarm_stats = 0;
     654#endif
    397655
    398656// Main of the alarm thread
    399657// Waits on SIGALRM and send SIGUSR1 to whom ever needs it
    400658static void * alarm_loop( __attribute__((unused)) void * args ) {
     659        __processor_id_t id;
     660        id.full_proc = false;
     661        id.id = doregister(&id);
     662        __cfaabi_tls.this_proc_id = &id;
     663
     664        #if !defined(__CFA_NO_STATISTICS__)
     665                struct __stats_t local_stats;
     666                __cfaabi_tls.this_stats = &local_stats;
     667                __init_stats( &local_stats );
     668        #endif
     669
    401670        // Block sigalrms to control when they arrive
    402671        sigset_t mask;
     
    456725EXIT:
    457726        __cfaabi_dbg_print_safe( "Kernel : Preemption thread stopping\n" );
     727        unregister(&id);
     728
     729        #if !defined(__CFA_NO_STATISTICS__)
     730                if( 0 != __print_alarm_stats ) {
     731                        __print_stats( &local_stats, __print_alarm_stats, "Alarm", "Thread", 0p );
     732                }
     733        #endif
    458734        return 0p;
    459735}
    460 
    461 //=============================================================================================
    462 // Kernel Signal Debug
    463 //=============================================================================================
    464 
    465 void __cfaabi_check_preemption() {
    466         bool ready = kernelTLS.preemption_state.enabled;
    467         if(!ready) { abort("Preemption should be ready"); }
    468 
    469         sigset_t oldset;
    470         int ret;
    471         ret = pthread_sigmask(0, 0p, &oldset);
    472         if(ret != 0) { abort("ERROR sigprocmask returned %d", ret); }
    473 
    474         ret = sigismember(&oldset, SIGUSR1);
    475         if(ret <  0) { abort("ERROR sigismember returned %d", ret); }
    476         if(ret == 1) { abort("ERROR SIGUSR1 is disabled"); }
    477 
    478         ret = sigismember(&oldset, SIGALRM);
    479         if(ret <  0) { abort("ERROR sigismember returned %d", ret); }
    480         if(ret == 0) { abort("ERROR SIGALRM is enabled"); }
    481 
    482         ret = sigismember(&oldset, SIGTERM);
    483         if(ret <  0) { abort("ERROR sigismember returned %d", ret); }
    484         if(ret == 1) { abort("ERROR SIGTERM is disabled"); }
    485 }
    486 
    487 #ifdef __CFA_WITH_VERIFY__
    488 bool __cfaabi_dbg_in_kernel() {
    489         return !kernelTLS.preemption_state.enabled;
    490 }
    491 #endif
    492736
    493737// Local Variables: //
  • libcfa/src/concurrency/preemption.hfa

    r3c64c668 r58fe85a  
    1616#pragma once
    1717
     18#include "bits/locks.hfa"
    1819#include "alarm.hfa"
    19 #include "kernel_private.hfa"
    2020
    21 void kernel_start_preemption();
    22 void kernel_stop_preemption();
     21struct event_kernel_t {
     22        alarm_list_t alarms;
     23        __spinlock_t lock;
     24};
     25
     26extern event_kernel_t * event_kernel;
     27
    2328void update_preemption( processor * this, Duration duration );
    2429
  • libcfa/src/concurrency/thread.cfa

    r3c64c668 r58fe85a  
    1919
    2020#include "kernel_private.hfa"
     21#include "exception.hfa"
    2122
    2223#define __CFA_INVOKE_PRIVATE__
     
    2829        context{ 0p, 0p };
    2930        self_cor{ name, storage, storageSize };
     31        ticket = TICKET_RUNNING;
    3032        state = Start;
    3133        preempted = __NO_PREEMPTION;
     
    3537        self_mon_p = &self_mon;
    3638        curr_cluster = &cl;
    37         next = 0p;
     39        link.next = 0p;
     40        link.prev = 0p;
     41        link.preferred = -1;
     42        #if defined( __CFA_WITH_VERIFY__ )
     43                canary = 0x0D15EA5E0D15EA5Ep;
     44        #endif
     45
     46        seqable.next = 0p;
     47        seqable.back = 0p;
    3848
    3949        node.next = 0p;
     
    4555
    4656void ^?{}($thread& this) with( this ) {
     57        #if defined( __CFA_WITH_VERIFY__ )
     58                canary = 0xDEADDEADDEADDEADp;
     59        #endif
    4760        unregister(curr_cluster, this);
    4861        ^self_cor{};
     62}
     63
     64FORALL_DATA_INSTANCE(ThreadCancelled, (dtype thread_t), (thread_t))
     65
     66forall(dtype T)
     67void copy(ThreadCancelled(T) * dst, ThreadCancelled(T) * src) {
     68        dst->virtual_table = src->virtual_table;
     69        dst->the_thread = src->the_thread;
     70        dst->the_exception = src->the_exception;
     71}
     72
     73forall(dtype T)
     74const char * msg(ThreadCancelled(T) *) {
     75        return "ThreadCancelled";
     76}
     77
     78forall(dtype T)
     79static void default_thread_cancel_handler(ThreadCancelled(T) & ) {
     80        abort( "Unhandled thread cancellation.\n" );
     81}
     82
     83forall(dtype T | is_thread(T) | IS_EXCEPTION(ThreadCancelled, (T)))
     84void ?{}( thread_dtor_guard_t & this,
     85                T & thrd, void(*defaultResumptionHandler)(ThreadCancelled(T) &)) {
     86        $monitor * m = get_monitor(thrd);
     87        $thread * desc = get_thread(thrd);
     88
     89        // Setup the monitor guard
     90        void (*dtor)(T& mutex this) = ^?{};
     91        bool join = defaultResumptionHandler != (void(*)(ThreadCancelled(T)&))0;
     92        (this.mg){&m, (void(*)())dtor, join};
     93
     94
     95        /* paranoid */ verifyf( Halted == desc->state || Cancelled == desc->state, "Expected thread to be Halted or Cancelled, was %d\n", (int)desc->state );
     96
     97        // After the guard set-up and any wait, check for cancellation.
     98        struct _Unwind_Exception * cancellation = desc->self_cor.cancellation;
     99        if ( likely( 0p == cancellation ) ) {
     100                return;
     101        } else if ( Cancelled == desc->state ) {
     102                return;
     103        }
     104        desc->state = Cancelled;
     105        if (!join) {
     106                defaultResumptionHandler = default_thread_cancel_handler;
     107        }
     108
     109        ThreadCancelled(T) except;
     110        // TODO: Remove explitate vtable set once trac#186 is fixed.
     111        except.virtual_table = &get_exception_vtable(&except);
     112        except.the_thread = &thrd;
     113        except.the_exception = __cfaehm_cancellation_exception( cancellation );
     114        throwResume except;
     115
     116        except.the_exception->virtual_table->free( except.the_exception );
     117        free( cancellation );
     118        desc->self_cor.cancellation = 0p;
     119}
     120
     121void ^?{}( thread_dtor_guard_t & this ) {
     122        ^(this.mg){};
    49123}
    50124
     
    59133
    60134        this_thrd->context.[SP, FP] = this_thrd->self_cor.context.[SP, FP];
    61         verify( this_thrd->context.SP );
     135        /* paranoid */ verify( this_thrd->context.SP );
    62136
    63         __schedule_thread(this_thrd);
     137        __schedule_thread( this_thrd );
    64138        enable_interrupts( __cfaabi_dbg_ctx );
    65139}
     
    84158}
    85159
     160//-----------------------------------------------------------------------------
     161forall(dtype T | is_thread(T) | IS_RESUMPTION_EXCEPTION(ThreadCancelled, (T)))
     162T & join( T & this ) {
     163        thread_dtor_guard_t guard = { this, defaultResumptionHandler };
     164        return this;
     165}
     166
     167uint64_t thread_rand() {
     168        disable_interrupts();
     169        uint64_t ret = __tls_rand();
     170        enable_interrupts( __cfaabi_dbg_ctx );
     171        return ret;
     172}
     173
    86174// Local Variables: //
    87175// mode: c //
  • libcfa/src/concurrency/thread.hfa

    r3c64c668 r58fe85a  
    2222#include "kernel.hfa"
    2323#include "monitor.hfa"
     24#include "exception.hfa"
    2425
    2526//-----------------------------------------------------------------------------
    2627// thread trait
    2728trait is_thread(dtype T) {
    28       void ^?{}(T& mutex this);
    29       void main(T& this);
    30       $thread* get_thread(T& this);
     29        void ^?{}(T& mutex this);
     30        void main(T& this);
     31        $thread* get_thread(T& this);
    3132};
     33
     34FORALL_DATA_EXCEPTION(ThreadCancelled, (dtype thread_t), (thread_t)) (
     35        thread_t * the_thread;
     36        exception_t * the_exception;
     37);
     38
     39forall(dtype T)
     40void copy(ThreadCancelled(T) * dst, ThreadCancelled(T) * src);
     41
     42forall(dtype T)
     43const char * msg(ThreadCancelled(T) *);
    3244
    3345// define that satisfies the trait without using the thread keyword
     
    6678static inline void ?{}($thread & this, const char * const name, struct cluster & cl, size_t stackSize ) { this{ name, cl, 0p, stackSize }; }
    6779
     80struct thread_dtor_guard_t {
     81        monitor_dtor_guard_t mg;
     82};
     83
     84forall( dtype T | is_thread(T) | IS_EXCEPTION(ThreadCancelled, (T)) )
     85void ?{}( thread_dtor_guard_t & this, T & thrd, void(*)(ThreadCancelled(T) &) );
     86void ^?{}( thread_dtor_guard_t & this );
     87
    6888//-----------------------------------------------------------------------------
    6989// thread runner
     
    82102forall( dtype T | sized(T) | is_thread(T) )
    83103void ^?{}( scoped(T)& this );
    84 
    85 //-----------------------------------------------------------------------------
    86 // Thread getters
    87 static inline struct $thread * active_thread () { return TL_GET( this_thread ); }
    88104
    89105//-----------------------------------------------------------------------------
     
    106122bool force_yield( enum __Preemption_Reason );
    107123
    108 static inline void yield() {
    109         force_yield(__MANUAL_PREEMPTION);
    110 }
     124//----------
     125// sleep: force thread to block and be rescheduled after Duration duration
     126void sleep( Duration duration );
    111127
    112 // Yield: yield N times
    113 static inline void yield( unsigned times ) {
    114         for( times ) {
    115                 yield();
    116         }
    117 }
     128//----------
     129// join
     130forall( dtype T | is_thread(T) | IS_RESUMPTION_EXCEPTION(ThreadCancelled, (T)) )
     131T & join( T & this );
    118132
    119133// Local Variables: //
Note: See TracChangeset for help on using the changeset viewer.