Ignore:
Timestamp:
Jan 7, 2021, 2:55:57 PM (5 years ago)
Author:
Thierry Delisle <tdelisle@…>
Branches:
ADT, arm-eh, ast-experimental, enum, forall-pointer-decay, jacob/cs343-translation, master, new-ast-unique-expr, pthread-emulation, qualifiedEnum
Children:
58fe85a
Parents:
bdfc032 (diff), 44e37ef (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.
Message:

Merge branch 'master' into dkobets-vector

Location:
libcfa/src/concurrency
Files:
21 added
1 deleted
19 edited

Legend:

Unmodified
Added
Removed
  • libcfa/src/concurrency/CtxSwitch-i386.S

    rbdfc032 reef8dfb  
    1010// Created On       : Tue Dec 6 12:27:26 2016
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Fri Jul 21 22:29:25 2017
    13 // Update Count     : 1
    14 //
    15 // This  library is free  software; you  can redistribute  it and/or  modify it
    16 // under the terms of the GNU Lesser General Public License as published by the
    17 // Free Software  Foundation; either  version 2.1 of  the License, or  (at your
    18 // option) any later version.
    19 //
    20 // This library is distributed in the  hope that it will be useful, but WITHOUT
    21 // ANY  WARRANTY;  without even  the  implied  warranty  of MERCHANTABILITY  or
    22 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
    23 // for more details.
    24 //
    25 // You should  have received a  copy of the  GNU Lesser General  Public License
    26 // along  with this library.
     12// Last Modified On : Sun Sep  6 18:23:37 2020
     13// Update Count     : 5
    2714//
    2815
    29 // This context switch routine depends on the fact that the stack of a new
    30 // thread has been set up to look like the thread has saved its context in
    31 // the normal manner.
    32 //
    33 // void CtxSwitch( machine_context *from, machine_context *to );
     16// The context switch routine requires the initial the stack of a thread to
     17// look like the thread has saved its context in the normal manner.
    3418
    35 // Offsets in the context structure. This needs to be synchronized with the
    36 // high level code a little better.
     19// Offsets must synchronized with the __stack_context_t in invoke.h.
    3720
    3821#define PTR_BYTE        4
    3922#define SP_OFFSET       ( 0 * PTR_BYTE )
    4023#define FP_OFFSET       ( 1 * PTR_BYTE )
    41 #define PC_OFFSET       ( 2 * PTR_BYTE )
    4224
     25// Context switch between coroutines/tasks.
     26//   void __cfactx_switch( struct __stack_context_t * from, struct __stack_context_t * to ) ;
     27// Arguments "from" in register 4(%esp), "to" in register 20(%esp)
     28
     29        .file "CtxSwitch-i386.S"
    4330        .text
    4431        .align 2
    45         .globl CtxSwitch
    46         .type  CtxSwitch, @function
    47 CtxSwitch:
     32        .global __cfactx_switch
     33        .type __cfactx_switch, @function
     34__cfactx_switch:
    4835
    4936        // Copy the "from" context argument from the stack to register eax
    50         // Return address is at 0(%esp), with parameters following
     37        // Return address is at 0(%esp), with parameters following.
    5138
    5239        movl 4(%esp),%eax
     
    6350        movl %ebp,FP_OFFSET(%eax)
    6451
    65         // Copy the "to" context argument from the stack to register eax
    66         // Having pushed three words (= 12 bytes) on the stack, the
    67         // argument is now at 8 + 12 = 20(%esp)
     52        // Copy the "to" context argument from the stack to register eax. Having
     53        // pushed 3 words (= 12 bytes) on the stack, the argument is now at
     54        // 8 + 12 = 20(%esp).
    6855
    6956        movl 20(%esp),%eax
     
    8370
    8471        ret
    85         .size  CtxSwitch, .-CtxSwitch
     72        .size __cfactx_switch, .-__cfactx_switch
    8673
    8774// Local Variables: //
  • libcfa/src/concurrency/CtxSwitch-x86_64.S

    rbdfc032 reef8dfb  
    77// CtxSwitch-x86_64.S --
    88//
    9 // Author           : Thierry Delisle
    10 // Created On       : Mon Nov 28 12:27:26 2016
     9// Author           : Peter A. Buhr
     10// Created On       : Mon Aug 10 08:10:26 2020
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Fri Jul 21 22:28:11 2017
    13 // Update Count     : 1
    14 //
    15 // This  library is free  software; you  can redistribute  it and/or  modify it
    16 // under the terms of the GNU Lesser General Public License as published by the
    17 // Free Software  Foundation; either  version 2.1 of  the License, or  (at your
    18 // option) any later version.
    19 //
    20 // This library is distributed in the  hope that it will be useful, but WITHOUT
    21 // ANY  WARRANTY;  without even  the  implied  warranty  of MERCHANTABILITY  or
    22 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
    23 // for more details.
    24 //
    25 // You should  have received a  copy of the  GNU Lesser General  Public License
    26 // along  with this library.
     12// Last Modified On : Sat Oct 24 14:36:25 2020
     13// Update Count     : 10
    2714//
    2815
    29 // This context switch routine depends on the fact that the stack of a new
    30 // thread has been set up to look like the thread has saved its context in
    31 // the normal manner.
    32 //
    33 // void CtxSwitch( machine_context *from, machine_context *to );
     16// The context switch routine requires the initial the stack of a thread to
     17// look like the thread has saved its context in the normal manner.
    3418
    35 // Offsets in the context structure. This needs to be synchronized with the
    36 // high level code a little better.
     19// Offsets must synchronized with the __stack_context_t in invoke.h.
    3720
    3821#define PTR_BYTE        8
     
    4023#define FP_OFFSET       ( 1 * PTR_BYTE )
    4124
    42 //-----------------------------------------------------------------------------
    43 // Regular context switch routine which enables switching from one context to anouther
     25// Context switch between coroutines/tasks.
     26//   void __cfactx_switch( struct __stack_context_t * from, struct __stack_context_t * to ) ;
     27// Arguments "from" in register rdi, "to" in register rsi.
     28
     29        .file "CtxSwitch-x86_64.S"
    4430        .text
    4531        .align 2
    46         .globl CtxSwitch
    47         .type  CtxSwitch, @function
    48 CtxSwitch:
     32        .global __cfactx_switch
     33        .type __cfactx_switch, @function
     34__cfactx_switch:
    4935
    5036        // Save volatile registers on the stack.
     
    7763
    7864        ret
    79         .size  CtxSwitch, .-CtxSwitch
     65        .size __cfactx_switch, .-__cfactx_switch
    8066
    81 //-----------------------------------------------------------------------------
    82 // Stub used to create new stacks which are ready to be context switched to
     67// Stub to create new stacks which can be context switched to
     68//   void __cfactx_invoke_stub( void );
     69
    8370        .text
    8471        .align 2
    85         .globl CtxInvokeStub
    86         .type    CtxInvokeStub, @function
    87 CtxInvokeStub:
    88         movq %rbx, %rdi
     72        .global __cfactx_invoke_stub
     73        .type __cfactx_invoke_stub, @function
     74__cfactx_invoke_stub:
     75        movq %rbx, %rdi                                         // move main and this to first two arguments
    8976        movq %r12, %rsi
    90         jmp *%r13
    91         .size  CtxInvokeStub, .-CtxInvokeStub
     77        jmp *%r13                                                       // jmp to invoke
     78        .size __cfactx_invoke_stub, .-__cfactx_invoke_stub
    9279
    9380// Local Variables: //
    94 // mode: c //
     81// mode: asm //
    9582// tab-width: 4 //
    9683// End: //
  • libcfa/src/concurrency/alarm.cfa

    rbdfc032 reef8dfb  
    1010// Created On       : Fri Jun 2 11:31:25 2017
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Sun Jan  5 08:41:36 2020
    13 // Update Count     : 69
     12// Last Modified On : Wed Jun 17 16:11:35 2020
     13// Update Count     : 75
    1414//
    1515
    1616#define __cforall_thread__
    1717
    18 extern "C" {
    1918#include <errno.h>
    2019#include <stdio.h>
     20#include <unistd.h>
    2121#include <string.h>
    22 #include <unistd.h>
    2322#include <sys/time.h>
    24 }
    2523
    2624#include "alarm.hfa"
    27 #include "kernel_private.hfa"
     25#include "kernel/fwd.hfa"
    2826#include "preemption.hfa"
    2927
     
    4745//=============================================================================================
    4846
    49 void ?{}( alarm_node_t & this, thread_desc * thrd, Time alarm, Duration period ) with( this ) {
     47void ?{}( alarm_node_t & this, $thread * thrd, Time alarm, Duration period) with( this ) {
    5048        this.thrd = thrd;
    5149        this.alarm = alarm;
    5250        this.period = period;
    53         next = 0;
    5451        set = false;
    55         kernel_alarm = false;
     52        type = User;
    5653}
    5754
    58 void ?{}( alarm_node_t & this, processor   * proc, Time alarm, Duration period ) with( this ) {
     55void ?{}( alarm_node_t & this, processor * proc, Time alarm, Duration period ) with( this ) {
    5956        this.proc = proc;
    6057        this.alarm = alarm;
    6158        this.period = period;
    62         next = 0;
    6359        set = false;
    64         kernel_alarm = true;
     60        type = Kernel;
     61}
     62void ?{}( alarm_node_t & this, Alarm_Callback callback, Time alarm, Duration period ) with( this ) {
     63        this.alarm = alarm;
     64        this.period = period;
     65        this.callback = callback;
     66        set = false;
     67        type = Callback;
    6568}
    6669
     
    7174}
    7275
    73 #if !defined(NDEBUG) && (defined(__CFA_DEBUG__) || defined(__CFA_VERIFY__))
    74 bool validate( alarm_list_t * this ) {
    75         alarm_node_t ** it = &this->head;
    76         while( (*it) ) {
    77                 it = &(*it)->next;
     76void insert( alarm_list_t * this, alarm_node_t * n ) {
     77        alarm_node_t * it = & (*this)`first;
     78        while( it && (n->alarm > it->alarm) ) {
     79                it = & (*it)`next;
     80        }
     81        if ( it ) {
     82                insert_before( *it, *n );
     83        } else {
     84                insert_last(*this, *n);
    7885        }
    7986
    80         return it == this->tail;
    81 }
    82 #endif
    83 
    84 static inline void insert_at( alarm_list_t * this, alarm_node_t * n, __alarm_it_t p ) {
    85         verify( !n->next );
    86         if( p == this->tail ) {
    87                 this->tail = &n->next;
    88         }
    89         else {
    90                 n->next = *p;
    91         }
    92         *p = n;
    93 
    94         verify( validate( this ) );
    95 }
    96 
    97 void insert( alarm_list_t * this, alarm_node_t * n ) {
    98         alarm_node_t ** it = &this->head;
    99         while( (*it) && (n->alarm > (*it)->alarm) ) {
    100                 it = &(*it)->next;
    101         }
    102 
    103         insert_at( this, n, it );
    104 
    105         verify( validate( this ) );
     87        verify( validate( *this ) );
    10688}
    10789
    10890alarm_node_t * pop( alarm_list_t * this ) {
    109         alarm_node_t * head = this->head;
     91        verify( validate( *this ) );
     92        alarm_node_t * head = & (*this)`first;
    11093        if( head ) {
    111                 this->head = head->next;
    112                 if( !head->next ) {
    113                         this->tail = &this->head;
    114                 }
    115                 head->next = 0p;
     94                remove(*head);
    11695        }
    117         verify( validate( this ) );
     96        verify( validate( *this ) );
    11897        return head;
    11998}
    12099
    121 static inline void remove_at( alarm_list_t * this, alarm_node_t * n, __alarm_it_t it ) {
    122         verify( it );
    123         verify( (*it) == n );
    124 
    125         (*it) = n->next;
    126         if( !n-> next ) {
    127                 this->tail = it;
    128         }
    129         n->next = 0p;
    130 
    131         verify( validate( this ) );
    132 }
    133 
    134 static inline void remove( alarm_list_t * this, alarm_node_t * n ) {
    135         alarm_node_t ** it = &this->head;
    136         while( (*it) && (*it) != n ) {
    137                 it = &(*it)->next;
    138         }
    139 
    140         verify( validate( this ) );
    141 
    142         if( *it ) { remove_at( this, n, it ); }
    143 
    144         verify( validate( this ) );
    145 }
    146 
    147100void register_self( alarm_node_t * this ) {
    148         alarm_list_t * alarms = &event_kernel->alarms;
     101        alarm_list_t & alarms = event_kernel->alarms;
    149102
    150103        disable_interrupts();
     
    152105        {
    153106                verify( validate( alarms ) );
    154                 bool first = !alarms->head;
     107                bool first = ! & alarms`first;
    155108
    156                 insert( alarms, this );
     109                insert( &alarms, this );
    157110                if( first ) {
    158                         __kernel_set_timer( alarms->head->alarm - __kernel_get_time() );
     111                        __kernel_set_timer( alarms`first.alarm - __kernel_get_time() );
    159112                }
    160113        }
     
    168121        lock( event_kernel->lock __cfaabi_dbg_ctx2 );
    169122        {
    170                 verify( validate( &event_kernel->alarms ) );
    171                 remove( &event_kernel->alarms, this );
     123                verify( validate( event_kernel->alarms ) );
     124                remove( *this );
    172125        }
    173126        unlock( event_kernel->lock );
     
    176129}
    177130
     131//=============================================================================================
     132// Utilities
     133//=============================================================================================
     134
     135void sleep( Duration duration ) {
     136        alarm_node_t node = { active_thread(), __kernel_get_time() + duration, 0`s };
     137
     138        register_self( &node );
     139        park();
     140
     141        /* paranoid */ verify( !node.set );
     142        /* paranoid */ verify( & node`next == 0p );
     143        /* paranoid */ verify( & node`prev == 0p );
     144}
     145
    178146// Local Variables: //
    179147// mode: c //
  • libcfa/src/concurrency/alarm.hfa

    rbdfc032 reef8dfb  
    2323#include "time.hfa"
    2424
    25 struct thread_desc;
     25#include "containers/list.hfa"
     26
     27struct $thread;
    2628struct processor;
    2729
     
    3739//=============================================================================================
    3840
     41enum alarm_type{ Kernel = 0, User = 1, Callback = 2 };
     42
     43struct alarm_node_t;
     44
     45typedef void (*Alarm_Callback)(alarm_node_t & );
     46
    3947struct alarm_node_t {
    4048        Time alarm;                             // time when alarm goes off
    4149        Duration period;                        // if > 0 => period of alarm
    42         alarm_node_t * next;            // intrusive link list field
     50
     51        DLISTED_MGD_IMPL_IN(alarm_node_t)
    4352
    4453        union {
    45                 thread_desc * thrd;     // thrd who created event
    46                 processor * proc;               // proc who created event
     54                $thread * thrd;                                 // thrd who created event
     55                processor * proc;                               // proc who created event
     56                Alarm_Callback callback;                // callback to handle event
    4757        };
    4858
    4959        bool set                :1;             // whether or not the alarm has be registered
    50         bool kernel_alarm       :1;             // true if this is not a user defined alarm
     60        enum alarm_type type;           // true if this is not a user defined alarm
    5161};
     62DLISTED_MGD_IMPL_OUT(alarm_node_t)
    5263
    53 typedef alarm_node_t ** __alarm_it_t;
    54 
    55 void ?{}( alarm_node_t & this, thread_desc * thrd, Time alarm, Duration period );
     64void ?{}( alarm_node_t & this, $thread * thrd, Time alarm, Duration period );
    5665void ?{}( alarm_node_t & this, processor   * proc, Time alarm, Duration period );
     66void ?{}( alarm_node_t & this, Alarm_Callback callback, Time alarm, Duration period );
    5767void ^?{}( alarm_node_t & this );
    5868
    59 struct alarm_list_t {
    60         alarm_node_t * head;
    61         __alarm_it_t tail;
    62 };
    63 
    64 static inline void ?{}( alarm_list_t & this ) with( this ) {
    65         head = 0;
    66         tail = &head;
    67 }
     69typedef dlist(alarm_node_t, alarm_node_t) alarm_list_t;
    6870
    6971void insert( alarm_list_t * this, alarm_node_t * n );
  • libcfa/src/concurrency/coroutine.cfa

    rbdfc032 reef8dfb  
    1010// Created On       : Mon Nov 28 12:27:26 2016
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Thu Dec  5 14:37:29 2019
    13 // Update Count     : 15
     12// Last Modified On : Tue Dec 15 12:06:04 2020
     13// Update Count     : 23
    1414//
    1515
     
    1818#include "coroutine.hfa"
    1919
    20 extern "C" {
    2120#include <stddef.h>
    2221#include <malloc.h>
     
    2423#include <string.h>
    2524#include <unistd.h>
    26 // use this define to make unwind.h play nice, definetely a hack
    27 #define HIDE_EXPORTS
     25#include <sys/mman.h>                                                                   // mprotect
    2826#include <unwind.h>
    29 #undef HIDE_EXPORTS
    30 #include <sys/mman.h>
    31 }
    3227
    3328#include "kernel_private.hfa"
     29#include "exception.hfa"
     30#include "math.hfa"
     31
     32#define CFA_COROUTINE_USE_MMAP 0
    3433
    3534#define __CFA_INVOKE_PRIVATE__
     
    3736
    3837extern "C" {
    39         void _CtxCoroutine_Unwind(struct _Unwind_Exception * storage, struct coroutine_desc *) __attribute__ ((__noreturn__));
     38        void _CtxCoroutine_Unwind(struct _Unwind_Exception * storage, struct $coroutine *) __attribute__ ((__noreturn__));
    4039        static void _CtxCoroutine_UnwindCleanup(_Unwind_Reason_Code, struct _Unwind_Exception *) __attribute__ ((__noreturn__));
    4140        static void _CtxCoroutine_UnwindCleanup(_Unwind_Reason_Code, struct _Unwind_Exception *) {
     
    4746
    4847//-----------------------------------------------------------------------------
     48FORALL_DATA_INSTANCE(CoroutineCancelled, (dtype coroutine_t), (coroutine_t))
     49
     50forall(dtype T)
     51void mark_exception(CoroutineCancelled(T) *) {}
     52
     53forall(dtype T)
     54void copy(CoroutineCancelled(T) * dst, CoroutineCancelled(T) * src) {
     55        dst->virtual_table = src->virtual_table;
     56        dst->the_coroutine = src->the_coroutine;
     57        dst->the_exception = src->the_exception;
     58}
     59
     60forall(dtype T)
     61const char * msg(CoroutineCancelled(T) *) {
     62        return "CoroutineCancelled(...)";
     63}
     64
     65// This code should not be inlined. It is the error path on resume.
     66forall(dtype T | is_coroutine(T))
     67void __cfaehm_cancelled_coroutine( T & cor, $coroutine * desc ) {
     68        verify( desc->cancellation );
     69        desc->state = Cancelled;
     70        exception_t * except = __cfaehm_cancellation_exception( desc->cancellation );
     71
     72        // TODO: Remove explitate vtable set once trac#186 is fixed.
     73        CoroutineCancelled(T) except;
     74        except.virtual_table = &get_exception_vtable(&except);
     75        except.the_coroutine = &cor;
     76        except.the_exception = except;
     77        throwResume except;
     78
     79        except->virtual_table->free( except );
     80        free( desc->cancellation );
     81        desc->cancellation = 0p;
     82}
     83
     84//-----------------------------------------------------------------------------
    4985// Global state variables
    5086
    5187// minimum feasible stack size in bytes
    52 #define MinStackSize 1000
     88static const size_t MinStackSize = 1000;
    5389extern size_t __page_size;                              // architecture pagesize HACK, should go in proper runtime singleton
     90extern int __map_prot;
    5491
    5592void __stack_prepare( __stack_info_t * this, size_t create_size );
     93void __stack_clean  ( __stack_info_t * this );
    5694
    5795//-----------------------------------------------------------------------------
     
    74112        bool userStack = ((intptr_t)this.storage & 0x1) != 0;
    75113        if ( ! userStack && this.storage ) {
    76                 __attribute__((may_alias)) intptr_t * istorage = (intptr_t *)&this.storage;
    77                 *istorage &= (intptr_t)-1;
    78 
    79                 void * storage = this.storage->limit;
    80                 __cfaabi_dbg_debug_do(
    81                         storage = (char*)(storage) - __page_size;
    82                         if ( mprotect( storage, __page_size, PROT_READ | PROT_WRITE ) == -1 ) {
    83                                 abort( "(coStack_t *)%p.^?{}() : internal error, mprotect failure, error(%d) %s.", &this, errno, strerror( errno ) );
    84                         }
    85                 );
    86                 __cfaabi_dbg_print_safe("Kernel : Deleting stack %p\n", storage);
    87                 free( storage );
    88         }
    89 }
    90 
    91 void ?{}( coroutine_desc & this, const char * name, void * storage, size_t storageSize ) with( this ) {
     114                __stack_clean( &this );
     115        }
     116}
     117
     118void ?{}( $coroutine & this, const char name[], void * storage, size_t storageSize ) with( this ) {
    92119        (this.context){0p, 0p};
    93120        (this.stack){storage, storageSize};
     
    99126}
    100127
    101 void ^?{}(coroutine_desc& this) {
     128void ^?{}($coroutine& this) {
    102129        if(this.state != Halted && this.state != Start && this.state != Primed) {
    103                 coroutine_desc * src = TL_GET( this_thread )->curr_cor;
    104                 coroutine_desc * dst = &this;
     130                $coroutine * src = active_coroutine();
     131                $coroutine * dst = &this;
    105132
    106133                struct _Unwind_Exception storage;
     
    115142                }
    116143
    117                 CoroutineCtxSwitch( src, dst );
     144                $ctx_switch( src, dst );
    118145        }
    119146}
     
    123150forall(dtype T | is_coroutine(T))
    124151void prime(T& cor) {
    125         coroutine_desc* this = get_coroutine(cor);
     152        $coroutine* this = get_coroutine(cor);
    126153        assert(this->state == Start);
    127154
     
    134161        assert(__page_size != 0l);
    135162        size_t size = libCeiling( storageSize, 16 ) + stack_data_size;
     163        size = ceiling(size, __page_size);
    136164
    137165        // If we are running debug, we also need to allocate a guardpage to catch stack overflows.
    138166        void * storage;
    139         __cfaabi_dbg_debug_do(
    140                 storage = memalign( __page_size, size + __page_size );
    141         );
    142         __cfaabi_dbg_no_debug_do(
    143                 storage = (void*)malloc(size);
    144         );
    145 
     167        #if CFA_COROUTINE_USE_MMAP
     168                storage = mmap(0p, size + __page_size, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
     169                if(storage == ((void*)-1)) {
     170                        abort( "coroutine stack creation : internal error, mmap failure, error(%d) %s.", errno, strerror( errno ) );
     171                }
     172                if ( mprotect( storage, __page_size, PROT_NONE ) == -1 ) {
     173                        abort( "coroutine stack creation : internal error, mprotect failure, error(%d) %s.", errno, strerror( errno ) );
     174                } // if
     175                storage = (void *)(((intptr_t)storage) + __page_size);
     176        #else
     177                __cfaabi_dbg_debug_do(
     178                        storage = memalign( __page_size, size + __page_size );
     179                );
     180                __cfaabi_dbg_no_debug_do(
     181                        storage = (void*)malloc(size);
     182                );
     183
     184                __cfaabi_dbg_debug_do(
     185                        if ( mprotect( storage, __page_size, PROT_NONE ) == -1 ) {
     186                                abort( "__stack_alloc : internal error, mprotect failure, error(%d) %s.", (int)errno, strerror( (int)errno ) );
     187                        }
     188                        storage = (void *)(((intptr_t)storage) + __page_size);
     189                );
     190        #endif
    146191        __cfaabi_dbg_print_safe("Kernel : Created stack %p of size %zu\n", storage, size);
    147         __cfaabi_dbg_debug_do(
    148                 if ( mprotect( storage, __page_size, PROT_NONE ) == -1 ) {
    149                         abort( "__stack_alloc : internal error, mprotect failure, error(%d) %s.", (int)errno, strerror( (int)errno ) );
    150                 }
    151                 storage = (void *)(((intptr_t)storage) + __page_size);
    152         );
    153192
    154193        verify( ((intptr_t)storage & (libAlign() - 1)) == 0ul );
    155194        return [storage, size];
     195}
     196
     197void __stack_clean  ( __stack_info_t * this ) {
     198        size_t size = ((intptr_t)this->storage->base) - ((intptr_t)this->storage->limit) + sizeof(__stack_t);
     199        void * storage = this->storage->limit;
     200
     201        #if CFA_COROUTINE_USE_MMAP
     202                storage = (void *)(((intptr_t)storage) - __page_size);
     203                if(munmap(storage, size + __page_size) == -1) {
     204                        abort( "coroutine stack destruction : internal error, munmap failure, error(%d) %s.", errno, strerror( errno ) );
     205                }
     206        #else
     207                __cfaabi_dbg_debug_do(
     208                        storage = (char*)(storage) - __page_size;
     209                        if ( mprotect( storage, __page_size, __map_prot ) == -1 ) {
     210                                abort( "(coStack_t *)%p.^?{}() : internal error, mprotect failure, error(%d) %s.", &this, errno, strerror( errno ) );
     211                        }
     212                );
     213
     214                free( storage );
     215        #endif
     216        __cfaabi_dbg_print_safe("Kernel : Deleting stack %p\n", storage);
    156217}
    157218
     
    175236                size = libFloor(create_size - stack_data_size - diff, libAlign());
    176237        } // if
    177         assertf( size >= MinStackSize, "Stack size %zd provides less than minimum of %d bytes for a stack.", size, MinStackSize );
    178 
    179         this->storage = (__stack_t *)((intptr_t)storage + size);
     238        assertf( size >= MinStackSize, "Stack size %zd provides less than minimum of %zd bytes for a stack.", size, MinStackSize );
     239
     240        this->storage = (__stack_t *)((intptr_t)storage + size - sizeof(__stack_t));
    180241        this->storage->limit = storage;
    181         this->storage->base  = (void*)((intptr_t)storage + size);
     242        this->storage->base  = (void*)((intptr_t)storage + size - sizeof(__stack_t));
     243        this->storage->exception_context.top_resume = 0p;
     244        this->storage->exception_context.current_exception = 0p;
    182245        __attribute__((may_alias)) intptr_t * istorage = (intptr_t*)&this->storage;
    183246        *istorage |= userStack ? 0x1 : 0x0;
     
    187250// is not inline (We can't inline Cforall in C)
    188251extern "C" {
    189         void __leave_coroutine( struct coroutine_desc * src ) {
    190                 coroutine_desc * starter = src->cancellation != 0 ? src->last : src->starter;
     252        void __cfactx_cor_leave( struct $coroutine * src ) {
     253                $coroutine * starter = src->cancellation != 0 ? src->last : src->starter;
    191254
    192255                src->state = Halted;
     
    201264                        src->name, src, starter->name, starter );
    202265
    203                 CoroutineCtxSwitch( src, starter );
    204         }
    205 
    206         struct coroutine_desc * __finish_coroutine(void) {
    207                 struct coroutine_desc * cor = kernelTLS.this_thread->curr_cor;
     266                $ctx_switch( src, starter );
     267        }
     268
     269        struct $coroutine * __cfactx_cor_finish(void) {
     270                struct $coroutine * cor = active_coroutine();
    208271
    209272                if(cor->state == Primed) {
    210                         suspend();
     273                        __cfactx_suspend();
    211274                }
    212275
  • libcfa/src/concurrency/coroutine.hfa

    rbdfc032 reef8dfb  
    1010// Created On       : Mon Nov 28 12:27:26 2016
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Tue Dec  3 22:47:58 2019
    13 // Update Count     : 10
     12// Last Modified On : Tue Feb  4 12:29:26 2020
     13// Update Count     : 11
    1414//
    1515
     
    1818#include <assert.h>
    1919#include "invoke.h"
     20#include "../exception.hfa"
     21
     22//-----------------------------------------------------------------------------
     23// Exception thrown from resume when a coroutine stack is cancelled.
     24FORALL_DATA_EXCEPTION(CoroutineCancelled, (dtype coroutine_t), (coroutine_t)) (
     25        coroutine_t * the_coroutine;
     26        exception_t * the_exception;
     27);
     28
     29forall(dtype T)
     30void copy(CoroutineCancelled(T) * dst, CoroutineCancelled(T) * src);
     31
     32forall(dtype T)
     33const char * msg(CoroutineCancelled(T) *);
    2034
    2135//-----------------------------------------------------------------------------
     
    2337// Anything that implements this trait can be resumed.
    2438// Anything that is resumed is a coroutine.
    25 trait is_coroutine(dtype T) {
    26       void main(T & this);
    27       coroutine_desc * get_coroutine(T & this);
     39trait is_coroutine(dtype T | IS_RESUMPTION_EXCEPTION(CoroutineCancelled, (T))) {
     40        void main(T & this);
     41        $coroutine * get_coroutine(T & this);
    2842};
    2943
    30 #define DECL_COROUTINE(X) static inline coroutine_desc* get_coroutine(X& this) { return &this.__cor; } void main(X& this)
     44#define DECL_COROUTINE(X) static inline $coroutine* get_coroutine(X& this) { return &this.__cor; } void main(X& this)
    3145
    3246//-----------------------------------------------------------------------------
     
    3549// void ^?{}( coStack_t & this );
    3650
    37 void ?{}( coroutine_desc & this, const char * name, void * storage, size_t storageSize );
    38 void ^?{}( coroutine_desc & this );
     51void  ?{}( $coroutine & this, const char name[], void * storage, size_t storageSize );
     52void ^?{}( $coroutine & this );
    3953
    40 static inline void ?{}( coroutine_desc & this)                                       { this{ "Anonymous Coroutine", 0p, 0 }; }
    41 static inline void ?{}( coroutine_desc & this, size_t stackSize)                     { this{ "Anonymous Coroutine", 0p, stackSize }; }
    42 static inline void ?{}( coroutine_desc & this, void * storage, size_t storageSize )  { this{ "Anonymous Coroutine", storage, storageSize }; }
    43 static inline void ?{}( coroutine_desc & this, const char * name)                    { this{ name, 0p, 0 }; }
    44 static inline void ?{}( coroutine_desc & this, const char * name, size_t stackSize ) { this{ name, 0p, stackSize }; }
     54static inline void ?{}( $coroutine & this)                                       { this{ "Anonymous Coroutine", 0p, 0 }; }
     55static inline void ?{}( $coroutine & this, size_t stackSize)                     { this{ "Anonymous Coroutine", 0p, stackSize }; }
     56static inline void ?{}( $coroutine & this, void * storage, size_t storageSize )  { this{ "Anonymous Coroutine", storage, storageSize }; }
     57static inline void ?{}( $coroutine & this, const char name[])                    { this{ name, 0p, 0 }; }
     58static inline void ?{}( $coroutine & this, const char name[], size_t stackSize ) { this{ name, 0p, stackSize }; }
    4559
    4660//-----------------------------------------------------------------------------
    4761// Public coroutine API
    48 static inline void suspend(void);
    49 
    50 forall(dtype T | is_coroutine(T))
    51 static inline T & resume(T & cor);
    52 
    5362forall(dtype T | is_coroutine(T))
    5463void prime(T & cor);
    5564
    56 static inline struct coroutine_desc * active_coroutine() { return TL_GET( this_thread )->curr_cor; }
     65static inline struct $coroutine * active_coroutine() { return active_thread()->curr_cor; }
    5766
    5867//-----------------------------------------------------------------------------
     
    6170// Start coroutine routines
    6271extern "C" {
    63         void CtxInvokeCoroutine(void (*main)(void *), void * this);
     72        void __cfactx_invoke_coroutine(void (*main)(void *), void * this);
    6473
    6574        forall(dtype T)
    66         void CtxStart(void (*main)(T &), struct coroutine_desc * cor, T & this, void (*invoke)(void (*main)(void *), void *));
     75        void __cfactx_start(void (*main)(T &), struct $coroutine * cor, T & this, void (*invoke)(void (*main)(void *), void *));
    6776
    68         extern void _CtxCoroutine_Unwind(struct _Unwind_Exception * storage, struct coroutine_desc *) __attribute__ ((__noreturn__));
     77        extern void __cfactx_coroutine_unwind(struct _Unwind_Exception * storage, struct $coroutine *) __attribute__ ((__noreturn__));
    6978
    70         extern void CtxSwitch( struct __stack_context_t * from, struct __stack_context_t * to ) asm ("CtxSwitch");
     79        extern void __cfactx_switch( struct __stack_context_t * from, struct __stack_context_t * to ) asm ("__cfactx_switch");
    7180}
    7281
    7382// Private wrappers for context switch and stack creation
    7483// Wrapper for co
    75 static inline void CoroutineCtxSwitch(coroutine_desc* src, coroutine_desc* dst) {
     84static inline void $ctx_switch( $coroutine * src, $coroutine * dst ) __attribute__((nonnull (1, 2))) {
    7685        // set state of current coroutine to inactive
    77         src->state = src->state == Halted ? Halted : Inactive;
     86        src->state = src->state == Halted ? Halted : Blocked;
    7887
    7988        // set new coroutine that task is executing
    80         TL_GET( this_thread )->curr_cor = dst;
     89        active_thread()->curr_cor = dst;
    8190
    8291        // context switch to specified coroutine
    8392        verify( dst->context.SP );
    84         CtxSwitch( &src->context, &dst->context );
    85         // when CtxSwitch returns we are back in the src coroutine
     93        __cfactx_switch( &src->context, &dst->context );
     94        // when __cfactx_switch returns we are back in the src coroutine
    8695
    8796        // set state of new coroutine to active
     
    8998
    9099        if( unlikely(src->cancellation != 0p) ) {
    91                 _CtxCoroutine_Unwind(src->cancellation, src);
     100                __cfactx_coroutine_unwind(src->cancellation, src);
    92101        }
    93102}
    94103
    95 extern void __stack_prepare   ( __stack_info_t * this, size_t size /* ignored if storage already allocated */);
     104extern void __stack_prepare( __stack_info_t * this, size_t size /* ignored if storage already allocated */);
     105extern void __stack_clean  ( __stack_info_t * this );
     106
    96107
    97108// Suspend implementation inlined for performance
    98 static inline void suspend(void) {
    99         // optimization : read TLS once and reuse it
    100         // Safety note: this is preemption safe since if
    101         // preemption occurs after this line, the pointer
    102         // will also migrate which means this value will
    103         // stay in syn with the TLS
    104         coroutine_desc * src = TL_GET( this_thread )->curr_cor;
     109extern "C" {
     110        static inline void __cfactx_suspend(void) {
     111                // optimization : read TLS once and reuse it
     112                // Safety note: this is preemption safe since if
     113                // preemption occurs after this line, the pointer
     114                // will also migrate which means this value will
     115                // stay in syn with the TLS
     116                $coroutine * src = active_coroutine();
    105117
    106         assertf( src->last != 0,
    107                 "Attempt to suspend coroutine \"%.256s\" (%p) that has never been resumed.\n"
    108                 "Possible cause is a suspend executed in a member called by a coroutine user rather than by the coroutine main.",
    109                 src->name, src );
    110         assertf( src->last->state != Halted,
    111                 "Attempt by coroutine \"%.256s\" (%p) to suspend back to terminated coroutine \"%.256s\" (%p).\n"
    112                 "Possible cause is terminated coroutine's main routine has already returned.",
    113                 src->name, src, src->last->name, src->last );
     118                assertf( src->last != 0,
     119                        "Attempt to suspend coroutine \"%.256s\" (%p) that has never been resumed.\n"
     120                        "Possible cause is a suspend executed in a member called by a coroutine user rather than by the coroutine main.",
     121                        src->name, src );
     122                assertf( src->last->state != Halted,
     123                        "Attempt by coroutine \"%.256s\" (%p) to suspend back to terminated coroutine \"%.256s\" (%p).\n"
     124                        "Possible cause is terminated coroutine's main routine has already returned.",
     125                        src->name, src, src->last->name, src->last );
    114126
    115         CoroutineCtxSwitch( src, src->last );
     127                $ctx_switch( src, src->last );
     128        }
    116129}
     130
     131forall(dtype T | is_coroutine(T))
     132void __cfaehm_cancelled_coroutine( T & cor, $coroutine * desc );
    117133
    118134// Resume implementation inlined for performance
     
    124140        // will also migrate which means this value will
    125141        // stay in syn with the TLS
    126         coroutine_desc * src = TL_GET( this_thread )->curr_cor;
    127         coroutine_desc * dst = get_coroutine(cor);
     142        $coroutine * src = active_coroutine();
     143        $coroutine * dst = get_coroutine(cor);
    128144
    129145        if( unlikely(dst->context.SP == 0p) ) {
    130                 TL_GET( this_thread )->curr_cor = dst;
    131146                __stack_prepare(&dst->stack, 65000);
    132                 CtxStart(main, dst, cor, CtxInvokeCoroutine);
    133                 TL_GET( this_thread )->curr_cor = src;
     147                __cfactx_start(main, dst, cor, __cfactx_invoke_coroutine);
    134148        }
    135149
     
    147161
    148162        // always done for performance testing
    149         CoroutineCtxSwitch( src, dst );
     163        $ctx_switch( src, dst );
     164        if ( unlikely(dst->cancellation) ) {
     165                __cfaehm_cancelled_coroutine( cor, dst );
     166        }
    150167
    151168        return cor;
    152169}
    153170
    154 static inline void resume(coroutine_desc * dst) {
     171static inline void resume( $coroutine * dst ) __attribute__((nonnull (1))) {
    155172        // optimization : read TLS once and reuse it
    156173        // Safety note: this is preemption safe since if
     
    158175        // will also migrate which means this value will
    159176        // stay in syn with the TLS
    160         coroutine_desc * src = TL_GET( this_thread )->curr_cor;
     177        $coroutine * src = active_coroutine();
    161178
    162179        // not resuming self ?
     
    172189
    173190        // always done for performance testing
    174         CoroutineCtxSwitch( src, dst );
     191        $ctx_switch( src, dst );
    175192}
    176193
  • libcfa/src/concurrency/invoke.c

    rbdfc032 reef8dfb  
    1010// Created On       : Tue Jan 17 12:27:26 2016
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Fri Feb  9 16:37:42 2018
    13 // Update Count     : 5
     12// Last Modified On : Sat Oct 24 14:35:28 2020
     13// Update Count     : 32
    1414//
    1515
     
    2929// Called from the kernel when starting a coroutine or task so must switch back to user mode.
    3030
    31 extern void __leave_coroutine ( struct coroutine_desc * );
    32 extern struct coroutine_desc * __finish_coroutine(void);
    33 extern void __leave_thread_monitor();
     31extern struct $coroutine * __cfactx_cor_finish(void);
     32extern void __cfactx_cor_leave ( struct $coroutine * );
     33extern void __cfactx_thrd_leave();
     34
    3435extern void disable_interrupts() OPTIONAL_THREAD;
    3536extern void enable_interrupts( __cfaabi_dbg_ctx_param );
    3637
    37 void CtxInvokeCoroutine(
     38void __cfactx_invoke_coroutine(
    3839        void (*main)(void *),
    3940        void *this
    4041) {
    4142        // Finish setting up the coroutine by setting its state
    42         struct coroutine_desc * cor = __finish_coroutine();
     43        struct $coroutine * cor = __cfactx_cor_finish();
    4344
    4445        // Call the main of the coroutine
     
    4647
    4748        //Final suspend, should never return
    48         __leave_coroutine( cor );
     49        __cfactx_cor_leave( cor );
    4950        __cabi_abort( "Resumed dead coroutine" );
    5051}
    5152
    52 static _Unwind_Reason_Code _CtxCoroutine_UnwindStop(
     53static _Unwind_Reason_Code __cfactx_coroutine_unwindstop(
    5354        __attribute((__unused__)) int version,
    5455        _Unwind_Action actions,
     
    6162                // We finished unwinding the coroutine,
    6263                // leave it
    63                 __leave_coroutine( param );
     64                __cfactx_cor_leave( param );
    6465                __cabi_abort( "Resumed dead coroutine" );
    6566        }
     
    6970}
    7071
    71 void _CtxCoroutine_Unwind(struct _Unwind_Exception * storage, struct coroutine_desc * cor) __attribute__ ((__noreturn__));
    72 void _CtxCoroutine_Unwind(struct _Unwind_Exception * storage, struct coroutine_desc * cor) {
    73         _Unwind_Reason_Code ret = _Unwind_ForcedUnwind( storage, _CtxCoroutine_UnwindStop, cor );
     72void __cfactx_coroutine_unwind(struct _Unwind_Exception * storage, struct $coroutine * cor) __attribute__ ((__noreturn__));
     73void __cfactx_coroutine_unwind(struct _Unwind_Exception * storage, struct $coroutine * cor) {
     74        _Unwind_Reason_Code ret = _Unwind_ForcedUnwind( storage, __cfactx_coroutine_unwindstop, cor );
    7475        printf("UNWIND ERROR %d after force unwind\n", ret);
    7576        abort();
    7677}
    7778
    78 void CtxInvokeThread(
     79void __cfactx_invoke_thread(
    7980        void (*main)(void *),
    8081        void *this
     
    9394        // The order of these 4 operations is very important
    9495        //Final suspend, should never return
    95         __leave_thread_monitor();
     96        __cfactx_thrd_leave();
    9697        __cabi_abort( "Resumed dead thread" );
    9798}
    9899
    99 void CtxStart(
     100void __cfactx_start(
    100101        void (*main)(void *),
    101         struct coroutine_desc * cor,
     102        struct $coroutine * cor,
    102103        void *this,
    103104        void (*invoke)(void *)
     
    108109
    109110        struct FakeStack {
    110             void *fixedRegisters[3];              // fixed registers ebx, edi, esi (popped on 1st uSwitch, values unimportant)
    111             void *rturn;                          // where to go on return from uSwitch
    112             void *dummyReturn;                    // fake return compiler would have pushed on call to uInvoke
    113             void *argument[3];                    // for 16-byte ABI, 16-byte alignment starts here
    114             void *padding;                        // padding to force 16-byte alignment, as "base" is 16-byte aligned
     111            void *fixedRegisters[3];                                            // fixed registers ebx, edi, esi (popped on 1st uSwitch, values unimportant)
     112            void *rturn;                                                                        // where to go on return from uSwitch
     113            void *dummyReturn;                                                          // fake return compiler would have pushed on call to uInvoke
     114            void *argument[3];                                                          // for 16-byte ABI, 16-byte alignment starts here
     115            void *padding;                                                                      // padding to force 16-byte alignment, as "base" is 16-byte aligned
    115116        };
    116117
     
    121122
    122123        fs->dummyReturn = NULL;
    123         fs->argument[0] = main;     // argument to invoke
    124         fs->argument[1] = this;     // argument to invoke
     124        fs->argument[0] = main;                                                         // argument to invoke
     125        fs->argument[1] = this;                                                         // argument to invoke
    125126        fs->rturn = invoke;
    126127
     
    128129
    129130        struct FakeStack {
    130                 void *fixedRegisters[5];            // fixed registers rbx, r12, r13, r14, r15
    131                 void *rturn;                        // where to go on return from uSwitch
    132                 void *dummyReturn;                  // NULL return address to provide proper alignment
     131                void *fixedRegisters[5];                                                // fixed registers rbx, r12, r13, r14, r15
     132                void *rturn;                                                                    // where to go on return from uSwitch
     133                void *dummyReturn;                                                              // NULL return address to provide proper alignment
    133134        };
    134135
    135136        cor->context.SP = (char *)stack->base - sizeof( struct FakeStack );
    136         cor->context.FP = NULL;         // terminate stack with NULL fp
     137        cor->context.FP = NULL;                                                         // terminate stack with NULL fp
    137138
    138139        struct FakeStack *fs = (struct FakeStack *)cor->context.SP;
    139140
    140141        fs->dummyReturn = NULL;
    141         fs->rturn = CtxInvokeStub;
    142         fs->fixedRegisters[0] = main;
    143         fs->fixedRegisters[1] = this;
     142        fs->rturn = __cfactx_invoke_stub;
     143        fs->fixedRegisters[0] = main;                                           // argument to invoke
     144        fs->fixedRegisters[1] = this;                                           // argument to invoke
    144145        fs->fixedRegisters[2] = invoke;
    145146
    146 #elif defined( __ARM_ARCH )
    147 #error ARM needs to be upgrade to use to parameters like X86/X64 (A.K.A. : I broke this and do not know how to fix it)
     147#elif defined( __ARM_ARCH_32 )
     148#error ARM needs to be upgrade to use two parameters like X86/X64 (A.K.A. : I broke this and do not know how to fix it)
     149        // More details about the error:
     150        // To avoid the thunk problem, I changed the invoke routine to pass the main explicitly
     151        // instead of relying on an assertion. This effectively hoists any required thunk one level
     152        // which was enough to get to global scope in most cases.
     153        // This means that __cfactx_invoke_... now takes two parameters and the FakeStack needs
     154        // to be adjusted as a consequence of that.
     155        // I don't know how to do that for ARM, hence the #error
     156
    148157        struct FakeStack {
    149                 float fpRegs[16];                       // floating point registers
    150                 void *intRegs[9];                       // integer/pointer registers
    151                 void *arg[2];                           // placeholder for this pointer
     158                float fpRegs[16];                                                               // floating point registers
     159                void * intRegs[9];                                                              // integer/pointer registers
     160                void * arg[2];                                                                  // placeholder for this pointer
    152161        };
    153162
     
    157166        struct FakeStack *fs = (struct FakeStack *)cor->context.SP;
    158167
    159         fs->intRegs[8] = CtxInvokeStub;
     168        fs->intRegs[8] = __cfactx_invoke_stub;
    160169        fs->arg[0] = this;
    161170        fs->arg[1] = invoke;
    162171
     172#elif defined( __ARM_ARCH )
     173        struct FakeStack {
     174                void * intRegs[12];                                                             // x19-x30 integer registers
     175                double fpRegs[8];                                                               // v8-v15 floating point
     176        };
     177
     178        cor->context.SP = (char *)stack->base - sizeof( struct FakeStack );
     179        cor->context.FP = NULL;
     180
     181        struct FakeStack *fs = (struct FakeStack *)cor->context.SP;
     182
     183        fs->intRegs[0] = main;                                                          // argument to invoke x19 => x0
     184        fs->intRegs[1] = this;                                                          // argument to invoke x20 => x1
     185        fs->intRegs[2] = invoke;
     186        fs->intRegs[11] = __cfactx_invoke_stub;                         // link register x30 => ret moves to pc
    163187#else
    164188        #error uknown hardware architecture
  • libcfa/src/concurrency/invoke.h

    rbdfc032 reef8dfb  
    1717#include "bits/defs.hfa"
    1818#include "bits/locks.hfa"
     19#include "kernel/fwd.hfa"
    1920
    2021#ifdef __cforall
     
    2627#define _INVOKE_H_
    2728
    28 #ifdef __ARM_ARCH
    29         // function prototypes are only really used by these macros on ARM
    30         void disable_global_interrupts();
    31         void enable_global_interrupts();
    32 
    33         #define TL_GET( member ) ( { __typeof__( kernelTLS.member ) target; \
    34                 disable_global_interrupts(); \
    35                 target = kernelTLS.member; \
    36                 enable_global_interrupts(); \
    37                 target; } )
    38         #define TL_SET( member, value ) disable_global_interrupts(); \
    39                 kernelTLS.member = value; \
    40                 enable_global_interrupts();
    41 #else
    42         #define TL_GET( member ) kernelTLS.member
    43         #define TL_SET( member, value ) kernelTLS.member = value;
    44 #endif
    45 
    46         #ifdef __cforall
    47         extern "Cforall" {
    48                 extern __attribute__((aligned(128))) thread_local struct KernelThreadData {
    49                         struct thread_desc    * volatile this_thread;
    50                         struct processor      * volatile this_processor;
    51 
    52                         struct {
    53                                 volatile unsigned short disable_count;
    54                                 volatile bool enabled;
    55                                 volatile bool in_progress;
    56                         } preemption_state;
    57 
    58                         uint32_t rand_seed;
    59                 } kernelTLS __attribute__ ((tls_model ( "initial-exec" )));
    60         }
    61         #endif
     29        struct __cfaehm_try_resume_node;
     30        struct __cfaehm_base_exception_t;
     31        struct exception_context_t {
     32                struct __cfaehm_try_resume_node * top_resume;
     33                struct __cfaehm_base_exception_t * current_exception;
     34        };
    6235
    6336        struct __stack_context_t {
     
    8558                // base of stack
    8659                void * base;
     60
     61                // Information for exception handling.
     62                struct exception_context_t exception_context;
    8763        };
    8864
     
    9268        };
    9369
    94         enum coroutine_state { Halted, Start, Inactive, Active, Primed };
    95 
    96         struct coroutine_desc {
    97                 // context that is switch during a CtxSwitch
     70        enum __Coroutine_State { Halted, Start, Primed, Blocked, Ready, Active, Cancelled, Halting };
     71
     72        struct $coroutine {
     73                // context that is switch during a __cfactx_switch
    9874                struct __stack_context_t context;
    9975
     
    10581
    10682                // current execution status for coroutine
    107                 enum coroutine_state state;
     83                enum __Coroutine_State state;
    10884
    10985                // first coroutine to resume this one
    110                 struct coroutine_desc * starter;
     86                struct $coroutine * starter;
    11187
    11288                // last coroutine to resume this one
    113                 struct coroutine_desc * last;
     89                struct $coroutine * last;
    11490
    11591                // If non-null stack must be unwound with this exception
     
    11793
    11894        };
     95        // Wrapper for gdb
     96        struct cfathread_coroutine_t { struct $coroutine debug; };
     97
     98        static inline struct __stack_t * __get_stack( struct $coroutine * cor ) {
     99                return (struct __stack_t*)(((uintptr_t)cor->stack.storage) & ((uintptr_t)-2));
     100        }
    119101
    120102        // struct which calls the monitor is accepting
     
    127109        };
    128110
    129         struct monitor_desc {
     111        struct $monitor {
    130112                // spinlock to protect internal data
    131113                struct __spinlock_t lock;
    132114
    133115                // current owner of the monitor
    134                 struct thread_desc * owner;
     116                struct $thread * owner;
    135117
    136118                // queue of threads that are blocked waiting for the monitor
    137                 __queue_t(struct thread_desc) entry_queue;
     119                __queue_t(struct $thread) entry_queue;
    138120
    139121                // stack of conditions to run next once we exit the monitor
     
    149131                struct __condition_node_t * dtor_node;
    150132        };
     133        // Wrapper for gdb
     134        struct cfathread_monitor_t { struct $monitor debug; };
    151135
    152136        struct __monitor_group_t {
    153137                // currently held monitors
    154                 __cfa_anonymous_object( __small_array_t(monitor_desc*) );
     138                __cfa_anonymous_object( __small_array_t($monitor*) );
    155139
    156140                // last function that acquired monitors
     
    158142        };
    159143
    160         struct thread_desc {
     144        // Link lists fields
     145        // instrusive link field for threads
     146        struct __thread_desc_link {
     147                struct $thread * next;
     148                struct $thread * prev;
     149                volatile unsigned long long ts;
     150                int preferred;
     151        };
     152
     153        struct $thread {
    161154                // Core threading fields
    162                 // context that is switch during a CtxSwitch
     155                // context that is switch during a __cfactx_switch
    163156                struct __stack_context_t context;
    164157
    165158                // current execution status for coroutine
    166                 enum coroutine_state state;
     159                // Possible values are:
     160                //    - TICKET_BLOCKED (-1) thread is blocked
     161                //    - TICKET_RUNNING ( 0) thread is running
     162                //    - TICKET_UNBLOCK ( 1) thread should ignore next block
     163                volatile int ticket;
     164                enum __Coroutine_State state:8;
     165                enum __Preemption_Reason preempted:8;
    167166
    168167                //SKULLDUGGERY errno is not save in the thread data structure because returnToKernel appears to be the only function to require saving and restoring it
    169 
    170                 // coroutine body used to store context
    171                 struct coroutine_desc  self_cor;
    172 
    173                 // current active context
    174                 struct coroutine_desc * curr_cor;
    175 
    176                 // monitor body used for mutual exclusion
    177                 struct monitor_desc    self_mon;
    178 
    179                 // pointer to monitor with sufficient lifetime for current monitors
    180                 struct monitor_desc *  self_mon_p;
    181168
    182169                // pointer to the cluster on which the thread is running
    183170                struct cluster * curr_cluster;
    184171
     172                // Link lists fields
     173                // instrusive link field for threads
     174                struct __thread_desc_link link;
     175
     176                // coroutine body used to store context
     177                struct $coroutine  self_cor;
     178
     179                // current active context
     180                struct $coroutine * curr_cor;
     181
     182                // monitor body used for mutual exclusion
     183                struct $monitor    self_mon;
     184
     185                // pointer to monitor with sufficient lifetime for current monitors
     186                struct $monitor *  self_mon_p;
     187
    185188                // monitors currently held by this thread
    186189                struct __monitor_group_t monitors;
    187190
    188                 // Link lists fields
    189                 // instrusive link field for threads
    190                 struct thread_desc * next;
    191 
     191                // used to put threads on user data structures
    192192                struct {
    193                         struct thread_desc * next;
    194                         struct thread_desc * prev;
     193                        struct $thread * next;
     194                        struct $thread * back;
     195                } seqable;
     196
     197                struct {
     198                        struct $thread * next;
     199                        struct $thread * prev;
    195200                } node;
    196         };
     201
     202                #if defined( __CFA_WITH_VERIFY__ )
     203                        void * canary;
     204                #endif
     205        };
     206        // Wrapper for gdb
     207        struct cfathread_thread_t { struct $thread debug; };
     208
     209        #ifdef __CFA_DEBUG__
     210                void __cfaabi_dbg_record_thrd($thread & this, bool park, const char prev_name[]);
     211        #else
     212                #define __cfaabi_dbg_record_thrd(x, y, z)
     213        #endif
    197214
    198215        #ifdef __cforall
    199216        extern "Cforall" {
    200                 static inline thread_desc *& get_next( thread_desc & this ) {
    201                         return this.next;
    202                 }
    203 
    204                 static inline [thread_desc *&, thread_desc *& ] __get( thread_desc & this ) {
     217
     218                static inline $thread *& get_next( $thread & this ) __attribute__((const)) {
     219                        return this.link.next;
     220                }
     221
     222                static inline [$thread *&, $thread *& ] __get( $thread & this ) __attribute__((const)) {
    205223                        return this.node.[next, prev];
     224                }
     225
     226                static inline $thread *& Back( $thread * this ) __attribute__((const)) {
     227                        return this->seqable.back;
     228                }
     229
     230                static inline $thread *& Next( $thread * this ) __attribute__((const)) {
     231                        return this->seqable.next;
     232                }
     233
     234                static inline bool listed( $thread * this ) {
     235                        return this->seqable.next != 0p;
    206236                }
    207237
     
    212242                }
    213243
    214                 static inline void ?{}(__monitor_group_t & this, struct monitor_desc ** data, __lock_size_t size, fptr_t func) {
     244                static inline void ?{}(__monitor_group_t & this, struct $monitor ** data, __lock_size_t size, fptr_t func) {
    215245                        (this.data){data};
    216246                        (this.size){size};
     
    218248                }
    219249
    220                 static inline bool ?==?( const __monitor_group_t & lhs, const __monitor_group_t & rhs ) {
     250                static inline bool ?==?( const __monitor_group_t & lhs, const __monitor_group_t & rhs ) __attribute__((const)) {
    221251                        if( (lhs.data != 0) != (rhs.data != 0) ) return false;
    222252                        if( lhs.size != rhs.size ) return false;
     
    252282
    253283        // assembler routines that performs the context switch
    254         extern void CtxInvokeStub( void );
    255         extern void CtxSwitch( struct __stack_context_t * from, struct __stack_context_t * to ) asm ("CtxSwitch");
     284        extern void __cfactx_invoke_stub( void );
     285        extern void __cfactx_switch( struct __stack_context_t * from, struct __stack_context_t * to ) asm ("__cfactx_switch");
    256286        // void CtxStore ( void * this ) asm ("CtxStore");
    257287        // void CtxRet   ( void * dst  ) asm ("CtxRet");
  • libcfa/src/concurrency/kernel.cfa

    rbdfc032 reef8dfb  
    1010// Created On       : Tue Jan 17 12:27:26 2017
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Thu Jan 30 22:55:50 2020
    13 // Update Count     : 56
     12// Last Modified On : Mon Aug 31 07:08:20 2020
     13// Update Count     : 71
    1414//
    1515
    1616#define __cforall_thread__
     17// #define __CFA_DEBUG_PRINT_RUNTIME_CORE__
    1718
    1819//C Includes
    19 #include <stddef.h>
    2020#include <errno.h>
    21 #include <string.h>
    22 extern "C" {
    2321#include <stdio.h>
    24 #include <fenv.h>
    25 #include <sys/resource.h>
    2622#include <signal.h>
    2723#include <unistd.h>
    28 #include <limits.h>                                                                             // PTHREAD_STACK_MIN
    29 #include <sys/mman.h>                                                                   // mprotect
    30 }
    3124
    3225//CFA Includes
    33 #include "time.hfa"
    3426#include "kernel_private.hfa"
    3527#include "preemption.hfa"
    36 #include "startup.hfa"
    3728
    3829//Private includes
     
    4031#include "invoke.h"
    4132
     33
    4234//-----------------------------------------------------------------------------
    4335// Some assembly required
    4436#if defined( __i386 )
    45         #define CtxGet( ctx )        \
    46                 __asm__ volatile (     \
    47                         "movl %%esp,%0\n"\
    48                         "movl %%ebp,%1\n"\
    49                         : "=rm" (ctx.SP),\
    50                                 "=rm" (ctx.FP) \
    51                 )
    52 
    5337        // mxcr : SSE Status and Control bits (control bits are preserved across function calls)
    5438        // fcw  : X87 FPU control word (preserved across function calls)
     
    7256
    7357#elif defined( __x86_64 )
    74         #define CtxGet( ctx )        \
    75                 __asm__ volatile (     \
    76                         "movq %%rsp,%0\n"\
    77                         "movq %%rbp,%1\n"\
    78                         : "=rm" (ctx.SP),\
    79                                 "=rm" (ctx.FP) \
    80                 )
    81 
    8258        #define __x87_store         \
    8359                uint32_t __mxcr;      \
     
    9874                )
    9975
    100 
    101 #elif defined( __ARM_ARCH )
    102 #define CtxGet( ctx ) __asm__ ( \
    103                 "mov %0,%%sp\n"   \
    104                 "mov %1,%%r11\n"   \
    105         : "=rm" (ctx.SP), "=rm" (ctx.FP) )
     76#elif defined( __arm__ )
     77        #define __x87_store
     78        #define __x87_load
     79
     80#elif defined( __aarch64__ )
     81        #define __x87_store              \
     82                uint32_t __fpcntl[2];    \
     83                __asm__ volatile (    \
     84                        "mrs x9, FPCR\n" \
     85                        "mrs x10, FPSR\n"  \
     86                        "stp x9, x10, %0\n"  \
     87                        : "=m" (__fpcntl) : : "x9", "x10" \
     88                )
     89
     90        #define __x87_load         \
     91                __asm__ volatile (    \
     92                        "ldp x9, x10, %0\n"  \
     93                        "msr FPSR, x10\n"  \
     94                        "msr FPCR, x9\n" \
     95                : "=m" (__fpcntl) : : "x9", "x10" \
     96                )
     97
    10698#else
    107         #error unknown hardware architecture
     99        #error unsupported hardware architecture
    108100#endif
    109101
     102extern $thread * mainThread;
     103extern processor * mainProcessor;
     104
    110105//-----------------------------------------------------------------------------
    111 //Start and stop routine for the kernel, declared first to make sure they run first
    112 static void kernel_startup(void)  __attribute__(( constructor( STARTUP_PRIORITY_KERNEL ) ));
    113 static void kernel_shutdown(void) __attribute__(( destructor ( STARTUP_PRIORITY_KERNEL ) ));
    114 
    115 //-----------------------------------------------------------------------------
    116 // Kernel storage
    117 KERNEL_STORAGE(cluster,         mainCluster);
    118 KERNEL_STORAGE(processor,       mainProcessor);
    119 KERNEL_STORAGE(thread_desc,     mainThread);
    120 KERNEL_STORAGE(__stack_t,       mainThreadCtx);
    121 
    122 cluster     * mainCluster;
    123 processor   * mainProcessor;
    124 thread_desc * mainThread;
    125 
    126 extern "C" {
    127         struct { __dllist_t(cluster) list; __spinlock_t lock; } __cfa_dbg_global_clusters;
    128 }
    129 
    130 size_t __page_size = 0;
    131 
    132 //-----------------------------------------------------------------------------
    133 // Global state
    134 thread_local struct KernelThreadData kernelTLS __attribute__ ((tls_model ( "initial-exec" ))) = {
    135         NULL,                                                                                           // cannot use 0p
    136         NULL,
    137         { 1, false, false },
    138         6u //this should be seeded better but due to a bug calling rdtsc doesn't work
    139 };
    140 
    141 //-----------------------------------------------------------------------------
    142 // Struct to steal stack
    143 struct current_stack_info_t {
    144         __stack_t * storage;                                                            // pointer to stack object
    145         void * base;                                                                            // base of stack
    146         void * limit;                                                                           // stack grows towards stack limit
    147         void * context;                                                                         // address of cfa_context_t
    148 };
    149 
    150 void ?{}( current_stack_info_t & this ) {
    151         __stack_context_t ctx;
    152         CtxGet( ctx );
    153         this.base = ctx.FP;
    154 
    155         rlimit r;
    156         getrlimit( RLIMIT_STACK, &r);
    157         size_t size = r.rlim_cur;
    158 
    159         this.limit = (void *)(((intptr_t)this.base) - size);
    160         this.context = &storage_mainThreadCtx;
    161 }
    162 
    163 //-----------------------------------------------------------------------------
    164 // Main thread construction
    165 
    166 void ?{}( coroutine_desc & this, current_stack_info_t * info) with( this ) {
    167         stack.storage = info->storage;
    168         with(*stack.storage) {
    169                 limit     = info->limit;
    170                 base      = info->base;
    171         }
    172         __attribute__((may_alias)) intptr_t * istorage = (intptr_t*) &stack.storage;
    173         *istorage |= 0x1;
    174         name = "Main Thread";
    175         state = Start;
    176         starter = 0p;
    177         last = 0p;
    178         cancellation = 0p;
    179 }
    180 
    181 void ?{}( thread_desc & this, current_stack_info_t * info) with( this ) {
    182         state = Start;
    183         self_cor{ info };
    184         curr_cor = &self_cor;
    185         curr_cluster = mainCluster;
    186         self_mon.owner = &this;
    187         self_mon.recursion = 1;
    188         self_mon_p = &self_mon;
    189         next = 0p;
    190 
    191         node.next = 0p;
    192         node.prev = 0p;
    193         doregister(curr_cluster, this);
    194 
    195         monitors{ &self_mon_p, 1, (fptr_t)0 };
    196 }
    197 
    198 //-----------------------------------------------------------------------------
    199 // Processor coroutine
    200 void ?{}(processorCtx_t & this) {
    201 
    202 }
    203 
    204 // Construct the processor context of non-main processors
    205 static void ?{}(processorCtx_t & this, processor * proc, current_stack_info_t * info) {
    206         (this.__cor){ info };
    207         this.proc = proc;
    208 }
    209 
    210 static void start(processor * this);
    211 void ?{}(processor & this, const char * name, cluster & cltr) with( this ) {
    212         this.name = name;
    213         this.cltr = &cltr;
    214         terminated{ 0 };
    215         do_terminate = false;
    216         preemption_alarm = 0p;
    217         pending_preemption = false;
    218         runner.proc = &this;
    219 
    220         idleLock{};
    221 
    222         start( &this );
    223 }
    224 
    225 void ^?{}(processor & this) with( this ){
    226         if( ! __atomic_load_n(&do_terminate, __ATOMIC_ACQUIRE) ) {
    227                 __cfaabi_dbg_print_safe("Kernel : core %p signaling termination\n", &this);
    228 
    229                 __atomic_store_n(&do_terminate, true, __ATOMIC_RELAXED);
    230                 wake( &this );
    231 
    232                 P( terminated );
    233                 verify( kernelTLS.this_processor != &this);
    234         }
    235 
    236         pthread_join( kernel_thread, 0p );
    237         free( this.stack );
    238 }
    239 
    240 void ?{}(cluster & this, const char * name, Duration preemption_rate) with( this ) {
    241         this.name = name;
    242         this.preemption_rate = preemption_rate;
    243         ready_queue{};
    244         ready_queue_lock{};
    245 
    246         procs{ __get };
    247         idles{ __get };
    248         threads{ __get };
    249 
    250         doregister(this);
    251 }
    252 
    253 void ^?{}(cluster & this) {
    254         unregister(this);
    255 }
     106// Kernel Scheduling logic
     107static $thread * __next_thread(cluster * this);
     108static $thread * __next_thread_slow(cluster * this);
     109static void __run_thread(processor * this, $thread * dst);
     110static void __wake_one(cluster * cltr);
     111
     112static void push  (__cluster_idles & idles, processor & proc);
     113static void remove(__cluster_idles & idles, processor & proc);
     114static [unsigned idle, unsigned total, * processor] query( & __cluster_idles idles );
     115
    256116
    257117//=============================================================================================
    258118// Kernel Scheduling logic
    259119//=============================================================================================
    260 static void runThread(processor * this, thread_desc * dst);
    261 static void finishRunning(processor * this);
    262 static void halt(processor * this);
    263 
    264120//Main of the processor contexts
    265121void main(processorCtx_t & runner) {
    266122        // Because of a bug, we couldn't initialized the seed on construction
    267123        // Do it here
    268         kernelTLS.rand_seed ^= rdtscl();
     124        __cfaabi_tls.rand_seed ^= rdtscl();
     125        __cfaabi_tls.ready_rng.fwd_seed = 25214903917_l64u * (rdtscl() ^ (uintptr_t)&runner);
     126        __tls_rand_advance_bck();
    269127
    270128        processor * this = runner.proc;
    271129        verify(this);
    272130
    273         __cfaabi_dbg_print_safe("Kernel : core %p starting\n", this);
    274 
    275         doregister(this->cltr, this);
     131        __cfadbg_print_safe(runtime_core, "Kernel : core %p starting\n", this);
     132        #if !defined(__CFA_NO_STATISTICS__)
     133                if( this->print_halts ) {
     134                        __cfaabi_bits_print_safe( STDOUT_FILENO, "Processor : %d - %s (%p)\n", this->id, this->name, (void*)this);
     135                }
     136        #endif
    276137
    277138        {
     
    279140                preemption_scope scope = { this };
    280141
    281                 __cfaabi_dbg_print_safe("Kernel : core %p started\n", this);
    282 
    283                 thread_desc * readyThread = 0p;
    284                 for( unsigned int spin_count = 0; ! __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST); spin_count++ ) {
    285                         readyThread = nextThread( this->cltr );
    286 
    287                         if(readyThread) {
    288                                 verify( ! kernelTLS.preemption_state.enabled );
    289 
    290                                 runThread(this, readyThread);
    291 
    292                                 verify( ! kernelTLS.preemption_state.enabled );
    293 
    294                                 //Some actions need to be taken from the kernel
    295                                 finishRunning(this);
    296 
    297                                 spin_count = 0;
    298                         } else {
    299                                 // spin(this, &spin_count);
    300                                 halt(this);
     142                __cfadbg_print_safe(runtime_core, "Kernel : core %p started\n", this);
     143
     144                $thread * readyThread = 0p;
     145                MAIN_LOOP:
     146                for() {
     147                        // Try to get the next thread
     148                        readyThread = __next_thread( this->cltr );
     149
     150                        if( !readyThread ) {
     151                                readyThread = __next_thread_slow( this->cltr );
    301152                        }
    302                 }
    303 
    304                 __cfaabi_dbg_print_safe("Kernel : core %p stopping\n", this);
    305         }
    306 
    307         unregister(this->cltr, this);
     153
     154                        HALT:
     155                        if( !readyThread ) {
     156                                // Don't block if we are done
     157                                if( __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST) ) break MAIN_LOOP;
     158
     159                                #if !defined(__CFA_NO_STATISTICS__)
     160                                        __tls_stats()->ready.sleep.halts++;
     161                                #endif
     162
     163                                // Push self to idle stack
     164                                push(this->cltr->idles, * this);
     165
     166                                // Confirm the ready-queue is empty
     167                                readyThread = __next_thread_slow( this->cltr );
     168                                if( readyThread ) {
     169                                        // A thread was found, cancel the halt
     170                                        remove(this->cltr->idles, * this);
     171
     172                                        #if !defined(__CFA_NO_STATISTICS__)
     173                                                __tls_stats()->ready.sleep.cancels++;
     174                                        #endif
     175
     176                                        // continue the mai loop
     177                                        break HALT;
     178                                }
     179
     180                                #if !defined(__CFA_NO_STATISTICS__)
     181                                        if(this->print_halts) {
     182                                                __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 0\n", this->id, rdtscl());
     183                                        }
     184                                #endif
     185
     186                                wait( this->idle );
     187
     188                                #if !defined(__CFA_NO_STATISTICS__)
     189                                        if(this->print_halts) {
     190                                                __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 1\n", this->id, rdtscl());
     191                                        }
     192                                #endif
     193
     194                                // We were woken up, remove self from idle
     195                                remove(this->cltr->idles, * this);
     196
     197                                // DON'T just proceed, start looking again
     198                                continue MAIN_LOOP;
     199                        }
     200
     201                        /* paranoid */ verify( readyThread );
     202
     203                        // We found a thread run it
     204                        __run_thread(this, readyThread);
     205
     206                        // Are we done?
     207                        if( __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST) ) break MAIN_LOOP;
     208                }
     209
     210                __cfadbg_print_safe(runtime_core, "Kernel : core %p stopping\n", this);
     211        }
    308212
    309213        V( this->terminated );
    310214
    311         __cfaabi_dbg_print_safe("Kernel : core %p terminated\n", this);
     215        if(this == mainProcessor) {
     216                // HACK : the coroutine context switch expects this_thread to be set
     217                // and it make sense for it to be set in all other cases except here
     218                // fake it
     219                __cfaabi_tls.this_thread = mainThread;
     220        }
     221
     222        __cfadbg_print_safe(runtime_core, "Kernel : core %p terminated\n", this);
    312223}
    313224
     
    318229// runThread runs a thread by context switching
    319230// from the processor coroutine to the target thread
    320 static void runThread(processor * this, thread_desc * thrd_dst) {
    321         coroutine_desc * proc_cor = get_coroutine(this->runner);
    322 
    323         // Reset the terminating actions here
    324         this->finish.action_code = No_Action;
    325 
    326         // Update global state
    327         kernelTLS.this_thread = thrd_dst;
    328 
    329         // set state of processor coroutine to inactive and the thread to active
    330         proc_cor->state = proc_cor->state == Halted ? Halted : Inactive;
    331         thrd_dst->state = Active;
    332 
    333         // set context switch to the thread that the processor is executing
    334         verify( thrd_dst->context.SP );
    335         CtxSwitch( &proc_cor->context, &thrd_dst->context );
    336         // when CtxSwitch returns we are back in the processor coroutine
    337 
    338         // set state of processor coroutine to active and the thread to inactive
    339         thrd_dst->state = thrd_dst->state == Halted ? Halted : Inactive;
     231static void __run_thread(processor * this, $thread * thrd_dst) {
     232        /* paranoid */ verify( ! __preemption_enabled() );
     233        /* paranoid */ verifyf( thrd_dst->state == Ready || thrd_dst->preempted != __NO_PREEMPTION, "state : %d, preempted %d\n", thrd_dst->state, thrd_dst->preempted);
     234        /* paranoid */ verifyf( thrd_dst->link.next == 0p, "Expected null got %p", thrd_dst->link.next );
     235        __builtin_prefetch( thrd_dst->context.SP );
     236
     237        $coroutine * proc_cor = get_coroutine(this->runner);
     238
     239        // set state of processor coroutine to inactive
     240        verify(proc_cor->state == Active);
     241        proc_cor->state = Blocked;
     242
     243        // Actually run the thread
     244        RUNNING:  while(true) {
     245                thrd_dst->preempted = __NO_PREEMPTION;
     246                thrd_dst->state = Active;
     247
     248                // Update global state
     249                kernelTLS().this_thread = thrd_dst;
     250
     251                /* paranoid */ verify( ! __preemption_enabled() );
     252                /* paranoid */ verify( kernelTLS().this_thread == thrd_dst );
     253                /* paranoid */ verify( thrd_dst->curr_cluster == this->cltr );
     254                /* paranoid */ verify( thrd_dst->context.SP );
     255                /* paranoid */ verify( thrd_dst->state != Halted );
     256                /* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) < ((uintptr_t)__get_stack(thrd_dst->curr_cor)->base ) || thrd_dst->curr_cor == proc_cor, "ERROR : Destination $thread %p has been corrupted.\n StackPointer too small.\n", thrd_dst ); // add escape condition if we are setting up the processor
     257                /* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) > ((uintptr_t)__get_stack(thrd_dst->curr_cor)->limit) || thrd_dst->curr_cor == proc_cor, "ERROR : Destination $thread %p has been corrupted.\n StackPointer too large.\n", thrd_dst ); // add escape condition if we are setting up the processor
     258                /* paranoid */ verify( 0x0D15EA5E0D15EA5Ep == thrd_dst->canary );
     259
     260
     261
     262                // set context switch to the thread that the processor is executing
     263                __cfactx_switch( &proc_cor->context, &thrd_dst->context );
     264                // when __cfactx_switch returns we are back in the processor coroutine
     265
     266                /* paranoid */ verify( 0x0D15EA5E0D15EA5Ep == thrd_dst->canary );
     267                /* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) > ((uintptr_t)__get_stack(thrd_dst->curr_cor)->limit), "ERROR : Destination $thread %p has been corrupted.\n StackPointer too large.\n", thrd_dst );
     268                /* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) < ((uintptr_t)__get_stack(thrd_dst->curr_cor)->base ), "ERROR : Destination $thread %p has been corrupted.\n StackPointer too small.\n", thrd_dst );
     269                /* paranoid */ verify( thrd_dst->context.SP );
     270                /* paranoid */ verify( thrd_dst->curr_cluster == this->cltr );
     271                /* paranoid */ verify( kernelTLS().this_thread == thrd_dst );
     272                /* paranoid */ verify( ! __preemption_enabled() );
     273
     274                // Reset global state
     275                kernelTLS().this_thread = 0p;
     276
     277                // We just finished running a thread, there are a few things that could have happened.
     278                // 1 - Regular case : the thread has blocked and now one has scheduled it yet.
     279                // 2 - Racy case    : the thread has blocked but someone has already tried to schedule it.
     280                // 4 - Preempted
     281                // In case 1, we may have won a race so we can't write to the state again.
     282                // In case 2, we lost the race so we now own the thread.
     283
     284                if(unlikely(thrd_dst->preempted != __NO_PREEMPTION)) {
     285                        // The thread was preempted, reschedule it and reset the flag
     286                        __schedule_thread( thrd_dst );
     287                        break RUNNING;
     288                }
     289
     290                if(unlikely(thrd_dst->state == Halting)) {
     291                        // The thread has halted, it should never be scheduled/run again
     292                        // finish the thread
     293                        __thread_finish( thrd_dst );
     294                        break RUNNING;
     295                }
     296
     297                /* paranoid */ verify( thrd_dst->state == Active );
     298                thrd_dst->state = Blocked;
     299
     300                // set state of processor coroutine to active and the thread to inactive
     301                int old_ticket = __atomic_fetch_sub(&thrd_dst->ticket, 1, __ATOMIC_SEQ_CST);
     302                switch(old_ticket) {
     303                        case TICKET_RUNNING:
     304                                // This is case 1, the regular case, nothing more is needed
     305                                break RUNNING;
     306                        case TICKET_UNBLOCK:
     307                                // This is case 2, the racy case, someone tried to run this thread before it finished blocking
     308                                // In this case, just run it again.
     309                                continue RUNNING;
     310                        default:
     311                                // This makes no sense, something is wrong abort
     312                                abort();
     313                }
     314        }
     315
     316        // Just before returning to the processor, set the processor coroutine to active
    340317        proc_cor->state = Active;
     318
     319        /* paranoid */ verify( ! __preemption_enabled() );
    341320}
    342321
    343322// KERNEL_ONLY
    344 static void returnToKernel() {
    345         coroutine_desc * proc_cor = get_coroutine(kernelTLS.this_processor->runner);
    346         thread_desc * thrd_src = kernelTLS.this_thread;
    347 
    348         // set state of current coroutine to inactive
    349         thrd_src->state = thrd_src->state == Halted ? Halted : Inactive;
    350         proc_cor->state = Active;
    351         int local_errno = *__volatile_errno();
    352         #if defined( __i386 ) || defined( __x86_64 )
    353                 __x87_store;
     323void returnToKernel() {
     324        /* paranoid */ verify( ! __preemption_enabled() );
     325        $coroutine * proc_cor = get_coroutine(kernelTLS().this_processor->runner);
     326        $thread * thrd_src = kernelTLS().this_thread;
     327
     328        #if !defined(__CFA_NO_STATISTICS__)
     329                struct processor * last_proc = kernelTLS().this_processor;
    354330        #endif
    355331
    356         // set new coroutine that the processor is executing
    357         // and context switch to it
    358         verify( proc_cor->context.SP );
    359         CtxSwitch( &thrd_src->context, &proc_cor->context );
    360 
    361         // set state of new coroutine to active
    362         proc_cor->state = proc_cor->state == Halted ? Halted : Inactive;
    363         thrd_src->state = Active;
    364 
    365         #if defined( __i386 ) || defined( __x86_64 )
    366                 __x87_load;
     332        // Run the thread on this processor
     333        {
     334                int local_errno = *__volatile_errno();
     335                #if defined( __i386 ) || defined( __x86_64 )
     336                        __x87_store;
     337                #endif
     338                /* paranoid */ verify( proc_cor->context.SP );
     339                /* paranoid */ verify( 0x0D15EA5E0D15EA5Ep == thrd_src->canary );
     340                __cfactx_switch( &thrd_src->context, &proc_cor->context );
     341                /* paranoid */ verify( 0x0D15EA5E0D15EA5Ep == thrd_src->canary );
     342                #if defined( __i386 ) || defined( __x86_64 )
     343                        __x87_load;
     344                #endif
     345                *__volatile_errno() = local_errno;
     346        }
     347
     348        #if !defined(__CFA_NO_STATISTICS__)
     349                if(last_proc != kernelTLS().this_processor) {
     350                        __tls_stats()->ready.threads.migration++;
     351                }
    367352        #endif
    368         *__volatile_errno() = local_errno;
    369 }
    370 
    371 // KERNEL_ONLY
    372 // Once a thread has finished running, some of
    373 // its final actions must be executed from the kernel
    374 static void finishRunning(processor * this) with( this->finish ) {
    375         verify( ! kernelTLS.preemption_state.enabled );
    376         choose( action_code ) {
    377         case No_Action:
    378                 break;
    379         case Release:
    380                 unlock( *lock );
    381         case Schedule:
    382                 ScheduleThread( thrd );
    383         case Release_Schedule:
    384                 unlock( *lock );
    385                 ScheduleThread( thrd );
    386         case Release_Multi:
    387                 for(int i = 0; i < lock_count; i++) {
    388                         unlock( *locks[i] );
    389                 }
    390         case Release_Multi_Schedule:
    391                 for(int i = 0; i < lock_count; i++) {
    392                         unlock( *locks[i] );
    393                 }
    394                 for(int i = 0; i < thrd_count; i++) {
    395                         ScheduleThread( thrds[i] );
    396                 }
    397         case Callback:
    398                 callback();
    399         default:
    400                 abort("KERNEL ERROR: Unexpected action to run after thread");
    401         }
    402 }
    403 
    404 // KERNEL_ONLY
    405 // Context invoker for processors
    406 // This is the entry point for processors (kernel threads)
    407 // It effectively constructs a coroutine by stealing the pthread stack
    408 static void * CtxInvokeProcessor(void * arg) {
    409         processor * proc = (processor *) arg;
    410         kernelTLS.this_processor = proc;
    411         kernelTLS.this_thread    = 0p;
    412         kernelTLS.preemption_state.[enabled, disable_count] = [false, 1];
    413         // SKULLDUGGERY: We want to create a context for the processor coroutine
    414         // which is needed for the 2-step context switch. However, there is no reason
    415         // to waste the perfectly valid stack create by pthread.
    416         current_stack_info_t info;
    417         __stack_t ctx;
    418         info.storage = &ctx;
    419         (proc->runner){ proc, &info };
    420 
    421         __cfaabi_dbg_print_safe("Coroutine : created stack %p\n", get_coroutine(proc->runner)->stack.storage);
    422 
    423         //Set global state
    424         kernelTLS.this_thread = 0p;
    425 
    426         //We now have a proper context from which to schedule threads
    427         __cfaabi_dbg_print_safe("Kernel : core %p created (%p, %p)\n", proc, &proc->runner, &ctx);
    428 
    429         // SKULLDUGGERY: Since the coroutine doesn't have its own stack, we can't
    430         // resume it to start it like it normally would, it will just context switch
    431         // back to here. Instead directly call the main since we already are on the
    432         // appropriate stack.
    433         get_coroutine(proc->runner)->state = Active;
    434         main( proc->runner );
    435         get_coroutine(proc->runner)->state = Halted;
    436 
    437         // Main routine of the core returned, the core is now fully terminated
    438         __cfaabi_dbg_print_safe("Kernel : core %p main ended (%p)\n", proc, &proc->runner);
    439 
    440         return 0p;
    441 }
    442 
    443 static void Abort( int ret, const char * func ) {
    444         if ( ret ) {                                                                            // pthread routines return errno values
    445                 abort( "%s : internal error, error(%d) %s.", func, ret, strerror( ret ) );
    446         } // if
    447 } // Abort
    448 
    449 void * create_pthread( pthread_t * pthread, void * (*start)(void *), void * arg ) {
    450         pthread_attr_t attr;
    451 
    452         Abort( pthread_attr_init( &attr ), "pthread_attr_init" ); // initialize attribute
    453 
    454         size_t stacksize;
    455         // default stack size, normally defined by shell limit
    456         Abort( pthread_attr_getstacksize( &attr, &stacksize ), "pthread_attr_getstacksize" );
    457         assert( stacksize >= PTHREAD_STACK_MIN );
    458 
    459         void * stack;
    460         __cfaabi_dbg_debug_do(
    461                 stack = memalign( __page_size, stacksize + __page_size );
    462                 // pthread has no mechanism to create the guard page in user supplied stack.
    463                 if ( mprotect( stack, __page_size, PROT_NONE ) == -1 ) {
    464                         abort( "mprotect : internal error, mprotect failure, error(%d) %s.", errno, strerror( errno ) );
    465                 } // if
    466         );
    467         __cfaabi_dbg_no_debug_do(
    468                 stack = malloc( stacksize );
    469         );
    470 
    471         Abort( pthread_attr_setstack( &attr, stack, stacksize ), "pthread_attr_setstack" );
    472 
    473         Abort( pthread_create( pthread, &attr, start, arg ), "pthread_create" );
    474         return stack;
    475 }
    476 
    477 static void start(processor * this) {
    478         __cfaabi_dbg_print_safe("Kernel : Starting core %p\n", this);
    479 
    480         this->stack = create_pthread( &this->kernel_thread, CtxInvokeProcessor, (void *)this );
    481 
    482         __cfaabi_dbg_print_safe("Kernel : core %p started\n", this);
    483 }
    484 
    485 // KERNEL_ONLY
    486 void kernel_first_resume( processor * this ) {
    487         thread_desc * src = mainThread;
    488         coroutine_desc * dst = get_coroutine(this->runner);
    489 
    490         verify( ! kernelTLS.preemption_state.enabled );
    491 
    492         kernelTLS.this_thread->curr_cor = dst;
    493         __stack_prepare( &dst->stack, 65000 );
    494         CtxStart(main, dst, this->runner, CtxInvokeCoroutine);
    495 
    496         verify( ! kernelTLS.preemption_state.enabled );
    497 
    498         dst->last = &src->self_cor;
    499         dst->starter = dst->starter ? dst->starter : &src->self_cor;
    500 
    501         // set state of current coroutine to inactive
    502         src->state = src->state == Halted ? Halted : Inactive;
    503 
    504         // context switch to specified coroutine
    505         verify( dst->context.SP );
    506         CtxSwitch( &src->context, &dst->context );
    507         // when CtxSwitch returns we are back in the src coroutine
    508 
    509         mainThread->curr_cor = &mainThread->self_cor;
    510 
    511         // set state of new coroutine to active
    512         src->state = Active;
    513 
    514         verify( ! kernelTLS.preemption_state.enabled );
    515 }
    516 
    517 // KERNEL_ONLY
    518 void kernel_last_resume( processor * this ) {
    519         coroutine_desc * src = &mainThread->self_cor;
    520         coroutine_desc * dst = get_coroutine(this->runner);
    521 
    522         verify( ! kernelTLS.preemption_state.enabled );
    523         verify( dst->starter == src );
    524         verify( dst->context.SP );
    525 
    526         // context switch to the processor
    527         CtxSwitch( &src->context, &dst->context );
     353
     354        /* paranoid */ verify( ! __preemption_enabled() );
     355        /* paranoid */ verifyf( ((uintptr_t)thrd_src->context.SP) < ((uintptr_t)__get_stack(thrd_src->curr_cor)->base ), "ERROR : Returning $thread %p has been corrupted.\n StackPointer too small.\n", thrd_src );
     356        /* paranoid */ verifyf( ((uintptr_t)thrd_src->context.SP) > ((uintptr_t)__get_stack(thrd_src->curr_cor)->limit), "ERROR : Returning $thread %p has been corrupted.\n StackPointer too large.\n", thrd_src );
    528357}
    529358
    530359//-----------------------------------------------------------------------------
    531360// Scheduler routines
    532 
    533361// KERNEL ONLY
    534 void ScheduleThread( thread_desc * thrd ) {
    535         verify( thrd );
    536         verify( thrd->state != Halted );
    537 
    538         verify( ! kernelTLS.preemption_state.enabled );
    539 
    540         verifyf( thrd->next == 0p, "Expected null got %p", thrd->next );
    541 
    542         with( *thrd->curr_cluster ) {
    543                 lock  ( ready_queue_lock __cfaabi_dbg_ctx2 );
    544                 bool was_empty = !(ready_queue != 0);
    545                 append( ready_queue, thrd );
    546                 unlock( ready_queue_lock );
    547 
    548                 if(was_empty) {
    549                         lock      (proc_list_lock __cfaabi_dbg_ctx2);
    550                         if(idles) {
    551                                 wake_fast(idles.head);
     362void __schedule_thread( $thread * thrd ) {
     363        /* paranoid */ verify( ! __preemption_enabled() );
     364        /* paranoid */ verify( kernelTLS().this_proc_id );
     365        /* paranoid */ verify( thrd );
     366        /* paranoid */ verify( thrd->state != Halted );
     367        /* paranoid */ verify( thrd->curr_cluster );
     368        /* paranoid */ #if defined( __CFA_WITH_VERIFY__ )
     369        /* paranoid */  if( thrd->state == Blocked || thrd->state == Start ) assertf( thrd->preempted == __NO_PREEMPTION,
     370                                        "Error inactive thread marked as preempted, state %d, preemption %d\n", thrd->state, thrd->preempted );
     371        /* paranoid */  if( thrd->preempted != __NO_PREEMPTION ) assertf(thrd->state == Active,
     372                                        "Error preempted thread marked as not currently running, state %d, preemption %d\n", thrd->state, thrd->preempted );
     373        /* paranoid */ #endif
     374        /* paranoid */ verifyf( thrd->link.next == 0p, "Expected null got %p", thrd->link.next );
     375        /* paranoid */ verify( 0x0D15EA5E0D15EA5Ep == thrd->canary );
     376
     377
     378        if (thrd->preempted == __NO_PREEMPTION) thrd->state = Ready;
     379
     380        ready_schedule_lock();
     381                // Dereference the thread now because once we push it, there is not guaranteed it's still valid.
     382                struct cluster * cl = thrd->curr_cluster;
     383
     384                // push the thread to the cluster ready-queue
     385                push( cl, thrd );
     386
     387                // variable thrd is no longer safe to use
     388
     389                // wake the cluster using the save variable.
     390                __wake_one( cl );
     391        ready_schedule_unlock();
     392
     393        /* paranoid */ verify( ! __preemption_enabled() );
     394}
     395
     396// KERNEL ONLY
     397static inline $thread * __next_thread(cluster * this) with( *this ) {
     398        /* paranoid */ verify( ! __preemption_enabled() );
     399        /* paranoid */ verify( kernelTLS().this_proc_id );
     400
     401        ready_schedule_lock();
     402                $thread * thrd = pop( this );
     403        ready_schedule_unlock();
     404
     405        /* paranoid */ verify( kernelTLS().this_proc_id );
     406        /* paranoid */ verify( ! __preemption_enabled() );
     407        return thrd;
     408}
     409
     410// KERNEL ONLY
     411static inline $thread * __next_thread_slow(cluster * this) with( *this ) {
     412        /* paranoid */ verify( ! __preemption_enabled() );
     413        /* paranoid */ verify( kernelTLS().this_proc_id );
     414
     415        ready_schedule_lock();
     416                $thread * thrd = pop_slow( this );
     417        ready_schedule_unlock();
     418
     419        /* paranoid */ verify( kernelTLS().this_proc_id );
     420        /* paranoid */ verify( ! __preemption_enabled() );
     421        return thrd;
     422}
     423
     424void unpark( $thread * thrd ) {
     425        if( !thrd ) return;
     426
     427        int old_ticket = __atomic_fetch_add(&thrd->ticket, 1, __ATOMIC_SEQ_CST);
     428        switch(old_ticket) {
     429                case TICKET_RUNNING:
     430                        // Wake won the race, the thread will reschedule/rerun itself
     431                        break;
     432                case TICKET_BLOCKED:
     433                        /* paranoid */ verify( ! thrd->preempted != __NO_PREEMPTION );
     434                        /* paranoid */ verify( thrd->state == Blocked );
     435
     436                        {
     437                                /* paranoid */ verify( publicTLS_get(this_proc_id) );
     438                                bool full = publicTLS_get(this_proc_id)->full_proc;
     439                                if(full) disable_interrupts();
     440
     441                                /* paranoid */ verify( ! __preemption_enabled() );
     442
     443                                // Wake lost the race,
     444                                __schedule_thread( thrd );
     445
     446                                /* paranoid */ verify( ! __preemption_enabled() );
     447
     448                                if(full) enable_interrupts( __cfaabi_dbg_ctx );
     449                                /* paranoid */ verify( publicTLS_get(this_proc_id) );
    552450                        }
    553                         unlock    (proc_list_lock);
    554                 }
    555                 else if( struct processor * idle = idles.head ) {
    556                         wake_fast(idle);
    557                 }
    558 
    559         }
    560 
    561         verify( ! kernelTLS.preemption_state.enabled );
     451
     452                        break;
     453                default:
     454                        // This makes no sense, something is wrong abort
     455                        abort("Thread %p (%s) has mismatch park/unpark\n", thrd, thrd->self_cor.name);
     456        }
     457}
     458
     459void park( void ) {
     460        /* paranoid */ verify( __preemption_enabled() );
     461        disable_interrupts();
     462        /* paranoid */ verify( ! __preemption_enabled() );
     463        /* paranoid */ verify( kernelTLS().this_thread->preempted == __NO_PREEMPTION );
     464
     465        returnToKernel();
     466
     467        /* paranoid */ verify( ! __preemption_enabled() );
     468        enable_interrupts( __cfaabi_dbg_ctx );
     469        /* paranoid */ verify( __preemption_enabled() );
     470
     471}
     472
     473extern "C" {
     474        // Leave the thread monitor
     475        // last routine called by a thread.
     476        // Should never return
     477        void __cfactx_thrd_leave() {
     478                $thread * thrd = active_thread();
     479                $monitor * this = &thrd->self_mon;
     480
     481                // Lock the monitor now
     482                lock( this->lock __cfaabi_dbg_ctx2 );
     483
     484                disable_interrupts();
     485
     486                /* paranoid */ verify( ! __preemption_enabled() );
     487                /* paranoid */ verify( thrd->state == Active );
     488                /* paranoid */ verify( 0x0D15EA5E0D15EA5Ep == thrd->canary );
     489                /* paranoid */ verify( kernelTLS().this_thread == thrd );
     490                /* paranoid */ verify( thrd->context.SP );
     491                /* paranoid */ verifyf( ((uintptr_t)thrd->context.SP) > ((uintptr_t)__get_stack(thrd->curr_cor)->limit), "ERROR : $thread %p has been corrupted.\n StackPointer too large.\n", thrd );
     492                /* paranoid */ verifyf( ((uintptr_t)thrd->context.SP) < ((uintptr_t)__get_stack(thrd->curr_cor)->base ), "ERROR : $thread %p has been corrupted.\n StackPointer too small.\n", thrd );
     493
     494                thrd->state = Halting;
     495                if( TICKET_RUNNING != thrd->ticket ) { abort( "Thread terminated with pending unpark" ); }
     496                if( thrd != this->owner ) { abort( "Thread internal monitor has incorrect owner" ); }
     497                if( this->recursion != 1) { abort( "Thread internal monitor has unbalanced recursion" ); }
     498
     499                // Leave the thread
     500                returnToKernel();
     501
     502                // Control flow should never reach here!
     503                abort();
     504        }
    562505}
    563506
    564507// KERNEL ONLY
    565 thread_desc * nextThread(cluster * this) with( *this ) {
    566         verify( ! kernelTLS.preemption_state.enabled );
    567         lock( ready_queue_lock __cfaabi_dbg_ctx2 );
    568         thread_desc * head = pop_head( ready_queue );
    569         unlock( ready_queue_lock );
    570         verify( ! kernelTLS.preemption_state.enabled );
    571         return head;
    572 }
    573 
    574 void BlockInternal() {
     508bool force_yield( __Preemption_Reason reason ) {
     509        /* paranoid */ verify( __preemption_enabled() );
    575510        disable_interrupts();
    576         verify( ! kernelTLS.preemption_state.enabled );
    577         returnToKernel();
    578         verify( ! kernelTLS.preemption_state.enabled );
     511        /* paranoid */ verify( ! __preemption_enabled() );
     512
     513        $thread * thrd = kernelTLS().this_thread;
     514        /* paranoid */ verify(thrd->state == Active);
     515
     516        // SKULLDUGGERY: It is possible that we are preempting this thread just before
     517        // it was going to park itself. If that is the case and it is already using the
     518        // intrusive fields then we can't use them to preempt the thread
     519        // If that is the case, abandon the preemption.
     520        bool preempted = false;
     521        if(thrd->link.next == 0p) {
     522                preempted = true;
     523                thrd->preempted = reason;
     524                returnToKernel();
     525        }
     526
     527        /* paranoid */ verify( ! __preemption_enabled() );
     528        enable_interrupts_noPoll();
     529        /* paranoid */ verify( __preemption_enabled() );
     530
     531        return preempted;
     532}
     533
     534//=============================================================================================
     535// Kernel Idle Sleep
     536//=============================================================================================
     537// Wake a thread from the front if there are any
     538static void __wake_one(cluster * this) {
     539        /* paranoid */ verify( ! __preemption_enabled() );
     540        /* paranoid */ verify( ready_schedule_islocked() );
     541
     542        // Check if there is a sleeping processor
     543        processor * p;
     544        unsigned idle;
     545        unsigned total;
     546        [idle, total, p] = query(this->idles);
     547
     548        // If no one is sleeping, we are done
     549        if( idle == 0 ) return;
     550
     551        // We found a processor, wake it up
     552        post( p->idle );
     553
     554        #if !defined(__CFA_NO_STATISTICS__)
     555                __tls_stats()->ready.sleep.wakes++;
     556        #endif
     557
     558        /* paranoid */ verify( ready_schedule_islocked() );
     559        /* paranoid */ verify( ! __preemption_enabled() );
     560
     561        return;
     562}
     563
     564// Unconditionnaly wake a thread
     565void __wake_proc(processor * this) {
     566        __cfadbg_print_safe(runtime_core, "Kernel : waking Processor %p\n", this);
     567
     568        disable_interrupts();
     569                /* paranoid */ verify( ! __preemption_enabled() );
     570                post( this->idle );
    579571        enable_interrupts( __cfaabi_dbg_ctx );
    580572}
    581573
    582 void BlockInternal( __spinlock_t * lock ) {
    583         disable_interrupts();
    584         with( *kernelTLS.this_processor ) {
    585                 finish.action_code = Release;
    586                 finish.lock        = lock;
    587         }
    588 
    589         verify( ! kernelTLS.preemption_state.enabled );
    590         returnToKernel();
    591         verify( ! kernelTLS.preemption_state.enabled );
    592 
    593         enable_interrupts( __cfaabi_dbg_ctx );
    594 }
    595 
    596 void BlockInternal( thread_desc * thrd ) {
    597         disable_interrupts();
    598         with( * kernelTLS.this_processor ) {
    599                 finish.action_code = Schedule;
    600                 finish.thrd        = thrd;
    601         }
    602 
    603         verify( ! kernelTLS.preemption_state.enabled );
    604         returnToKernel();
    605         verify( ! kernelTLS.preemption_state.enabled );
    606 
    607         enable_interrupts( __cfaabi_dbg_ctx );
    608 }
    609 
    610 void BlockInternal( __spinlock_t * lock, thread_desc * thrd ) {
    611         assert(thrd);
    612         disable_interrupts();
    613         with( * kernelTLS.this_processor ) {
    614                 finish.action_code = Release_Schedule;
    615                 finish.lock        = lock;
    616                 finish.thrd        = thrd;
    617         }
    618 
    619         verify( ! kernelTLS.preemption_state.enabled );
    620         returnToKernel();
    621         verify( ! kernelTLS.preemption_state.enabled );
    622 
    623         enable_interrupts( __cfaabi_dbg_ctx );
    624 }
    625 
    626 void BlockInternal(__spinlock_t * locks [], unsigned short count) {
    627         disable_interrupts();
    628         with( * kernelTLS.this_processor ) {
    629                 finish.action_code = Release_Multi;
    630                 finish.locks       = locks;
    631                 finish.lock_count  = count;
    632         }
    633 
    634         verify( ! kernelTLS.preemption_state.enabled );
    635         returnToKernel();
    636         verify( ! kernelTLS.preemption_state.enabled );
    637 
    638         enable_interrupts( __cfaabi_dbg_ctx );
    639 }
    640 
    641 void BlockInternal(__spinlock_t * locks [], unsigned short lock_count, thread_desc * thrds [], unsigned short thrd_count) {
    642         disable_interrupts();
    643         with( *kernelTLS.this_processor ) {
    644                 finish.action_code = Release_Multi_Schedule;
    645                 finish.locks       = locks;
    646                 finish.lock_count  = lock_count;
    647                 finish.thrds       = thrds;
    648                 finish.thrd_count  = thrd_count;
    649         }
    650 
    651         verify( ! kernelTLS.preemption_state.enabled );
    652         returnToKernel();
    653         verify( ! kernelTLS.preemption_state.enabled );
    654 
    655         enable_interrupts( __cfaabi_dbg_ctx );
    656 }
    657 
    658 void BlockInternal(__finish_callback_fptr_t callback) {
    659         disable_interrupts();
    660         with( *kernelTLS.this_processor ) {
    661                 finish.action_code = Callback;
    662                 finish.callback    = callback;
    663         }
    664 
    665         verify( ! kernelTLS.preemption_state.enabled );
    666         returnToKernel();
    667         verify( ! kernelTLS.preemption_state.enabled );
    668 
    669         enable_interrupts( __cfaabi_dbg_ctx );
    670 }
    671 
    672 // KERNEL ONLY
    673 void LeaveThread(__spinlock_t * lock, thread_desc * thrd) {
    674         verify( ! kernelTLS.preemption_state.enabled );
    675         with( * kernelTLS.this_processor ) {
    676                 finish.action_code = thrd ? Release_Schedule : Release;
    677                 finish.lock        = lock;
    678                 finish.thrd        = thrd;
    679         }
    680 
    681         returnToKernel();
    682 }
    683 
    684 //=============================================================================================
    685 // Kernel Setup logic
    686 //=============================================================================================
    687 //-----------------------------------------------------------------------------
    688 // Kernel boot procedures
    689 static void kernel_startup(void) {
    690         verify( ! kernelTLS.preemption_state.enabled );
    691         __cfaabi_dbg_print_safe("Kernel : Starting\n");
    692 
    693         __page_size = sysconf( _SC_PAGESIZE );
    694 
    695         __cfa_dbg_global_clusters.list{ __get };
    696         __cfa_dbg_global_clusters.lock{};
    697 
    698         // Initialize the main cluster
    699         mainCluster = (cluster *)&storage_mainCluster;
    700         (*mainCluster){"Main Cluster"};
    701 
    702         __cfaabi_dbg_print_safe("Kernel : Main cluster ready\n");
    703 
    704         // Start by initializing the main thread
    705         // SKULLDUGGERY: the mainThread steals the process main thread
    706         // which will then be scheduled by the mainProcessor normally
    707         mainThread = (thread_desc *)&storage_mainThread;
    708         current_stack_info_t info;
    709         info.storage = (__stack_t*)&storage_mainThreadCtx;
    710         (*mainThread){ &info };
    711 
    712         __cfaabi_dbg_print_safe("Kernel : Main thread ready\n");
    713 
    714 
    715 
    716         // Construct the processor context of the main processor
    717         void ?{}(processorCtx_t & this, processor * proc) {
    718                 (this.__cor){ "Processor" };
    719                 this.__cor.starter = 0p;
    720                 this.proc = proc;
    721         }
    722 
    723         void ?{}(processor & this) with( this ) {
    724                 name = "Main Processor";
    725                 cltr = mainCluster;
    726                 terminated{ 0 };
    727                 do_terminate = false;
    728                 preemption_alarm = 0p;
    729                 pending_preemption = false;
    730                 kernel_thread = pthread_self();
    731 
    732                 runner{ &this };
    733                 __cfaabi_dbg_print_safe("Kernel : constructed main processor context %p\n", &runner);
    734         }
    735 
    736         // Initialize the main processor and the main processor ctx
    737         // (the coroutine that contains the processing control flow)
    738         mainProcessor = (processor *)&storage_mainProcessor;
    739         (*mainProcessor){};
    740 
    741         //initialize the global state variables
    742         kernelTLS.this_processor = mainProcessor;
    743         kernelTLS.this_thread    = mainThread;
    744 
    745         // Enable preemption
    746         kernel_start_preemption();
    747 
    748         // Add the main thread to the ready queue
    749         // once resume is called on mainProcessor->runner the mainThread needs to be scheduled like any normal thread
    750         ScheduleThread(mainThread);
    751 
    752         // SKULLDUGGERY: Force a context switch to the main processor to set the main thread's context to the current UNIX
    753         // context. Hence, the main thread does not begin through CtxInvokeThread, like all other threads. The trick here is that
    754         // mainThread is on the ready queue when this call is made.
    755         kernel_first_resume( kernelTLS.this_processor );
    756 
    757 
    758 
    759         // THE SYSTEM IS NOW COMPLETELY RUNNING
    760         __cfaabi_dbg_print_safe("Kernel : Started\n--------------------------------------------------\n\n");
    761 
    762         verify( ! kernelTLS.preemption_state.enabled );
    763         enable_interrupts( __cfaabi_dbg_ctx );
    764         verify( TL_GET( preemption_state.enabled ) );
    765 }
    766 
    767 static void kernel_shutdown(void) {
    768         __cfaabi_dbg_print_safe("\n--------------------------------------------------\nKernel : Shutting down\n");
    769 
    770         verify( TL_GET( preemption_state.enabled ) );
    771         disable_interrupts();
    772         verify( ! kernelTLS.preemption_state.enabled );
    773 
    774         // SKULLDUGGERY: Notify the mainProcessor it needs to terminates.
    775         // When its coroutine terminates, it return control to the mainThread
    776         // which is currently here
    777         __atomic_store_n(&mainProcessor->do_terminate, true, __ATOMIC_RELEASE);
    778         kernel_last_resume( kernelTLS.this_processor );
    779         mainThread->self_cor.state = Halted;
    780 
    781         // THE SYSTEM IS NOW COMPLETELY STOPPED
    782 
    783         // Disable preemption
    784         kernel_stop_preemption();
    785 
    786         // Destroy the main processor and its context in reverse order of construction
    787         // These were manually constructed so we need manually destroy them
    788         ^(mainProcessor->runner){};
    789         ^(mainProcessor){};
    790 
    791         // Final step, destroy the main thread since it is no longer needed
    792         // Since we provided a stack to this taxk it will not destroy anything
    793         ^(mainThread){};
    794 
    795         ^(__cfa_dbg_global_clusters.list){};
    796         ^(__cfa_dbg_global_clusters.lock){};
    797 
    798         __cfaabi_dbg_print_safe("Kernel : Shutdown complete\n");
    799 }
    800 
    801 //=============================================================================================
    802 // Kernel Quiescing
    803 //=============================================================================================
    804 static void halt(processor * this) with( *this ) {
    805         // verify( ! __atomic_load_n(&do_terminate, __ATOMIC_SEQ_CST) );
    806 
    807         with( *cltr ) {
    808                 lock      (proc_list_lock __cfaabi_dbg_ctx2);
    809                 remove    (procs, *this);
    810                 push_front(idles, *this);
    811                 unlock    (proc_list_lock);
    812         }
    813 
    814         __cfaabi_dbg_print_safe("Kernel : Processor %p ready to sleep\n", this);
    815 
    816         wait( idleLock );
    817 
    818         __cfaabi_dbg_print_safe("Kernel : Processor %p woke up and ready to run\n", this);
    819 
    820         with( *cltr ) {
    821                 lock      (proc_list_lock __cfaabi_dbg_ctx2);
    822                 remove    (idles, *this);
    823                 push_front(procs, *this);
    824                 unlock    (proc_list_lock);
     574static void push  (__cluster_idles & this, processor & proc) {
     575        /* paranoid */ verify( ! __preemption_enabled() );
     576        lock( this );
     577                this.idle++;
     578                /* paranoid */ verify( this.idle <= this.total );
     579
     580                insert_first(this.list, proc);
     581        unlock( this );
     582        /* paranoid */ verify( ! __preemption_enabled() );
     583}
     584
     585static void remove(__cluster_idles & this, processor & proc) {
     586        /* paranoid */ verify( ! __preemption_enabled() );
     587        lock( this );
     588                this.idle--;
     589                /* paranoid */ verify( this.idle >= 0 );
     590
     591                remove(proc);
     592        unlock( this );
     593        /* paranoid */ verify( ! __preemption_enabled() );
     594}
     595
     596static [unsigned idle, unsigned total, * processor] query( & __cluster_idles this ) {
     597        for() {
     598                uint64_t l = __atomic_load_n(&this.lock, __ATOMIC_SEQ_CST);
     599                if( 1 == (l % 2) ) { Pause(); continue; }
     600                unsigned idle    = this.idle;
     601                unsigned total   = this.total;
     602                processor * proc = &this.list`first;
     603                // Compiler fence is unnecessary, but gcc-8 and older incorrectly reorder code without it
     604                asm volatile("": : :"memory");
     605                if(l != __atomic_load_n(&this.lock, __ATOMIC_SEQ_CST)) { Pause(); continue; }
     606                return [idle, total, proc];
    825607        }
    826608}
     
    836618        // the globalAbort flag is true.
    837619        lock( kernel_abort_lock __cfaabi_dbg_ctx2 );
     620
     621        // disable interrupts, it no longer makes sense to try to interrupt this processor
     622        disable_interrupts();
    838623
    839624        // first task to abort ?
     
    853638        }
    854639
    855         return kernelTLS.this_thread;
     640        return __cfaabi_tls.this_thread;
    856641}
    857642
    858643void kernel_abort_msg( void * kernel_data, char * abort_text, int abort_text_size ) {
    859         thread_desc * thrd = kernel_data;
     644        $thread * thrd = ( $thread * ) kernel_data;
    860645
    861646        if(thrd) {
     
    878663
    879664int kernel_abort_lastframe( void ) __attribute__ ((__nothrow__)) {
    880         return get_coroutine(kernelTLS.this_thread) == get_coroutine(mainThread) ? 4 : 2;
     665        return get_coroutine(kernelTLS().this_thread) == get_coroutine(mainThread) ? 4 : 2;
    881666}
    882667
     
    905690void ^?{}(semaphore & this) {}
    906691
    907 void P(semaphore & this) with( this ){
     692bool P(semaphore & this) with( this ){
    908693        lock( lock __cfaabi_dbg_ctx2 );
    909694        count -= 1;
    910695        if ( count < 0 ) {
    911696                // queue current task
    912                 append( waiting, kernelTLS.this_thread );
     697                append( waiting, active_thread() );
    913698
    914699                // atomically release spin lock and block
    915                 BlockInternal( &lock );
     700                unlock( lock );
     701                park();
     702                return true;
    916703        }
    917704        else {
    918705            unlock( lock );
    919         }
    920 }
    921 
    922 void V(semaphore & this) with( this ) {
    923         thread_desc * thrd = 0p;
     706            return false;
     707        }
     708}
     709
     710bool V(semaphore & this) with( this ) {
     711        $thread * thrd = 0p;
    924712        lock( lock __cfaabi_dbg_ctx2 );
    925713        count += 1;
     
    932720
    933721        // make new owner
    934         WakeThread( thrd );
    935 }
    936 
    937 //-----------------------------------------------------------------------------
    938 // Global Queues
    939 void doregister( cluster     & cltr ) {
    940         lock      ( __cfa_dbg_global_clusters.lock __cfaabi_dbg_ctx2);
    941         push_front( __cfa_dbg_global_clusters.list, cltr );
    942         unlock    ( __cfa_dbg_global_clusters.lock );
    943 }
    944 
    945 void unregister( cluster     & cltr ) {
    946         lock  ( __cfa_dbg_global_clusters.lock __cfaabi_dbg_ctx2);
    947         remove( __cfa_dbg_global_clusters.list, cltr );
    948         unlock( __cfa_dbg_global_clusters.lock );
    949 }
    950 
    951 void doregister( cluster * cltr, thread_desc & thrd ) {
    952         lock      (cltr->thread_list_lock __cfaabi_dbg_ctx2);
    953         cltr->nthreads += 1;
    954         push_front(cltr->threads, thrd);
    955         unlock    (cltr->thread_list_lock);
    956 }
    957 
    958 void unregister( cluster * cltr, thread_desc & thrd ) {
    959         lock  (cltr->thread_list_lock __cfaabi_dbg_ctx2);
    960         remove(cltr->threads, thrd );
    961         cltr->nthreads -= 1;
    962         unlock(cltr->thread_list_lock);
    963 }
    964 
    965 void doregister( cluster * cltr, processor * proc ) {
    966         lock      (cltr->proc_list_lock __cfaabi_dbg_ctx2);
    967         cltr->nprocessors += 1;
    968         push_front(cltr->procs, *proc);
    969         unlock    (cltr->proc_list_lock);
    970 }
    971 
    972 void unregister( cluster * cltr, processor * proc ) {
    973         lock  (cltr->proc_list_lock __cfaabi_dbg_ctx2);
    974         remove(cltr->procs, *proc );
    975         cltr->nprocessors -= 1;
    976         unlock(cltr->proc_list_lock);
     722        unpark( thrd );
     723
     724        return thrd != 0p;
     725}
     726
     727bool V(semaphore & this, unsigned diff) with( this ) {
     728        $thread * thrd = 0p;
     729        lock( lock __cfaabi_dbg_ctx2 );
     730        int release = max(-count, (int)diff);
     731        count += diff;
     732        for(release) {
     733                unpark( pop_head( waiting ) );
     734        }
     735
     736        unlock( lock );
     737
     738        return thrd != 0p;
    977739}
    978740
     
    981743__cfaabi_dbg_debug_do(
    982744        extern "C" {
    983                 void __cfaabi_dbg_record(__spinlock_t & this, const char * prev_name) {
     745                void __cfaabi_dbg_record_lock(__spinlock_t & this, const char prev_name[]) {
    984746                        this.prev_name = prev_name;
    985                         this.prev_thrd = kernelTLS.this_thread;
     747                        this.prev_thrd = kernelTLS().this_thread;
    986748                }
    987749        }
     
    990752//-----------------------------------------------------------------------------
    991753// Debug
    992 bool threading_enabled(void) {
     754bool threading_enabled(void) __attribute__((const)) {
    993755        return true;
    994756}
     757
     758//-----------------------------------------------------------------------------
     759// Statistics
     760#if !defined(__CFA_NO_STATISTICS__)
     761        void print_halts( processor & this ) {
     762                this.print_halts = true;
     763        }
     764
     765        void print_stats_now( cluster & this, int flags ) {
     766                __print_stats( this.stats, this.print_stats, "Cluster", this.name, (void*)&this );
     767        }
     768
     769        extern int __print_alarm_stats;
     770        void print_alarm_stats() {
     771                __print_alarm_stats = -1;
     772        }
     773#endif
    995774// Local Variables: //
    996775// mode: c //
  • libcfa/src/concurrency/kernel.hfa

    rbdfc032 reef8dfb  
    1010// Created On       : Tue Jan 17 12:27:26 2017
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Wed Dec  4 07:54:51 2019
    13 // Update Count     : 18
     12// Last Modified On : Tue Feb  4 12:29:26 2020
     13// Update Count     : 22
    1414//
    1515
    1616#pragma once
    17 
    18 #include <stdbool.h>
    1917
    2018#include "invoke.h"
     
    2220#include "coroutine.hfa"
    2321
     22#include "containers/list.hfa"
     23
    2424extern "C" {
    25 #include <pthread.h>
    26 #include <semaphore.h>
     25        #include <bits/pthreadtypes.h>
     26        #include <linux/types.h>
    2727}
    2828
     
    3232        __spinlock_t lock;
    3333        int count;
    34         __queue_t(thread_desc) waiting;
     34        __queue_t($thread) waiting;
    3535};
    3636
    3737void  ?{}(semaphore & this, int count = 1);
    3838void ^?{}(semaphore & this);
    39 void   P (semaphore & this);
    40 void   V (semaphore & this);
     39bool   P (semaphore & this);
     40bool   V (semaphore & this);
     41bool   V (semaphore & this, unsigned count);
    4142
    4243
     
    4546extern struct cluster * mainCluster;
    4647
    47 enum FinishOpCode { No_Action, Release, Schedule, Release_Schedule, Release_Multi, Release_Multi_Schedule, Callback };
    48 
    49 typedef void (*__finish_callback_fptr_t)(void);
    50 
    51 //TODO use union, many of these fields are mutually exclusive (i.e. MULTI vs NOMULTI)
    52 struct FinishAction {
    53         FinishOpCode action_code;
    54         /*
    55         // Union of possible actions
    56         union {
    57                 // Option 1 : locks and threads
    58                 struct {
    59                         // 1 thread or N thread
    60                         union {
    61                                 thread_desc * thrd;
    62                                 struct {
    63                                         thread_desc ** thrds;
    64                                         unsigned short thrd_count;
    65                                 };
    66                         };
    67                         // 1 lock or N lock
    68                         union {
    69                                 __spinlock_t * lock;
    70                                 struct {
    71                                         __spinlock_t ** locks;
    72                                         unsigned short lock_count;
    73                                 };
    74                         };
    75                 };
    76                 // Option 2 : action pointer
    77                 __finish_callback_fptr_t callback;
    78         };
    79         /*/
    80         thread_desc * thrd;
    81         thread_desc ** thrds;
    82         unsigned short thrd_count;
    83         __spinlock_t * lock;
    84         __spinlock_t ** locks;
    85         unsigned short lock_count;
    86         __finish_callback_fptr_t callback;
    87         //*/
    88 };
    89 static inline void ?{}(FinishAction & this) {
    90         this.action_code = No_Action;
    91         this.thrd = 0p;
    92         this.lock = 0p;
    93 }
    94 static inline void ^?{}(FinishAction &) {}
    95 
    96 // Processor
     48// Processor id, required for scheduling threads
     49struct __processor_id_t {
     50        unsigned id:24;
     51        bool full_proc:1;
     52
     53        #if !defined(__CFA_NO_STATISTICS__)
     54                struct __stats_t * stats;
     55        #endif
     56};
     57
    9758coroutine processorCtx_t {
    9859        struct processor * proc;
     
    10061
    10162// Wrapper around kernel threads
    102 struct processor {
     63struct __attribute__((aligned(128))) processor {
    10364        // Main state
     65        inline __processor_id_t;
     66
     67        // Cluster from which to get threads
     68        struct cluster * cltr;
     69
     70        // Set to true to notify the processor should terminate
     71        volatile bool do_terminate;
     72
    10473        // Coroutine ctx who does keeps the state of the processor
    10574        struct processorCtx_t runner;
    10675
    107         // Cluster from which to get threads
    108         struct cluster * cltr;
    109 
    11076        // Name of the processor
    11177        const char * name;
     
    11379        // Handle to pthreads
    11480        pthread_t kernel_thread;
    115 
    116         // RunThread data
    117         // Action to do after a thread is ran
    118         struct FinishAction finish;
    11981
    12082        // Preemption data
     
    12587        bool pending_preemption;
    12688
    127         // Idle lock
    128         __bin_sem_t idleLock;
    129 
    130         // Termination
    131         // Set to true to notify the processor should terminate
    132         volatile bool do_terminate;
    133 
    134         // Termination synchronisation
     89        // Idle lock (kernel semaphore)
     90        __bin_sem_t idle;
     91
     92        // Termination synchronisation (user semaphore)
    13593        semaphore terminated;
    13694
     
    13997
    14098        // Link lists fields
    141         struct __dbg_node_proc {
    142                 struct processor * next;
    143                 struct processor * prev;
    144         } node;
     99        DLISTED_MGD_IMPL_IN(processor)
     100
     101        #if !defined(__CFA_NO_STATISTICS__)
     102                int print_stats;
     103                bool print_halts;
     104        #endif
    145105
    146106#ifdef __CFA_DEBUG__
     
    150110};
    151111
    152 void  ?{}(processor & this, const char * name, struct cluster & cltr);
     112void  ?{}(processor & this, const char name[], struct cluster & cltr);
    153113void ^?{}(processor & this);
    154114
    155115static inline void  ?{}(processor & this)                    { this{ "Anonymous Processor", *mainCluster}; }
    156116static inline void  ?{}(processor & this, struct cluster & cltr)    { this{ "Anonymous Processor", cltr}; }
    157 static inline void  ?{}(processor & this, const char * name) { this{name, *mainCluster }; }
    158 
    159 static inline [processor *&, processor *& ] __get( processor & this ) {
    160         return this.node.[next, prev];
    161 }
     117static inline void  ?{}(processor & this, const char name[]) { this{name, *mainCluster }; }
     118
     119DLISTED_MGD_IMPL_OUT(processor)
     120
     121//-----------------------------------------------------------------------------
     122// I/O
     123struct __io_data;
     124
     125// IO poller user-thread
     126// Not using the "thread" keyword because we want to control
     127// more carefully when to start/stop it
     128struct $io_ctx_thread {
     129        struct __io_data * ring;
     130        single_sem sem;
     131        volatile bool done;
     132        $thread self;
     133};
     134
     135
     136struct io_context {
     137        $io_ctx_thread thrd;
     138};
     139
     140struct io_context_params {
     141        int num_entries;
     142        int num_ready;
     143        int submit_aff;
     144        bool eager_submits:1;
     145        bool poller_submits:1;
     146        bool poll_submit:1;
     147        bool poll_complete:1;
     148};
     149
     150void  ?{}(io_context_params & this);
     151
     152void  ?{}(io_context & this, struct cluster & cl);
     153void  ?{}(io_context & this, struct cluster & cl, const io_context_params & params);
     154void ^?{}(io_context & this);
     155
     156struct io_cancellation {
     157        __u64 target;
     158};
     159
     160static inline void  ?{}(io_cancellation & this) { this.target = -1u; }
     161static inline void ^?{}(io_cancellation &) {}
     162bool cancel(io_cancellation & this);
     163
     164//-----------------------------------------------------------------------------
     165// Cluster Tools
     166
     167// Intrusives lanes which are used by the relaxed ready queue
     168struct __attribute__((aligned(128))) __intrusive_lane_t;
     169void  ?{}(__intrusive_lane_t & this);
     170void ^?{}(__intrusive_lane_t & this);
     171
     172// Counter used for wether or not the lanes are all empty
     173struct __attribute__((aligned(128))) __snzi_node_t;
     174struct __snzi_t {
     175        unsigned mask;
     176        int root;
     177        __snzi_node_t * nodes;
     178};
     179
     180void  ?{}( __snzi_t & this, unsigned depth );
     181void ^?{}( __snzi_t & this );
     182
     183//TODO adjust cache size to ARCHITECTURE
     184// Structure holding the relaxed ready queue
     185struct __ready_queue_t {
     186        // Data tracking how many/which lanes are used
     187        // Aligned to 128 for cache locality
     188        __snzi_t snzi;
     189
     190        // Data tracking the actual lanes
     191        // On a seperate cacheline from the used struct since
     192        // used can change on each push/pop but this data
     193        // only changes on shrink/grow
     194        struct {
     195                // Arary of lanes
     196                __intrusive_lane_t * volatile data;
     197
     198                // Number of lanes (empty or not)
     199                volatile size_t count;
     200        } lanes;
     201};
     202
     203void  ?{}(__ready_queue_t & this);
     204void ^?{}(__ready_queue_t & this);
     205
     206// Idle Sleep
     207struct __cluster_idles {
     208        // Spin lock protecting the queue
     209        volatile uint64_t lock;
     210
     211        // Total number of processors
     212        unsigned total;
     213
     214        // Total number of idle processors
     215        unsigned idle;
     216
     217        // List of idle processors
     218        dlist(processor, processor) list;
     219};
    162220
    163221//-----------------------------------------------------------------------------
    164222// Cluster
    165 struct cluster {
    166         // Ready queue locks
    167         __spinlock_t ready_queue_lock;
    168 
     223struct __attribute__((aligned(128))) cluster {
    169224        // Ready queue for threads
    170         __queue_t(thread_desc) ready_queue;
     225        __ready_queue_t ready_queue;
    171226
    172227        // Name of the cluster
     
    176231        Duration preemption_rate;
    177232
    178         // List of processors
    179         __spinlock_t proc_list_lock;
    180         __dllist_t(struct processor) procs;
    181         __dllist_t(struct processor) idles;
    182         unsigned int nprocessors;
     233        // List of idle processors
     234        __cluster_idles idles;
    183235
    184236        // List of threads
    185237        __spinlock_t thread_list_lock;
    186         __dllist_t(struct thread_desc) threads;
     238        __dllist_t(struct $thread) threads;
    187239        unsigned int nthreads;
    188240
     
    192244                cluster * prev;
    193245        } node;
     246
     247        struct {
     248                io_context * ctxs;
     249                unsigned cnt;
     250        } io;
     251
     252        #if !defined(__CFA_NO_STATISTICS__)
     253                struct __stats_t * stats;
     254                int print_stats;
     255        #endif
    194256};
    195257extern Duration default_preemption();
    196258
    197 void ?{} (cluster & this, const char * name, Duration preemption_rate);
     259void ?{} (cluster & this, const char name[], Duration preemption_rate, unsigned num_io, const io_context_params & io_params);
    198260void ^?{}(cluster & this);
    199261
    200 static inline void ?{} (cluster & this)                           { this{"Anonymous Cluster", default_preemption()}; }
    201 static inline void ?{} (cluster & this, Duration preemption_rate) { this{"Anonymous Cluster", preemption_rate}; }
    202 static inline void ?{} (cluster & this, const char * name)        { this{name, default_preemption()}; }
    203 
    204 static inline [cluster *&, cluster *& ] __get( cluster & this ) {
    205         return this.node.[next, prev];
    206 }
    207 
    208 static inline struct processor * active_processor() { return TL_GET( this_processor ); } // UNSAFE
    209 static inline struct cluster   * active_cluster  () { return TL_GET( this_processor )->cltr; }
     262static inline void ?{} (cluster & this)                                            { io_context_params default_params;    this{"Anonymous Cluster", default_preemption(), 1, default_params}; }
     263static inline void ?{} (cluster & this, Duration preemption_rate)                  { io_context_params default_params;    this{"Anonymous Cluster", preemption_rate, 1, default_params}; }
     264static inline void ?{} (cluster & this, const char name[])                         { io_context_params default_params;    this{name, default_preemption(), 1, default_params}; }
     265static inline void ?{} (cluster & this, unsigned num_io)                           { io_context_params default_params;    this{"Anonymous Cluster", default_preemption(), num_io, default_params}; }
     266static inline void ?{} (cluster & this, Duration preemption_rate, unsigned num_io) { io_context_params default_params;    this{"Anonymous Cluster", preemption_rate, num_io, default_params}; }
     267static inline void ?{} (cluster & this, const char name[], unsigned num_io)        { io_context_params default_params;    this{name, default_preemption(), num_io, default_params}; }
     268static inline void ?{} (cluster & this, const io_context_params & io_params)                                            { this{"Anonymous Cluster", default_preemption(), 1, io_params}; }
     269static inline void ?{} (cluster & this, Duration preemption_rate, const io_context_params & io_params)                  { this{"Anonymous Cluster", preemption_rate, 1, io_params}; }
     270static inline void ?{} (cluster & this, const char name[], const io_context_params & io_params)                         { this{name, default_preemption(), 1, io_params}; }
     271static inline void ?{} (cluster & this, unsigned num_io, const io_context_params & io_params)                           { this{"Anonymous Cluster", default_preemption(), num_io, io_params}; }
     272static inline void ?{} (cluster & this, Duration preemption_rate, unsigned num_io, const io_context_params & io_params) { this{"Anonymous Cluster", preemption_rate, num_io, io_params}; }
     273static inline void ?{} (cluster & this, const char name[], unsigned num_io, const io_context_params & io_params)        { this{name, default_preemption(), num_io, io_params}; }
     274
     275static inline [cluster *&, cluster *& ] __get( cluster & this ) __attribute__((const)) { return this.node.[next, prev]; }
     276
     277static inline struct processor * active_processor() { return publicTLS_get( this_processor ); } // UNSAFE
     278static inline struct cluster   * active_cluster  () { return publicTLS_get( this_processor )->cltr; }
     279
     280#if !defined(__CFA_NO_STATISTICS__)
     281        void print_stats_now( cluster & this, int flags );
     282
     283        static inline void print_stats_at_exit( cluster & this, int flags ) {
     284                this.print_stats |= flags;
     285        }
     286
     287        static inline void print_stats_at_exit( processor & this, int flags ) {
     288                this.print_stats |= flags;
     289        }
     290
     291        void print_halts( processor & this );
     292#endif
    210293
    211294// Local Variables: //
  • libcfa/src/concurrency/kernel_private.hfa

    rbdfc032 reef8dfb  
    1010// Created On       : Mon Feb 13 12:27:26 2017
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Sat Nov 30 19:25:02 2019
    13 // Update Count     : 8
     12// Last Modified On : Wed Aug 12 08:21:33 2020
     13// Update Count     : 9
    1414//
    1515
     
    2020
    2121#include "alarm.hfa"
    22 
     22#include "stats.hfa"
    2323
    2424//-----------------------------------------------------------------------------
    2525// Scheduler
     26
     27struct __attribute__((aligned(128))) __scheduler_lock_id_t;
    2628
    2729extern "C" {
     
    3133}
    3234
    33 void ScheduleThread( thread_desc * );
    34 static inline void WakeThread( thread_desc * thrd ) {
    35         if( !thrd ) return;
    36 
    37         verify(thrd->state == Inactive);
    38 
    39         disable_interrupts();
    40         ScheduleThread( thrd );
    41         enable_interrupts( __cfaabi_dbg_ctx );
    42 }
    43 thread_desc * nextThread(cluster * this);
    44 
    45 //Block current thread and release/wake-up the following resources
    46 void BlockInternal(void);
    47 void BlockInternal(__spinlock_t * lock);
    48 void BlockInternal(thread_desc * thrd);
    49 void BlockInternal(__spinlock_t * lock, thread_desc * thrd);
    50 void BlockInternal(__spinlock_t * locks [], unsigned short count);
    51 void BlockInternal(__spinlock_t * locks [], unsigned short count, thread_desc * thrds [], unsigned short thrd_count);
    52 void BlockInternal(__finish_callback_fptr_t callback);
    53 void LeaveThread(__spinlock_t * lock, thread_desc * thrd);
     35void __schedule_thread( $thread * )
     36#if defined(NDEBUG) || (!defined(__CFA_DEBUG__) && !defined(__CFA_VERIFY__))
     37        __attribute__((nonnull (1)))
     38#endif
     39;
     40
     41extern bool __preemption_enabled();
     42
     43//release/wake-up the following resources
     44void __thread_finish( $thread * thrd );
    5445
    5546//-----------------------------------------------------------------------------
     
    5748void main(processorCtx_t *);
    5849
    59 void * create_pthread( pthread_t *, void * (*)(void *), void * );
    60 
    61 static inline void wake_fast(processor * this) {
    62         __cfaabi_dbg_print_safe("Kernel : Waking up processor %p\n", this);
    63         post( this->idleLock );
    64 }
    65 
    66 static inline void wake(processor * this) {
    67         disable_interrupts();
    68         wake_fast(this);
    69         enable_interrupts( __cfaabi_dbg_ctx );
    70 }
    71 
    72 struct event_kernel_t {
    73         alarm_list_t alarms;
    74         __spinlock_t lock;
    75 };
    76 
    77 extern event_kernel_t * event_kernel;
    78 
    79 struct __cfa_kernel_preemption_state_t {
    80         bool enabled;
    81         bool in_progress;
    82         unsigned short disable_count;
    83 };
    84 
    85 extern volatile thread_local __cfa_kernel_preemption_state_t preemption_state __attribute__ ((tls_model ( "initial-exec" )));
     50void * __create_pthread( pthread_t *, void * (*)(void *), void * );
     51void __destroy_pthread( pthread_t pthread, void * stack, void ** retval );
     52
     53
     54
     55extern cluster * mainCluster;
    8656
    8757//-----------------------------------------------------------------------------
    8858// Threads
    8959extern "C" {
    90       void CtxInvokeThread(void (*main)(void *), void * this);
    91 }
    92 
    93 extern void ThreadCtxSwitch(coroutine_desc * src, coroutine_desc * dst);
     60      void __cfactx_invoke_thread(void (*main)(void *), void * this);
     61}
    9462
    9563__cfaabi_dbg_debug_do(
    96         extern void __cfaabi_dbg_thread_register  ( thread_desc * thrd );
    97         extern void __cfaabi_dbg_thread_unregister( thread_desc * thrd );
     64        extern void __cfaabi_dbg_thread_register  ( $thread * thrd );
     65        extern void __cfaabi_dbg_thread_unregister( $thread * thrd );
    9866)
    9967
     68#define TICKET_BLOCKED (-1) // thread is blocked
     69#define TICKET_RUNNING ( 0) // thread is running
     70#define TICKET_UNBLOCK ( 1) // thread should ignore next block
     71
    10072//-----------------------------------------------------------------------------
    10173// Utils
    102 #define KERNEL_STORAGE(T,X) static char storage_##X[sizeof(T)]
    103 
    104 static inline uint32_t tls_rand() {
    105         kernelTLS.rand_seed ^= kernelTLS.rand_seed << 6;
    106         kernelTLS.rand_seed ^= kernelTLS.rand_seed >> 21;
    107         kernelTLS.rand_seed ^= kernelTLS.rand_seed << 7;
    108         return kernelTLS.rand_seed;
    109 }
    110 
    111 
    112 void doregister( struct cluster & cltr );
    113 void unregister( struct cluster & cltr );
    114 
    115 void doregister( struct cluster * cltr, struct thread_desc & thrd );
    116 void unregister( struct cluster * cltr, struct thread_desc & thrd );
    117 
    118 void doregister( struct cluster * cltr, struct processor * proc );
    119 void unregister( struct cluster * cltr, struct processor * proc );
     74void doregister( struct cluster * cltr, struct $thread & thrd );
     75void unregister( struct cluster * cltr, struct $thread & thrd );
     76
     77//-----------------------------------------------------------------------------
     78// I/O
     79void ^?{}(io_context & this, bool );
     80
     81//=======================================================================
     82// Cluster lock API
     83//=======================================================================
     84// Cells use by the reader writer lock
     85// while not generic it only relies on a opaque pointer
     86struct __attribute__((aligned(128))) __scheduler_lock_id_t {
     87        // Spin lock used as the underlying lock
     88        volatile bool lock;
     89
     90        // Handle pointing to the proc owning this cell
     91        // Used for allocating cells and debugging
     92        __processor_id_t * volatile handle;
     93
     94        #ifdef __CFA_WITH_VERIFY__
     95                // Debug, check if this is owned for reading
     96                bool owned;
     97        #endif
     98};
     99
     100static_assert( sizeof(struct __scheduler_lock_id_t) <= __alignof(struct __scheduler_lock_id_t));
     101
     102// Lock-Free registering/unregistering of threads
     103// Register a processor to a given cluster and get its unique id in return
     104unsigned doregister( struct __processor_id_t * proc );
     105
     106// Unregister a processor from a given cluster using its id, getting back the original pointer
     107void     unregister( struct __processor_id_t * proc );
     108
     109//-----------------------------------------------------------------------
     110// Cluster idle lock/unlock
     111static inline void lock(__cluster_idles & this) {
     112        for() {
     113                uint64_t l = this.lock;
     114                if(
     115                        (0 == (l % 2))
     116                        && __atomic_compare_exchange_n(&this.lock, &l, l + 1, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
     117                ) return;
     118                Pause();
     119        }
     120}
     121
     122static inline void unlock(__cluster_idles & this) {
     123        /* paranoid */ verify( 1 == (this.lock % 2) );
     124        __atomic_fetch_add( &this.lock, 1, __ATOMIC_SEQ_CST );
     125}
     126
     127//=======================================================================
     128// Reader-writer lock implementation
     129// Concurrent with doregister/unregister,
     130//    i.e., threads can be added at any point during or between the entry/exit
     131
     132//-----------------------------------------------------------------------
     133// simple spinlock underlying the RWLock
     134// Blocking acquire
     135static inline void __atomic_acquire(volatile bool * ll) {
     136        while( __builtin_expect(__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST), false) ) {
     137                while(__atomic_load_n(ll, (int)__ATOMIC_RELAXED))
     138                        Pause();
     139        }
     140        /* paranoid */ verify(*ll);
     141}
     142
     143// Non-Blocking acquire
     144static inline bool __atomic_try_acquire(volatile bool * ll) {
     145        return !__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST);
     146}
     147
     148// Release
     149static inline void __atomic_unlock(volatile bool * ll) {
     150        /* paranoid */ verify(*ll);
     151        __atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE);
     152}
     153
     154//-----------------------------------------------------------------------
     155// Reader-Writer lock protecting the ready-queues
     156// while this lock is mostly generic some aspects
     157// have been hard-coded to for the ready-queue for
     158// simplicity and performance
     159struct __scheduler_RWLock_t {
     160        // total cachelines allocated
     161        unsigned int max;
     162
     163        // cachelines currently in use
     164        volatile unsigned int alloc;
     165
     166        // cachelines ready to itereate over
     167        // (!= to alloc when thread is in second half of doregister)
     168        volatile unsigned int ready;
     169
     170        // writer lock
     171        volatile bool lock;
     172
     173        // data pointer
     174        __scheduler_lock_id_t * data;
     175};
     176
     177void  ?{}(__scheduler_RWLock_t & this);
     178void ^?{}(__scheduler_RWLock_t & this);
     179
     180extern __scheduler_RWLock_t * __scheduler_lock;
     181
     182//-----------------------------------------------------------------------
     183// Reader side : acquire when using the ready queue to schedule but not
     184//  creating/destroying queues
     185static inline void ready_schedule_lock(void) with(*__scheduler_lock) {
     186        /* paranoid */ verify( ! __preemption_enabled() );
     187        /* paranoid */ verify( kernelTLS().this_proc_id );
     188
     189        unsigned iproc = kernelTLS().this_proc_id->id;
     190        /*paranoid*/ verify(data[iproc].handle == kernelTLS().this_proc_id);
     191        /*paranoid*/ verify(iproc < ready);
     192
     193        // Step 1 : make sure no writer are in the middle of the critical section
     194        while(__atomic_load_n(&lock, (int)__ATOMIC_RELAXED))
     195                Pause();
     196
     197        // Fence needed because we don't want to start trying to acquire the lock
     198        // before we read a false.
     199        // Not needed on x86
     200        // std::atomic_thread_fence(std::memory_order_seq_cst);
     201
     202        // Step 2 : acquire our local lock
     203        __atomic_acquire( &data[iproc].lock );
     204        /*paranoid*/ verify(data[iproc].lock);
     205
     206        #ifdef __CFA_WITH_VERIFY__
     207                // Debug, check if this is owned for reading
     208                data[iproc].owned = true;
     209        #endif
     210}
     211
     212static inline void ready_schedule_unlock(void) with(*__scheduler_lock) {
     213        /* paranoid */ verify( ! __preemption_enabled() );
     214        /* paranoid */ verify( kernelTLS().this_proc_id );
     215
     216        unsigned iproc = kernelTLS().this_proc_id->id;
     217        /*paranoid*/ verify(data[iproc].handle == kernelTLS().this_proc_id);
     218        /*paranoid*/ verify(iproc < ready);
     219        /*paranoid*/ verify(data[iproc].lock);
     220        /*paranoid*/ verify(data[iproc].owned);
     221        #ifdef __CFA_WITH_VERIFY__
     222                // Debug, check if this is owned for reading
     223                data[iproc].owned = false;
     224        #endif
     225        __atomic_unlock(&data[iproc].lock);
     226}
     227
     228#ifdef __CFA_WITH_VERIFY__
     229        static inline bool ready_schedule_islocked(void) {
     230                /* paranoid */ verify( ! __preemption_enabled() );
     231                /*paranoid*/ verify( kernelTLS().this_proc_id );
     232                __processor_id_t * proc = kernelTLS().this_proc_id;
     233                return __scheduler_lock->data[proc->id].owned;
     234        }
     235
     236        static inline bool ready_mutate_islocked() {
     237                return __scheduler_lock->lock;
     238        }
     239#endif
     240
     241//-----------------------------------------------------------------------
     242// Writer side : acquire when changing the ready queue, e.g. adding more
     243//  queues or removing them.
     244uint_fast32_t ready_mutate_lock( void );
     245
     246void ready_mutate_unlock( uint_fast32_t /* value returned by lock */ );
     247
     248//=======================================================================
     249// Ready-Queue API
     250//-----------------------------------------------------------------------
     251// pop thread from the ready queue of a cluster
     252// returns 0p if empty
     253__attribute__((hot)) bool query(struct cluster * cltr);
     254
     255//-----------------------------------------------------------------------
     256// push thread onto a ready queue for a cluster
     257// returns true if the list was previously empty, false otherwise
     258__attribute__((hot)) bool push(struct cluster * cltr, struct $thread * thrd);
     259
     260//-----------------------------------------------------------------------
     261// pop thread from the ready queue of a cluster
     262// returns 0p if empty
     263// May return 0p spuriously
     264__attribute__((hot)) struct $thread * pop(struct cluster * cltr);
     265
     266//-----------------------------------------------------------------------
     267// pop thread from the ready queue of a cluster
     268// returns 0p if empty
     269// guaranteed to find any threads added before this call
     270__attribute__((hot)) struct $thread * pop_slow(struct cluster * cltr);
     271
     272//-----------------------------------------------------------------------
     273// remove thread from the ready queue of a cluster
     274// returns bool if it wasn't found
     275bool remove_head(struct cluster * cltr, struct $thread * thrd);
     276
     277//-----------------------------------------------------------------------
     278// Increase the width of the ready queue (number of lanes) by 4
     279void ready_queue_grow  (struct cluster * cltr, int target);
     280
     281//-----------------------------------------------------------------------
     282// Decrease the width of the ready queue (number of lanes) by 4
     283void ready_queue_shrink(struct cluster * cltr, int target);
     284
    120285
    121286// Local Variables: //
  • libcfa/src/concurrency/monitor.cfa

    rbdfc032 reef8dfb  
    55// file "LICENCE" distributed with Cforall.
    66//
    7 // monitor_desc.c --
     7// $monitor.c --
    88//
    99// Author           : Thierry Delisle
     
    2727//-----------------------------------------------------------------------------
    2828// Forward declarations
    29 static inline void set_owner ( monitor_desc * this, thread_desc * owner );
    30 static inline void set_owner ( monitor_desc * storage [], __lock_size_t count, thread_desc * owner );
    31 static inline void set_mask  ( monitor_desc * storage [], __lock_size_t count, const __waitfor_mask_t & mask );
    32 static inline void reset_mask( monitor_desc * this );
    33 
    34 static inline thread_desc * next_thread( monitor_desc * this );
    35 static inline bool is_accepted( monitor_desc * this, const __monitor_group_t & monitors );
     29static inline void __set_owner ( $monitor * this, $thread * owner );
     30static inline void __set_owner ( $monitor * storage [], __lock_size_t count, $thread * owner );
     31static inline void set_mask  ( $monitor * storage [], __lock_size_t count, const __waitfor_mask_t & mask );
     32static inline void reset_mask( $monitor * this );
     33
     34static inline $thread * next_thread( $monitor * this );
     35static inline bool is_accepted( $monitor * this, const __monitor_group_t & monitors );
    3636
    3737static inline void lock_all  ( __spinlock_t * locks [], __lock_size_t count );
    38 static inline void lock_all  ( monitor_desc * source [], __spinlock_t * /*out*/ locks [], __lock_size_t count );
     38static inline void lock_all  ( $monitor * source [], __spinlock_t * /*out*/ locks [], __lock_size_t count );
    3939static inline void unlock_all( __spinlock_t * locks [], __lock_size_t count );
    40 static inline void unlock_all( monitor_desc * locks [], __lock_size_t count );
    41 
    42 static inline void save   ( monitor_desc * ctx [], __lock_size_t count, __spinlock_t * locks [], unsigned int /*out*/ recursions [], __waitfor_mask_t /*out*/ masks [] );
    43 static inline void restore( monitor_desc * ctx [], __lock_size_t count, __spinlock_t * locks [], unsigned int /*in */ recursions [], __waitfor_mask_t /*in */ masks [] );
    44 
    45 static inline void init     ( __lock_size_t count, monitor_desc * monitors [], __condition_node_t & waiter, __condition_criterion_t criteria [] );
    46 static inline void init_push( __lock_size_t count, monitor_desc * monitors [], __condition_node_t & waiter, __condition_criterion_t criteria [] );
    47 
    48 static inline thread_desc *        check_condition   ( __condition_criterion_t * );
     40static inline void unlock_all( $monitor * locks [], __lock_size_t count );
     41
     42static inline void save   ( $monitor * ctx [], __lock_size_t count, __spinlock_t * locks [], unsigned int /*out*/ recursions [], __waitfor_mask_t /*out*/ masks [] );
     43static inline void restore( $monitor * ctx [], __lock_size_t count, __spinlock_t * locks [], unsigned int /*in */ recursions [], __waitfor_mask_t /*in */ masks [] );
     44
     45static inline void init     ( __lock_size_t count, $monitor * monitors [], __condition_node_t & waiter, __condition_criterion_t criteria [] );
     46static inline void init_push( __lock_size_t count, $monitor * monitors [], __condition_node_t & waiter, __condition_criterion_t criteria [] );
     47
     48static inline $thread *        check_condition   ( __condition_criterion_t * );
    4949static inline void                 brand_condition   ( condition & );
    50 static inline [thread_desc *, int] search_entry_queue( const __waitfor_mask_t &, monitor_desc * monitors [], __lock_size_t count );
     50static inline [$thread *, int] search_entry_queue( const __waitfor_mask_t &, $monitor * monitors [], __lock_size_t count );
    5151
    5252forall(dtype T | sized( T ))
    5353static inline __lock_size_t insert_unique( T * array [], __lock_size_t & size, T * val );
    5454static inline __lock_size_t count_max    ( const __waitfor_mask_t & mask );
    55 static inline __lock_size_t aggregate    ( monitor_desc * storage [], const __waitfor_mask_t & mask );
     55static inline __lock_size_t aggregate    ( $monitor * storage [], const __waitfor_mask_t & mask );
    5656
    5757//-----------------------------------------------------------------------------
     
    6868
    6969#define monitor_ctx( mons, cnt )                                /* Define that create the necessary struct for internal/external scheduling operations */ \
    70         monitor_desc ** monitors = mons;                          /* Save the targeted monitors                                                          */ \
     70        $monitor ** monitors = mons;                          /* Save the targeted monitors                                                          */ \
    7171        __lock_size_t count = cnt;                                /* Save the count to a local variable                                                  */ \
    7272        unsigned int recursions[ count ];                         /* Save the current recursion levels to restore them later                             */ \
     
    8080//-----------------------------------------------------------------------------
    8181// Enter/Leave routines
    82 
    83 
    84 extern "C" {
    85         // Enter single monitor
    86         static void __enter_monitor_desc( monitor_desc * this, const __monitor_group_t & group ) {
    87                 // Lock the monitor spinlock
    88                 lock( this->lock __cfaabi_dbg_ctx2 );
    89                 // Interrupts disable inside critical section
    90                 thread_desc * thrd = kernelTLS.this_thread;
    91 
    92                 __cfaabi_dbg_print_safe( "Kernel : %10p Entering mon %p (%p)\n", thrd, this, this->owner);
    93 
    94                 if( !this->owner ) {
    95                         // No one has the monitor, just take it
    96                         set_owner( this, thrd );
    97 
    98                         __cfaabi_dbg_print_safe( "Kernel :  mon is free \n" );
    99                 }
    100                 else if( this->owner == thrd) {
    101                         // We already have the monitor, just note how many times we took it
    102                         this->recursion += 1;
    103 
    104                         __cfaabi_dbg_print_safe( "Kernel :  mon already owned \n" );
    105                 }
    106                 else if( is_accepted( this, group) ) {
    107                         // Some one was waiting for us, enter
    108                         set_owner( this, thrd );
    109 
    110                         // Reset mask
    111                         reset_mask( this );
    112 
    113                         __cfaabi_dbg_print_safe( "Kernel :  mon accepts \n" );
    114                 }
    115                 else {
    116                         __cfaabi_dbg_print_safe( "Kernel :  blocking \n" );
    117 
    118                         // Some one else has the monitor, wait in line for it
    119                         append( this->entry_queue, thrd );
    120 
    121                         BlockInternal( &this->lock );
    122 
    123                         __cfaabi_dbg_print_safe( "Kernel : %10p Entered  mon %p\n", thrd, this);
    124 
    125                         // BlockInternal will unlock spinlock, no need to unlock ourselves
    126                         return;
    127                 }
     82// Enter single monitor
     83static void __enter( $monitor * this, const __monitor_group_t & group ) {
     84        $thread * thrd = active_thread();
     85
     86        // Lock the monitor spinlock
     87        lock( this->lock __cfaabi_dbg_ctx2 );
     88
     89        __cfaabi_dbg_print_safe( "Kernel : %10p Entering mon %p (%p)\n", thrd, this, this->owner);
     90
     91        if( unlikely(0 != (0x1 & (uintptr_t)this->owner)) ) {
     92                abort( "Attempt by thread \"%.256s\" (%p) to access joined monitor %p.", thrd->self_cor.name, thrd, this );
     93        }
     94        else if( !this->owner ) {
     95                // No one has the monitor, just take it
     96                __set_owner( this, thrd );
     97
     98                __cfaabi_dbg_print_safe( "Kernel :  mon is free \n" );
     99        }
     100        else if( this->owner == thrd) {
     101                // We already have the monitor, just note how many times we took it
     102                this->recursion += 1;
     103
     104                __cfaabi_dbg_print_safe( "Kernel :  mon already owned \n" );
     105        }
     106        else if( is_accepted( this, group) ) {
     107                // Some one was waiting for us, enter
     108                __set_owner( this, thrd );
     109
     110                // Reset mask
     111                reset_mask( this );
     112
     113                __cfaabi_dbg_print_safe( "Kernel :  mon accepts \n" );
     114        }
     115        else {
     116                __cfaabi_dbg_print_safe( "Kernel :  blocking \n" );
     117
     118                // Some one else has the monitor, wait in line for it
     119                /* paranoid */ verify( thrd->link.next == 0p );
     120                append( this->entry_queue, thrd );
     121                /* paranoid */ verify( thrd->link.next == 1p );
     122
     123                unlock( this->lock );
     124                park();
    128125
    129126                __cfaabi_dbg_print_safe( "Kernel : %10p Entered  mon %p\n", thrd, this);
    130127
    131                 // Release the lock and leave
     128                /* paranoid */ verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
     129                return;
     130        }
     131
     132        __cfaabi_dbg_print_safe( "Kernel : %10p Entered  mon %p\n", thrd, this);
     133
     134        /* paranoid */ verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
     135        /* paranoid */ verify( this->lock.lock );
     136
     137        // Release the lock and leave
     138        unlock( this->lock );
     139        return;
     140}
     141
     142static void __dtor_enter( $monitor * this, fptr_t func, bool join ) {
     143        $thread * thrd = active_thread();
     144        #if defined( __CFA_WITH_VERIFY__ )
     145                bool is_thrd = this == &thrd->self_mon;
     146        #endif
     147
     148        // Lock the monitor spinlock
     149        lock( this->lock __cfaabi_dbg_ctx2 );
     150
     151        __cfaabi_dbg_print_safe( "Kernel : %10p Entering dtor for mon %p (%p)\n", thrd, this, this->owner);
     152
     153
     154        if( !this->owner ) {
     155                __cfaabi_dbg_print_safe( "Kernel : Destroying free mon %p\n", this);
     156
     157                // No one has the monitor, just take it
     158                __set_owner( this, thrd );
     159
     160                /* paranoid */ verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
     161                /* paranoid */ verify( !is_thrd || thrd->state == Halted || thrd->state == Cancelled );
     162
    132163                unlock( this->lock );
    133164                return;
    134165        }
    135 
    136         static void __enter_monitor_dtor( monitor_desc * this, fptr_t func ) {
    137                 // Lock the monitor spinlock
    138                 lock( this->lock __cfaabi_dbg_ctx2 );
    139                 // Interrupts disable inside critical section
    140                 thread_desc * thrd = kernelTLS.this_thread;
    141 
    142                 __cfaabi_dbg_print_safe( "Kernel : %10p Entering dtor for mon %p (%p)\n", thrd, this, this->owner);
    143 
    144 
    145                 if( !this->owner ) {
    146                         __cfaabi_dbg_print_safe( "Kernel : Destroying free mon %p\n", this);
    147 
    148                         // No one has the monitor, just take it
    149                         set_owner( this, thrd );
    150 
    151                         unlock( this->lock );
    152                         return;
     166        else if( this->owner == thrd && !join) {
     167                // We already have the monitor... but where about to destroy it so the nesting will fail
     168                // Abort!
     169                abort( "Attempt to destroy monitor %p by thread \"%.256s\" (%p) in nested mutex.", this, thrd->self_cor.name, thrd );
     170        }
     171        // SKULLDUGGERY: join will act as a dtor so it would normally trigger to above check
     172        // because join will not release the monitor after it executed.
     173        // to avoid that it sets the owner to the special value thrd | 1p before exiting
     174        else if( this->owner == ($thread*)(1 | (uintptr_t)thrd) ) {
     175                // restore the owner and just return
     176                __cfaabi_dbg_print_safe( "Kernel : Destroying free mon %p\n", this);
     177
     178                // No one has the monitor, just take it
     179                __set_owner( this, thrd );
     180
     181                /* paranoid */ verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
     182                /* paranoid */ verify( !is_thrd || thrd->state == Halted || thrd->state == Cancelled );
     183
     184                unlock( this->lock );
     185                return;
     186        }
     187
     188        // The monitor is busy, if this is a thread and the thread owns itself, it better be active
     189        /* paranoid */ verify( !is_thrd || this->owner != thrd || (thrd->state != Halted && thrd->state != Cancelled) );
     190
     191        __lock_size_t count = 1;
     192        $monitor ** monitors = &this;
     193        __monitor_group_t group = { &this, 1, func };
     194        if( is_accepted( this, group) ) {
     195                __cfaabi_dbg_print_safe( "Kernel :  mon accepts dtor, block and signal it \n" );
     196
     197                // Wake the thread that is waiting for this
     198                __condition_criterion_t * urgent = pop( this->signal_stack );
     199                /* paranoid */ verify( urgent );
     200
     201                // Reset mask
     202                reset_mask( this );
     203
     204                // Create the node specific to this wait operation
     205                wait_ctx_primed( thrd, 0 )
     206
     207                // Some one else has the monitor, wait for him to finish and then run
     208                unlock( this->lock );
     209
     210                // Release the next thread
     211                /* paranoid */ verifyf( urgent->owner->waiting_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
     212                unpark( urgent->owner->waiting_thread );
     213
     214                // Park current thread waiting
     215                park();
     216
     217                // Some one was waiting for us, enter
     218                /* paranoid */ verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
     219
     220                __cfaabi_dbg_print_safe( "Kernel : Destroying %p\n", this);
     221                return;
     222        }
     223        else {
     224                __cfaabi_dbg_print_safe( "Kernel :  blocking \n" );
     225
     226                wait_ctx( thrd, 0 )
     227                this->dtor_node = &waiter;
     228
     229                // Some one else has the monitor, wait in line for it
     230                /* paranoid */ verify( thrd->link.next == 0p );
     231                append( this->entry_queue, thrd );
     232                /* paranoid */ verify( thrd->link.next == 1p );
     233                unlock( this->lock );
     234
     235                // Park current thread waiting
     236                park();
     237
     238                /* paranoid */ verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
     239                return;
     240        }
     241}
     242
     243// Leave single monitor
     244void __leave( $monitor * this ) {
     245        // Lock the monitor spinlock
     246        lock( this->lock __cfaabi_dbg_ctx2 );
     247
     248        __cfaabi_dbg_print_safe( "Kernel : %10p Leaving mon %p (%p)\n", active_thread(), this, this->owner);
     249
     250        /* paranoid */ verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
     251
     252        // Leaving a recursion level, decrement the counter
     253        this->recursion -= 1;
     254
     255        // If we haven't left the last level of recursion
     256        // it means we don't need to do anything
     257        if( this->recursion != 0) {
     258                __cfaabi_dbg_print_safe( "Kernel :  recursion still %d\n", this->recursion);
     259                unlock( this->lock );
     260                return;
     261        }
     262
     263        // Get the next thread, will be null on low contention monitor
     264        $thread * new_owner = next_thread( this );
     265
     266        // Check the new owner is consistent with who we wake-up
     267        // new_owner might be null even if someone owns the monitor when the owner is still waiting for another monitor
     268        /* paranoid */ verifyf( !new_owner || new_owner == this->owner, "Expected owner to be %p, got %p (m: %p)", new_owner, this->owner, this );
     269
     270        // We can now let other threads in safely
     271        unlock( this->lock );
     272
     273        //We need to wake-up the thread
     274        /* paranoid */ verifyf( !new_owner || new_owner == this->owner, "Expected owner to be %p, got %p (m: %p)", new_owner, this->owner, this );
     275        unpark( new_owner );
     276}
     277
     278// Leave single monitor for the last time
     279void __dtor_leave( $monitor * this, bool join ) {
     280        __cfaabi_dbg_debug_do(
     281                if( active_thread() != this->owner ) {
     282                        abort( "Destroyed monitor %p has inconsistent owner, expected %p got %p.\n", this, active_thread(), this->owner);
    153283                }
    154                 else if( this->owner == thrd) {
    155                         // We already have the monitor... but where about to destroy it so the nesting will fail
    156                         // Abort!
    157                         abort( "Attempt to destroy monitor %p by thread \"%.256s\" (%p) in nested mutex.", this, thrd->self_cor.name, thrd );
     284                if( this->recursion != 1  && !join ) {
     285                        abort( "Destroyed monitor %p has %d outstanding nested calls.\n", this, this->recursion - 1);
    158286                }
    159 
    160                 __lock_size_t count = 1;
    161                 monitor_desc ** monitors = &this;
    162                 __monitor_group_t group = { &this, 1, func };
    163                 if( is_accepted( this, group) ) {
    164                         __cfaabi_dbg_print_safe( "Kernel :  mon accepts dtor, block and signal it \n" );
    165 
    166                         // Wake the thread that is waiting for this
    167                         __condition_criterion_t * urgent = pop( this->signal_stack );
    168                         verify( urgent );
    169 
    170                         // Reset mask
    171                         reset_mask( this );
    172 
    173                         // Create the node specific to this wait operation
    174                         wait_ctx_primed( thrd, 0 )
    175 
    176                         // Some one else has the monitor, wait for him to finish and then run
    177                         BlockInternal( &this->lock, urgent->owner->waiting_thread );
    178 
    179                         // Some one was waiting for us, enter
    180                         set_owner( this, thrd );
    181                 }
    182                 else {
    183                         __cfaabi_dbg_print_safe( "Kernel :  blocking \n" );
    184 
    185                         wait_ctx( thrd, 0 )
    186                         this->dtor_node = &waiter;
    187 
    188                         // Some one else has the monitor, wait in line for it
    189                         append( this->entry_queue, thrd );
    190                         BlockInternal( &this->lock );
    191 
    192                         // BlockInternal will unlock spinlock, no need to unlock ourselves
    193                         return;
    194                 }
    195 
    196                 __cfaabi_dbg_print_safe( "Kernel : Destroying %p\n", this);
    197 
    198         }
    199 
    200         // Leave single monitor
    201         void __leave_monitor_desc( monitor_desc * this ) {
    202                 // Lock the monitor spinlock
    203                 lock( this->lock __cfaabi_dbg_ctx2 );
    204 
    205                 __cfaabi_dbg_print_safe( "Kernel : %10p Leaving mon %p (%p)\n", kernelTLS.this_thread, this, this->owner);
    206 
    207                 verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
    208 
    209                 // Leaving a recursion level, decrement the counter
    210                 this->recursion -= 1;
    211 
    212                 // If we haven't left the last level of recursion
    213                 // it means we don't need to do anything
    214                 if( this->recursion != 0) {
    215                         __cfaabi_dbg_print_safe( "Kernel :  recursion still %d\n", this->recursion);
    216                         unlock( this->lock );
    217                         return;
    218                 }
    219 
    220                 // Get the next thread, will be null on low contention monitor
    221                 thread_desc * new_owner = next_thread( this );
    222 
    223                 // We can now let other threads in safely
    224                 unlock( this->lock );
    225 
    226                 //We need to wake-up the thread
    227                 WakeThread( new_owner );
    228         }
    229 
    230         // Leave single monitor for the last time
    231         void __leave_dtor_monitor_desc( monitor_desc * this ) {
    232                 __cfaabi_dbg_debug_do(
    233                         if( TL_GET( this_thread ) != this->owner ) {
    234                                 abort( "Destroyed monitor %p has inconsistent owner, expected %p got %p.\n", this, TL_GET( this_thread ), this->owner);
    235                         }
    236                         if( this->recursion != 1 ) {
    237                                 abort( "Destroyed monitor %p has %d outstanding nested calls.\n", this, this->recursion - 1);
    238                         }
    239                 )
    240         }
    241 
    242         // Leave the thread monitor
    243         // last routine called by a thread.
    244         // Should never return
    245         void __leave_thread_monitor() {
    246                 thread_desc * thrd = TL_GET( this_thread );
    247                 monitor_desc * this = &thrd->self_mon;
    248 
    249                 // Lock the monitor now
    250                 lock( this->lock __cfaabi_dbg_ctx2 );
    251 
    252                 disable_interrupts();
    253 
    254                 thrd->self_cor.state = Halted;
    255 
    256                 verifyf( thrd == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", thrd, this->owner, this->recursion, this );
    257 
    258                 // Leaving a recursion level, decrement the counter
    259                 this->recursion -= 1;
    260 
    261                 // If we haven't left the last level of recursion
    262                 // it must mean there is an error
    263                 if( this->recursion != 0) { abort( "Thread internal monitor has unbalanced recursion" ); }
    264 
    265                 // Fetch the next thread, can be null
    266                 thread_desc * new_owner = next_thread( this );
    267 
    268                 // Leave the thread, this will unlock the spinlock
    269                 // Use leave thread instead of BlockInternal which is
    270                 // specialized for this case and supports null new_owner
    271                 LeaveThread( &this->lock, new_owner );
    272 
    273                 // Control flow should never reach here!
    274         }
     287        )
     288
     289        this->owner = ($thread*)(1 | (uintptr_t)this->owner);
     290}
     291
     292void __thread_finish( $thread * thrd ) {
     293        $monitor * this = &thrd->self_mon;
     294
     295        // Lock the monitor now
     296        /* paranoid */ verify( 0x0D15EA5E0D15EA5Ep == thrd->canary );
     297        /* paranoid */ verify( this->lock.lock );
     298        /* paranoid */ verify( thrd->context.SP );
     299        /* paranoid */ verifyf( ((uintptr_t)thrd->context.SP) > ((uintptr_t)__get_stack(thrd->curr_cor)->limit), "ERROR : $thread %p has been corrupted.\n StackPointer too large.\n", thrd );
     300        /* paranoid */ verifyf( ((uintptr_t)thrd->context.SP) < ((uintptr_t)__get_stack(thrd->curr_cor)->base ), "ERROR : $thread %p has been corrupted.\n StackPointer too small.\n", thrd );
     301        /* paranoid */ verify( ! __preemption_enabled() );
     302
     303        /* paranoid */ verifyf( thrd == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", thrd, this->owner, this->recursion, this );
     304        /* paranoid */ verify( thrd->state == Halting );
     305        /* paranoid */ verify( this->recursion == 1 );
     306
     307        // Leaving a recursion level, decrement the counter
     308        this->recursion -= 1;
     309        this->owner = 0p;
     310
     311        // Fetch the next thread, can be null
     312        $thread * new_owner = next_thread( this );
     313
     314        // Mark the state as fully halted
     315        thrd->state = Halted;
     316
     317        // Release the monitor lock
     318        unlock( this->lock );
     319
     320        // Unpark the next owner if needed
     321        /* paranoid */ verifyf( !new_owner || new_owner == this->owner, "Expected owner to be %p, got %p (m: %p)", new_owner, this->owner, this );
     322        /* paranoid */ verify( ! __preemption_enabled() );
     323        /* paranoid */ verify( thrd->state == Halted );
     324        unpark( new_owner );
    275325}
    276326
     
    279329static inline void enter( __monitor_group_t monitors ) {
    280330        for( __lock_size_t i = 0; i < monitors.size; i++) {
    281                 __enter_monitor_desc( monitors[i], monitors );
     331                __enter( monitors[i], monitors );
    282332        }
    283333}
     
    285335// Leave multiple monitor
    286336// relies on the monitor array being sorted
    287 static inline void leave(monitor_desc * monitors [], __lock_size_t count) {
     337static inline void leave($monitor * monitors [], __lock_size_t count) {
    288338        for( __lock_size_t i = count - 1; i >= 0; i--) {
    289                 __leave_monitor_desc( monitors[i] );
     339                __leave( monitors[i] );
    290340        }
    291341}
     
    293343// Ctor for monitor guard
    294344// Sorts monitors before entering
    295 void ?{}( monitor_guard_t & this, monitor_desc * m [], __lock_size_t count, fptr_t func ) {
    296         thread_desc * thrd = TL_GET( this_thread );
     345void ?{}( monitor_guard_t & this, $monitor * m [], __lock_size_t count, fptr_t func ) {
     346        $thread * thrd = active_thread();
    297347
    298348        // Store current array
     
    329379
    330380        // Restore thread context
    331         TL_GET( this_thread )->monitors = this.prev;
     381        active_thread()->monitors = this.prev;
    332382}
    333383
    334384// Ctor for monitor guard
    335385// Sorts monitors before entering
    336 void ?{}( monitor_dtor_guard_t & this, monitor_desc * m [], fptr_t func ) {
     386void ?{}( monitor_dtor_guard_t & this, $monitor * m [], fptr_t func, bool join ) {
    337387        // optimization
    338         thread_desc * thrd = TL_GET( this_thread );
     388        $thread * thrd = active_thread();
    339389
    340390        // Store current array
     
    344394        this.prev = thrd->monitors;
    345395
     396        // Save whether we are in a join or not
     397        this.join = join;
     398
    346399        // Update thread context (needed for conditions)
    347400        (thrd->monitors){m, 1, func};
    348401
    349         __enter_monitor_dtor( this.m, func );
     402        __dtor_enter( this.m, func, join );
    350403}
    351404
     
    353406void ^?{}( monitor_dtor_guard_t & this ) {
    354407        // Leave the monitors in order
    355         __leave_dtor_monitor_desc( this.m );
     408        __dtor_leave( this.m, this.join );
    356409
    357410        // Restore thread context
    358         TL_GET( this_thread )->monitors = this.prev;
     411        active_thread()->monitors = this.prev;
    359412}
    360413
    361414//-----------------------------------------------------------------------------
    362415// Internal scheduling types
    363 void ?{}(__condition_node_t & this, thread_desc * waiting_thread, __lock_size_t count, uintptr_t user_info ) {
     416void ?{}(__condition_node_t & this, $thread * waiting_thread, __lock_size_t count, uintptr_t user_info ) {
    364417        this.waiting_thread = waiting_thread;
    365418        this.count = count;
     
    375428}
    376429
    377 void ?{}(__condition_criterion_t & this, monitor_desc * target, __condition_node_t & owner ) {
     430void ?{}(__condition_criterion_t & this, $monitor * target, __condition_node_t & owner ) {
    378431        this.ready  = false;
    379432        this.target = target;
     
    396449
    397450        // Create the node specific to this wait operation
    398         wait_ctx( TL_GET( this_thread ), user_info );
     451        wait_ctx( active_thread(), user_info );
    399452
    400453        // Append the current wait operation to the ones already queued on the condition
    401454        // We don't need locks for that since conditions must always be waited on inside monitor mutual exclusion
     455        /* paranoid */ verify( waiter.next == 0p );
    402456        append( this.blocked, &waiter );
     457        /* paranoid */ verify( waiter.next == 1p );
    403458
    404459        // Lock all monitors (aggregates the locks as well)
     
    407462        // Find the next thread(s) to run
    408463        __lock_size_t thread_count = 0;
    409         thread_desc * threads[ count ];
     464        $thread * threads[ count ];
    410465        __builtin_memset( threads, 0, sizeof( threads ) );
    411466
     
    415470        // Remove any duplicate threads
    416471        for( __lock_size_t i = 0; i < count; i++) {
    417                 thread_desc * new_owner = next_thread( monitors[i] );
     472                $thread * new_owner = next_thread( monitors[i] );
    418473                insert_unique( threads, thread_count, new_owner );
    419474        }
    420475
     476        // Unlock the locks, we don't need them anymore
     477        for(int i = 0; i < count; i++) {
     478                unlock( *locks[i] );
     479        }
     480
     481        // Wake the threads
     482        for(int i = 0; i < thread_count; i++) {
     483                unpark( threads[i] );
     484        }
     485
    421486        // Everything is ready to go to sleep
    422         BlockInternal( locks, count, threads, thread_count );
     487        park();
    423488
    424489        // We are back, restore the owners and recursions
     
    435500        //Some more checking in debug
    436501        __cfaabi_dbg_debug_do(
    437                 thread_desc * this_thrd = TL_GET( this_thread );
     502                $thread * this_thrd = active_thread();
    438503                if ( this.monitor_count != this_thrd->monitors.size ) {
    439504                        abort( "Signal on condition %p made with different number of monitor(s), expected %zi got %zi", &this, this.monitor_count, this_thrd->monitors.size );
     
    483548
    484549        // Create the node specific to this wait operation
    485         wait_ctx_primed( kernelTLS.this_thread, 0 )
     550        wait_ctx_primed( active_thread(), 0 )
    486551
    487552        //save contexts
     
    489554
    490555        //Find the thread to run
    491         thread_desc * signallee = pop_head( this.blocked )->waiting_thread;
    492         set_owner( monitors, count, signallee );
     556        $thread * signallee = pop_head( this.blocked )->waiting_thread;
     557        __set_owner( monitors, count, signallee );
    493558
    494559        __cfaabi_dbg_print_buffer_decl( "Kernel : signal_block condition %p (s: %p)\n", &this, signallee );
    495560
     561        // unlock all the monitors
     562        unlock_all( locks, count );
     563
     564        // unpark the thread we signalled
     565        unpark( signallee );
     566
    496567        //Everything is ready to go to sleep
    497         BlockInternal( locks, count, &signallee, 1 );
     568        park();
    498569
    499570
     
    536607        // Create one!
    537608        __lock_size_t max = count_max( mask );
    538         monitor_desc * mon_storage[max];
     609        $monitor * mon_storage[max];
    539610        __builtin_memset( mon_storage, 0, sizeof( mon_storage ) );
    540611        __lock_size_t actual_count = aggregate( mon_storage, mask );
     
    554625        {
    555626                // Check if the entry queue
    556                 thread_desc * next; int index;
     627                $thread * next; int index;
    557628                [next, index] = search_entry_queue( mask, monitors, count );
    558629
     
    564635                                verifyf( accepted.size == 1,  "ERROR: Accepted dtor has more than 1 mutex parameter." );
    565636
    566                                 monitor_desc * mon2dtor = accepted[0];
     637                                $monitor * mon2dtor = accepted[0];
    567638                                verifyf( mon2dtor->dtor_node, "ERROR: Accepted monitor has no dtor_node." );
    568639
     
    576647
    577648                                // Create the node specific to this wait operation
    578                                 wait_ctx_primed( kernelTLS.this_thread, 0 );
     649                                wait_ctx_primed( active_thread(), 0 );
    579650
    580651                                // Save monitor states
     
    590661
    591662                                // Set the owners to be the next thread
    592                                 set_owner( monitors, count, next );
    593 
    594                                 // Everything is ready to go to sleep
    595                                 BlockInternal( locks, count, &next, 1 );
     663                                __set_owner( monitors, count, next );
     664
     665                                // unlock all the monitors
     666                                unlock_all( locks, count );
     667
     668                                // unpark the thread we signalled
     669                                unpark( next );
     670
     671                                //Everything is ready to go to sleep
     672                                park();
    596673
    597674                                // We are back, restore the owners and recursions
     
    622699
    623700        // Create the node specific to this wait operation
    624         wait_ctx_primed( kernelTLS.this_thread, 0 );
     701        wait_ctx_primed( active_thread(), 0 );
    625702
    626703        monitor_save;
     
    628705
    629706        for( __lock_size_t i = 0; i < count; i++) {
    630                 verify( monitors[i]->owner == kernelTLS.this_thread );
    631         }
     707                verify( monitors[i]->owner == active_thread() );
     708        }
     709
     710        // unlock all the monitors
     711        unlock_all( locks, count );
    632712
    633713        //Everything is ready to go to sleep
    634         BlockInternal( locks, count );
     714        park();
    635715
    636716
     
    649729// Utilities
    650730
    651 static inline void set_owner( monitor_desc * this, thread_desc * owner ) {
    652         // __cfaabi_dbg_print_safe( "Kernal :   Setting owner of %p to %p ( was %p)\n", this, owner, this->owner );
     731static inline void __set_owner( $monitor * this, $thread * owner ) {
     732        /* paranoid */ verify( this->lock.lock );
    653733
    654734        //Pass the monitor appropriately
     
    659739}
    660740
    661 static inline void set_owner( monitor_desc * monitors [], __lock_size_t count, thread_desc * owner ) {
    662         monitors[0]->owner     = owner;
    663         monitors[0]->recursion = 1;
     741static inline void __set_owner( $monitor * monitors [], __lock_size_t count, $thread * owner ) {
     742        /* paranoid */ verify ( monitors[0]->lock.lock );
     743        /* paranoid */ verifyf( monitors[0]->owner == active_thread(), "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), monitors[0]->owner, monitors[0]->recursion, monitors[0] );
     744        monitors[0]->owner        = owner;
     745        monitors[0]->recursion    = 1;
    664746        for( __lock_size_t i = 1; i < count; i++ ) {
    665                 monitors[i]->owner     = owner;
    666                 monitors[i]->recursion = 0;
    667         }
    668 }
    669 
    670 static inline void set_mask( monitor_desc * storage [], __lock_size_t count, const __waitfor_mask_t & mask ) {
     747                /* paranoid */ verify ( monitors[i]->lock.lock );
     748                /* paranoid */ verifyf( monitors[i]->owner == active_thread(), "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), monitors[i]->owner, monitors[i]->recursion, monitors[i] );
     749                monitors[i]->owner        = owner;
     750                monitors[i]->recursion    = 0;
     751        }
     752}
     753
     754static inline void set_mask( $monitor * storage [], __lock_size_t count, const __waitfor_mask_t & mask ) {
    671755        for( __lock_size_t i = 0; i < count; i++) {
    672756                storage[i]->mask = mask;
     
    674758}
    675759
    676 static inline void reset_mask( monitor_desc * this ) {
     760static inline void reset_mask( $monitor * this ) {
    677761        this->mask.accepted = 0p;
    678762        this->mask.data = 0p;
     
    680764}
    681765
    682 static inline thread_desc * next_thread( monitor_desc * this ) {
     766static inline $thread * next_thread( $monitor * this ) {
    683767        //Check the signaller stack
    684768        __cfaabi_dbg_print_safe( "Kernel :  mon %p AS-stack top %p\n", this, this->signal_stack.top);
     
    688772                //regardless of if we are ready to baton pass,
    689773                //we need to set the monitor as in use
    690                 set_owner( this,  urgent->owner->waiting_thread );
     774                /* paranoid */ verifyf( !this->owner || active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
     775                __set_owner( this,  urgent->owner->waiting_thread );
    691776
    692777                return check_condition( urgent );
     
    695780        // No signaller thread
    696781        // Get the next thread in the entry_queue
    697         thread_desc * new_owner = pop_head( this->entry_queue );
    698         set_owner( this, new_owner );
     782        $thread * new_owner = pop_head( this->entry_queue );
     783        /* paranoid */ verifyf( !this->owner || active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
     784        /* paranoid */ verify( !new_owner || new_owner->link.next == 0p );
     785        __set_owner( this, new_owner );
    699786
    700787        return new_owner;
    701788}
    702789
    703 static inline bool is_accepted( monitor_desc * this, const __monitor_group_t & group ) {
     790static inline bool is_accepted( $monitor * this, const __monitor_group_t & group ) {
    704791        __acceptable_t * it = this->mask.data; // Optim
    705792        __lock_size_t count = this->mask.size;
     
    723810}
    724811
    725 static inline void init( __lock_size_t count, monitor_desc * monitors [], __condition_node_t & waiter, __condition_criterion_t criteria [] ) {
     812static inline void init( __lock_size_t count, $monitor * monitors [], __condition_node_t & waiter, __condition_criterion_t criteria [] ) {
    726813        for( __lock_size_t i = 0; i < count; i++) {
    727814                (criteria[i]){ monitors[i], waiter };
     
    731818}
    732819
    733 static inline void init_push( __lock_size_t count, monitor_desc * monitors [], __condition_node_t & waiter, __condition_criterion_t criteria [] ) {
     820static inline void init_push( __lock_size_t count, $monitor * monitors [], __condition_node_t & waiter, __condition_criterion_t criteria [] ) {
    734821        for( __lock_size_t i = 0; i < count; i++) {
    735822                (criteria[i]){ monitors[i], waiter };
     
    747834}
    748835
    749 static inline void lock_all( monitor_desc * source [], __spinlock_t * /*out*/ locks [], __lock_size_t count ) {
     836static inline void lock_all( $monitor * source [], __spinlock_t * /*out*/ locks [], __lock_size_t count ) {
    750837        for( __lock_size_t i = 0; i < count; i++ ) {
    751838                __spinlock_t * l = &source[i]->lock;
     
    761848}
    762849
    763 static inline void unlock_all( monitor_desc * locks [], __lock_size_t count ) {
     850static inline void unlock_all( $monitor * locks [], __lock_size_t count ) {
    764851        for( __lock_size_t i = 0; i < count; i++ ) {
    765852                unlock( locks[i]->lock );
     
    768855
    769856static inline void save(
    770         monitor_desc * ctx [],
     857        $monitor * ctx [],
    771858        __lock_size_t count,
    772859        __attribute((unused)) __spinlock_t * locks [],
     
    781868
    782869static inline void restore(
    783         monitor_desc * ctx [],
     870        $monitor * ctx [],
    784871        __lock_size_t count,
    785872        __spinlock_t * locks [],
     
    799886// 2 - Checks if all the monitors are ready to run
    800887//     if so return the thread to run
    801 static inline thread_desc * check_condition( __condition_criterion_t * target ) {
     888static inline $thread * check_condition( __condition_criterion_t * target ) {
    802889        __condition_node_t * node = target->owner;
    803890        unsigned short count = node->count;
     
    817904        }
    818905
    819         __cfaabi_dbg_print_safe( "Kernel :  Runing %i (%p)\n", ready2run, ready2run ? node->waiting_thread : 0p );
     906        __cfaabi_dbg_print_safe( "Kernel :  Runing %i (%p)\n", ready2run, ready2run ? (thread*)node->waiting_thread : (thread*)0p );
    820907        return ready2run ? node->waiting_thread : 0p;
    821908}
    822909
    823910static inline void brand_condition( condition & this ) {
    824         thread_desc * thrd = TL_GET( this_thread );
     911        $thread * thrd = active_thread();
    825912        if( !this.monitors ) {
    826913                // __cfaabi_dbg_print_safe( "Branding\n" );
     
    828915                this.monitor_count = thrd->monitors.size;
    829916
    830                 this.monitors = (monitor_desc **)malloc( this.monitor_count * sizeof( *this.monitors ) );
     917                this.monitors = ($monitor **)malloc( this.monitor_count * sizeof( *this.monitors ) );
    831918                for( int i = 0; i < this.monitor_count; i++ ) {
    832919                        this.monitors[i] = thrd->monitors[i];
     
    835922}
    836923
    837 static inline [thread_desc *, int] search_entry_queue( const __waitfor_mask_t & mask, monitor_desc * monitors [], __lock_size_t count ) {
    838 
    839         __queue_t(thread_desc) & entry_queue = monitors[0]->entry_queue;
     924static inline [$thread *, int] search_entry_queue( const __waitfor_mask_t & mask, $monitor * monitors [], __lock_size_t count ) {
     925
     926        __queue_t($thread) & entry_queue = monitors[0]->entry_queue;
    840927
    841928        // For each thread in the entry-queue
    842         for(    thread_desc ** thrd_it = &entry_queue.head;
    843                 *thrd_it;
    844                 thrd_it = &(*thrd_it)->next
     929        for(    $thread ** thrd_it = &entry_queue.head;
     930                (*thrd_it) != 1p;
     931                thrd_it = &(*thrd_it)->link.next
    845932        ) {
    846933                // For each acceptable check if it matches
     
    884971}
    885972
    886 static inline __lock_size_t aggregate( monitor_desc * storage [], const __waitfor_mask_t & mask ) {
     973static inline __lock_size_t aggregate( $monitor * storage [], const __waitfor_mask_t & mask ) {
    887974        __lock_size_t size = 0;
    888975        for( __lock_size_t i = 0; i < mask.size; i++ ) {
  • libcfa/src/concurrency/monitor.hfa

    rbdfc032 reef8dfb  
    2323
    2424trait is_monitor(dtype T) {
    25         monitor_desc * get_monitor( T & );
     25        $monitor * get_monitor( T & );
    2626        void ^?{}( T & mutex );
    2727};
    2828
    29 static inline void ?{}(monitor_desc & this) with( this ) {
     29static inline void ?{}($monitor & this) with( this ) {
    3030        lock{};
    3131        entry_queue{};
     
    3939}
    4040
    41 static inline void ^?{}(monitor_desc & ) {}
     41static inline void ^?{}($monitor & ) {}
    4242
    4343struct monitor_guard_t {
    44         monitor_desc **         m;
     44        $monitor **     m;
    4545        __lock_size_t           count;
    4646        __monitor_group_t prev;
    4747};
    4848
    49 void ?{}( monitor_guard_t & this, monitor_desc ** m, __lock_size_t count, void (*func)() );
     49void ?{}( monitor_guard_t & this, $monitor ** m, __lock_size_t count, void (*func)() );
    5050void ^?{}( monitor_guard_t & this );
    5151
    5252struct monitor_dtor_guard_t {
    53         monitor_desc *    m;
     53        $monitor *    m;
    5454        __monitor_group_t prev;
     55        bool join;
    5556};
    5657
    57 void ?{}( monitor_dtor_guard_t & this, monitor_desc ** m, void (*func)() );
     58void ?{}( monitor_dtor_guard_t & this, $monitor ** m, void (*func)(), bool join );
    5859void ^?{}( monitor_dtor_guard_t & this );
    5960
     
    7273
    7374        // The monitor this criterion concerns
    74         monitor_desc * target;
     75        $monitor * target;
    7576
    7677        // The parent node to which this criterion belongs
     
    8788struct __condition_node_t {
    8889        // Thread that needs to be woken when all criteria are met
    89         thread_desc * waiting_thread;
     90        $thread * waiting_thread;
    9091
    9192        // Array of criteria (Criterions are contiguous in memory)
     
    106107}
    107108
    108 void ?{}(__condition_node_t & this, thread_desc * waiting_thread, __lock_size_t count, uintptr_t user_info );
     109void ?{}(__condition_node_t & this, $thread * waiting_thread, __lock_size_t count, uintptr_t user_info );
    109110void ?{}(__condition_criterion_t & this );
    110 void ?{}(__condition_criterion_t & this, monitor_desc * target, __condition_node_t * owner );
     111void ?{}(__condition_criterion_t & this, $monitor * target, __condition_node_t * owner );
    111112
    112113struct condition {
     
    115116
    116117        // Array of monitor pointers (Monitors are NOT contiguous in memory)
    117         monitor_desc ** monitors;
     118        $monitor ** monitors;
    118119
    119120        // Number of monitors in the array
     
    131132
    132133              void wait        ( condition & this, uintptr_t user_info = 0 );
     134static inline bool is_empty    ( condition & this ) { return this.blocked.head == 1p; }
    133135              bool signal      ( condition & this );
    134136              bool signal_block( condition & this );
    135 static inline bool is_empty    ( condition & this ) { return !this.blocked.head; }
     137static inline bool signal_all  ( condition & this ) { bool ret = false; while(!is_empty(this)) { ret = signal(this) || ret; } return ret; }
    136138         uintptr_t front       ( condition & this );
    137139
  • libcfa/src/concurrency/mutex.cfa

    rbdfc032 reef8dfb  
    3030        this.lock{};
    3131        this.blocked_threads{};
     32        this.is_locked = false;
    3233}
    3334
     
    3940        lock( lock __cfaabi_dbg_ctx2 );
    4041        if( is_locked ) {
    41                 append( blocked_threads, kernelTLS.this_thread );
    42                 BlockInternal( &lock );
     42                append( blocked_threads, active_thread() );
     43                unlock( lock );
     44                park();
    4345        }
    4446        else {
     
    6264        lock( this.lock __cfaabi_dbg_ctx2 );
    6365        this.is_locked = (this.blocked_threads != 0);
    64         WakeThread(
     66        unpark(
    6567                pop_head( this.blocked_threads )
    6668        );
     
    8486        lock( lock __cfaabi_dbg_ctx2 );
    8587        if( owner == 0p ) {
    86                 owner = kernelTLS.this_thread;
     88                owner = active_thread();
    8789                recursion_count = 1;
    8890                unlock( lock );
    8991        }
    90         else if( owner == kernelTLS.this_thread ) {
     92        else if( owner == active_thread() ) {
    9193                recursion_count++;
    9294                unlock( lock );
    9395        }
    9496        else {
    95                 append( blocked_threads, kernelTLS.this_thread );
    96                 BlockInternal( &lock );
     97                append( blocked_threads, active_thread() );
     98                unlock( lock );
     99                park();
    97100        }
    98101}
     
    102105        lock( lock __cfaabi_dbg_ctx2 );
    103106        if( owner == 0p ) {
    104                 owner = kernelTLS.this_thread;
     107                owner = active_thread();
    105108                recursion_count = 1;
    106109                ret = true;
    107110        }
    108         else if( owner == kernelTLS.this_thread ) {
     111        else if( owner == active_thread() ) {
    109112                recursion_count++;
    110113                ret = true;
     
    118121        recursion_count--;
    119122        if( recursion_count == 0 ) {
    120                 thread_desc * thrd = pop_head( blocked_threads );
     123                $thread * thrd = pop_head( blocked_threads );
    121124                owner = thrd;
    122125                recursion_count = (thrd ? 1 : 0);
    123                 WakeThread( thrd );
     126                unpark( thrd );
    124127        }
    125128        unlock( lock );
     
    138141void notify_one(condition_variable & this) with(this) {
    139142        lock( lock __cfaabi_dbg_ctx2 );
    140         WakeThread(
     143        unpark(
    141144                pop_head( this.blocked_threads )
    142145        );
     
    147150        lock( lock __cfaabi_dbg_ctx2 );
    148151        while(this.blocked_threads) {
    149                 WakeThread(
     152                unpark(
    150153                        pop_head( this.blocked_threads )
    151154                );
     
    156159void wait(condition_variable & this) {
    157160        lock( this.lock __cfaabi_dbg_ctx2 );
    158         append( this.blocked_threads, kernelTLS.this_thread );
    159         BlockInternal( &this.lock );
     161        append( this.blocked_threads, active_thread() );
     162        unlock( this.lock );
     163        park();
    160164}
    161165
     
    163167void wait(condition_variable & this, L & l) {
    164168        lock( this.lock __cfaabi_dbg_ctx2 );
    165         append( this.blocked_threads, kernelTLS.this_thread );
    166         void __unlock(void) {
    167                 unlock(l);
    168                 unlock(this.lock);
    169         }
    170         BlockInternal( __unlock );
     169        append( this.blocked_threads, active_thread() );
     170        unlock(l);
     171        unlock(this.lock);
     172        park();
    171173        lock(l);
    172174}
  • libcfa/src/concurrency/mutex.hfa

    rbdfc032 reef8dfb  
    3636
    3737        // List of blocked threads
    38         __queue_t(struct thread_desc) blocked_threads;
     38        __queue_t(struct $thread) blocked_threads;
    3939
    4040        // Locked flag
     
    5555
    5656        // List of blocked threads
    57         __queue_t(struct thread_desc) blocked_threads;
     57        __queue_t(struct $thread) blocked_threads;
    5858
    5959        // Current thread owning the lock
    60         struct thread_desc * owner;
     60        struct $thread * owner;
    6161
    6262        // Number of recursion level
     
    8383
    8484        // List of blocked threads
    85         __queue_t(struct thread_desc) blocked_threads;
     85        __queue_t(struct $thread) blocked_threads;
    8686};
    8787
  • libcfa/src/concurrency/preemption.cfa

    rbdfc032 reef8dfb  
    1010// Created On       : Mon Jun 5 14:20:42 2017
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Thu Dec  5 16:34:05 2019
    13 // Update Count     : 43
     12// Last Modified On : Fri Nov  6 07:42:13 2020
     13// Update Count     : 54
    1414//
    1515
     
    1919#include <assert.h>
    2020
    21 extern "C" {
    2221#include <errno.h>
    2322#include <stdio.h>
     
    2524#include <unistd.h>
    2625#include <limits.h>                                                                             // PTHREAD_STACK_MIN
    27 }
    2826
    2927#include "bits/signal.hfa"
     28#include "kernel_private.hfa"
    3029
    3130#if !defined(__CFA_DEFAULT_PREEMPTION__)
     
    3938// FwdDeclarations : timeout handlers
    4039static void preempt( processor   * this );
    41 static void timeout( thread_desc * this );
     40static void timeout( $thread * this );
    4241
    4342// FwdDeclarations : Signal handlers
    4443static void sigHandler_ctxSwitch( __CFA_SIGPARMS__ );
     44static void sigHandler_alarm    ( __CFA_SIGPARMS__ );
    4545static void sigHandler_segv     ( __CFA_SIGPARMS__ );
    4646static void sigHandler_ill      ( __CFA_SIGPARMS__ );
     
    5656#elif defined( __x86_64 )
    5757#define CFA_REG_IP gregs[REG_RIP]
    58 #elif defined( __ARM_ARCH )
     58#elif defined( __arm__ )
    5959#define CFA_REG_IP arm_pc
     60#elif defined( __aarch64__ )
     61#define CFA_REG_IP pc
    6062#else
    61 #error unknown hardware architecture
     63#error unsupported hardware architecture
    6264#endif
    6365
     
    8385// Get next expired node
    8486static inline alarm_node_t * get_expired( alarm_list_t * alarms, Time currtime ) {
    85         if( !alarms->head ) return 0p;                                          // If no alarms return null
    86         if( alarms->head->alarm >= currtime ) return 0p;        // If alarms head not expired return null
     87        if( ! & (*alarms)`first ) return 0p;                                            // If no alarms return null
     88        if( (*alarms)`first.alarm >= currtime ) return 0p;      // If alarms head not expired return null
    8789        return pop(alarms);                                                                     // Otherwise just pop head
    8890}
    8991
    9092// Tick one frame of the Discrete Event Simulation for alarms
    91 static void tick_preemption() {
     93static void tick_preemption(void) {
    9294        alarm_node_t * node = 0p;                                                       // Used in the while loop but cannot be declared in the while condition
    9395        alarm_list_t * alarms = &event_kernel->alarms;          // Local copy for ease of reading
     
    9799        while( node = get_expired( alarms, currtime ) ) {
    98100                // __cfaabi_dbg_print_buffer_decl( " KERNEL: preemption tick.\n" );
     101                Duration period = node->period;
     102                if( period == 0) {
     103                        node->set = false;                  // Node is one-shot, just mark it as not pending
     104                }
    99105
    100106                // Check if this is a kernel
    101                 if( node->kernel_alarm ) {
     107                if( node->type == Kernel ) {
    102108                        preempt( node->proc );
    103109                }
     110                else if( node->type == User ) {
     111                        timeout( node->thrd );
     112                }
    104113                else {
    105                         timeout( node->thrd );
     114                        node->callback(*node);
    106115                }
    107116
    108117                // Check if this is a periodic alarm
    109                 Duration period = node->period;
    110118                if( period > 0 ) {
    111119                        // __cfaabi_dbg_print_buffer_local( " KERNEL: alarm period is %lu.\n", period.tv );
     
    113121                        insert( alarms, node );             // Reinsert the node for the next time it triggers
    114122                }
    115                 else {
    116                         node->set = false;                  // Node is one-shot, just mark it as not pending
    117                 }
    118123        }
    119124
    120125        // If there are still alarms pending, reset the timer
    121         if( alarms->head ) {
    122                 __cfaabi_dbg_print_buffer_decl( " KERNEL: @%ju(%ju) resetting alarm to %ju.\n", currtime.tv, __kernel_get_time().tv, (alarms->head->alarm - currtime).tv);
    123                 Duration delta = alarms->head->alarm - currtime;
    124                 Duration caped = max(delta, 50`us);
     126        if( & (*alarms)`first ) {
     127                __cfadbg_print_buffer_decl(preemption, " KERNEL: @%ju(%ju) resetting alarm to %ju.\n", currtime.tv, __kernel_get_time().tv, (alarms->head->alarm - currtime).tv);
     128                Duration delta = (*alarms)`first.alarm - currtime;
     129                Duration capped = max(delta, 50`us);
    125130                // itimerval tim  = { caped };
    126131                // __cfaabi_dbg_print_buffer_local( "    Values are %lu, %lu, %lu %lu.\n", delta.tv, caped.tv, tim.it_value.tv_sec, tim.it_value.tv_usec);
    127132
    128                 __kernel_set_timer( caped );
     133                __kernel_set_timer( capped );
    129134        }
    130135}
     
    158163// Kernel Signal Tools
    159164//=============================================================================================
    160 
    161 __cfaabi_dbg_debug_do( static thread_local void * last_interrupt = 0; )
     165// In a user-level threading system, there are handful of thread-local variables where this problem occurs on the ARM.
     166//
     167// For each kernel thread running user-level threads, there is a flag variable to indicate if interrupts are
     168// enabled/disabled for that kernel thread. Therefore, this variable is made thread local.
     169//
     170// For example, this code fragment sets the state of the "interrupt" variable in thread-local memory.
     171//
     172// _Thread_local volatile int interrupts;
     173// int main() {
     174//     interrupts = 0; // disable interrupts }
     175//
     176// which generates the following code on the ARM
     177//
     178// (gdb) disassemble main
     179// Dump of assembler code for function main:
     180//    0x0000000000000610 <+0>:  mrs     x1, tpidr_el0
     181//    0x0000000000000614 <+4>:  mov     w0, #0x0                        // #0
     182//    0x0000000000000618 <+8>:  add     x1, x1, #0x0, lsl #12
     183//    0x000000000000061c <+12>: add     x1, x1, #0x10
     184//    0x0000000000000620 <+16>: str     wzr, [x1]
     185//    0x0000000000000624 <+20>: ret
     186//
     187// The mrs moves a pointer from coprocessor register tpidr_el0 into register x1.  Register w0 is set to 0. The two adds
     188// increase the TLS pointer with the displacement (offset) 0x10, which is the location in the TSL of variable
     189// "interrupts".  Finally, 0 is stored into "interrupts" through the pointer in register x1 that points into the
     190// TSL. Now once x1 has the pointer to the location of the TSL for kernel thread N, it can be be preempted at a
     191// user-level and the user thread is put on the user-level ready-queue. When the preempted thread gets to the front of
     192// the user-level ready-queue it is run on kernel thread M. It now stores 0 into "interrupts" back on kernel thread N,
     193// turning off interrupt on the wrong kernel thread.
     194//
     195// On the x86, the following code is generated for the same code fragment.
     196//
     197// (gdb) disassemble main
     198// Dump of assembler code for function main:
     199//    0x0000000000400420 <+0>:  movl   $0x0,%fs:0xfffffffffffffffc
     200//    0x000000000040042c <+12>: xor    %eax,%eax
     201//    0x000000000040042e <+14>: retq
     202//
     203// and there is base-displacement addressing used to atomically reset variable "interrupts" off of the TSL pointer in
     204// register "fs".
     205//
     206// Hence, the ARM has base-displacement address for the general purpose registers, BUT not to the coprocessor
     207// registers. As a result, generating the address for the write into variable "interrupts" is no longer atomic.
     208//
     209// Note this problem does NOT occur when just using multiple kernel threads because the preemption ALWAYS restarts the
     210// thread on the same kernel thread.
     211//
     212// The obvious question is why does ARM use a coprocessor register to store the TSL pointer given that coprocessor
     213// registers are second-class registers with respect to the instruction set. One possible answer is that they did not
     214// want to dedicate one of the general registers to hold the TLS pointer and there was a free coprocessor register
     215// available.
     216
     217//-----------------------------------------------------------------------------
     218// Some assembly required
     219#define __cfaasm_label(label, when) when: asm volatile goto(".global __cfaasm_" #label "_" #when "\n" "__cfaasm_" #label "_" #when ":":::"memory":when)
     220
     221//----------
     222// special case for preemption since used often
     223bool __preemption_enabled() {
     224        // create a assembler label before
     225        // marked as clobber all to avoid movement
     226        __cfaasm_label(check, before);
     227
     228        // access tls as normal
     229        bool enabled = __cfaabi_tls.preemption_state.enabled;
     230
     231        // create a assembler label after
     232        // marked as clobber all to avoid movement
     233        __cfaasm_label(check, after);
     234        return enabled;
     235}
     236
     237struct asm_region {
     238        void * before;
     239        void * after;
     240};
     241
     242static inline bool __cfaasm_in( void * ip, struct asm_region & region ) {
     243        return ip >= region.before && ip <= region.after;
     244}
     245
     246
     247//----------
     248// Get data from the TLS block
     249// struct asm_region __cfaasm_get;
     250uintptr_t __cfatls_get( unsigned long int offset ) __attribute__((__noinline__)); //no inline to avoid problems
     251uintptr_t __cfatls_get( unsigned long int offset ) {
     252        // create a assembler label before
     253        // marked as clobber all to avoid movement
     254        __cfaasm_label(get, before);
     255
     256        // access tls as normal (except for pointer arithmetic)
     257        uintptr_t val = *(uintptr_t*)((uintptr_t)&__cfaabi_tls + offset);
     258
     259        // create a assembler label after
     260        // marked as clobber all to avoid movement
     261        __cfaasm_label(get, after);
     262        return val;
     263}
    162264
    163265extern "C" {
    164266        // Disable interrupts by incrementing the counter
    165267        void disable_interrupts() {
    166                 with( kernelTLS.preemption_state ) {
     268                // create a assembler label before
     269                // marked as clobber all to avoid movement
     270                __cfaasm_label(dsable, before);
     271
     272                with( __cfaabi_tls.preemption_state ) {
    167273                        #if GCC_VERSION > 50000
    168274                        static_assert(__atomic_always_lock_free(sizeof(enabled), &enabled), "Must be lock-free");
     
    181287                        verify( new_val < 65_000u );              // If this triggers someone is disabling interrupts without enabling them
    182288                }
     289
     290                // create a assembler label after
     291                // marked as clobber all to avoid movement
     292                __cfaasm_label(dsable, after);
     293
    183294        }
    184295
    185296        // Enable interrupts by decrementing the counter
    186         // If counter reaches 0, execute any pending CtxSwitch
     297        // If counter reaches 0, execute any pending __cfactx_switch
    187298        void enable_interrupts( __cfaabi_dbg_ctx_param ) {
    188                 processor   * proc = kernelTLS.this_processor; // Cache the processor now since interrupts can start happening after the atomic store
    189                 thread_desc * thrd = kernelTLS.this_thread;       // Cache the thread now since interrupts can start happening after the atomic store
    190 
    191                 with( kernelTLS.preemption_state ){
     299                // Cache the processor now since interrupts can start happening after the atomic store
     300                processor   * proc = __cfaabi_tls.this_processor;
     301                /* paranoid */ verify( proc );
     302
     303                with( __cfaabi_tls.preemption_state ){
    192304                        unsigned short prev = disable_count;
    193305                        disable_count -= 1;
    194                         verify( prev != 0u );                     // If this triggers someone is enabled already enabled interruptsverify( prev != 0u );
     306
     307                        // If this triggers someone is enabled already enabled interruptsverify( prev != 0u );
     308                        /* paranoid */ verify( prev != 0u );
    195309
    196310                        // Check if we need to prempt the thread because an interrupt was missed
    197311                        if( prev == 1 ) {
    198312                                #if GCC_VERSION > 50000
    199                                 static_assert(__atomic_always_lock_free(sizeof(enabled), &enabled), "Must be lock-free");
     313                                        static_assert(__atomic_always_lock_free(sizeof(enabled), &enabled), "Must be lock-free");
    200314                                #endif
    201315
     
    209323                                if( proc->pending_preemption ) {
    210324                                        proc->pending_preemption = false;
    211                                         BlockInternal( thrd );
     325                                        force_yield( __POLL_PREEMPTION );
    212326                                }
    213327                        }
     
    219333
    220334        // Disable interrupts by incrementint the counter
    221         // Don't execute any pending CtxSwitch even if counter reaches 0
     335        // Don't execute any pending __cfactx_switch even if counter reaches 0
    222336        void enable_interrupts_noPoll() {
    223                 unsigned short prev = kernelTLS.preemption_state.disable_count;
    224                 kernelTLS.preemption_state.disable_count -= 1;
    225                 verifyf( prev != 0u, "Incremented from %u\n", prev );                     // If this triggers someone is enabled already enabled interrupts
     337                unsigned short prev = __cfaabi_tls.preemption_state.disable_count;
     338                __cfaabi_tls.preemption_state.disable_count -= 1;
     339                // If this triggers someone is enabled already enabled interrupts
     340                /* paranoid */ verifyf( prev != 0u, "Incremented from %u\n", prev );
    226341                if( prev == 1 ) {
    227342                        #if GCC_VERSION > 50000
    228                         static_assert(__atomic_always_lock_free(sizeof(kernelTLS.preemption_state.enabled), &kernelTLS.preemption_state.enabled), "Must be lock-free");
     343                                static_assert(__atomic_always_lock_free(sizeof(__cfaabi_tls.preemption_state.enabled), &__cfaabi_tls.preemption_state.enabled), "Must be lock-free");
    229344                        #endif
    230345                        // Set enabled flag to true
    231346                        // should be atomic to avoid preemption in the middle of the operation.
    232347                        // use memory order RELAXED since there is no inter-thread on this variable requirements
    233                         __atomic_store_n(&kernelTLS.preemption_state.enabled, true, __ATOMIC_RELAXED);
     348                        __atomic_store_n(&__cfaabi_tls.preemption_state.enabled, true, __ATOMIC_RELAXED);
    234349
    235350                        // Signal the compiler that a fence is needed but only for signal handlers
     
    238353        }
    239354}
     355
     356//-----------------------------------------------------------------------------
     357// Kernel Signal Debug
     358void __cfaabi_check_preemption() {
     359        bool ready = __preemption_enabled();
     360        if(!ready) { abort("Preemption should be ready"); }
     361
     362        __cfaasm_label(debug, before);
     363
     364                sigset_t oldset;
     365                int ret;
     366                ret = pthread_sigmask(0, ( const sigset_t * ) 0p, &oldset);  // workaround trac#208: cast should be unnecessary
     367                if(ret != 0) { abort("ERROR sigprocmask returned %d", ret); }
     368
     369                ret = sigismember(&oldset, SIGUSR1);
     370                if(ret <  0) { abort("ERROR sigismember returned %d", ret); }
     371                if(ret == 1) { abort("ERROR SIGUSR1 is disabled"); }
     372
     373                ret = sigismember(&oldset, SIGALRM);
     374                if(ret <  0) { abort("ERROR sigismember returned %d", ret); }
     375                if(ret == 0) { abort("ERROR SIGALRM is enabled"); }
     376
     377                ret = sigismember(&oldset, SIGTERM);
     378                if(ret <  0) { abort("ERROR sigismember returned %d", ret); }
     379                if(ret == 1) { abort("ERROR SIGTERM is disabled"); }
     380
     381        __cfaasm_label(debug, after);
     382}
     383
     384#ifdef __CFA_WITH_VERIFY__
     385bool __cfaabi_dbg_in_kernel() {
     386        return !__preemption_enabled();
     387}
     388#endif
     389
     390#undef __cfaasm_label
     391
     392//-----------------------------------------------------------------------------
     393// Signal handling
    240394
    241395// sigprocmask wrapper : unblock a single signal
     
    257411
    258412        if ( pthread_sigmask( SIG_BLOCK, &mask, 0p ) == -1 ) {
    259             abort( "internal error, pthread_sigmask" );
     413                abort( "internal error, pthread_sigmask" );
    260414        }
    261415}
     
    268422
    269423// reserved for future use
    270 static void timeout( thread_desc * this ) {
    271         //TODO : implement waking threads
    272 }
     424static void timeout( $thread * this ) {
     425        unpark( this );
     426}
     427
     428//-----------------------------------------------------------------------------
     429// Some assembly required
     430#if defined( __i386 )
     431        #ifdef __PIC__
     432                #define RELOC_PRELUDE( label ) \
     433                        "calll   .Lcfaasm_prelude_" #label "$pb\n\t" \
     434                        ".Lcfaasm_prelude_" #label "$pb:\n\t" \
     435                        "popl    %%eax\n\t" \
     436                        ".Lcfaasm_prelude_" #label "_end:\n\t" \
     437                        "addl    $_GLOBAL_OFFSET_TABLE_+(.Lcfaasm_prelude_" #label "_end-.Lcfaasm_prelude_" #label "$pb), %%eax\n\t"
     438                #define RELOC_PREFIX ""
     439                #define RELOC_SUFFIX "@GOT(%%eax)"
     440        #else
     441                #define RELOC_PREFIX "$"
     442                #define RELOC_SUFFIX ""
     443        #endif
     444        #define __cfaasm_label( label ) struct asm_region label = \
     445                ({ \
     446                        struct asm_region region; \
     447                        asm( \
     448                                RELOC_PRELUDE( label ) \
     449                                "movl " RELOC_PREFIX "__cfaasm_" #label "_before" RELOC_SUFFIX ", %[vb]\n\t" \
     450                                "movl " RELOC_PREFIX "__cfaasm_" #label "_after"  RELOC_SUFFIX ", %[va]\n\t" \
     451                                 : [vb]"=r"(region.before), [va]"=r"(region.after) \
     452                        ); \
     453                        region; \
     454                });
     455#elif defined( __x86_64 )
     456        #ifdef __PIC__
     457                #define RELOC_PREFIX ""
     458                #define RELOC_SUFFIX "@GOTPCREL(%%rip)"
     459        #else
     460                #define RELOC_PREFIX "$"
     461                #define RELOC_SUFFIX ""
     462        #endif
     463        #define __cfaasm_label( label ) struct asm_region label = \
     464                ({ \
     465                        struct asm_region region; \
     466                        asm( \
     467                                "movq " RELOC_PREFIX "__cfaasm_" #label "_before" RELOC_SUFFIX ", %[vb]\n\t" \
     468                                "movq " RELOC_PREFIX "__cfaasm_" #label "_after"  RELOC_SUFFIX ", %[va]\n\t" \
     469                                 : [vb]"=r"(region.before), [va]"=r"(region.after) \
     470                        ); \
     471                        region; \
     472                });
     473#elif defined( __aarch64__ )
     474        #ifdef __PIC__
     475                // Note that this works only for gcc
     476                #define __cfaasm_label( label ) struct asm_region label = \
     477                ({ \
     478                        struct asm_region region; \
     479                        asm( \
     480                                "adrp %[vb], _GLOBAL_OFFSET_TABLE_"                              "\n\t" \
     481                                "ldr  %[vb], [%[vb], #:gotpage_lo15:__cfaasm_" #label "_before]" "\n\t" \
     482                                "adrp %[va], _GLOBAL_OFFSET_TABLE_"                              "\n\t" \
     483                                "ldr  %[va], [%[va], #:gotpage_lo15:__cfaasm_" #label "_after]"  "\n\t" \
     484                                 : [vb]"=r"(region.before), [va]"=r"(region.after) \
     485                        ); \
     486                        region; \
     487                });
     488        #else
     489                #error this is not the right thing to do
     490                /*
     491                #define __cfaasm_label( label ) struct asm_region label = \
     492                ({ \
     493                        struct asm_region region; \
     494                        asm( \
     495                                "adrp %[vb], __cfaasm_" #label "_before"              "\n\t" \
     496                                "add  %[vb], %[vb], :lo12:__cfaasm_" #label "_before" "\n\t" \
     497                                "adrp %[va], :got:__cfaasm_" #label "_after"          "\n\t" \
     498                                "add  %[va], %[va], :lo12:__cfaasm_" #label "_after"  "\n\t" \
     499                                 : [vb]"=r"(region.before), [va]"=r"(region.after) \
     500                        ); \
     501                        region; \
     502                });
     503                */
     504        #endif
     505#else
     506        #error unknown hardware architecture
     507#endif
    273508
    274509// KERNEL ONLY
    275 // Check if a CtxSwitch signal handler shoud defer
     510// Check if a __cfactx_switch signal handler shoud defer
    276511// If true  : preemption is safe
    277512// If false : preemption is unsafe and marked as pending
    278 static inline bool preemption_ready() {
     513static inline bool preemption_ready( void * ip ) {
     514        // Get all the region for which it is not safe to preempt
     515        __cfaasm_label( get    );
     516        __cfaasm_label( check  );
     517        __cfaasm_label( dsable );
     518        __cfaasm_label( debug  );
     519
    279520        // Check if preemption is safe
    280         bool ready = kernelTLS.preemption_state.enabled && ! kernelTLS.preemption_state.in_progress;
    281 
     521        bool ready = true;
     522        if( __cfaasm_in( ip, get    ) ) { ready = false; goto EXIT; };
     523        if( __cfaasm_in( ip, check  ) ) { ready = false; goto EXIT; };
     524        if( __cfaasm_in( ip, dsable ) ) { ready = false; goto EXIT; };
     525        if( __cfaasm_in( ip, debug  ) ) { ready = false; goto EXIT; };
     526        if( !__cfaabi_tls.preemption_state.enabled) { ready = false; goto EXIT; };
     527        if( __cfaabi_tls.preemption_state.in_progress ) { ready = false; goto EXIT; };
     528
     529EXIT:
    282530        // Adjust the pending flag accordingly
    283         kernelTLS.this_processor->pending_preemption = !ready;
     531        __cfaabi_tls.this_processor->pending_preemption = !ready;
    284532        return ready;
    285533}
     
    291539// Startup routine to activate preemption
    292540// Called from kernel_startup
    293 void kernel_start_preemption() {
     541void __kernel_alarm_startup() {
    294542        __cfaabi_dbg_print_safe( "Kernel : Starting preemption\n" );
    295543
    296544        // Start with preemption disabled until ready
    297         kernelTLS.preemption_state.enabled = false;
    298         kernelTLS.preemption_state.disable_count = 1;
     545        __cfaabi_tls.preemption_state.enabled = false;
     546        __cfaabi_tls.preemption_state.disable_count = 1;
    299547
    300548        // Initialize the event kernel
     
    303551
    304552        // Setup proper signal handlers
    305         __cfaabi_sigaction( SIGUSR1, sigHandler_ctxSwitch, SA_SIGINFO | SA_RESTART ); // CtxSwitch handler
     553        __cfaabi_sigaction( SIGUSR1, sigHandler_ctxSwitch, SA_SIGINFO | SA_RESTART ); // __cfactx_switch handler
     554        __cfaabi_sigaction( SIGALRM, sigHandler_alarm    , SA_SIGINFO | SA_RESTART ); // debug handler
    306555
    307556        signal_block( SIGALRM );
    308557
    309         alarm_stack = create_pthread( &alarm_thread, alarm_loop, 0p );
     558        alarm_stack = __create_pthread( &alarm_thread, alarm_loop, 0p );
    310559}
    311560
    312561// Shutdown routine to deactivate preemption
    313562// Called from kernel_shutdown
    314 void kernel_stop_preemption() {
     563void __kernel_alarm_shutdown() {
    315564        __cfaabi_dbg_print_safe( "Kernel : Preemption stopping\n" );
    316565
     
    326575        // Wait for the preemption thread to finish
    327576
    328         pthread_join( alarm_thread, 0p );
    329         free( alarm_stack );
     577        __destroy_pthread( alarm_thread, alarm_stack, 0p );
    330578
    331579        // Preemption is now fully stopped
     
    353601// Kernel Signal Handlers
    354602//=============================================================================================
     603__cfaabi_dbg_debug_do( static thread_local void * last_interrupt = 0; )
    355604
    356605// Context switch signal handler
    357606// Receives SIGUSR1 signal and causes the current thread to yield
    358607static void sigHandler_ctxSwitch( __CFA_SIGPARMS__ ) {
    359         __cfaabi_dbg_debug_do( last_interrupt = (void *)(cxt->uc_mcontext.CFA_REG_IP); )
     608        void * ip = (void *)(cxt->uc_mcontext.CFA_REG_IP);
     609        __cfaabi_dbg_debug_do( last_interrupt = ip; )
    360610
    361611        // SKULLDUGGERY: if a thread creates a processor and the immediately deletes it,
    362612        // the interrupt that is supposed to force the kernel thread to preempt might arrive
    363         // before the kernel thread has even started running. When that happens an iterrupt
    364         // we a null 'this_processor' will be caught, just ignore it.
    365         if(! kernelTLS.this_processor ) return;
     613        // before the kernel thread has even started running. When that happens, an interrupt
     614        // with a null 'this_processor' will be caught, just ignore it.
     615        if(! __cfaabi_tls.this_processor ) return;
    366616
    367617        choose(sfp->si_value.sival_int) {
    368618                case PREEMPT_NORMAL   : ;// Normal case, nothing to do here
    369                 case PREEMPT_TERMINATE: verify( __atomic_load_n( &kernelTLS.this_processor->do_terminate, __ATOMIC_SEQ_CST ) );
     619                case PREEMPT_TERMINATE: verify( __atomic_load_n( &__cfaabi_tls.this_processor->do_terminate, __ATOMIC_SEQ_CST ) );
    370620                default:
    371621                        abort( "internal error, signal value is %d", sfp->si_value.sival_int );
     
    373623
    374624        // Check if it is safe to preempt here
    375         if( !preemption_ready() ) { return; }
    376 
    377         __cfaabi_dbg_print_buffer_decl( " KERNEL: preempting core %p (%p @ %p).\n", kernelTLS.this_processor, kernelTLS.this_thread, (void *)(cxt->uc_mcontext.CFA_REG_IP) );
     625        if( !preemption_ready( ip ) ) { return; }
     626
     627        __cfaabi_dbg_print_buffer_decl( " KERNEL: preempting core %p (%p @ %p).\n", __cfaabi_tls.this_processor, __cfaabi_tls.this_thread, (void *)(cxt->uc_mcontext.CFA_REG_IP) );
    378628
    379629        // Sync flag : prevent recursive calls to the signal handler
    380         kernelTLS.preemption_state.in_progress = true;
     630        __cfaabi_tls.preemption_state.in_progress = true;
    381631
    382632        // Clear sighandler mask before context switching.
     
    388638        }
    389639
    390         // TODO: this should go in finish action
    391640        // Clear the in progress flag
    392         kernelTLS.preemption_state.in_progress = false;
     641        __cfaabi_tls.preemption_state.in_progress = false;
    393642
    394643        // Preemption can occur here
    395644
    396         BlockInternal( kernelTLS.this_thread ); // Do the actual CtxSwitch
    397 }
     645        force_yield( __ALARM_PREEMPTION ); // Do the actual __cfactx_switch
     646}
     647
     648static void sigHandler_alarm( __CFA_SIGPARMS__ ) {
     649        abort("SIGALRM should never reach the signal handler");
     650}
     651
     652#if !defined(__CFA_NO_STATISTICS__)
     653        int __print_alarm_stats = 0;
     654#endif
    398655
    399656// Main of the alarm thread
    400657// Waits on SIGALRM and send SIGUSR1 to whom ever needs it
    401658static void * alarm_loop( __attribute__((unused)) void * args ) {
     659        __processor_id_t id;
     660        id.full_proc = false;
     661        id.id = doregister(&id);
     662        __cfaabi_tls.this_proc_id = &id;
     663
     664        #if !defined(__CFA_NO_STATISTICS__)
     665                struct __stats_t local_stats;
     666                __cfaabi_tls.this_stats = &local_stats;
     667                __init_stats( &local_stats );
     668        #endif
     669
    402670        // Block sigalrms to control when they arrive
    403671        sigset_t mask;
     
    457725EXIT:
    458726        __cfaabi_dbg_print_safe( "Kernel : Preemption thread stopping\n" );
     727        unregister(&id);
     728
     729        #if !defined(__CFA_NO_STATISTICS__)
     730                if( 0 != __print_alarm_stats ) {
     731                        __print_stats( &local_stats, __print_alarm_stats, "Alarm", "Thread", 0p );
     732                }
     733        #endif
    459734        return 0p;
    460735}
    461 
    462 //=============================================================================================
    463 // Kernel Signal Debug
    464 //=============================================================================================
    465 
    466 void __cfaabi_check_preemption() {
    467         bool ready = kernelTLS.preemption_state.enabled;
    468         if(!ready) { abort("Preemption should be ready"); }
    469 
    470         sigset_t oldset;
    471         int ret;
    472         ret = pthread_sigmask(0, 0p, &oldset);
    473         if(ret != 0) { abort("ERROR sigprocmask returned %d", ret); }
    474 
    475         ret = sigismember(&oldset, SIGUSR1);
    476         if(ret <  0) { abort("ERROR sigismember returned %d", ret); }
    477         if(ret == 1) { abort("ERROR SIGUSR1 is disabled"); }
    478 
    479         ret = sigismember(&oldset, SIGALRM);
    480         if(ret <  0) { abort("ERROR sigismember returned %d", ret); }
    481         if(ret == 0) { abort("ERROR SIGALRM is enabled"); }
    482 
    483         ret = sigismember(&oldset, SIGTERM);
    484         if(ret <  0) { abort("ERROR sigismember returned %d", ret); }
    485         if(ret == 1) { abort("ERROR SIGTERM is disabled"); }
    486 }
    487 
    488 #ifdef __CFA_WITH_VERIFY__
    489 bool __cfaabi_dbg_in_kernel() {
    490         return !kernelTLS.preemption_state.enabled;
    491 }
    492 #endif
    493736
    494737// Local Variables: //
  • libcfa/src/concurrency/preemption.hfa

    rbdfc032 reef8dfb  
    1616#pragma once
    1717
     18#include "bits/locks.hfa"
    1819#include "alarm.hfa"
    19 #include "kernel_private.hfa"
    2020
    21 void kernel_start_preemption();
    22 void kernel_stop_preemption();
     21struct event_kernel_t {
     22        alarm_list_t alarms;
     23        __spinlock_t lock;
     24};
     25
     26extern event_kernel_t * event_kernel;
     27
    2328void update_preemption( processor * this, Duration duration );
    2429
  • libcfa/src/concurrency/thread.cfa

    rbdfc032 reef8dfb  
    1919
    2020#include "kernel_private.hfa"
     21#include "exception.hfa"
    2122
    2223#define __CFA_INVOKE_PRIVATE__
    2324#include "invoke.h"
    2425
    25 extern "C" {
    26         #include <fenv.h>
    27         #include <stddef.h>
    28 }
    29 
    30 //extern volatile thread_local processor * this_processor;
    31 
    3226//-----------------------------------------------------------------------------
    3327// Thread ctors and dtors
    34 void ?{}(thread_desc & this, const char * const name, cluster & cl, void * storage, size_t storageSize ) with( this ) {
     28void ?{}($thread & this, const char * const name, cluster & cl, void * storage, size_t storageSize ) with( this ) {
    3529        context{ 0p, 0p };
    3630        self_cor{ name, storage, storageSize };
     31        ticket = TICKET_RUNNING;
    3732        state = Start;
     33        preempted = __NO_PREEMPTION;
    3834        curr_cor = &self_cor;
    3935        self_mon.owner = &this;
     
    4137        self_mon_p = &self_mon;
    4238        curr_cluster = &cl;
    43         next = 0p;
     39        link.next = 0p;
     40        link.prev = 0p;
     41        link.preferred = -1;
     42        #if defined( __CFA_WITH_VERIFY__ )
     43                canary = 0x0D15EA5E0D15EA5Ep;
     44        #endif
     45
     46        seqable.next = 0p;
     47        seqable.back = 0p;
    4448
    4549        node.next = 0p;
     
    5054}
    5155
    52 void ^?{}(thread_desc& this) with( this ) {
     56void ^?{}($thread& this) with( this ) {
     57        #if defined( __CFA_WITH_VERIFY__ )
     58                canary = 0xDEADDEADDEADDEADp;
     59        #endif
    5360        unregister(curr_cluster, this);
    5461        ^self_cor{};
    5562}
    5663
     64FORALL_DATA_INSTANCE(ThreadCancelled, (dtype thread_t), (thread_t))
     65
     66forall(dtype T)
     67void copy(ThreadCancelled(T) * dst, ThreadCancelled(T) * src) {
     68        dst->virtual_table = src->virtual_table;
     69        dst->the_thread = src->the_thread;
     70        dst->the_exception = src->the_exception;
     71}
     72
     73forall(dtype T)
     74const char * msg(ThreadCancelled(T) *) {
     75        return "ThreadCancelled";
     76}
     77
     78forall(dtype T)
     79static void default_thread_cancel_handler(ThreadCancelled(T) & ) {
     80        abort( "Unhandled thread cancellation.\n" );
     81}
     82
     83forall(dtype T | is_thread(T) | IS_EXCEPTION(ThreadCancelled, (T)))
     84void ?{}( thread_dtor_guard_t & this,
     85                T & thrd, void(*defaultResumptionHandler)(ThreadCancelled(T) &)) {
     86        $monitor * m = get_monitor(thrd);
     87        $thread * desc = get_thread(thrd);
     88
     89        // Setup the monitor guard
     90        void (*dtor)(T& mutex this) = ^?{};
     91        bool join = defaultResumptionHandler != (void(*)(ThreadCancelled(T)&))0;
     92        (this.mg){&m, (void(*)())dtor, join};
     93
     94
     95        /* paranoid */ verifyf( Halted == desc->state || Cancelled == desc->state, "Expected thread to be Halted or Cancelled, was %d\n", (int)desc->state );
     96
     97        // After the guard set-up and any wait, check for cancellation.
     98        struct _Unwind_Exception * cancellation = desc->self_cor.cancellation;
     99        if ( likely( 0p == cancellation ) ) {
     100                return;
     101        } else if ( Cancelled == desc->state ) {
     102                return;
     103        }
     104        desc->state = Cancelled;
     105        if (!join) {
     106                defaultResumptionHandler = default_thread_cancel_handler;
     107        }
     108
     109        ThreadCancelled(T) except;
     110        // TODO: Remove explitate vtable set once trac#186 is fixed.
     111        except.virtual_table = &get_exception_vtable(&except);
     112        except.the_thread = &thrd;
     113        except.the_exception = __cfaehm_cancellation_exception( cancellation );
     114        throwResume except;
     115
     116        except.the_exception->virtual_table->free( except.the_exception );
     117        free( cancellation );
     118        desc->self_cor.cancellation = 0p;
     119}
     120
     121void ^?{}( thread_dtor_guard_t & this ) {
     122        ^(this.mg){};
     123}
     124
     125//-----------------------------------------------------------------------------
     126// Starting and stopping threads
     127forall( dtype T | is_thread(T) )
     128void __thrd_start( T & this, void (*main_p)(T &) ) {
     129        $thread * this_thrd = get_thread(this);
     130
     131        disable_interrupts();
     132        __cfactx_start(main_p, get_coroutine(this), this, __cfactx_invoke_thread);
     133
     134        this_thrd->context.[SP, FP] = this_thrd->self_cor.context.[SP, FP];
     135        /* paranoid */ verify( this_thrd->context.SP );
     136
     137        __schedule_thread( this_thrd );
     138        enable_interrupts( __cfaabi_dbg_ctx );
     139}
     140
     141//-----------------------------------------------------------------------------
     142// Support for threads that don't ues the thread keyword
    57143forall( dtype T | sized(T) | is_thread(T) | { void ?{}(T&); } )
    58144void ?{}( scoped(T)& this ) with( this ) {
     
    73159
    74160//-----------------------------------------------------------------------------
    75 // Starting and stopping threads
    76 forall( dtype T | is_thread(T) )
    77 void __thrd_start( T & this, void (*main_p)(T &) ) {
    78         thread_desc * this_thrd = get_thread(this);
    79         thread_desc * curr_thrd = TL_GET( this_thread );
    80 
    81         disable_interrupts();
    82         CtxStart(main_p, get_coroutine(this), this, CtxInvokeThread);
    83 
    84         this_thrd->context.[SP, FP] = this_thrd->self_cor.context.[SP, FP];
    85         verify( this_thrd->context.SP );
    86         // CtxSwitch( &curr_thrd->context, &this_thrd->context );
    87 
    88         ScheduleThread(this_thrd);
    89         enable_interrupts( __cfaabi_dbg_ctx );
     161forall(dtype T | is_thread(T) | IS_RESUMPTION_EXCEPTION(ThreadCancelled, (T)))
     162T & join( T & this ) {
     163        thread_dtor_guard_t guard = { this, defaultResumptionHandler };
     164        return this;
    90165}
    91166
    92 void yield( void ) {
    93         // Safety note : This could cause some false positives due to preemption
    94       verify( TL_GET( preemption_state.enabled ) );
    95         BlockInternal( TL_GET( this_thread ) );
    96         // Safety note : This could cause some false positives due to preemption
    97       verify( TL_GET( preemption_state.enabled ) );
    98 }
    99 
    100 void yield( unsigned times ) {
    101         for( unsigned i = 0; i < times; i++ ) {
    102                 yield();
    103         }
     167uint64_t thread_rand() {
     168        disable_interrupts();
     169        uint64_t ret = __tls_rand();
     170        enable_interrupts( __cfaabi_dbg_ctx );
     171        return ret;
    104172}
    105173
  • libcfa/src/concurrency/thread.hfa

    rbdfc032 reef8dfb  
    2222#include "kernel.hfa"
    2323#include "monitor.hfa"
     24#include "exception.hfa"
    2425
    2526//-----------------------------------------------------------------------------
    2627// thread trait
    2728trait is_thread(dtype T) {
    28       void ^?{}(T& mutex this);
    29       void main(T& this);
    30       thread_desc* get_thread(T& this);
     29        void ^?{}(T& mutex this);
     30        void main(T& this);
     31        $thread* get_thread(T& this);
    3132};
    3233
    33 #define DECL_THREAD(X) thread_desc* get_thread(X& this) { return &this.__thrd; } void main(X& this)
     34FORALL_DATA_EXCEPTION(ThreadCancelled, (dtype thread_t), (thread_t)) (
     35        thread_t * the_thread;
     36        exception_t * the_exception;
     37);
     38
     39forall(dtype T)
     40void copy(ThreadCancelled(T) * dst, ThreadCancelled(T) * src);
     41
     42forall(dtype T)
     43const char * msg(ThreadCancelled(T) *);
     44
     45// define that satisfies the trait without using the thread keyword
     46#define DECL_THREAD(X) $thread* get_thread(X& this) __attribute__((const)) { return &this.__thrd; } void main(X& this)
     47
     48// Inline getters for threads/coroutines/monitors
     49forall( dtype T | is_thread(T) )
     50static inline $coroutine* get_coroutine(T & this) __attribute__((const)) { return &get_thread(this)->self_cor; }
    3451
    3552forall( dtype T | is_thread(T) )
    36 static inline coroutine_desc* get_coroutine(T & this) {
    37         return &get_thread(this)->self_cor;
    38 }
     53static inline $monitor  * get_monitor  (T & this) __attribute__((const)) { return &get_thread(this)->self_mon; }
    3954
    40 forall( dtype T | is_thread(T) )
    41 static inline monitor_desc* get_monitor(T & this) {
    42         return &get_thread(this)->self_mon;
    43 }
     55static inline $coroutine* get_coroutine($thread * this) __attribute__((const)) { return &this->self_cor; }
     56static inline $monitor  * get_monitor  ($thread * this) __attribute__((const)) { return &this->self_mon; }
    4457
    45 static inline coroutine_desc* get_coroutine(thread_desc * this) {
    46         return &this->self_cor;
    47 }
    48 
    49 static inline monitor_desc* get_monitor(thread_desc * this) {
    50         return &this->self_mon;
    51 }
    52 
     58//-----------------------------------------------------------------------------
     59// forward declarations needed for threads
    5360extern struct cluster * mainCluster;
    5461
     
    5865//-----------------------------------------------------------------------------
    5966// Ctors and dtors
    60 void ?{}(thread_desc & this, const char * const name, struct cluster & cl, void * storage, size_t storageSize );
    61 void ^?{}(thread_desc & this);
     67void ?{}($thread & this, const char * const name, struct cluster & cl, void * storage, size_t storageSize );
     68void ^?{}($thread & this);
    6269
    63 static inline void ?{}(thread_desc & this)                                                                  { this{ "Anonymous Thread", *mainCluster, 0p, 65000 }; }
    64 static inline void ?{}(thread_desc & this, size_t stackSize )                                               { this{ "Anonymous Thread", *mainCluster, 0p, stackSize }; }
    65 static inline void ?{}(thread_desc & this, void * storage, size_t storageSize )                             { this{ "Anonymous Thread", *mainCluster, storage, storageSize }; }
    66 static inline void ?{}(thread_desc & this, struct cluster & cl )                                            { this{ "Anonymous Thread", cl, 0p, 65000 }; }
    67 static inline void ?{}(thread_desc & this, struct cluster & cl, size_t stackSize )                          { this{ "Anonymous Thread", cl, 0p, stackSize }; }
    68 static inline void ?{}(thread_desc & this, struct cluster & cl, void * storage, size_t storageSize )        { this{ "Anonymous Thread", cl, storage, storageSize }; }
    69 static inline void ?{}(thread_desc & this, const char * const name)                                         { this{ name, *mainCluster, 0p, 65000 }; }
    70 static inline void ?{}(thread_desc & this, const char * const name, struct cluster & cl )                   { this{ name, cl, 0p, 65000 }; }
    71 static inline void ?{}(thread_desc & this, const char * const name, struct cluster & cl, size_t stackSize ) { this{ name, cl, 0p, stackSize }; }
     70static inline void ?{}($thread & this)                                                                  { this{ "Anonymous Thread", *mainCluster, 0p, 65000 }; }
     71static inline void ?{}($thread & this, size_t stackSize )                                               { this{ "Anonymous Thread", *mainCluster, 0p, stackSize }; }
     72static inline void ?{}($thread & this, void * storage, size_t storageSize )                             { this{ "Anonymous Thread", *mainCluster, storage, storageSize }; }
     73static inline void ?{}($thread & this, struct cluster & cl )                                            { this{ "Anonymous Thread", cl, 0p, 65000 }; }
     74static inline void ?{}($thread & this, struct cluster & cl, size_t stackSize )                          { this{ "Anonymous Thread", cl, 0p, stackSize }; }
     75static inline void ?{}($thread & this, struct cluster & cl, void * storage, size_t storageSize )        { this{ "Anonymous Thread", cl, storage, storageSize }; }
     76static inline void ?{}($thread & this, const char * const name)                                         { this{ name, *mainCluster, 0p, 65000 }; }
     77static inline void ?{}($thread & this, const char * const name, struct cluster & cl )                   { this{ name, cl, 0p, 65000 }; }
     78static inline void ?{}($thread & this, const char * const name, struct cluster & cl, size_t stackSize ) { this{ name, cl, 0p, stackSize }; }
     79
     80struct thread_dtor_guard_t {
     81        monitor_dtor_guard_t mg;
     82};
     83
     84forall( dtype T | is_thread(T) | IS_EXCEPTION(ThreadCancelled, (T)) )
     85void ?{}( thread_dtor_guard_t & this, T & thrd, void(*)(ThreadCancelled(T) &) );
     86void ^?{}( thread_dtor_guard_t & this );
    7287
    7388//-----------------------------------------------------------------------------
     
    88103void ^?{}( scoped(T)& this );
    89104
    90 void yield();
    91 void yield( unsigned times );
     105//-----------------------------------------------------------------------------
     106// Scheduler API
    92107
    93 static inline struct thread_desc * active_thread () { return TL_GET( this_thread ); }
     108//----------
     109// Park thread: block until corresponding call to unpark, won't block if unpark is already called
     110void park( void );
     111
     112//----------
     113// Unpark a thread, if the thread is already blocked, schedule it
     114//                  if the thread is not yet block, signal that it should rerun immediately
     115void unpark( $thread * this );
     116
     117forall( dtype T | is_thread(T) )
     118static inline void unpark( T & this ) { if(!&this) return; unpark( get_thread( this ) );}
     119
     120//----------
     121// Yield: force thread to block and be rescheduled
     122bool force_yield( enum __Preemption_Reason );
     123
     124//----------
     125// sleep: force thread to block and be rescheduled after Duration duration
     126void sleep( Duration duration );
     127
     128//----------
     129// join
     130forall( dtype T | is_thread(T) | IS_RESUMPTION_EXCEPTION(ThreadCancelled, (T)) )
     131T & join( T & this );
    94132
    95133// Local Variables: //
Note: See TracChangeset for help on using the changeset viewer.