Changes in / [2cbfe92:b7d6a36]


Ignore:
Files:
1 added
15 edited

Legend:

Unmodified
Added
Removed
  • libcfa/src/Makefile.am

    r2cbfe92 rb7d6a36  
    4848thread_headers_nosrc = concurrency/invoke.h
    4949thread_headers = concurrency/coroutine.hfa concurrency/thread.hfa concurrency/kernel.hfa concurrency/monitor.hfa concurrency/mutex.hfa
    50 thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/preemption.cfa ${thread_headers:.hfa=.cfa}
     50thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/preemption.cfa concurrency/ready_queue.cfa ${thread_headers:.hfa=.cfa}
    5151else
    5252headers =
  • libcfa/src/Makefile.in

    r2cbfe92 rb7d6a36  
    165165        concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa \
    166166        concurrency/invoke.c concurrency/preemption.cfa \
    167         concurrency/coroutine.cfa concurrency/thread.cfa \
    168         concurrency/kernel.cfa concurrency/monitor.cfa \
    169         concurrency/mutex.cfa
     167        concurrency/ready_queue.cfa concurrency/coroutine.cfa \
     168        concurrency/thread.cfa concurrency/kernel.cfa \
     169        concurrency/monitor.cfa concurrency/mutex.cfa
    170170@BUILDLIB_TRUE@am__objects_3 = concurrency/coroutine.lo \
    171171@BUILDLIB_TRUE@ concurrency/thread.lo concurrency/kernel.lo \
     
    174174@BUILDLIB_TRUE@ concurrency/CtxSwitch-@ARCHITECTURE@.lo \
    175175@BUILDLIB_TRUE@ concurrency/alarm.lo concurrency/invoke.lo \
    176 @BUILDLIB_TRUE@ concurrency/preemption.lo $(am__objects_3)
     176@BUILDLIB_TRUE@ concurrency/preemption.lo \
     177@BUILDLIB_TRUE@ concurrency/ready_queue.lo $(am__objects_3)
    177178am_libcfathread_la_OBJECTS = $(am__objects_4)
    178179libcfathread_la_OBJECTS = $(am_libcfathread_la_OBJECTS)
     
    470471@BUILDLIB_FALSE@thread_headers =
    471472@BUILDLIB_TRUE@thread_headers = concurrency/coroutine.hfa concurrency/thread.hfa concurrency/kernel.hfa concurrency/monitor.hfa concurrency/mutex.hfa
    472 @BUILDLIB_TRUE@thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/preemption.cfa ${thread_headers:.hfa=.cfa}
     473@BUILDLIB_TRUE@thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/preemption.cfa concurrency/ready_queue.cfa ${thread_headers:.hfa=.cfa}
    473474
    474475#----------------------------------------------------------------------------------------------------------------
     
    606607        concurrency/$(DEPDIR)/$(am__dirstamp)
    607608concurrency/preemption.lo: concurrency/$(am__dirstamp) \
     609        concurrency/$(DEPDIR)/$(am__dirstamp)
     610concurrency/ready_queue.lo: concurrency/$(am__dirstamp) \
    608611        concurrency/$(DEPDIR)/$(am__dirstamp)
    609612concurrency/coroutine.lo: concurrency/$(am__dirstamp) \
  • libcfa/src/bits/debug.hfa

    r2cbfe92 rb7d6a36  
    3737        #include <stdarg.h>
    3838        #include <stdio.h>
     39        #include <unistd.h>
    3940
    4041        extern void __cfaabi_bits_write( int fd, const char buffer[], int len );
     
    4950#endif
    5051
     52// #define __CFA_DEBUG_PRINT__
     53
    5154#ifdef __CFA_DEBUG_PRINT__
    5255        #define __cfaabi_dbg_write( buffer, len )         __cfaabi_bits_write( STDERR_FILENO, buffer, len )
    5356        #define __cfaabi_dbg_acquire()                    __cfaabi_bits_acquire()
    5457        #define __cfaabi_dbg_release()                    __cfaabi_bits_release()
    55         #define __cfaabi_dbg_print_safe(...)              __cfaabi_bits_print_safe   (__VA_ARGS__)
    56         #define __cfaabi_dbg_print_nolock(...)            __cfaabi_bits_print_nolock (__VA_ARGS__)
    57         #define __cfaabi_dbg_print_buffer(...)            __cfaabi_bits_print_buffer (__VA_ARGS__)
    58         #define __cfaabi_dbg_print_buffer_decl(...)       char __dbg_text[256]; int __dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_bits_write( __dbg_text, __dbg_len );
    59         #define __cfaabi_dbg_print_buffer_local(...)      __dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_dbg_write( __dbg_text, __dbg_len );
     58        #define __cfaabi_dbg_print_safe(...)              __cfaabi_bits_print_safe  ( STDERR_FILENO, __VA_ARGS__ )
     59        #define __cfaabi_dbg_print_nolock(...)            __cfaabi_bits_print_nolock( STDERR_FILENO, __VA_ARGS__ )
     60        #define __cfaabi_dbg_print_buffer(...)            __cfaabi_bits_print_buffer( STDERR_FILENO, __VA_ARGS__ )
     61        #define __cfaabi_dbg_print_buffer_decl(...)       char __dbg_text[256]; int __dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_bits_write( STDERR_FILENO, __dbg_text, __dbg_len );
     62        #define __cfaabi_dbg_print_buffer_local(...)      __dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_bits_write( STDERR_FILENO, __dbg_text, __dbg_len );
    6063#else
    6164        #define __cfaabi_dbg_write(...)               ((void)0)
  • libcfa/src/bits/defs.hfa

    r2cbfe92 rb7d6a36  
    5454    return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 );
    5555}
     56
     57// #define __CFA_NO_BIT_TEST_AND_SET__
     58
     59#if defined( __i386 )
     60static inline bool __atomic_bts(volatile unsigned long int * target, unsigned long int bit ) {
     61        #if defined(__CFA_NO_BIT_TEST_AND_SET__)
     62        unsigned long int mask = 1ul << bit;
     63        unsigned long int ret = __atomic_fetch_or(target, mask, (int)__ATOMIC_RELAXED);
     64        return (ret & mask) != 0;
     65    #else
     66        int result = 0;
     67        asm volatile(
     68            "LOCK btsl %[bit], %[target]\n\t"
     69            : "=@ccc" (result)
     70            : [target] "m" (*target), [bit] "r" (bit)
     71        );
     72        return result != 0;
     73    #endif
     74}
     75
     76static inline bool __atomic_btr(volatile unsigned long int * target, unsigned long int bit ) {
     77        #if defined(__CFA_NO_BIT_TEST_AND_SET__)
     78        unsigned long int mask = 1ul << bit;
     79        unsigned long int ret = __atomic_fetch_and(target, ~mask, (int)__ATOMIC_RELAXED);
     80        return (ret & mask) != 0;
     81        #else
     82        int result = 0;
     83        asm volatile(
     84            "LOCK btrl %[bit], %[target]\n\t"
     85            :"=@ccc" (result)
     86            : [target] "m" (*target), [bit] "r" (bit)
     87        );
     88        return result != 0;
     89    #endif
     90}
     91#elif defined( __x86_64 )
     92static inline bool __atomic_bts(volatile unsigned long long int * target, unsigned long long int bit ) {
     93        #if defined(__CFA_NO_BIT_TEST_AND_SET__)
     94        unsigned long long int mask = 1ul << bit;
     95        unsigned long long int ret = __atomic_fetch_or(target, mask, (int)__ATOMIC_RELAXED);
     96        return (ret & mask) != 0;
     97    #else
     98        int result = 0;
     99        asm volatile(
     100            "LOCK btsq %[bit], %[target]\n\t"
     101            : "=@ccc" (result)
     102            : [target] "m" (*target), [bit] "r" (bit)
     103        );
     104        return result != 0;
     105    #endif
     106}
     107
     108static inline bool __atomic_btr(volatile unsigned long long int * target, unsigned long long int bit ) {
     109        #if defined(__CFA_NO_BIT_TEST_AND_SET__)
     110        unsigned long long int mask = 1ul << bit;
     111        unsigned long long int ret = __atomic_fetch_and(target, ~mask, (int)__ATOMIC_RELAXED);
     112        return (ret & mask) != 0;
     113        #else
     114        int result = 0;
     115        asm volatile(
     116            "LOCK btrq %[bit], %[target]\n\t"
     117            :"=@ccc" (result)
     118            : [target] "m" (*target), [bit] "r" (bit)
     119        );
     120        return result != 0;
     121    #endif
     122}
     123#elif defined( __ARM_ARCH )
     124    #error __atomic_bts and __atomic_btr not implemented for arm
     125#else
     126        #error uknown hardware architecture
     127#endif
  • libcfa/src/concurrency/invoke.h

    r2cbfe92 rb7d6a36  
    158158        };
    159159
     160        // Link lists fields
     161        // instrusive link field for threads
     162        struct __thread_desc_link {
     163                struct thread_desc * next;
     164                struct thread_desc * prev;
     165                unsigned long long ts;
     166        };
     167
    160168        struct thread_desc {
    161169                // Core threading fields
     
    188196                // Link lists fields
    189197                // instrusive link field for threads
    190                 struct thread_desc * next;
     198                struct __thread_desc_link link;
    191199
    192200                struct {
     
    199207        extern "Cforall" {
    200208                static inline thread_desc *& get_next( thread_desc & this ) {
    201                         return this.next;
     209                        return this.link.next;
    202210                }
    203211
  • libcfa/src/concurrency/kernel.cfa

    r2cbfe92 rb7d6a36  
    187187        self_mon.recursion = 1;
    188188        self_mon_p = &self_mon;
    189         next = 0p;
     189        link.next = 0p;
     190        link.prev = 0p;
    190191
    191192        node.next = 0p;
     
    212213        this.name = name;
    213214        this.cltr = &cltr;
     215        id = -1u;
    214216        terminated{ 0 };
    215217        do_terminate = false;
     
    242244        this.preemption_rate = preemption_rate;
    243245        ready_queue{};
    244         ready_queue_lock{};
    245 
    246         procs{ __get };
     246        ready_lock{};
     247
    247248        idles{ __get };
    248249        threads{ __get };
     
    273274        __cfaabi_dbg_print_safe("Kernel : core %p starting\n", this);
    274275
    275         doregister(this->cltr, this);
     276        // register the processor unless it's the main thread which is handled in the boot sequence
     277        if(this != mainProcessor) {
     278                this->id = doregister(this->cltr, this);
     279                ready_queue_grow( this->cltr );
     280        }
     281
    276282
    277283        {
     
    305311        }
    306312
    307         unregister(this->cltr, this);
    308 
    309313        V( this->terminated );
    310314
     315
     316        // unregister the processor unless it's the main thread which is handled in the boot sequence
     317        if(this != mainProcessor) {
     318                ready_queue_shrink( this->cltr );
     319                unregister(this->cltr, this);
     320        }
     321
    311322        __cfaabi_dbg_print_safe("Kernel : core %p terminated\n", this);
     323
     324        stats_tls_tally(this->cltr);
    312325}
    313326
     
    538551        verify( ! kernelTLS.preemption_state.enabled );
    539552
    540         verifyf( thrd->next == 0p, "Expected null got %p", thrd->next );
     553        verifyf( thrd->link.next == 0p, "Expected null got %p", thrd->link.next );
     554
     555
     556        ready_schedule_lock(thrd->curr_cluster, kernelTLS.this_processor);
     557                bool was_empty = push( thrd->curr_cluster, thrd );
     558        ready_schedule_unlock(thrd->curr_cluster, kernelTLS.this_processor);
    541559
    542560        with( *thrd->curr_cluster ) {
    543                 lock  ( ready_queue_lock __cfaabi_dbg_ctx2 );
    544                 bool was_empty = !(ready_queue != 0);
    545                 append( ready_queue, thrd );
    546                 unlock( ready_queue_lock );
    547 
    548                 if(was_empty) {
    549                         lock      (proc_list_lock __cfaabi_dbg_ctx2);
    550                         if(idles) {
    551                                 wake_fast(idles.head);
    552                         }
    553                         unlock    (proc_list_lock);
    554                 }
    555                 else if( struct processor * idle = idles.head ) {
    556                         wake_fast(idle);
    557                 }
    558 
     561                // if(was_empty) {
     562                //      lock      (proc_list_lock __cfaabi_dbg_ctx2);
     563                //      if(idles) {
     564                //              wake_fast(idles.head);
     565                //      }
     566                //      unlock    (proc_list_lock);
     567                // }
     568                // else if( struct processor * idle = idles.head ) {
     569                //      wake_fast(idle);
     570                // }
    559571        }
    560572
     
    565577thread_desc * nextThread(cluster * this) with( *this ) {
    566578        verify( ! kernelTLS.preemption_state.enabled );
    567         lock( ready_queue_lock __cfaabi_dbg_ctx2 );
    568         thread_desc * head = pop_head( ready_queue );
    569         unlock( ready_queue_lock );
     579
     580        ready_schedule_lock(this, kernelTLS.this_processor);
     581                thread_desc * head = pop( this );
     582        ready_schedule_unlock(this, kernelTLS.this_processor);
     583
    570584        verify( ! kernelTLS.preemption_state.enabled );
    571585        return head;
     
    729743                pending_preemption = false;
    730744                kernel_thread = pthread_self();
     745                id = -1u;
    731746
    732747                runner{ &this };
     
    738753        mainProcessor = (processor *)&storage_mainProcessor;
    739754        (*mainProcessor){};
     755
     756        mainProcessor->id = doregister(mainCluster, mainProcessor);
    740757
    741758        //initialize the global state variables
     
    784801        kernel_stop_preemption();
    785802
     803        unregister(mainCluster, mainProcessor);
     804
    786805        // Destroy the main processor and its context in reverse order of construction
    787806        // These were manually constructed so we need manually destroy them
    788         ^(mainProcessor->runner){};
    789         ^(mainProcessor){};
     807        void ^?{}(processor & this) with( this ) {
     808                //don't join the main thread here, that wouldn't make any sense
     809                __cfaabi_dbg_print_safe("Kernel : destroyed main processor context %p\n", &runner);
     810        }
     811
     812        ^(*mainProcessor){};
    790813
    791814        // Final step, destroy the main thread since it is no longer needed
    792         // Since we provided a stack to this taxk it will not destroy anything
    793         ^(mainThread){};
     815        // Since we provided a stack to this task it will not destroy anything
     816        ^(*mainThread){};
     817
     818        ^(*mainCluster){};
    794819
    795820        ^(__cfa_dbg_global_clusters.list){};
     
    803828//=============================================================================================
    804829static void halt(processor * this) with( *this ) {
    805         // verify( ! __atomic_load_n(&do_terminate, __ATOMIC_SEQ_CST) );
    806 
    807         with( *cltr ) {
    808                 lock      (proc_list_lock __cfaabi_dbg_ctx2);
    809                 remove    (procs, *this);
    810                 push_front(idles, *this);
    811                 unlock    (proc_list_lock);
    812         }
    813 
    814         __cfaabi_dbg_print_safe("Kernel : Processor %p ready to sleep\n", this);
    815 
    816         wait( idleLock );
    817 
    818         __cfaabi_dbg_print_safe("Kernel : Processor %p woke up and ready to run\n", this);
    819 
    820         with( *cltr ) {
    821                 lock      (proc_list_lock __cfaabi_dbg_ctx2);
    822                 remove    (idles, *this);
    823                 push_front(procs, *this);
    824                 unlock    (proc_list_lock);
    825         }
     830        // // verify( ! __atomic_load_n(&do_terminate, __ATOMIC_SEQ_CST) );
     831
     832        // with( *cltr ) {
     833        //      lock      (proc_list_lock __cfaabi_dbg_ctx2);
     834        //      push_front(idles, *this);
     835        //      unlock    (proc_list_lock);
     836        // }
     837
     838        // __cfaabi_dbg_print_safe("Kernel : Processor %p ready to sleep\n", this);
     839
     840        // wait( idleLock );
     841
     842        // __cfaabi_dbg_print_safe("Kernel : Processor %p woke up and ready to run\n", this);
     843
     844        // with( *cltr ) {
     845        //      lock      (proc_list_lock __cfaabi_dbg_ctx2);
     846        //      remove    (idles, *this);
     847        //      unlock    (proc_list_lock);
     848        // }
    826849}
    827850
     
    963986}
    964987
    965 void doregister( cluster * cltr, processor * proc ) {
    966         lock      (cltr->proc_list_lock __cfaabi_dbg_ctx2);
    967         cltr->nprocessors += 1;
    968         push_front(cltr->procs, *proc);
    969         unlock    (cltr->proc_list_lock);
    970 }
    971 
    972 void unregister( cluster * cltr, processor * proc ) {
    973         lock  (cltr->proc_list_lock __cfaabi_dbg_ctx2);
    974         remove(cltr->procs, *proc );
    975         cltr->nprocessors -= 1;
    976         unlock(cltr->proc_list_lock);
    977 }
    978 
    979988//-----------------------------------------------------------------------------
    980989// Debug
  • libcfa/src/concurrency/kernel.hfa

    r2cbfe92 rb7d6a36  
    107107        // Cluster from which to get threads
    108108        struct cluster * cltr;
     109        unsigned int id;
    109110
    110111        // Name of the processor
     
    161162}
    162163
     164
     165//-----------------------------------------------------------------------------
     166// Cluster Tools
     167
     168// Cells use by the reader writer lock
     169// while not generic it only relies on a opaque pointer
     170struct __processor_id;
     171
     172// Reader-Writer lock protecting the ready-queue
     173// while this lock is mostly generic some aspects
     174// have been hard-coded to for the ready-queue for
     175// simplicity and performance
     176struct __clusterRWLock_t {
     177        // total cachelines allocated
     178        unsigned int max;
     179
     180        // cachelines currently in use
     181        volatile unsigned int alloc;
     182
     183        // cachelines ready to itereate over
     184        // (!= to alloc when thread is in second half of doregister)
     185        volatile unsigned int ready;
     186
     187        // writer lock
     188        volatile bool lock;
     189
     190        // data pointer
     191        __processor_id * data;
     192};
     193
     194void  ?{}(__clusterRWLock_t & this);
     195void ^?{}(__clusterRWLock_t & this);
     196
     197// Intrusives lanes which are used by the relaxed ready queue
     198struct __attribute__((aligned(128))) __intrusive_lane_t {
     199        // spin lock protecting the queue
     200        volatile bool lock;
     201
     202        // anchor for the head and the tail of the queue
     203        struct __sentinel_t {
     204                // Link lists fields
     205                // instrusive link field for threads
     206                // must be exactly as in thread_desc
     207                __thread_desc_link link;
     208        } before, after;
     209
     210#if defined(__CFA_WITH_VERIFY__)
     211        // id of last processor to acquire the lock
     212        // needed only to check for mutual exclusion violations
     213        unsigned int last_id;
     214
     215        // number of items on this list
     216        // needed only to check for deadlocks
     217        unsigned int count;
     218#endif
     219
     220        // Optional statistic counters
     221        #if !defined(__CFA_NO_SCHED_STATS__)
     222                struct __attribute__((aligned(64))) {
     223                        // difference between number of push and pops
     224                        ssize_t diff;
     225
     226                        // total number of pushes and pops
     227                        size_t  push;
     228                        size_t  pop ;
     229                } stat;
     230        #endif
     231};
     232
     233void  ?{}(__intrusive_lane_t & this);
     234void ^?{}(__intrusive_lane_t & this);
     235
     236typedef unsigned long long __cfa_readyQ_mask_t;
     237
     238// enum {
     239//      __cfa_ready_queue_mask_size = (64 - sizeof(size_t)) / sizeof(size_t),
     240//      __cfa_max_ready_queues = __cfa_ready_queue_mask_size * 8 * sizeof(size_t)
     241// };
     242
     243#define __cfa_lane_mask_size ((64 - sizeof(size_t)) / sizeof(__cfa_readyQ_mask_t))
     244#define __cfa_max_lanes (__cfa_lane_mask_size * 8 * sizeof(__cfa_readyQ_mask_t))
     245
     246//TODO adjust cache size to ARCHITECTURE
     247// Structure holding the relaxed ready queue
     248struct __attribute__((aligned(128))) __ready_queue_t {
     249        // Data tracking how many/which lanes are used
     250        // Aligned to 128 for cache locality
     251        struct {
     252                // number of non-empty lanes
     253                volatile size_t count;
     254
     255                // bit mask, set bits indentify which lanes are non-empty
     256                volatile __cfa_readyQ_mask_t mask[ __cfa_lane_mask_size ];
     257        } used;
     258
     259        // Data tracking the actual lanes
     260        // On a seperate cacheline from the used struct since
     261        // used can change on each push/pop but this data
     262        // only changes on shrink/grow
     263        struct __attribute__((aligned(64))) {
     264                // Arary of lanes
     265                __intrusive_lane_t * volatile data;
     266
     267                // Number of lanes (empty or not)
     268                volatile size_t count;
     269        } lanes;
     270
     271        // Statistics
     272        #if !defined(__CFA_NO_STATISTICS__)
     273                __attribute__((aligned(64))) struct {
     274                        struct {
     275                                // Push statistic
     276                                struct {
     277                                        // number of attemps at pushing something
     278                                        volatile size_t attempt;
     279
     280                                        // number of successes at pushing
     281                                        volatile size_t success;
     282                                } push;
     283
     284                                // Pop statistic
     285                                struct {
     286                                        // number of reads of the mask
     287                                        // picking an empty __cfa_readyQ_mask_t counts here
     288                                        // but not as an attempt
     289                                        volatile size_t maskrds;
     290
     291                                        // number of attemps at poping something
     292                                        volatile size_t attempt;
     293
     294                                        // number of successes at poping
     295                                        volatile size_t success;
     296                                } pop;
     297                        } pick;
     298
     299                        // stats on the "used" struct of the queue
     300                        // tracks average number of queues that are not empty
     301                        // when pushing / poping
     302                        struct {
     303                                volatile size_t value;
     304                                volatile size_t count;
     305                        } used;
     306                } global_stats;
     307
     308        #endif
     309};
     310
     311void  ?{}(__ready_queue_t & this);
     312void ^?{}(__ready_queue_t & this);
     313
    163314//-----------------------------------------------------------------------------
    164315// Cluster
    165316struct cluster {
    166317        // Ready queue locks
    167         __spinlock_t ready_queue_lock;
     318        __clusterRWLock_t ready_lock;
    168319
    169320        // Ready queue for threads
    170         __queue_t(thread_desc) ready_queue;
     321        __ready_queue_t ready_queue;
    171322
    172323        // Name of the cluster
     
    178329        // List of processors
    179330        __spinlock_t proc_list_lock;
    180         __dllist_t(struct processor) procs;
    181331        __dllist_t(struct processor) idles;
    182         unsigned int nprocessors;
    183332
    184333        // List of threads
  • libcfa/src/concurrency/kernel_private.hfa

    r2cbfe92 rb7d6a36  
    100100//-----------------------------------------------------------------------------
    101101// Utils
    102 #define KERNEL_STORAGE(T,X) static char storage_##X[sizeof(T)]
     102#define KERNEL_STORAGE(T,X) __attribute((aligned(__alignof__(T)))) static char storage_##X[sizeof(T)]
    103103
    104104static inline uint32_t tls_rand() {
     
    116116void unregister( struct cluster * cltr, struct thread_desc & thrd );
    117117
    118 void doregister( struct cluster * cltr, struct processor * proc );
    119 void unregister( struct cluster * cltr, struct processor * proc );
     118//=======================================================================
     119// Cluster lock API
     120//=======================================================================
     121struct __attribute__((aligned(64))) __processor_id {
     122        processor * volatile handle;
     123        volatile bool lock;
     124};
     125
     126// Lock-Free registering/unregistering of threads
     127// Register a processor to a given cluster and get its unique id in return
     128unsigned doregister( struct cluster * cltr, struct processor * proc );
     129
     130// Unregister a processor from a given cluster using its id, getting back the original pointer
     131void     unregister( struct cluster * cltr, struct processor * proc );
     132
     133//=======================================================================
     134// Reader-writer lock implementation
     135// Concurrent with doregister/unregister,
     136//    i.e., threads can be added at any point during or between the entry/exit
     137
     138//-----------------------------------------------------------------------
     139// simple spinlock underlying the RWLock
     140// Blocking acquire
     141static inline void __atomic_acquire(volatile bool * ll) {
     142        while( __builtin_expect(__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST), false) ) {
     143                while(__atomic_load_n(ll, (int)__ATOMIC_RELAXED))
     144                        asm volatile("pause");
     145        }
     146        /* paranoid */ verify(*ll);
     147}
     148
     149// Non-Blocking acquire
     150static inline bool __atomic_try_acquire(volatile bool * ll) {
     151        return !__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST);
     152}
     153
     154// Release
     155static inline void __atomic_unlock(volatile bool * ll) {
     156        /* paranoid */ verify(*ll);
     157        __atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE);
     158}
     159
     160//-----------------------------------------------------------------------
     161// Reader side : acquire when using the ready queue to schedule but not
     162//  creating/destroying queues
     163static inline void ready_schedule_lock( struct cluster * cltr, struct processor * proc) with(cltr->ready_lock) {
     164        unsigned iproc = proc->id;
     165        /*paranoid*/ verify(data[iproc].handle == proc);
     166        /*paranoid*/ verify(iproc < ready);
     167
     168        // Step 1 : make sure no writer are in the middle of the critical section
     169        while(__atomic_load_n(&lock, (int)__ATOMIC_RELAXED))
     170                asm volatile("pause");
     171
     172        // Fence needed because we don't want to start trying to acquire the lock
     173        // before we read a false.
     174        // Not needed on x86
     175        // std::atomic_thread_fence(std::memory_order_seq_cst);
     176
     177        // Step 2 : acquire our local lock
     178        __atomic_acquire( &data[iproc].lock );
     179        /*paranoid*/ verify(data[iproc].lock);
     180}
     181
     182static inline void ready_schedule_unlock( struct cluster * cltr, struct processor * proc) with(cltr->ready_lock) {
     183        unsigned iproc = proc->id;
     184        /*paranoid*/ verify(data[iproc].handle == proc);
     185        /*paranoid*/ verify(iproc < ready);
     186        /*paranoid*/ verify(data[iproc].lock);
     187        __atomic_unlock(&data[iproc].lock);
     188}
     189
     190//-----------------------------------------------------------------------
     191// Writer side : acquire when changing the ready queue, e.g. adding more
     192//  queues or removing them.
     193uint_fast32_t ready_mutate_lock( struct cluster & cltr );
     194
     195void ready_mutate_unlock( struct cluster & cltr, uint_fast32_t /* value returned by lock */ );
     196
     197//=======================================================================
     198// Ready-Queue API
     199//-----------------------------------------------------------------------
     200// push thread onto a ready queue for a cluster
     201// returns true if the list was previously empty, false otherwise
     202__attribute__((hot)) bool push(struct cluster * cltr, struct thread_desc * thrd);
     203
     204//-----------------------------------------------------------------------
     205// pop thread from the ready queue of a cluster
     206// returns 0p if empty
     207__attribute__((hot)) thread_desc * pop(struct cluster * cltr);
     208
     209//-----------------------------------------------------------------------
     210// Increase the width of the ready queue (number of lanes) by 4
     211void ready_queue_grow  (struct cluster * cltr);
     212
     213//-----------------------------------------------------------------------
     214// Decrease the width of the ready queue (number of lanes) by 4
     215void ready_queue_shrink(struct cluster * cltr);
     216
     217//-----------------------------------------------------------------------
     218// Statics call at the end of each thread to register statistics
     219#if !defined(__CFA_NO_STATISTICS__)
     220void stats_tls_tally(struct cluster * cltr);
     221#else
     222static inline void stats_tls_tally(struct cluster * cltr) {}
     223#endif
    120224
    121225// Local Variables: //
  • libcfa/src/concurrency/monitor.cfa

    r2cbfe92 rb7d6a36  
    817817        }
    818818
    819         __cfaabi_dbg_print_safe( "Kernel :  Runing %i (%p)\n", ready2run, ready2run ? node->waiting_thread : 0p );
     819        __cfaabi_dbg_print_safe( "Kernel :  Runing %i (%p)\n", ready2run, ready2run ? (thread_desc*)node->waiting_thread : (thread_desc*)0p );
    820820        return ready2run ? node->waiting_thread : 0p;
    821821}
     
    842842        for(    thread_desc ** thrd_it = &entry_queue.head;
    843843                *thrd_it;
    844                 thrd_it = &(*thrd_it)->next
     844                thrd_it = &(*thrd_it)->link.next
    845845        ) {
    846846                // For each acceptable check if it matches
  • libcfa/src/concurrency/preemption.cfa

    r2cbfe92 rb7d6a36  
    120120        // If there are still alarms pending, reset the timer
    121121        if( alarms->head ) {
    122                 __cfaabi_dbg_print_buffer_decl( " KERNEL: @%ju(%ju) resetting alarm to %ju.\n", currtime.tv, __kernel_get_time().tv, (alarms->head->alarm - currtime).tv);
     122                // __cfaabi_dbg_print_buffer_decl( " KERNEL: @%ju(%ju) resetting alarm to %ju.\n", currtime.tv, __kernel_get_time().tv, (alarms->head->alarm - currtime).tv);
    123123                Duration delta = alarms->head->alarm - currtime;
    124124                Duration caped = max(delta, 50`us);
  • libcfa/src/concurrency/thread.cfa

    r2cbfe92 rb7d6a36  
    4141        self_mon_p = &self_mon;
    4242        curr_cluster = &cl;
    43         next = 0p;
     43        link.next = 0p;
     44        link.prev = 0p;
    4445
    4546        node.next = 0p;
  • libcfa/src/stdhdr/assert.h

    r2cbfe92 rb7d6a36  
    3333        #define verify(x) assert(x)
    3434        #define verifyf(x, ...) assertf(x, __VA_ARGS__)
     35        #define verifyfail(...)
    3536        #define __CFA_WITH_VERIFY__
    3637#else
    3738        #define verify(x)
    3839        #define verifyf(x, ...)
     40        #define verifyfail(...)
    3941#endif
    4042
  • tests/concurrent/examples/.expect/datingService.txt

    r2cbfe92 rb7d6a36  
    1 Girl:17 is dating Boy at 2 with ccode 17
    2  Boy:2 is dating Girl 17 with ccode 17
    3  Boy:14 is dating Girl 5 with ccode 5
    4 Girl:5 is dating Boy at 14 with ccode 5
    5  Boy:9 is dating Girl 10 with ccode 10
    6 Girl:10 is dating Boy at 9 with ccode 10
    7  Boy:1 is dating Girl 18 with ccode 18
    8 Girl:18 is dating Boy at 1 with ccode 18
    9  Boy:16 is dating Girl 3 with ccode 3
    10 Girl:3 is dating Boy at 16 with ccode 3
    11  Boy:5 is dating Girl 14 with ccode 14
    12 Girl:14 is dating Boy at 5 with ccode 14
    13  Boy:15 is dating Girl 4 with ccode 4
    14 Girl:4 is dating Boy at 15 with ccode 4
    15 Girl:0 is dating Boy at 19 with ccode 0
    16  Boy:19 is dating Girl 0 with ccode 0
    17 Girl:9 is dating Boy at 10 with ccode 9
    18  Boy:10 is dating Girl 9 with ccode 9
    19 Girl:11 is dating Boy at 8 with ccode 11
    20  Boy:8 is dating Girl 11 with ccode 11
    21  Boy:12 is dating Girl 7 with ccode 7
    22 Girl:7 is dating Boy at 12 with ccode 7
    23  Boy:11 is dating Girl 8 with ccode 8
    24 Girl:8 is dating Boy at 11 with ccode 8
    25 Girl:16 is dating Boy at 3 with ccode 16
    26  Boy:3 is dating Girl 16 with ccode 16
    27 Girl:15 is dating Boy at 4 with ccode 15
    28  Boy:4 is dating Girl 15 with ccode 15
    29 Girl:19 is dating Boy at 0 with ccode 19
    30  Boy:0 is dating Girl 19 with ccode 19
    31 Girl:2 is dating Boy at 17 with ccode 2
    32  Boy:17 is dating Girl 2 with ccode 2
    33  Boy:13 is dating Girl 6 with ccode 6
    34 Girl:6 is dating Boy at 13 with ccode 6
    35  Boy:7 is dating Girl 12 with ccode 12
    36 Girl:12 is dating Boy at 7 with ccode 12
    37 Girl:13 is dating Boy at 6 with ccode 13
    38  Boy:6 is dating Girl 13 with ccode 13
    39 Girl:1 is dating Boy at 18 with ccode 1
    40  Boy:18 is dating Girl 1 with ccode 1
  • tests/concurrent/examples/datingService.cfa

    r2cbfe92 rb7d6a36  
    11//
    22// Cforall Version 1.0.0 Copyright (C) 2017 University of Waterloo
    3 // 
     3//
    44// The contents of this file are covered under the licence agreement in the
    55// file "LICENCE" distributed with Cforall.
     
    3535                signal_block( Boys[ccode] );                                    // restart boy to set phone number
    3636        } // if
    37         sout | "Girl:" | PhoneNo | "is dating Boy at" | BoyPhoneNo | "with ccode" | ccode;
     37        // sout | "Girl:" | PhoneNo | "is dating Boy at" | BoyPhoneNo | "with ccode" | ccode;
    3838        return BoyPhoneNo;
    3939} // DatingService girl
     
    4747                signal_block( Girls[ccode] );                                   // restart girl to set phone number
    4848        } // if
    49         sout | " Boy:" | PhoneNo | "is dating Girl" | GirlPhoneNo | "with ccode" | ccode;
     49        // sout | " Boy:" | PhoneNo | "is dating Girl" | GirlPhoneNo | "with ccode" | ccode;
    5050        return GirlPhoneNo;
    5151} // DatingService boy
  • tests/concurrent/waitfor/when.cfa

    r2cbfe92 rb7d6a36  
    5757
    5858void arbiter( global_t & mutex this ) {
     59        // There is a race at start where callers can get in before the arbiter.
     60        // It doesn't really matter here so just restart the loop correctly and move on
     61        this.last_call = 6;
     62
    5963        for( int i = 0; i < N; i++ ) {
    6064                   when( this.last_call == 6 ) waitfor( call1 : this ) { if( this.last_call != 1) { serr | "Expected last_call to be 1 got" | this.last_call; } }
Note: See TracChangeset for help on using the changeset viewer.