// // Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo // // The contents of this file are covered under the licence agreement in the // file "LICENCE" distributed with Cforall. // // kernel -- // // Author : Thierry Delisle // Created On : Tue Jan 17 12:27:26 2017 // Last Modified By : Peter A. Buhr // Last Modified On : Tue Feb 4 12:29:26 2020 // Update Count : 22 // #pragma once #include #include #include "invoke.h" #include "time_t.hfa" #include "coroutine.hfa" extern "C" { #include #include } //----------------------------------------------------------------------------- // Locks struct semaphore { __spinlock_t lock; int count; __queue_t($thread) waiting; }; void ?{}(semaphore & this, int count = 1); void ^?{}(semaphore & this); bool P (semaphore & this); bool V (semaphore & this); bool V (semaphore & this, unsigned count); //----------------------------------------------------------------------------- // Processor extern struct cluster * mainCluster; // Processor coroutine processorCtx_t { struct processor * proc; }; // Wrapper around kernel threads struct processor { // Main state // Coroutine ctx who does keeps the state of the processor struct processorCtx_t runner; // Cluster from which to get threads struct cluster * cltr; unsigned int id; // Name of the processor const char * name; // Handle to pthreads pthread_t kernel_thread; // RunThread data // Action to do after a thread is ran $thread * destroyer; // Preemption data // Node which is added in the discrete event simulaiton struct alarm_node_t * preemption_alarm; // If true, a preemption was triggered in an unsafe region, the processor must preempt as soon as possible bool pending_preemption; // Idle lock (kernel semaphore) __bin_sem_t idle; // Termination // Set to true to notify the processor should terminate volatile bool do_terminate; // Termination synchronisation (user semaphore) semaphore terminated; // pthread Stack void * stack; // Link lists fields struct __dbg_node_cltr { processor * next; processor * prev; } node; #ifdef __CFA_DEBUG__ // Last function to enable preemption on this processor const char * last_enable; #endif }; void ?{}(processor & this, const char name[], struct cluster & cltr); void ^?{}(processor & this); static inline void ?{}(processor & this) { this{ "Anonymous Processor", *mainCluster}; } static inline void ?{}(processor & this, struct cluster & cltr) { this{ "Anonymous Processor", cltr}; } static inline void ?{}(processor & this, const char name[]) { this{name, *mainCluster }; } static inline [processor *&, processor *& ] __get( processor & this ) __attribute__((const)) { return this.node.[next, prev]; } //----------------------------------------------------------------------------- // I/O struct __io_data; #define CFA_CLUSTER_IO_POLLER_USER_THREAD 1 << 0 // 0x1 #define CFA_CLUSTER_IO_POLLER_THREAD_SUBMITS 1 << 1 // 0x2 // #define CFA_CLUSTER_IO_POLLER_KERNEL_SIDE 1 << 2 // 0x4 #define CFA_CLUSTER_IO_BUFFLEN_OFFSET 16 //----------------------------------------------------------------------------- // Cluster Tools // Cells use by the reader writer lock // while not generic it only relies on a opaque pointer struct __processor_id; // Reader-Writer lock protecting the ready-queue // while this lock is mostly generic some aspects // have been hard-coded to for the ready-queue for // simplicity and performance struct __clusterRWLock_t { // total cachelines allocated unsigned int max; // cachelines currently in use volatile unsigned int alloc; // cachelines ready to itereate over // (!= to alloc when thread is in second half of doregister) volatile unsigned int ready; // writer lock volatile bool lock; // data pointer __processor_id * data; }; void ?{}(__clusterRWLock_t & this); void ^?{}(__clusterRWLock_t & this); // Intrusives lanes which are used by the relaxed ready queue struct __attribute__((aligned(128))) __intrusive_lane_t { // spin lock protecting the queue volatile bool lock; // anchor for the head and the tail of the queue struct __sentinel_t { // Link lists fields // instrusive link field for threads // must be exactly as in $thread __thread_desc_link link; } before, after; #if defined(__CFA_WITH_VERIFY__) // id of last processor to acquire the lock // needed only to check for mutual exclusion violations unsigned int last_id; // number of items on this list // needed only to check for deadlocks unsigned int count; #endif // Optional statistic counters #if !defined(__CFA_NO_SCHED_STATS__) struct __attribute__((aligned(64))) { // difference between number of push and pops ssize_t diff; // total number of pushes and pops size_t push; size_t pop ; } stat; #endif }; void ?{}(__intrusive_lane_t & this); void ^?{}(__intrusive_lane_t & this); typedef unsigned long long __cfa_readyQ_mask_t; // enum { // __cfa_ready_queue_mask_size = (64 - sizeof(size_t)) / sizeof(size_t), // __cfa_max_ready_queues = __cfa_ready_queue_mask_size * 8 * sizeof(size_t) // }; #define __cfa_lane_mask_size ((64 - sizeof(size_t)) / sizeof(__cfa_readyQ_mask_t)) #define __cfa_max_lanes (__cfa_lane_mask_size * 8 * sizeof(__cfa_readyQ_mask_t)) //TODO adjust cache size to ARCHITECTURE // Structure holding the relaxed ready queue struct __attribute__((aligned(128))) __ready_queue_t { // Data tracking how many/which lanes are used // Aligned to 128 for cache locality struct { // number of non-empty lanes volatile size_t count; // bit mask, set bits indentify which lanes are non-empty volatile __cfa_readyQ_mask_t mask[ __cfa_lane_mask_size ]; } used; // Data tracking the actual lanes // On a seperate cacheline from the used struct since // used can change on each push/pop but this data // only changes on shrink/grow struct __attribute__((aligned(64))) { // Arary of lanes __intrusive_lane_t * volatile data; // Number of lanes (empty or not) volatile size_t count; } lanes; // Statistics #if !defined(__CFA_NO_STATISTICS__) __attribute__((aligned(64))) struct { struct { // Push statistic struct { // number of attemps at pushing something volatile size_t attempt; // number of successes at pushing volatile size_t success; } push; // Pop statistic struct { // number of reads of the mask // picking an empty __cfa_readyQ_mask_t counts here // but not as an attempt volatile size_t maskrds; // number of attemps at poping something volatile size_t attempt; // number of successes at poping volatile size_t success; } pop; } pick; // stats on the "used" struct of the queue // tracks average number of queues that are not empty // when pushing / poping struct { volatile size_t value; volatile size_t count; } used; } global_stats; #endif }; void ?{}(__ready_queue_t & this); void ^?{}(__ready_queue_t & this); //----------------------------------------------------------------------------- // Cluster struct cluster { // Ready queue locks __clusterRWLock_t ready_lock; // Ready queue for threads __ready_queue_t ready_queue; // Name of the cluster const char * name; // Preemption rate on this cluster Duration preemption_rate; // List of processors __spinlock_t idle_lock; __dllist_t(struct processor) procs; __dllist_t(struct processor) idles; unsigned int nprocessors; // List of threads __spinlock_t thread_list_lock; __dllist_t(struct $thread) threads; unsigned int nthreads; // Link lists fields struct __dbg_node_cltr { cluster * next; cluster * prev; } node; struct __io_data * io; #if !defined(__CFA_NO_STATISTICS__) bool print_stats; #endif }; extern Duration default_preemption(); void ?{} (cluster & this, const char name[], Duration preemption_rate, unsigned flags); void ^?{}(cluster & this); static inline void ?{} (cluster & this) { this{"Anonymous Cluster", default_preemption(), 0}; } static inline void ?{} (cluster & this, Duration preemption_rate) { this{"Anonymous Cluster", preemption_rate, 0}; } static inline void ?{} (cluster & this, const char name[]) { this{name, default_preemption(), 0}; } static inline void ?{} (cluster & this, unsigned flags) { this{"Anonymous Cluster", default_preemption(), flags}; } static inline void ?{} (cluster & this, Duration preemption_rate, unsigned flags) { this{"Anonymous Cluster", preemption_rate, flags}; } static inline void ?{} (cluster & this, const char name[], unsigned flags) { this{name, default_preemption(), flags}; } static inline [cluster *&, cluster *& ] __get( cluster & this ) __attribute__((const)) { return this.node.[next, prev]; } static inline struct processor * active_processor() { return TL_GET( this_processor ); } // UNSAFE static inline struct cluster * active_cluster () { return TL_GET( this_processor )->cltr; } #if !defined(__CFA_NO_STATISTICS__) static inline void print_stats_at_exit( cluster & this ) { this.print_stats = true; } #endif // Local Variables: // // mode: c // // tab-width: 4 // // End: //