Context Navigation

Reverse Diff

kernel_private.hfa [dca5802:09f357ec]

File:

: 1 edited

libcfa/src/concurrency/kernel_private.hfa (modified) (3 diffs)

Legend:

: Unmodified
: Added
: Removed

libcfa/src/concurrency/kernel_private.hfa

-              rdca5802
+              r09f357ec
 // Threads
 extern "C" {
+      forall(dtype T | is_thread(T))
+      void CtxInvokeThread(T * this);
+      void CtxInvokeThread(void (*main)(void *), void * this);
+}
 …
 //-----------------------------------------------------------------------------
 // Utils
 #define KERNEL_STORAGE(T,X) __attribute((aligned(__alignof__(T)))) static char storage_##X[sizeof(T)]
+#define KERNEL_STORAGE(T,X) static char storage_##X[sizeof(T)]
 static inline uint32_t tls_rand() {
 …
 void unregister( struct cluster * cltr, struct thread_desc & thrd );
+//=======================================================================
+// Cluster lock API
+//=======================================================================
+struct __attribute__((aligned(64))) __processor_id {
+        processor * volatile handle;
+        volatile bool lock;
+};
+// Lock-Free registering/unregistering of threads
+// Register a processor to a given cluster and get its unique id in return
+unsigned doregister( struct cluster * cltr, struct processor * proc );
+// Unregister a processor from a given cluster using its id, getting back the original pointer
+void     unregister( struct cluster * cltr, struct processor * proc );
+//=======================================================================
+// Reader-writer lock implementation
+// Concurrent with doregister/unregister,
+//    i.e., threads can be added at any point during or between the entry/exit
+//-----------------------------------------------------------------------
+// simple spinlock underlying the RWLock
+// Blocking acquire
+static inline void __atomic_acquire(volatile bool * ll) {
+        while( __builtin_expect(__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST), false) ) {
+                while(__atomic_load_n(ll, (int)__ATOMIC_RELAXED))
+                        asm volatile("pause");
+        }
+        /* paranoid */ verify(*ll);
+}
+// Non-Blocking acquire
+static inline bool __atomic_try_acquire(volatile bool * ll) {
+        return !__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST);
+}
+// Release
+static inline void __atomic_unlock(volatile bool * ll) {
+        /* paranoid */ verify(*ll);
+        __atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE);
+}
+//-----------------------------------------------------------------------
+// Reader side : acquire when using the ready queue to schedule but not
+//  creating/destroying queues
+static inline void ready_schedule_lock( struct cluster * cltr, struct processor * proc) with(cltr->ready_lock) {
+        unsigned iproc = proc->id;
+        /*paranoid*/ verify(data[iproc].handle == proc);
+        /*paranoid*/ verify(iproc < ready);
+        // Step 1 : make sure no writer are in the middle of the critical section
+        while(__atomic_load_n(&lock, (int)__ATOMIC_RELAXED))
+                asm volatile("pause");
+        // Fence needed because we don't want to start trying to acquire the lock
+        // before we read a false.
+        // Not needed on x86
+        // std::atomic_thread_fence(std::memory_order_seq_cst);
+        // Step 2 : acquire our local lock
+        __atomic_acquire( &data[iproc].lock );
+        /*paranoid*/ verify(data[iproc].lock);
+}
+static inline void ready_schedule_unlock( struct cluster * cltr, struct processor * proc) with(cltr->ready_lock) {
+        unsigned iproc = proc->id;
+        /*paranoid*/ verify(data[iproc].handle == proc);
+        /*paranoid*/ verify(iproc < ready);
+        /*paranoid*/ verify(data[iproc].lock);
+        __atomic_unlock(&data[iproc].lock);
+}
+//-----------------------------------------------------------------------
+// Writer side : acquire when changing the ready queue, e.g. adding more
+//  queues or removing them.
+uint_fast32_t ready_mutate_lock( struct cluster & cltr );
+void ready_mutate_unlock( struct cluster & cltr, uint_fast32_t /* value returned by lock */ );
+//=======================================================================
+// Ready-Queue API
+//-----------------------------------------------------------------------
+// push thread onto a ready queue for a cluster
+// returns true if the list was previously empty, false otherwise
+__attribute__((hot)) bool push(struct cluster * cltr, struct thread_desc * thrd);
+//-----------------------------------------------------------------------
+// pop thread from the ready queue of a cluster
+// returns 0p if empty
+__attribute__((hot)) thread_desc * pop(struct cluster * cltr);
+//-----------------------------------------------------------------------
+// Increase the width of the ready queue (number of lanes) by 4
+void ready_queue_grow  (struct cluster * cltr);
+//-----------------------------------------------------------------------
+// Decrease the width of the ready queue (number of lanes) by 4
+void ready_queue_shrink(struct cluster * cltr);
+//-----------------------------------------------------------------------
+// Statics call at the end of each thread to register statistics
+#if !defined(__CFA_NO_STATISTICS__)
+void stats_tls_tally(struct cluster * cltr);
+#else
+static inline void stats_tls_tally(struct cluster * cltr) {}
+#endif
+void doregister( struct cluster * cltr, struct processor * proc );
+void unregister( struct cluster * cltr, struct processor * proc );
 // Local Variables: //

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changes in libcfa/src/concurrency/kernel_private.hfa [dca5802:09f357ec]

Legend:

libcfa/src/concurrency/kernel_private.hfa

Download in other formats: