Diff [00e9be94eec3b41099f7decdab5a57d6edea62ed:99b2407c52d53a1bc8b16e5d3269a34ff9ad94b9] for / – Cforall

libcfa/src/concurrency/clib/cfathread.cfa

-              r00e9be9
+              r99b2407
         // Mutex
         struct cfathread_mutex {
                 single_acquisition_lock impl;
+                fast_lock impl;
         };
         int cfathread_mutex_init(cfathread_mutex_t *restrict mut, const cfathread_mutexattr_t *restrict) __attribute__((nonnull (1))) { *mut = new(); return 0; }
 …
         // Condition
         struct cfathread_condition {
                 condition_variable(single_acquisition_lock) impl;
+                condition_variable(fast_lock) impl;
         };
         int cfathread_cond_init(cfathread_cond_t *restrict cond, const cfathread_condattr_t *restrict) __attribute__((nonnull (1))) { *cond = new(); return 0; }

libcfa/src/concurrency/invoke.h

-              r00e9be9
+              r99b2407
                 struct $thread * prev;
                 volatile unsigned long long ts;
+                unsigned preferred;
         };
 …
                 } node;
+                struct processor * last_proc;
                 #if defined( __CFA_WITH_VERIFY__ )
                         void * canary;

libcfa/src/concurrency/io.cfa

-              r00e9be9
+              r99b2407
         #include "kernel.hfa"
         #include "kernel/fwd.hfa"
+        #include "kernel_private.hfa"
         #include "io/types.hfa"
 …
         static inline unsigned __flush( struct $io_context & );
         static inline __u32 __release_sqes( struct $io_context & );
+        extern void __kernel_unpark( $thread * thrd );
         bool __cfa_io_drain( processor * proc ) {
                 /* paranoid */ verify( ! __preemption_enabled() );
+                /* paranoid */ verify( ready_schedule_islocked() );
                 /* paranoid */ verify( proc );
                 /* paranoid */ verify( proc->io.ctx );
 …
                         __cfadbg_print_safe( io, "Kernel I/O : Syscall completed : cqe %p, result %d for %p\n", &cqe, cqe.res, future );
                         fulfil( *future, cqe.res );
+                        __kernel_unpark( fulfil( *future, cqe.res, false ) );
+                }
 …
                 __atomic_store_n( ctx->cq.head, head + count, __ATOMIC_SEQ_CST );
+                /* paranoid */ verify( ready_schedule_islocked() );
                 /* paranoid */ verify( ! __preemption_enabled() );

libcfa/src/concurrency/kernel.cfa

-              r00e9be9
+              r99b2407
 #include "invoke.h"
+#if !defined(__CFA_NO_STATISTICS__)
+        #define __STATS( ...) __VA_ARGS__
+#else
+        #define __STATS( ...)
+#endif
 //-----------------------------------------------------------------------------
 …
                 preemption_scope scope = { this };
+                #if !defined(__CFA_NO_STATISTICS__)
+                        unsigned long long last_tally = rdtscl();
+                #endif
+                __STATS( unsigned long long last_tally = rdtscl(); )
                 // if we need to run some special setup, now is the time to do it.
 …
                                 __cfa_io_flush( this );
+                        }
+                //      SEARCH: {
+                //              /* paranoid */ verify( ! __preemption_enabled() );
+                //              /* paranoid */ verify( kernelTLS().this_proc_id );
+                //              // First, lock the scheduler since we are searching for a thread
+                //              // Try to get the next thread
+                //              ready_schedule_lock();
+                //              readyThread = pop_fast( this->cltr );
+                //              ready_schedule_unlock();
+                //              if(readyThread) {  break SEARCH; }
+                //              // If we can't find a thread, might as well flush any outstanding I/O
+                //              if(this->io.pending) { __cfa_io_flush( this ); }
+                //              // Spin a little on I/O, just in case
+                //              for(25) {
+                //                      __maybe_io_drain( this );
+                //                      ready_schedule_lock();
+                //                      readyThread = pop_fast( this->cltr );
+                //                      ready_schedule_unlock();
+                //                      if(readyThread) {  break SEARCH; }
+                //              }
+                //              // no luck, try stealing a few times
+                //              for(25) {
+                //                      if( __maybe_io_drain( this ) ) {
+                //                              ready_schedule_lock();
+                //                              readyThread = pop_fast( this->cltr );
+                //                      } else {
+                //                              ready_schedule_lock();
+                //                              readyThread = pop_slow( this->cltr );
+                //                      }
+                //                      ready_schedule_unlock();
+                //                      if(readyThread) {  break SEARCH; }
+                //              }
+                //              // still no luck, search for a thread
+                //              ready_schedule_lock();
+                //              readyThread = pop_search( this->cltr );
+                //              ready_schedule_unlock();
+                //              if(readyThread) { break SEARCH; }
+                //              // Don't block if we are done
+                //              if( __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST) ) break MAIN_LOOP;
+                //              __STATS( __tls_stats()->ready.sleep.halts++; )
+                //              // Push self to idle stack
+                //              mark_idle(this->cltr->procs, * this);
+                //              // Confirm the ready-queue is empty
+                //              __maybe_io_drain( this );
+                //              ready_schedule_lock();
+                //              readyThread = pop_search( this->cltr );
+                //              ready_schedule_unlock();
+                //              if( readyThread ) {
+                //                      // A thread was found, cancel the halt
+                //                      mark_awake(this->cltr->procs, * this);
+                //                      __STATS( __tls_stats()->ready.sleep.cancels++; )
+                //                      // continue the main loop
+                //                      break SEARCH;
+                //              }
+                //              __STATS( if(this->print_halts) __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 0\n", this->id, rdtscl()); )
+                //              __cfadbg_print_safe(runtime_core, "Kernel : core %p waiting on eventfd %d\n", this, this->idle);
+                //              // __disable_interrupts_hard();
+                //              eventfd_t val;
+                //              eventfd_read( this->idle, &val );
+                //              // __enable_interrupts_hard();
+                //              __STATS( if(this->print_halts) __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 1\n", this->id, rdtscl()); )
+                //              // We were woken up, remove self from idle
+                //              mark_awake(this->cltr->procs, * this);
+                //              // DON'T just proceed, start looking again
+                //              continue MAIN_LOOP;
+                //      }
+                // RUN_THREAD:
+                //      /* paranoid */ verify( kernelTLS().this_proc_id );
+                //      /* paranoid */ verify( ! __preemption_enabled() );
+                //      /* paranoid */ verify( readyThread );
+                //      // Reset io dirty bit
+                //      this->io.dirty = false;
+                //      // We found a thread run it
+                //      __run_thread(this, readyThread);
+                //      // Are we done?
+                //      if( __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST) ) break MAIN_LOOP;
+                //      #if !defined(__CFA_NO_STATISTICS__)
+                //              unsigned long long curr = rdtscl();
+                //              if(curr > (last_tally + 500000000)) {
+                //                      __tally_stats(this->cltr->stats, __cfaabi_tls.this_stats);
+                //                      last_tally = curr;
+                //              }
+                //      #endif
+                //      if(this->io.pending && !this->io.dirty) {
+                //              __cfa_io_flush( this );
+                //      }
+                //      // Check if there is pending io
+                //      __maybe_io_drain( this );
+                }
 …
         $thread * thrd_src = kernelTLS().this_thread;
+        #if !defined(__CFA_NO_STATISTICS__)
+                struct processor * last_proc = kernelTLS().this_processor;
+        #endif
+        __STATS( thrd_src->last_proc = kernelTLS().this_processor; )
         // Run the thread on this processor
 …
         #if !defined(__CFA_NO_STATISTICS__)
+                if(last_proc != kernelTLS().this_processor) {
+                /* paranoid */ verify( thrd_src->last_proc != 0p );
+                if(thrd_src->last_proc != kernelTLS().this_processor) {
                         __tls_stats()->ready.threads.migration++;
+                }
 …
 // Scheduler routines
 // KERNEL ONLY
 void __schedule_thread( $thread * thrd ) {
+static void __schedule_thread( $thread * thrd ) {
         /* paranoid */ verify( ! __preemption_enabled() );
         /* paranoid */ verify( kernelTLS().this_proc_id );
 …
         // Dereference the thread now because once we push it, there is not guaranteed it's still valid.
         struct cluster * cl = thrd->curr_cluster;
+        __STATS(bool outside = thrd->last_proc && thrd->last_proc != kernelTLS().this_processor; )
         // push the thread to the cluster ready-queue
 …
                 if( kernelTLS().this_stats ) {
                         __tls_stats()->ready.threads.threads++;
+                        if(outside) {
+                                __tls_stats()->ready.threads.extunpark++;
+                        }
                         __push_stat( __tls_stats(), __tls_stats()->ready.threads.threads, false, "Processor", kernelTLS().this_processor );
+                }
                 else {
                         __atomic_fetch_add(&cl->stats->ready.threads.threads, 1, __ATOMIC_RELAXED);
+                        __atomic_fetch_add(&cl->stats->ready.threads.extunpark, 1, __ATOMIC_RELAXED);
                         __push_stat( cl->stats, cl->stats->ready.threads.threads, true, "Cluster", cl );
+                }
 …
         ready_schedule_lock();
+                $thread * thrd = pop_slow( this );
+                $thread * thrd;
+                for(25) {
+                        thrd = pop_slow( this );
+                        if(thrd) goto RET;
+                }
+                thrd = pop_search( this );
+                RET:
         ready_schedule_unlock();
 …
+}
+void __kernel_unpark( $thread * thrd ) {
+        /* paranoid */ verify( ! __preemption_enabled() );
+        /* paranoid */ verify( ready_schedule_islocked());
+        if( !thrd ) return;
+        if(__must_unpark(thrd)) {
+                // Wake lost the race,
+                __schedule_thread( thrd );
+        }
+        /* paranoid */ verify( ready_schedule_islocked());
+        /* paranoid */ verify( ! __preemption_enabled() );
+}
 void unpark( $thread * thrd ) {
         if( !thrd ) return;
 …
 static inline bool __maybe_io_drain( processor * proc ) {
+        bool ret = false;
         #if defined(CFA_HAVE_LINUX_IO_URING_H)
                 __cfadbg_print_safe(runtime_core, "Kernel : core %p checking io for ring %d\n", proc, proc->io.ctx->fd);
 …
                 unsigned tail = *ctx->cq.tail;
                 if(head == tail) return false;
+                return __cfa_io_drain( proc );
+                ready_schedule_lock();
+                ret = __cfa_io_drain( proc );
+                ready_schedule_unlock();
         #endif
+        return ret;
+}

libcfa/src/concurrency/kernel/startup.cfa

r00e9be9	r99b2407
447	447	link.next = 0p;
448	448	link.prev = 0p;
	449	link.preferred = -1u;
	450	last_proc = 0p;
449	451	#if defined( __CFA_WITH_VERIFY__ )
450	452	canary = 0x0D15EA5E0D15EA5Ep;

libcfa/src/concurrency/kernel_private.hfa

-              r00e9be9
+              r99b2407
 //-----------------------------------------------------------------------
 // pop thread from the ready queue of a cluster
+// pop thread from the local queues of a cluster
 // returns 0p if empty
 // May return 0p spuriously
 …
 //-----------------------------------------------------------------------
+// pop thread from the ready queue of a cluster
+// pop thread from any ready queue of a cluster
+// returns 0p if empty
+// May return 0p spuriously
+__attribute__((hot)) struct $thread * pop_slow(struct cluster * cltr);
+//-----------------------------------------------------------------------
+// search all ready queues of a cluster for any thread
 // returns 0p if empty
 // guaranteed to find any threads added before this call
 __attribute__((hot)) struct $thread * pop_slow(struct cluster * cltr);
+__attribute__((hot)) struct $thread * pop_search(struct cluster * cltr);
 //-----------------------------------------------------------------------

libcfa/src/concurrency/ready_queue.cfa

-              r00e9be9
+              r99b2407
+        }
+        __attribute__((hot)) struct $thread * pop_slow(struct cluster * cltr) {
+        __attribute__((hot)) struct $thread * pop_slow(struct cluster * cltr) { return pop_fast(cltr); }
+        __attribute__((hot)) struct $thread * pop_search(struct cluster * cltr) {
                 return search(cltr);
+        }
 …
         __attribute__((hot)) struct $thread * pop_slow(struct cluster * cltr) with (cltr->ready_queue) {
+                for(25) {
+                        unsigned i = __tls_rand() % lanes.count;
+                        $thread * t = try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.steal));
+                        if(t) return t;
+                }
+                unsigned i = __tls_rand() % lanes.count;
+                return try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.steal));
+        }
+        __attribute__((hot)) struct $thread * pop_search(struct cluster * cltr) with (cltr->ready_queue) {
                 return search(cltr);
+        }

libcfa/src/concurrency/stats.cfa

-              r00e9be9
+              r99b2407
                 stats->ready.pop.search.espec   = 0;
                 stats->ready.threads.migration = 0;
+                stats->ready.threads.extunpark = 0;
                 stats->ready.threads.threads   = 0;
                 stats->ready.sleep.halts   = 0;
 …
                 __atomic_fetch_add( &cltr->ready.pop.search.espec  , proc->ready.pop.search.espec  , __ATOMIC_SEQ_CST ); proc->ready.pop.search.espec   = 0;
                 __atomic_fetch_add( &cltr->ready.threads.migration , proc->ready.threads.migration , __ATOMIC_SEQ_CST ); proc->ready.threads.migration  = 0;
+                __atomic_fetch_add( &cltr->ready.threads.extunpark , proc->ready.threads.extunpark , __ATOMIC_SEQ_CST ); proc->ready.threads.extunpark  = 0;
                 __atomic_fetch_add( &cltr->ready.threads.threads   , proc->ready.threads.threads   , __ATOMIC_SEQ_CST ); proc->ready.threads.threads    = 0;
                 __atomic_fetch_add( &cltr->ready.sleep.halts       , proc->ready.sleep.halts       , __ATOMIC_SEQ_CST ); proc->ready.sleep.halts        = 0;
 …
                         uint64_t totalR = ready.pop.local.success + ready.pop.help.success + ready.pop.steal.success + ready.pop.search.success;
                         uint64_t totalS = ready.push.local.success + ready.push.share.success + ready.push.extrn.success;
                         sstr | "- totals   : " | eng3(totalR) | "run," | eng3(totalS) | "schd (" | eng3(ready.push.extrn.success) | "ext," | eng3(ready.threads.migration) | "mig)";
+                        sstr | "- totals   : " | eng3(totalR) | "run," | eng3(totalS) | "schd (" | eng3(ready.push.extrn.success) | "ext," | eng3(ready.threads.migration) | "mig," | eng3(ready.threads.extunpark) | " eupk)";
                         double push_len = ((double)ready.push.local.attempt + ready.push.share.attempt + ready.push.extrn.attempt) / totalS;

libcfa/src/concurrency/stats.hfa

r00e9be9	r99b2407
70	70	struct {
71	71	volatile uint64_t migration;
	72	volatile uint64_t extunpark;
72	73	volatile int64_t threads; // number of threads in the system, includes only local change
73	74	} threads;

libcfa/src/concurrency/thread.cfa

r00e9be9	r99b2407
39	39	link.next = 0p;
40	40	link.prev = 0p;
	41	link.preferred = -1u;
	42	last_proc = 0p;
41	43	#if defined( __CFA_WITH_VERIFY__ )
42	44	canary = 0x0D15EA5E0D15EA5Ep;

Context Navigation

Changes in / [00e9be9:99b2407]

Legend:

libcfa/src/concurrency/clib/cfathread.cfa

libcfa/src/concurrency/invoke.h

libcfa/src/concurrency/io.cfa

libcfa/src/concurrency/kernel.cfa

libcfa/src/concurrency/kernel/startup.cfa

libcfa/src/concurrency/kernel_private.hfa

libcfa/src/concurrency/ready_queue.cfa

libcfa/src/concurrency/stats.cfa

libcfa/src/concurrency/stats.hfa

libcfa/src/concurrency/thread.cfa

Download in other formats: