Context Navigation

rfcd65ca	r24e321c
175	175	struct cluster * curr_cluster;
176	176
177		// preferred ready-queue
	177	// preferred ready-queue or CPU
178	178	unsigned preferred;
179	179

libcfa/src/concurrency/io.cfa

-              rfcd65ca
+              r24e321c
         static inline unsigned __flush( struct $io_context & );
         static inline __u32 __release_sqes( struct $io_context & );
         extern void __kernel_unpark( thread$ * thrd );
+        extern void __kernel_unpark( thread$ * thrd, unpark_hint );
         bool __cfa_io_drain( processor * proc ) {
 …
                         __cfadbg_print_safe( io, "Kernel I/O : Syscall completed : cqe %p, result %d for %p\n", &cqe, cqe.res, future );
                         __kernel_unpark( fulfil( *future, cqe.res, false ) );
+                        __kernel_unpark( fulfil( *future, cqe.res, false ), UNPARK_LOCAL );
+                }

libcfa/src/concurrency/kernel.cfa

-              rfcd65ca
+              r24e321c
                 if(unlikely(thrd_dst->preempted != __NO_PREEMPTION)) {
                         // The thread was preempted, reschedule it and reset the flag
                         schedule_thread$( thrd_dst );
+                        schedule_thread$( thrd_dst, UNPARK_LOCAL );
                         break RUNNING;
+                }
 …
 // Scheduler routines
 // KERNEL ONLY
 static void __schedule_thread( thread$ * thrd ) {
+static void __schedule_thread( thread$ * thrd, unpark_hint hint ) {
         /* paranoid */ verify( ! __preemption_enabled() );
         /* paranoid */ verify( ready_schedule_islocked());
 …
         // Dereference the thread now because once we push it, there is not guaranteed it's still valid.
         struct cluster * cl = thrd->curr_cluster;
         __STATS(bool outside = thrd->last_proc && thrd->last_proc != kernelTLS().this_processor; )
+        __STATS(bool outside = hint == UNPARK_LOCAL && thrd->last_proc && thrd->last_proc != kernelTLS().this_processor; )
         // push the thread to the cluster ready-queue
         push( cl, thrd, local );
+        push( cl, thrd, hint );
         // variable thrd is no longer safe to use
 …
+}
 void schedule_thread$( thread$ * thrd ) {
+void schedule_thread$( thread$ * thrd, unpark_hint hint ) {
         ready_schedule_lock();
                 __schedule_thread( thrd );
+                __schedule_thread( thrd, hint );
         ready_schedule_unlock();
+}
 …
+}
 void __kernel_unpark( thread$ * thrd ) {
+void __kernel_unpark( thread$ * thrd, unpark_hint hint ) {
         /* paranoid */ verify( ! __preemption_enabled() );
         /* paranoid */ verify( ready_schedule_islocked());
 …
         if(__must_unpark(thrd)) {
                 // Wake lost the race,
                 __schedule_thread( thrd );
+                __schedule_thread( thrd, hint );
+        }
 …
+}
 void unpark( thread$ * thrd ) {
+void unpark( thread$ * thrd, unpark_hint hint ) {
         if( !thrd ) return;
 …
                 disable_interrupts();
                         // Wake lost the race,
                         schedule_thread$( thrd );
+                        schedule_thread$( thrd, hint );
                 enable_interrupts(false);
+        }

libcfa/src/concurrency/kernel/fwd.hfa

-              rfcd65ca
+              r24e321c
         extern "Cforall" {
+                enum unpark_hint { UNPARK_LOCAL, UNPARK_REMOTE };
                 extern void park( void );
+                extern void unpark( struct thread$ * this );
+                extern void unpark( struct thread$ *, unpark_hint );
+                static inline void unpark( struct thread$ * thrd ) { unpark(thrd, UNPARK_LOCAL); }
                 static inline struct thread$ * active_thread () {
                         struct thread$ * t = publicTLS_get( this_thread );

libcfa/src/concurrency/kernel/startup.cfa

-              rfcd65ca
+              r24e321c
         __cfadbg_print_safe(runtime_core, "Kernel : Main cluster ready\n");
+        // Construct the processor context of the main processor
+        void ?{}(processorCtx_t & this, processor * proc) {
+                (this.__cor){ "Processor" };
+                this.__cor.starter = 0p;
+                this.proc = proc;
+        }
+        void ?{}(processor & this) with( this ) {
+                ( this.terminated ){};
+                ( this.runner ){};
+                init( this, "Main Processor", *mainCluster, 0p );
+                kernel_thread = pthread_self();
+                runner{ &this };
+                __cfadbg_print_safe(runtime_core, "Kernel : constructed main processor context %p\n", &runner);
+        }
+        // Initialize the main processor and the main processor ctx
+        // (the coroutine that contains the processing control flow)
+        mainProcessor = (processor *)&storage_mainProcessor;
+        (*mainProcessor){};
+        register_tls( mainProcessor );
         // Start by initializing the main thread
         // SKULLDUGGERY: the mainThread steals the process main thread
 …
         __cfadbg_print_safe(runtime_core, "Kernel : Main thread ready\n");
-        // Construct the processor context of the main processor
-        void ?{}(processorCtx_t & this, processor * proc) {
-                (this.__cor){ "Processor" };
-                this.__cor.starter = 0p;
-                this.proc = proc;
+        }
-        void ?{}(processor & this) with( this ) {
-                ( this.terminated ){};
-                ( this.runner ){};
-                init( this, "Main Processor", *mainCluster, 0p );
-                kernel_thread = pthread_self();
-                runner{ &this };
-                __cfadbg_print_safe(runtime_core, "Kernel : constructed main processor context %p\n", &runner);
+        }
-        // Initialize the main processor and the main processor ctx
-        // (the coroutine that contains the processing control flow)
-        mainProcessor = (processor *)&storage_mainProcessor;
-        (*mainProcessor){};
-        register_tls( mainProcessor );
         //initialize the global state variables
         __cfaabi_tls.this_processor = mainProcessor;
 …
         // Add the main thread to the ready queue
         // once resume is called on mainProcessor->runner the mainThread needs to be scheduled like any normal thread
         schedule_thread$(mainThread);
+        schedule_thread$(mainThread, UNPARK_LOCAL);
         // SKULLDUGGERY: Force a context switch to the main processor to set the main thread's context to the current UNIX
 …
         link.next = 0p;
         link.ts   = -1llu;
         preferred = -1u;
+        preferred = ready_queue_new_preferred();
         last_proc = 0p;
         #if defined( __CFA_WITH_VERIFY__ )

libcfa/src/concurrency/kernel_private.hfa

-              rfcd65ca
+              r24e321c
+}
 void schedule_thread$( thread$ * ) __attribute__((nonnull (1)));
+void schedule_thread$( thread$ *, unpark_hint hint ) __attribute__((nonnull (1)));
 extern bool __preemption_enabled();
 …
 // push thread onto a ready queue for a cluster
 // returns true if the list was previously empty, false otherwise
 __attribute__((hot)) void push(struct cluster * cltr, struct thread$ * thrd, bool local);
+__attribute__((hot)) void push(struct cluster * cltr, struct thread$ * thrd, unpark_hint hint);
 //-----------------------------------------------------------------------
 …
 //-----------------------------------------------------------------------
+// get preferred ready for new thread
+unsigned ready_queue_new_preferred();
+//-----------------------------------------------------------------------
 // Increase the width of the ready queue (number of lanes) by 4
 void ready_queue_grow  (struct cluster * cltr);

libcfa/src/concurrency/ready_queue.cfa

-              rfcd65ca
+              r24e321c
 //-----------------------------------------------------------------------
 #if defined(USE_CPU_WORK_STEALING)
         __attribute__((hot)) void push(struct cluster * cltr, struct thread$ * thrd, bool push_local) with (cltr->ready_queue) {
+        __attribute__((hot)) void push(struct cluster * cltr, struct thread$ * thrd, unpark_hint hint) with (cltr->ready_queue) {
                 __cfadbg_print_safe(ready_queue, "Kernel : Pushing %p on cluster %p\n", thrd, cltr);
 …
+        }
         __attribute__((hot)) void push(struct cluster * cltr, struct thread$ * thrd, bool push_local) with (cltr->ready_queue) {
+        __attribute__((hot)) void push(struct cluster * cltr, struct thread$ * thrd, unpark_hint hint) with (cltr->ready_queue) {
                 __cfadbg_print_safe(ready_queue, "Kernel : Pushing %p on cluster %p\n", thrd, cltr);
                 const bool external = !push_local || (!kernelTLS().this_processor) || (cltr != kernelTLS().this_processor->cltr);
+                const bool external = (hint != UNPARK_LOCAL) || (!kernelTLS().this_processor) || (cltr != kernelTLS().this_processor->cltr);
                 /* paranoid */ verify(external || kernelTLS().this_processor->rdq.id < lanes.count );
 …
 #endif
 #if defined(USE_WORK_STEALING)
         __attribute__((hot)) void push(struct cluster * cltr, struct thread$ * thrd, bool push_local) with (cltr->ready_queue) {
+        __attribute__((hot)) void push(struct cluster * cltr, struct thread$ * thrd, unpark_hint hint) with (cltr->ready_queue) {
                 __cfadbg_print_safe(ready_queue, "Kernel : Pushing %p on cluster %p\n", thrd, cltr);
                 // #define USE_PREFERRED
                 #if !defined(USE_PREFERRED)
                 const bool external = !push_local || (!kernelTLS().this_processor) || (cltr != kernelTLS().this_processor->cltr);
+                const bool external = (hint != UNPARK_LOCAL) || (!kernelTLS().this_processor) || (cltr != kernelTLS().this_processor->cltr);
                 /* paranoid */ verify(external || kernelTLS().this_processor->rdq.id < lanes.count );
                 #else
                         unsigned preferred = thrd->preferred;
                         const bool external = push_local || (!kernelTLS().this_processor) || preferred == -1u || thrd->curr_cluster != cltr;
+                        const bool external = (hint != UNPARK_LOCAL) || (!kernelTLS().this_processor) || preferred == -1u || thrd->curr_cluster != cltr;
                         /* paranoid */ verifyf(external || preferred < lanes.count, "Invalid preferred queue %u for %u lanes", preferred, lanes.count );
 …
         #endif
+        thrd->preferred = w;
+        #if defined(USE_CPU_WORK_STEALING)
+                thrd->preferred = w / READYQ_SHARD_FACTOR;
+        #else
+                thrd->preferred = w;
+        #endif
         // return the popped thread
 …
 //-----------------------------------------------------------------------
+// get preferred ready for new thread
+unsigned ready_queue_new_preferred() {
+        unsigned pref = 0;
+        if(struct thread$ * thrd = publicTLS_get( this_thread )) {
+                pref = thrd->preferred;
+        }
+        else {
+                #if defined(USE_CPU_WORK_STEALING)
+                        pref = __kernel_getcpu();
+                #endif
+        }
+        #if defined(USE_CPU_WORK_STEALING)
+                /* paranoid */ verify(pref >= 0);
+                /* paranoid */ verify(pref < cpu_info.hthrd_count);
+        #endif
+        return pref;
+}
+//-----------------------------------------------------------------------
 // Check that all the intrusive queues in the data structure are still consistent
 static void check( __ready_queue_t & q ) with (q) {

libcfa/src/concurrency/thread.cfa

-              rfcd65ca
+              r24e321c
         link.next = 0p;
         link.ts   = -1llu;
         preferred = thread_rand() % cl.ready_queue.lanes.count;
+        preferred = ready_queue_new_preferred();
         last_proc = 0p;
         #if defined( __CFA_WITH_VERIFY__ )
 …
         /* paranoid */ verify( this_thrd->context.SP );
         schedule_thread$( this_thrd );
+        schedule_thread$( this_thrd, UNPARK_LOCAL );
         enable_interrupts();
+}

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 24e321c

Legend:

Download in other formats: