Changes in / [30763fd:58e280f4]
- Location:
- libcfa/src
- Files:
-
- 1 deleted
- 8 edited
-
Makefile.am (modified) (1 diff)
-
Makefile.in (modified) (4 diffs)
-
bits/defs.hfa (modified) (1 diff)
-
concurrency/invoke.h (modified) (1 diff)
-
concurrency/kernel.cfa (modified) (12 diffs)
-
concurrency/kernel.hfa (modified) (3 diffs)
-
concurrency/kernel_private.hfa (modified) (2 diffs)
-
concurrency/ready_queue.cfa (deleted)
-
concurrency/thread.cfa (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
-
libcfa/src/Makefile.am
r30763fd r58e280f4 48 48 thread_headers_nosrc = concurrency/invoke.h 49 49 thread_headers = concurrency/coroutine.hfa concurrency/thread.hfa concurrency/kernel.hfa concurrency/monitor.hfa concurrency/mutex.hfa 50 thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/preemption.cfa concurrency/ready_queue.cfa${thread_headers:.hfa=.cfa}50 thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/preemption.cfa ${thread_headers:.hfa=.cfa} 51 51 else 52 52 headers = -
libcfa/src/Makefile.in
r30763fd r58e280f4 165 165 concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa \ 166 166 concurrency/invoke.c concurrency/preemption.cfa \ 167 concurrency/ ready_queue.cfa concurrency/coroutine.cfa \168 concurrency/ thread.cfa concurrency/kernel.cfa \169 concurrency/m onitor.cfa concurrency/mutex.cfa167 concurrency/coroutine.cfa concurrency/thread.cfa \ 168 concurrency/kernel.cfa concurrency/monitor.cfa \ 169 concurrency/mutex.cfa 170 170 @BUILDLIB_TRUE@am__objects_3 = concurrency/coroutine.lo \ 171 171 @BUILDLIB_TRUE@ concurrency/thread.lo concurrency/kernel.lo \ … … 174 174 @BUILDLIB_TRUE@ concurrency/CtxSwitch-@ARCHITECTURE@.lo \ 175 175 @BUILDLIB_TRUE@ concurrency/alarm.lo concurrency/invoke.lo \ 176 @BUILDLIB_TRUE@ concurrency/preemption.lo \ 177 @BUILDLIB_TRUE@ concurrency/ready_queue.lo $(am__objects_3) 176 @BUILDLIB_TRUE@ concurrency/preemption.lo $(am__objects_3) 178 177 am_libcfathread_la_OBJECTS = $(am__objects_4) 179 178 libcfathread_la_OBJECTS = $(am_libcfathread_la_OBJECTS) … … 464 463 @BUILDLIB_FALSE@thread_headers = 465 464 @BUILDLIB_TRUE@thread_headers = concurrency/coroutine.hfa concurrency/thread.hfa concurrency/kernel.hfa concurrency/monitor.hfa concurrency/mutex.hfa 466 @BUILDLIB_TRUE@thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/preemption.cfa concurrency/ready_queue.cfa${thread_headers:.hfa=.cfa}465 @BUILDLIB_TRUE@thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/preemption.cfa ${thread_headers:.hfa=.cfa} 467 466 468 467 #---------------------------------------------------------------------------------------------------------------- … … 600 599 concurrency/$(DEPDIR)/$(am__dirstamp) 601 600 concurrency/preemption.lo: concurrency/$(am__dirstamp) \ 602 concurrency/$(DEPDIR)/$(am__dirstamp)603 concurrency/ready_queue.lo: concurrency/$(am__dirstamp) \604 601 concurrency/$(DEPDIR)/$(am__dirstamp) 605 602 concurrency/coroutine.lo: concurrency/$(am__dirstamp) \ -
libcfa/src/bits/defs.hfa
r30763fd r58e280f4 53 53 return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 ); 54 54 } 55 56 #define __CFA_NO_BIT_TEST_AND_SET__57 58 static inline bool bts(volatile unsigned long long int * target, unsigned long long int bit ) {59 #if defined(__CFA_NO_BIT_TEST_AND_SET__)60 unsigned long long int mask = 1ul << bit;61 unsigned long long int ret = __atomic_fetch_or(target, mask, (int)__ATOMIC_RELAXED);62 return (ret & mask) != 0;63 #else64 int result = 0;65 asm volatile(66 "LOCK btsq %[bit], %[target]\n\t"67 :"=@ccc" (result)68 : [target] "m" (*target), [bit] "r" (bit)69 );70 return result != 0;71 #endif72 }73 74 static inline bool btr(volatile unsigned long long int * target, unsigned long long int bit ) {75 #if defined(__CFA_NO_BIT_TEST_AND_SET__)76 unsigned long long int mask = 1ul << bit;77 unsigned long long int ret = __atomic_fetch_and(target, ~mask, (int)__ATOMIC_RELAXED);78 return (ret & mask) != 0;79 #else80 int result = 0;81 asm volatile(82 "LOCK btrq %[bit], %[target]\n\t"83 :"=@ccc" (result)84 : [target] "m" (*target), [bit] "r" (bit)85 );86 return result != 0;87 #endif88 } -
libcfa/src/concurrency/invoke.h
r30763fd r58e280f4 189 189 // instrusive link field for threads 190 190 struct thread_desc * next; 191 struct thread_desc * prev;192 unsigned long long ts;193 191 194 192 struct { -
libcfa/src/concurrency/kernel.cfa
r30763fd r58e280f4 210 210 this.name = name; 211 211 this.cltr = &cltr; 212 id = -1u;213 212 terminated{ 0 }; 214 213 do_terminate = false; … … 240 239 this.preemption_rate = preemption_rate; 241 240 ready_queue{}; 242 ready_lock{}; 243 241 ready_queue_lock{}; 242 243 procs{ __get }; 244 244 idles{ __get }; 245 245 threads{ __get }; … … 270 270 __cfaabi_dbg_print_safe("Kernel : core %p starting\n", this); 271 271 272 // register the processor unless it's the main thread which is handled in the boot sequence 273 if(this != mainProcessor) 274 this->id = doregister(this->cltr, this); 272 doregister(this->cltr, this); 275 273 276 274 { … … 308 306 } 309 307 308 unregister(this->cltr, this); 309 310 310 V( this->terminated ); 311 312 // unregister the processor unless it's the main thread which is handled in the boot sequence313 if(this != mainProcessor)314 unregister(this->cltr, this);315 311 316 312 __cfaabi_dbg_print_safe("Kernel : core %p terminated\n", this); … … 509 505 510 506 with( *thrd->curr_cluster ) { 511 ready_schedule_lock(*thrd->curr_cluster, kernelTLS.this_processor); 512 __atomic_acquire(&ready_queue.lock); 513 thrd->ts = rdtscl(); 514 bool was_empty = push( ready_queue, thrd ); 515 __atomic_unlock(&ready_queue.lock); 516 ready_schedule_unlock(*thrd->curr_cluster, kernelTLS.this_processor); 507 lock ( ready_queue_lock __cfaabi_dbg_ctx2 ); 508 bool was_empty = !(ready_queue != 0); 509 append( ready_queue, thrd ); 510 unlock( ready_queue_lock ); 517 511 518 512 if(was_empty) { … … 535 529 thread_desc * nextThread(cluster * this) with( *this ) { 536 530 verify( ! kernelTLS.preemption_state.enabled ); 537 538 ready_schedule_lock(*this, kernelTLS.this_processor); 539 __atomic_acquire(&ready_queue.lock); 540 thread_desc * head; 541 __attribute__((unused)) bool _; 542 [head, _] = pop( ready_queue ); 543 __atomic_unlock(&ready_queue.lock); 544 ready_schedule_unlock(*this, kernelTLS.this_processor); 545 531 lock( ready_queue_lock __cfaabi_dbg_ctx2 ); 532 thread_desc * head = pop_head( ready_queue ); 533 unlock( ready_queue_lock ); 546 534 verify( ! kernelTLS.preemption_state.enabled ); 547 535 return head; … … 705 693 pending_preemption = false; 706 694 kernel_thread = pthread_self(); 707 id = -1u;708 695 709 696 runner{ &this }; … … 715 702 mainProcessor = (processor *)&storage_mainProcessor; 716 703 (*mainProcessor){}; 717 718 mainProcessor->id = doregister(mainCluster, mainProcessor);719 704 720 705 //initialize the global state variables … … 763 748 kernel_stop_preemption(); 764 749 765 unregister(mainCluster, mainProcessor);766 767 750 // Destroy the main processor and its context in reverse order of construction 768 751 // These were manually constructed so we need manually destroy them 769 752 ^(mainProcessor->runner){}; 770 ^( *mainProcessor){};753 ^(mainProcessor){}; 771 754 772 755 // Final step, destroy the main thread since it is no longer needed 773 // Since we provided a stack to this task it will not destroy anything 774 ^(*mainThread){}; 775 776 ^(*mainCluster){}; 756 // Since we provided a stack to this taxk it will not destroy anything 757 ^(mainThread){}; 777 758 778 759 ^(__cfa_dbg_global_clusters.list){}; … … 790 771 with( *cltr ) { 791 772 lock (proc_list_lock __cfaabi_dbg_ctx2); 773 remove (procs, *this); 792 774 push_front(idles, *this); 793 775 unlock (proc_list_lock); … … 803 785 lock (proc_list_lock __cfaabi_dbg_ctx2); 804 786 remove (idles, *this); 787 push_front(procs, *this); 805 788 unlock (proc_list_lock); 806 789 } … … 943 926 } 944 927 928 void doregister( cluster * cltr, processor * proc ) { 929 lock (cltr->proc_list_lock __cfaabi_dbg_ctx2); 930 cltr->nprocessors += 1; 931 push_front(cltr->procs, *proc); 932 unlock (cltr->proc_list_lock); 933 } 934 935 void unregister( cluster * cltr, processor * proc ) { 936 lock (cltr->proc_list_lock __cfaabi_dbg_ctx2); 937 remove(cltr->procs, *proc ); 938 cltr->nprocessors -= 1; 939 unlock(cltr->proc_list_lock); 940 } 941 945 942 //----------------------------------------------------------------------------- 946 943 // Debug -
libcfa/src/concurrency/kernel.hfa
r30763fd r58e280f4 106 106 // Cluster from which to get threads 107 107 struct cluster * cltr; 108 unsigned int id;109 108 110 109 // Name of the processor … … 158 157 } 159 158 160 161 //-----------------------------------------------------------------------------162 // Cluster Tools163 struct __processor_id;164 165 // Reader-Writer lock protecting the ready-queue166 struct __clusterRWLock_t {167 // total cachelines allocated168 unsigned int max;169 170 // cachelines currently in use171 volatile unsigned int alloc;172 173 // cachelines ready to itereate over174 // (!= to alloc when thread is in second half of doregister)175 volatile unsigned int ready;176 177 // writer lock178 volatile bool lock;179 180 // data pointer181 __processor_id * data;182 };183 184 void ?{}(__clusterRWLock_t & this);185 void ^?{}(__clusterRWLock_t & this);186 187 // Underlying sub quues of the ready queue188 struct __attribute__((aligned(128))) __intrusive_ready_queue_t {189 // spin lock protecting the queue190 volatile bool lock;191 192 // anchor for the head and the tail of the queue193 struct __sentinel_t {194 struct thread_desc * next;195 struct thread_desc * prev;196 unsigned long long ts;197 } before, after;198 199 // Optional statistic counters200 #ifndef __CFA_NO_SCHED_STATS__201 struct __attribute__((aligned(64))) {202 // difference between number of push and pops203 ssize_t diff;204 205 // total number of pushes and pops206 size_t push;207 size_t pop ;208 } stat;209 #endif210 };211 212 void ?{}(__intrusive_ready_queue_t & this);213 void ^?{}(__intrusive_ready_queue_t & this);214 215 159 //----------------------------------------------------------------------------- 216 160 // Cluster 217 161 struct cluster { 218 162 // Ready queue locks 219 __ clusterRWLock_t ready_lock;163 __spinlock_t ready_queue_lock; 220 164 221 165 // Ready queue for threads 222 __ intrusive_ready_queue_tready_queue;166 __queue_t(thread_desc) ready_queue; 223 167 224 168 // Name of the cluster … … 230 174 // List of processors 231 175 __spinlock_t proc_list_lock; 176 __dllist_t(struct processor) procs; 232 177 __dllist_t(struct processor) idles; 178 unsigned int nprocessors; 233 179 234 180 // List of threads -
libcfa/src/concurrency/kernel_private.hfa
r30763fd r58e280f4 99 99 //----------------------------------------------------------------------------- 100 100 // Utils 101 #define KERNEL_STORAGE(T,X) __attribute((aligned(__alignof__(T))))static char storage_##X[sizeof(T)]101 #define KERNEL_STORAGE(T,X) static char storage_##X[sizeof(T)] 102 102 103 103 static inline uint32_t tls_rand() { … … 115 115 void unregister( struct cluster * cltr, struct thread_desc & thrd ); 116 116 117 //======================================================================= 118 // Cluster lock API 119 //======================================================================= 120 struct __attribute__((aligned(64))) __processor_id { 121 processor * volatile handle; 122 volatile bool lock; 123 }; 124 125 // Lock-Free registering/unregistering of threads 126 // Register a processor to a given cluster and get its unique id in return 127 unsigned doregister( struct cluster * cltr, struct processor * proc ); 128 129 // Unregister a processor from a given cluster using its id, getting back the original pointer 130 void unregister( struct cluster * cltr, struct processor * proc ); 131 132 //======================================================================= 133 // Reader-writer lock implementation 134 // Concurrent with doregister/unregister, 135 // i.e., threads can be added at any point during or between the entry/exit 136 static inline void __atomic_acquire(volatile bool * ll) { 137 while( __builtin_expect(__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST), false) ) { 138 while(__atomic_load_n(ll, (int)__ATOMIC_RELAXED)) 139 asm volatile("pause"); 140 } 141 /* paranoid */ verify(*ll); 142 } 143 144 static inline bool __atomic_try_acquire(volatile bool * ll) { 145 return __atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST); 146 } 147 148 static inline void __atomic_unlock(volatile bool * ll) { 149 /* paranoid */ verify(*ll); 150 __atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE); 151 } 152 153 //----------------------------------------------------------------------- 154 // Reader side : acquire when using the ready queue to schedule but not 155 // creating/destroying queues 156 static inline void ready_schedule_lock( struct cluster & cltr, struct processor * proc) with(cltr.ready_lock) { 157 unsigned iproc = proc->id; 158 /*paranoid*/ verify(data[iproc].handle == proc); 159 /*paranoid*/ verify(iproc < ready); 160 161 // Step 1 : make sure no writer are in the middle of the critical section 162 while(__atomic_load_n(&lock, (int)__ATOMIC_RELAXED)) 163 asm volatile("pause"); 164 165 // Fence needed because we don't want to start trying to acquire the lock 166 // before we read a false. 167 // Not needed on x86 168 // std::atomic_thread_fence(std::memory_order_seq_cst); 169 170 // Step 2 : acquire our local lock 171 __atomic_acquire( &data[iproc].lock ); 172 /*paranoid*/ verify(data[iproc].lock); 173 } 174 175 static inline void ready_schedule_unlock( struct cluster & cltr, struct processor * proc) with(cltr.ready_lock) { 176 unsigned iproc = proc->id; 177 /*paranoid*/ verify(data[iproc].handle == proc); 178 /*paranoid*/ verify(iproc < ready); 179 /*paranoid*/ verify(data[iproc].lock); 180 __atomic_store_n(&data[iproc].lock, false, __ATOMIC_RELEASE); 181 } 182 183 //----------------------------------------------------------------------- 184 // Writer side : acquire when changing the ready queue, e.g. adding more 185 // queues or removing them. 186 uint_fast32_t ready_mutate_lock( struct cluster & cltr ); 187 188 void ready_mutate_unlock( struct cluster & cltr, uint_fast32_t ); 189 190 bool push(__intrusive_ready_queue_t & this, thread_desc * node); 191 [thread_desc *, bool] pop(__intrusive_ready_queue_t & this); 117 void doregister( struct cluster * cltr, struct processor * proc ); 118 void unregister( struct cluster * cltr, struct processor * proc ); 192 119 193 120 // Local Variables: // -
libcfa/src/concurrency/thread.cfa
r30763fd r58e280f4 41 41 self_mon_p = &self_mon; 42 42 curr_cluster = &cl; 43 next = 0p; 44 prev = 0p; 43 next = NULL; 45 44 46 45 node.next = NULL;
Note:
See TracChangeset
for help on using the changeset viewer.