Changes in / [397c101a:d4f1521]
- Location:
- libcfa/src
- Files:
-
- 1 added
- 8 edited
Legend:
- Unmodified
- Added
- Removed
-
libcfa/src/Makefile.am
r397c101a rd4f1521 48 48 thread_headers_nosrc = concurrency/invoke.h 49 49 thread_headers = concurrency/coroutine.hfa concurrency/thread.hfa concurrency/kernel.hfa concurrency/monitor.hfa concurrency/mutex.hfa 50 thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/preemption.cfa ${thread_headers:.hfa=.cfa}50 thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/preemption.cfa concurrency/ready_queue.cfa ${thread_headers:.hfa=.cfa} 51 51 else 52 52 headers = -
libcfa/src/Makefile.in
r397c101a rd4f1521 165 165 concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa \ 166 166 concurrency/invoke.c concurrency/preemption.cfa \ 167 concurrency/ coroutine.cfa concurrency/thread.cfa \168 concurrency/ kernel.cfa concurrency/monitor.cfa \169 concurrency/m utex.cfa167 concurrency/ready_queue.cfa concurrency/coroutine.cfa \ 168 concurrency/thread.cfa concurrency/kernel.cfa \ 169 concurrency/monitor.cfa concurrency/mutex.cfa 170 170 @BUILDLIB_TRUE@am__objects_3 = concurrency/coroutine.lo \ 171 171 @BUILDLIB_TRUE@ concurrency/thread.lo concurrency/kernel.lo \ … … 174 174 @BUILDLIB_TRUE@ concurrency/CtxSwitch-@ARCHITECTURE@.lo \ 175 175 @BUILDLIB_TRUE@ concurrency/alarm.lo concurrency/invoke.lo \ 176 @BUILDLIB_TRUE@ concurrency/preemption.lo $(am__objects_3) 176 @BUILDLIB_TRUE@ concurrency/preemption.lo \ 177 @BUILDLIB_TRUE@ concurrency/ready_queue.lo $(am__objects_3) 177 178 am_libcfathread_la_OBJECTS = $(am__objects_4) 178 179 libcfathread_la_OBJECTS = $(am_libcfathread_la_OBJECTS) … … 463 464 @BUILDLIB_FALSE@thread_headers = 464 465 @BUILDLIB_TRUE@thread_headers = concurrency/coroutine.hfa concurrency/thread.hfa concurrency/kernel.hfa concurrency/monitor.hfa concurrency/mutex.hfa 465 @BUILDLIB_TRUE@thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/preemption.cfa ${thread_headers:.hfa=.cfa}466 @BUILDLIB_TRUE@thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/preemption.cfa concurrency/ready_queue.cfa ${thread_headers:.hfa=.cfa} 466 467 467 468 #---------------------------------------------------------------------------------------------------------------- … … 599 600 concurrency/$(DEPDIR)/$(am__dirstamp) 600 601 concurrency/preemption.lo: concurrency/$(am__dirstamp) \ 602 concurrency/$(DEPDIR)/$(am__dirstamp) 603 concurrency/ready_queue.lo: concurrency/$(am__dirstamp) \ 601 604 concurrency/$(DEPDIR)/$(am__dirstamp) 602 605 concurrency/coroutine.lo: concurrency/$(am__dirstamp) \ -
libcfa/src/bits/defs.hfa
r397c101a rd4f1521 53 53 return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 ); 54 54 } 55 56 #define __CFA_NO_BIT_TEST_AND_SET__ 57 58 static inline bool bts(volatile unsigned long long int * target, unsigned long long int bit ) { 59 #if defined(__CFA_NO_BIT_TEST_AND_SET__) 60 unsigned long long int mask = 1ul << bit; 61 unsigned long long int ret = __atomic_fetch_or(target, mask, (int)__ATOMIC_RELAXED); 62 return (ret & mask) != 0; 63 #else 64 int result = 0; 65 asm volatile( 66 "LOCK btsq %[bit], %[target]\n\t" 67 :"=@ccc" (result) 68 : [target] "m" (*target), [bit] "r" (bit) 69 ); 70 return result != 0; 71 #endif 72 } 73 74 static inline bool btr(volatile unsigned long long int * target, unsigned long long int bit ) { 75 #if defined(__CFA_NO_BIT_TEST_AND_SET__) 76 unsigned long long int mask = 1ul << bit; 77 unsigned long long int ret = __atomic_fetch_and(target, ~mask, (int)__ATOMIC_RELAXED); 78 return (ret & mask) != 0; 79 #else 80 int result = 0; 81 asm volatile( 82 "LOCK btrq %[bit], %[target]\n\t" 83 :"=@ccc" (result) 84 : [target] "m" (*target), [bit] "r" (bit) 85 ); 86 return result != 0; 87 #endif 88 } -
libcfa/src/concurrency/invoke.h
r397c101a rd4f1521 189 189 // instrusive link field for threads 190 190 struct thread_desc * next; 191 struct thread_desc * prev; 192 unsigned long long ts; 191 193 192 194 struct { -
libcfa/src/concurrency/kernel.cfa
r397c101a rd4f1521 210 210 this.name = name; 211 211 this.cltr = &cltr; 212 id = -1u; 212 213 terminated{ 0 }; 213 214 do_terminate = false; … … 239 240 this.preemption_rate = preemption_rate; 240 241 ready_queue{}; 241 ready_queue_lock{}; 242 243 procs{ __get }; 242 ready_lock{}; 243 244 244 idles{ __get }; 245 245 threads{ __get }; … … 270 270 __cfaabi_dbg_print_safe("Kernel : core %p starting\n", this); 271 271 272 doregister(this->cltr, this); 272 // register the processor unless it's the main thread which is handled in the boot sequence 273 if(this != mainProcessor) 274 this->id = doregister(this->cltr, this); 273 275 274 276 { … … 306 308 } 307 309 308 unregister(this->cltr, this);309 310 310 V( this->terminated ); 311 312 // unregister the processor unless it's the main thread which is handled in the boot sequence 313 if(this != mainProcessor) 314 unregister(this->cltr, this); 311 315 312 316 __cfaabi_dbg_print_safe("Kernel : core %p terminated\n", this); … … 505 509 506 510 with( *thrd->curr_cluster ) { 507 lock ( ready_queue_lock __cfaabi_dbg_ctx2 ); 508 bool was_empty = !(ready_queue != 0); 509 append( ready_queue, thrd ); 510 unlock( ready_queue_lock ); 511 ready_schedule_lock(*thrd->curr_cluster, kernelTLS.this_processor); 512 __atomic_acquire(&ready_queue.lock); 513 thrd->ts = rdtscl(); 514 bool was_empty = push( ready_queue, thrd ); 515 __atomic_unlock(&ready_queue.lock); 516 ready_schedule_unlock(*thrd->curr_cluster, kernelTLS.this_processor); 511 517 512 518 if(was_empty) { … … 529 535 thread_desc * nextThread(cluster * this) with( *this ) { 530 536 verify( ! kernelTLS.preemption_state.enabled ); 531 lock( ready_queue_lock __cfaabi_dbg_ctx2 ); 532 thread_desc * head = pop_head( ready_queue ); 533 unlock( ready_queue_lock ); 537 538 ready_schedule_lock(*this, kernelTLS.this_processor); 539 __atomic_acquire(&ready_queue.lock); 540 thread_desc * head; 541 __attribute__((unused)) bool _; 542 [head, _] = pop( ready_queue ); 543 __atomic_unlock(&ready_queue.lock); 544 ready_schedule_unlock(*this, kernelTLS.this_processor); 545 534 546 verify( ! kernelTLS.preemption_state.enabled ); 535 547 return head; … … 693 705 pending_preemption = false; 694 706 kernel_thread = pthread_self(); 707 id = -1u; 695 708 696 709 runner{ &this }; … … 702 715 mainProcessor = (processor *)&storage_mainProcessor; 703 716 (*mainProcessor){}; 717 718 mainProcessor->id = doregister(mainCluster, mainProcessor); 704 719 705 720 //initialize the global state variables … … 748 763 kernel_stop_preemption(); 749 764 765 unregister(mainCluster, mainProcessor); 766 750 767 // Destroy the main processor and its context in reverse order of construction 751 768 // These were manually constructed so we need manually destroy them 752 769 ^(mainProcessor->runner){}; 753 ^( mainProcessor){};770 ^(*mainProcessor){}; 754 771 755 772 // Final step, destroy the main thread since it is no longer needed 756 // Since we provided a stack to this taxk it will not destroy anything 757 ^(mainThread){}; 773 // Since we provided a stack to this task it will not destroy anything 774 ^(*mainThread){}; 775 776 ^(*mainCluster){}; 758 777 759 778 ^(__cfa_dbg_global_clusters.list){}; … … 771 790 with( *cltr ) { 772 791 lock (proc_list_lock __cfaabi_dbg_ctx2); 773 remove (procs, *this);774 792 push_front(idles, *this); 775 793 unlock (proc_list_lock); … … 785 803 lock (proc_list_lock __cfaabi_dbg_ctx2); 786 804 remove (idles, *this); 787 push_front(procs, *this);788 805 unlock (proc_list_lock); 789 806 } … … 926 943 } 927 944 928 void doregister( cluster * cltr, processor * proc ) {929 lock (cltr->proc_list_lock __cfaabi_dbg_ctx2);930 cltr->nprocessors += 1;931 push_front(cltr->procs, *proc);932 unlock (cltr->proc_list_lock);933 }934 935 void unregister( cluster * cltr, processor * proc ) {936 lock (cltr->proc_list_lock __cfaabi_dbg_ctx2);937 remove(cltr->procs, *proc );938 cltr->nprocessors -= 1;939 unlock(cltr->proc_list_lock);940 }941 942 945 //----------------------------------------------------------------------------- 943 946 // Debug -
libcfa/src/concurrency/kernel.hfa
r397c101a rd4f1521 107 107 // Cluster from which to get threads 108 108 struct cluster * cltr; 109 unsigned int id; 109 110 110 111 // Name of the processor … … 158 159 } 159 160 161 162 //----------------------------------------------------------------------------- 163 // Cluster Tools 164 struct __processor_id; 165 166 // Reader-Writer lock protecting the ready-queue 167 struct __clusterRWLock_t { 168 // total cachelines allocated 169 unsigned int max; 170 171 // cachelines currently in use 172 volatile unsigned int alloc; 173 174 // cachelines ready to itereate over 175 // (!= to alloc when thread is in second half of doregister) 176 volatile unsigned int ready; 177 178 // writer lock 179 volatile bool lock; 180 181 // data pointer 182 __processor_id * data; 183 }; 184 185 void ?{}(__clusterRWLock_t & this); 186 void ^?{}(__clusterRWLock_t & this); 187 188 // Underlying sub quues of the ready queue 189 struct __attribute__((aligned(128))) __intrusive_ready_queue_t { 190 // spin lock protecting the queue 191 volatile bool lock; 192 193 // anchor for the head and the tail of the queue 194 struct __sentinel_t { 195 struct thread_desc * next; 196 struct thread_desc * prev; 197 unsigned long long ts; 198 } before, after; 199 200 // Optional statistic counters 201 #ifndef __CFA_NO_SCHED_STATS__ 202 struct __attribute__((aligned(64))) { 203 // difference between number of push and pops 204 ssize_t diff; 205 206 // total number of pushes and pops 207 size_t push; 208 size_t pop ; 209 } stat; 210 #endif 211 }; 212 213 void ?{}(__intrusive_ready_queue_t & this); 214 void ^?{}(__intrusive_ready_queue_t & this); 215 160 216 //----------------------------------------------------------------------------- 161 217 // Cluster 162 218 struct cluster { 163 219 // Ready queue locks 164 __ spinlock_t ready_queue_lock;220 __clusterRWLock_t ready_lock; 165 221 166 222 // Ready queue for threads 167 __ queue_t(thread_desc)ready_queue;223 __intrusive_ready_queue_t ready_queue; 168 224 169 225 // Name of the cluster … … 175 231 // List of processors 176 232 __spinlock_t proc_list_lock; 177 __dllist_t(struct processor) procs;178 233 __dllist_t(struct processor) idles; 179 unsigned int nprocessors;180 234 181 235 // List of threads -
libcfa/src/concurrency/kernel_private.hfa
r397c101a rd4f1521 99 99 //----------------------------------------------------------------------------- 100 100 // Utils 101 #define KERNEL_STORAGE(T,X) static char storage_##X[sizeof(T)]101 #define KERNEL_STORAGE(T,X) __attribute((aligned(__alignof__(T)))) static char storage_##X[sizeof(T)] 102 102 103 103 static inline uint32_t tls_rand() { … … 115 115 void unregister( struct cluster * cltr, struct thread_desc & thrd ); 116 116 117 void doregister( struct cluster * cltr, struct processor * proc ); 118 void unregister( struct cluster * cltr, struct processor * proc ); 117 //======================================================================= 118 // Cluster lock API 119 //======================================================================= 120 struct __attribute__((aligned(64))) __processor_id { 121 processor * volatile handle; 122 volatile bool lock; 123 }; 124 125 // Lock-Free registering/unregistering of threads 126 // Register a processor to a given cluster and get its unique id in return 127 unsigned doregister( struct cluster * cltr, struct processor * proc ); 128 129 // Unregister a processor from a given cluster using its id, getting back the original pointer 130 void unregister( struct cluster * cltr, struct processor * proc ); 131 132 //======================================================================= 133 // Reader-writer lock implementation 134 // Concurrent with doregister/unregister, 135 // i.e., threads can be added at any point during or between the entry/exit 136 static inline void __atomic_acquire(volatile bool * ll) { 137 while( __builtin_expect(__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST), false) ) { 138 while(__atomic_load_n(ll, (int)__ATOMIC_RELAXED)) 139 asm volatile("pause"); 140 } 141 /* paranoid */ verify(*ll); 142 } 143 144 static inline bool __atomic_try_acquire(volatile bool * ll) { 145 return __atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST); 146 } 147 148 static inline void __atomic_unlock(volatile bool * ll) { 149 /* paranoid */ verify(*ll); 150 __atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE); 151 } 152 153 //----------------------------------------------------------------------- 154 // Reader side : acquire when using the ready queue to schedule but not 155 // creating/destroying queues 156 static inline void ready_schedule_lock( struct cluster & cltr, struct processor * proc) with(cltr.ready_lock) { 157 unsigned iproc = proc->id; 158 /*paranoid*/ verify(data[iproc].handle == proc); 159 /*paranoid*/ verify(iproc < ready); 160 161 // Step 1 : make sure no writer are in the middle of the critical section 162 while(__atomic_load_n(&lock, (int)__ATOMIC_RELAXED)) 163 asm volatile("pause"); 164 165 // Fence needed because we don't want to start trying to acquire the lock 166 // before we read a false. 167 // Not needed on x86 168 // std::atomic_thread_fence(std::memory_order_seq_cst); 169 170 // Step 2 : acquire our local lock 171 __atomic_acquire( &data[iproc].lock ); 172 /*paranoid*/ verify(data[iproc].lock); 173 } 174 175 static inline void ready_schedule_unlock( struct cluster & cltr, struct processor * proc) with(cltr.ready_lock) { 176 unsigned iproc = proc->id; 177 /*paranoid*/ verify(data[iproc].handle == proc); 178 /*paranoid*/ verify(iproc < ready); 179 /*paranoid*/ verify(data[iproc].lock); 180 __atomic_store_n(&data[iproc].lock, false, __ATOMIC_RELEASE); 181 } 182 183 //----------------------------------------------------------------------- 184 // Writer side : acquire when changing the ready queue, e.g. adding more 185 // queues or removing them. 186 uint_fast32_t ready_mutate_lock( struct cluster & cltr ); 187 188 void ready_mutate_unlock( struct cluster & cltr, uint_fast32_t ); 189 190 bool push(__intrusive_ready_queue_t & this, thread_desc * node); 191 [thread_desc *, bool] pop(__intrusive_ready_queue_t & this); 119 192 120 193 // Local Variables: // -
libcfa/src/concurrency/thread.cfa
r397c101a rd4f1521 41 41 self_mon_p = &self_mon; 42 42 curr_cluster = &cl; 43 next = NULL; 43 next = 0p; 44 prev = 0p; 44 45 45 46 node.next = NULL;
Note: See TracChangeset
for help on using the changeset viewer.