Changes in / [d4f1521:397c101a]
- Location:
- libcfa/src
- Files:
-
- 1 deleted
- 8 edited
Legend:
- Unmodified
- Added
- Removed
-
libcfa/src/Makefile.am
rd4f1521 r397c101a 48 48 thread_headers_nosrc = concurrency/invoke.h 49 49 thread_headers = concurrency/coroutine.hfa concurrency/thread.hfa concurrency/kernel.hfa concurrency/monitor.hfa concurrency/mutex.hfa 50 thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/preemption.cfa concurrency/ready_queue.cfa${thread_headers:.hfa=.cfa}50 thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/preemption.cfa ${thread_headers:.hfa=.cfa} 51 51 else 52 52 headers = -
libcfa/src/Makefile.in
rd4f1521 r397c101a 165 165 concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa \ 166 166 concurrency/invoke.c concurrency/preemption.cfa \ 167 concurrency/ ready_queue.cfa concurrency/coroutine.cfa \168 concurrency/ thread.cfa concurrency/kernel.cfa \169 concurrency/m onitor.cfa concurrency/mutex.cfa167 concurrency/coroutine.cfa concurrency/thread.cfa \ 168 concurrency/kernel.cfa concurrency/monitor.cfa \ 169 concurrency/mutex.cfa 170 170 @BUILDLIB_TRUE@am__objects_3 = concurrency/coroutine.lo \ 171 171 @BUILDLIB_TRUE@ concurrency/thread.lo concurrency/kernel.lo \ … … 174 174 @BUILDLIB_TRUE@ concurrency/CtxSwitch-@ARCHITECTURE@.lo \ 175 175 @BUILDLIB_TRUE@ concurrency/alarm.lo concurrency/invoke.lo \ 176 @BUILDLIB_TRUE@ concurrency/preemption.lo \ 177 @BUILDLIB_TRUE@ concurrency/ready_queue.lo $(am__objects_3) 176 @BUILDLIB_TRUE@ concurrency/preemption.lo $(am__objects_3) 178 177 am_libcfathread_la_OBJECTS = $(am__objects_4) 179 178 libcfathread_la_OBJECTS = $(am_libcfathread_la_OBJECTS) … … 464 463 @BUILDLIB_FALSE@thread_headers = 465 464 @BUILDLIB_TRUE@thread_headers = concurrency/coroutine.hfa concurrency/thread.hfa concurrency/kernel.hfa concurrency/monitor.hfa concurrency/mutex.hfa 466 @BUILDLIB_TRUE@thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/preemption.cfa concurrency/ready_queue.cfa${thread_headers:.hfa=.cfa}465 @BUILDLIB_TRUE@thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/preemption.cfa ${thread_headers:.hfa=.cfa} 467 466 468 467 #---------------------------------------------------------------------------------------------------------------- … … 600 599 concurrency/$(DEPDIR)/$(am__dirstamp) 601 600 concurrency/preemption.lo: concurrency/$(am__dirstamp) \ 602 concurrency/$(DEPDIR)/$(am__dirstamp)603 concurrency/ready_queue.lo: concurrency/$(am__dirstamp) \604 601 concurrency/$(DEPDIR)/$(am__dirstamp) 605 602 concurrency/coroutine.lo: concurrency/$(am__dirstamp) \ -
libcfa/src/bits/defs.hfa
rd4f1521 r397c101a 53 53 return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 ); 54 54 } 55 56 #define __CFA_NO_BIT_TEST_AND_SET__57 58 static inline bool bts(volatile unsigned long long int * target, unsigned long long int bit ) {59 #if defined(__CFA_NO_BIT_TEST_AND_SET__)60 unsigned long long int mask = 1ul << bit;61 unsigned long long int ret = __atomic_fetch_or(target, mask, (int)__ATOMIC_RELAXED);62 return (ret & mask) != 0;63 #else64 int result = 0;65 asm volatile(66 "LOCK btsq %[bit], %[target]\n\t"67 :"=@ccc" (result)68 : [target] "m" (*target), [bit] "r" (bit)69 );70 return result != 0;71 #endif72 }73 74 static inline bool btr(volatile unsigned long long int * target, unsigned long long int bit ) {75 #if defined(__CFA_NO_BIT_TEST_AND_SET__)76 unsigned long long int mask = 1ul << bit;77 unsigned long long int ret = __atomic_fetch_and(target, ~mask, (int)__ATOMIC_RELAXED);78 return (ret & mask) != 0;79 #else80 int result = 0;81 asm volatile(82 "LOCK btrq %[bit], %[target]\n\t"83 :"=@ccc" (result)84 : [target] "m" (*target), [bit] "r" (bit)85 );86 return result != 0;87 #endif88 } -
libcfa/src/concurrency/invoke.h
rd4f1521 r397c101a 189 189 // instrusive link field for threads 190 190 struct thread_desc * next; 191 struct thread_desc * prev;192 unsigned long long ts;193 191 194 192 struct { -
libcfa/src/concurrency/kernel.cfa
rd4f1521 r397c101a 210 210 this.name = name; 211 211 this.cltr = &cltr; 212 id = -1u;213 212 terminated{ 0 }; 214 213 do_terminate = false; … … 240 239 this.preemption_rate = preemption_rate; 241 240 ready_queue{}; 242 ready_lock{}; 243 241 ready_queue_lock{}; 242 243 procs{ __get }; 244 244 idles{ __get }; 245 245 threads{ __get }; … … 270 270 __cfaabi_dbg_print_safe("Kernel : core %p starting\n", this); 271 271 272 // register the processor unless it's the main thread which is handled in the boot sequence 273 if(this != mainProcessor) 274 this->id = doregister(this->cltr, this); 272 doregister(this->cltr, this); 275 273 276 274 { … … 308 306 } 309 307 308 unregister(this->cltr, this); 309 310 310 V( this->terminated ); 311 312 // unregister the processor unless it's the main thread which is handled in the boot sequence313 if(this != mainProcessor)314 unregister(this->cltr, this);315 311 316 312 __cfaabi_dbg_print_safe("Kernel : core %p terminated\n", this); … … 509 505 510 506 with( *thrd->curr_cluster ) { 511 ready_schedule_lock(*thrd->curr_cluster, kernelTLS.this_processor); 512 __atomic_acquire(&ready_queue.lock); 513 thrd->ts = rdtscl(); 514 bool was_empty = push( ready_queue, thrd ); 515 __atomic_unlock(&ready_queue.lock); 516 ready_schedule_unlock(*thrd->curr_cluster, kernelTLS.this_processor); 507 lock ( ready_queue_lock __cfaabi_dbg_ctx2 ); 508 bool was_empty = !(ready_queue != 0); 509 append( ready_queue, thrd ); 510 unlock( ready_queue_lock ); 517 511 518 512 if(was_empty) { … … 535 529 thread_desc * nextThread(cluster * this) with( *this ) { 536 530 verify( ! kernelTLS.preemption_state.enabled ); 537 538 ready_schedule_lock(*this, kernelTLS.this_processor); 539 __atomic_acquire(&ready_queue.lock); 540 thread_desc * head; 541 __attribute__((unused)) bool _; 542 [head, _] = pop( ready_queue ); 543 __atomic_unlock(&ready_queue.lock); 544 ready_schedule_unlock(*this, kernelTLS.this_processor); 545 531 lock( ready_queue_lock __cfaabi_dbg_ctx2 ); 532 thread_desc * head = pop_head( ready_queue ); 533 unlock( ready_queue_lock ); 546 534 verify( ! kernelTLS.preemption_state.enabled ); 547 535 return head; … … 705 693 pending_preemption = false; 706 694 kernel_thread = pthread_self(); 707 id = -1u;708 695 709 696 runner{ &this }; … … 715 702 mainProcessor = (processor *)&storage_mainProcessor; 716 703 (*mainProcessor){}; 717 718 mainProcessor->id = doregister(mainCluster, mainProcessor);719 704 720 705 //initialize the global state variables … … 763 748 kernel_stop_preemption(); 764 749 765 unregister(mainCluster, mainProcessor);766 767 750 // Destroy the main processor and its context in reverse order of construction 768 751 // These were manually constructed so we need manually destroy them 769 752 ^(mainProcessor->runner){}; 770 ^( *mainProcessor){};753 ^(mainProcessor){}; 771 754 772 755 // Final step, destroy the main thread since it is no longer needed 773 // Since we provided a stack to this task it will not destroy anything 774 ^(*mainThread){}; 775 776 ^(*mainCluster){}; 756 // Since we provided a stack to this taxk it will not destroy anything 757 ^(mainThread){}; 777 758 778 759 ^(__cfa_dbg_global_clusters.list){}; … … 790 771 with( *cltr ) { 791 772 lock (proc_list_lock __cfaabi_dbg_ctx2); 773 remove (procs, *this); 792 774 push_front(idles, *this); 793 775 unlock (proc_list_lock); … … 803 785 lock (proc_list_lock __cfaabi_dbg_ctx2); 804 786 remove (idles, *this); 787 push_front(procs, *this); 805 788 unlock (proc_list_lock); 806 789 } … … 943 926 } 944 927 928 void doregister( cluster * cltr, processor * proc ) { 929 lock (cltr->proc_list_lock __cfaabi_dbg_ctx2); 930 cltr->nprocessors += 1; 931 push_front(cltr->procs, *proc); 932 unlock (cltr->proc_list_lock); 933 } 934 935 void unregister( cluster * cltr, processor * proc ) { 936 lock (cltr->proc_list_lock __cfaabi_dbg_ctx2); 937 remove(cltr->procs, *proc ); 938 cltr->nprocessors -= 1; 939 unlock(cltr->proc_list_lock); 940 } 941 945 942 //----------------------------------------------------------------------------- 946 943 // Debug -
libcfa/src/concurrency/kernel.hfa
rd4f1521 r397c101a 107 107 // Cluster from which to get threads 108 108 struct cluster * cltr; 109 unsigned int id;110 109 111 110 // Name of the processor … … 159 158 } 160 159 161 162 //-----------------------------------------------------------------------------163 // Cluster Tools164 struct __processor_id;165 166 // Reader-Writer lock protecting the ready-queue167 struct __clusterRWLock_t {168 // total cachelines allocated169 unsigned int max;170 171 // cachelines currently in use172 volatile unsigned int alloc;173 174 // cachelines ready to itereate over175 // (!= to alloc when thread is in second half of doregister)176 volatile unsigned int ready;177 178 // writer lock179 volatile bool lock;180 181 // data pointer182 __processor_id * data;183 };184 185 void ?{}(__clusterRWLock_t & this);186 void ^?{}(__clusterRWLock_t & this);187 188 // Underlying sub quues of the ready queue189 struct __attribute__((aligned(128))) __intrusive_ready_queue_t {190 // spin lock protecting the queue191 volatile bool lock;192 193 // anchor for the head and the tail of the queue194 struct __sentinel_t {195 struct thread_desc * next;196 struct thread_desc * prev;197 unsigned long long ts;198 } before, after;199 200 // Optional statistic counters201 #ifndef __CFA_NO_SCHED_STATS__202 struct __attribute__((aligned(64))) {203 // difference between number of push and pops204 ssize_t diff;205 206 // total number of pushes and pops207 size_t push;208 size_t pop ;209 } stat;210 #endif211 };212 213 void ?{}(__intrusive_ready_queue_t & this);214 void ^?{}(__intrusive_ready_queue_t & this);215 216 160 //----------------------------------------------------------------------------- 217 161 // Cluster 218 162 struct cluster { 219 163 // Ready queue locks 220 __ clusterRWLock_t ready_lock;164 __spinlock_t ready_queue_lock; 221 165 222 166 // Ready queue for threads 223 __ intrusive_ready_queue_tready_queue;167 __queue_t(thread_desc) ready_queue; 224 168 225 169 // Name of the cluster … … 231 175 // List of processors 232 176 __spinlock_t proc_list_lock; 177 __dllist_t(struct processor) procs; 233 178 __dllist_t(struct processor) idles; 179 unsigned int nprocessors; 234 180 235 181 // List of threads -
libcfa/src/concurrency/kernel_private.hfa
rd4f1521 r397c101a 99 99 //----------------------------------------------------------------------------- 100 100 // Utils 101 #define KERNEL_STORAGE(T,X) __attribute((aligned(__alignof__(T))))static char storage_##X[sizeof(T)]101 #define KERNEL_STORAGE(T,X) static char storage_##X[sizeof(T)] 102 102 103 103 static inline uint32_t tls_rand() { … … 115 115 void unregister( struct cluster * cltr, struct thread_desc & thrd ); 116 116 117 //======================================================================= 118 // Cluster lock API 119 //======================================================================= 120 struct __attribute__((aligned(64))) __processor_id { 121 processor * volatile handle; 122 volatile bool lock; 123 }; 124 125 // Lock-Free registering/unregistering of threads 126 // Register a processor to a given cluster and get its unique id in return 127 unsigned doregister( struct cluster * cltr, struct processor * proc ); 128 129 // Unregister a processor from a given cluster using its id, getting back the original pointer 130 void unregister( struct cluster * cltr, struct processor * proc ); 131 132 //======================================================================= 133 // Reader-writer lock implementation 134 // Concurrent with doregister/unregister, 135 // i.e., threads can be added at any point during or between the entry/exit 136 static inline void __atomic_acquire(volatile bool * ll) { 137 while( __builtin_expect(__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST), false) ) { 138 while(__atomic_load_n(ll, (int)__ATOMIC_RELAXED)) 139 asm volatile("pause"); 140 } 141 /* paranoid */ verify(*ll); 142 } 143 144 static inline bool __atomic_try_acquire(volatile bool * ll) { 145 return __atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST); 146 } 147 148 static inline void __atomic_unlock(volatile bool * ll) { 149 /* paranoid */ verify(*ll); 150 __atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE); 151 } 152 153 //----------------------------------------------------------------------- 154 // Reader side : acquire when using the ready queue to schedule but not 155 // creating/destroying queues 156 static inline void ready_schedule_lock( struct cluster & cltr, struct processor * proc) with(cltr.ready_lock) { 157 unsigned iproc = proc->id; 158 /*paranoid*/ verify(data[iproc].handle == proc); 159 /*paranoid*/ verify(iproc < ready); 160 161 // Step 1 : make sure no writer are in the middle of the critical section 162 while(__atomic_load_n(&lock, (int)__ATOMIC_RELAXED)) 163 asm volatile("pause"); 164 165 // Fence needed because we don't want to start trying to acquire the lock 166 // before we read a false. 167 // Not needed on x86 168 // std::atomic_thread_fence(std::memory_order_seq_cst); 169 170 // Step 2 : acquire our local lock 171 __atomic_acquire( &data[iproc].lock ); 172 /*paranoid*/ verify(data[iproc].lock); 173 } 174 175 static inline void ready_schedule_unlock( struct cluster & cltr, struct processor * proc) with(cltr.ready_lock) { 176 unsigned iproc = proc->id; 177 /*paranoid*/ verify(data[iproc].handle == proc); 178 /*paranoid*/ verify(iproc < ready); 179 /*paranoid*/ verify(data[iproc].lock); 180 __atomic_store_n(&data[iproc].lock, false, __ATOMIC_RELEASE); 181 } 182 183 //----------------------------------------------------------------------- 184 // Writer side : acquire when changing the ready queue, e.g. adding more 185 // queues or removing them. 186 uint_fast32_t ready_mutate_lock( struct cluster & cltr ); 187 188 void ready_mutate_unlock( struct cluster & cltr, uint_fast32_t ); 189 190 bool push(__intrusive_ready_queue_t & this, thread_desc * node); 191 [thread_desc *, bool] pop(__intrusive_ready_queue_t & this); 117 void doregister( struct cluster * cltr, struct processor * proc ); 118 void unregister( struct cluster * cltr, struct processor * proc ); 192 119 193 120 // Local Variables: // -
libcfa/src/concurrency/thread.cfa
rd4f1521 r397c101a 41 41 self_mon_p = &self_mon; 42 42 curr_cluster = &cl; 43 next = 0p; 44 prev = 0p; 43 next = NULL; 45 44 46 45 node.next = NULL;
Note: See TracChangeset
for help on using the changeset viewer.