Changes in / [e307e12:f80f840]
- Location:
- libcfa/src
- Files:
-
- 1 added
- 9 edited
Legend:
- Unmodified
- Added
- Removed
-
libcfa/src/Makefile.am
re307e12 rf80f840 48 48 thread_headers_nosrc = concurrency/invoke.h 49 49 thread_headers = concurrency/coroutine.hfa concurrency/thread.hfa concurrency/kernel.hfa concurrency/monitor.hfa concurrency/mutex.hfa 50 thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/preemption.cfa ${thread_headers:.hfa=.cfa}50 thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/preemption.cfa concurrency/ready_queue.cfa ${thread_headers:.hfa=.cfa} 51 51 else 52 52 headers = -
libcfa/src/Makefile.in
re307e12 rf80f840 165 165 concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa \ 166 166 concurrency/invoke.c concurrency/preemption.cfa \ 167 concurrency/ coroutine.cfa concurrency/thread.cfa \168 concurrency/ kernel.cfa concurrency/monitor.cfa \169 concurrency/m utex.cfa167 concurrency/ready_queue.cfa concurrency/coroutine.cfa \ 168 concurrency/thread.cfa concurrency/kernel.cfa \ 169 concurrency/monitor.cfa concurrency/mutex.cfa 170 170 @BUILDLIB_TRUE@am__objects_3 = concurrency/coroutine.lo \ 171 171 @BUILDLIB_TRUE@ concurrency/thread.lo concurrency/kernel.lo \ … … 174 174 @BUILDLIB_TRUE@ concurrency/CtxSwitch-@ARCHITECTURE@.lo \ 175 175 @BUILDLIB_TRUE@ concurrency/alarm.lo concurrency/invoke.lo \ 176 @BUILDLIB_TRUE@ concurrency/preemption.lo $(am__objects_3) 176 @BUILDLIB_TRUE@ concurrency/preemption.lo \ 177 @BUILDLIB_TRUE@ concurrency/ready_queue.lo $(am__objects_3) 177 178 am_libcfathread_la_OBJECTS = $(am__objects_4) 178 179 libcfathread_la_OBJECTS = $(am_libcfathread_la_OBJECTS) … … 462 463 @BUILDLIB_FALSE@thread_headers = 463 464 @BUILDLIB_TRUE@thread_headers = concurrency/coroutine.hfa concurrency/thread.hfa concurrency/kernel.hfa concurrency/monitor.hfa concurrency/mutex.hfa 464 @BUILDLIB_TRUE@thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/preemption.cfa ${thread_headers:.hfa=.cfa}465 @BUILDLIB_TRUE@thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/preemption.cfa concurrency/ready_queue.cfa ${thread_headers:.hfa=.cfa} 465 466 466 467 #---------------------------------------------------------------------------------------------------------------- … … 598 599 concurrency/$(DEPDIR)/$(am__dirstamp) 599 600 concurrency/preemption.lo: concurrency/$(am__dirstamp) \ 601 concurrency/$(DEPDIR)/$(am__dirstamp) 602 concurrency/ready_queue.lo: concurrency/$(am__dirstamp) \ 600 603 concurrency/$(DEPDIR)/$(am__dirstamp) 601 604 concurrency/coroutine.lo: concurrency/$(am__dirstamp) \ -
libcfa/src/bits/defs.hfa
re307e12 rf80f840 53 53 return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 ); 54 54 } 55 56 // #define __CFA_NO_BIT_TEST_AND_SET__ 57 58 static inline bool bts(volatile unsigned long long int * target, unsigned long long int bit ) { 59 #if defined(__CFA_NO_BIT_TEST_AND_SET__) 60 unsigned long long int mask = 1ul << bit; 61 unsigned long long int ret = __atomic_fetch_or(target, mask, (int)__ATOMIC_RELAXED); 62 return (ret & mask) != 0; 63 #else 64 int result = 0; 65 asm volatile( 66 "LOCK btsq %[bit], %[target]\n\t" 67 : "=@ccc" (result) 68 : [target] "m" (*target), [bit] "r" (bit) 69 ); 70 return result != 0; 71 #endif 72 } 73 74 static inline bool btr(volatile unsigned long long int * target, unsigned long long int bit ) { 75 #if defined(__CFA_NO_BIT_TEST_AND_SET__) 76 unsigned long long int mask = 1ul << bit; 77 unsigned long long int ret = __atomic_fetch_and(target, ~mask, (int)__ATOMIC_RELAXED); 78 return (ret & mask) != 0; 79 #else 80 int result = 0; 81 asm volatile( 82 "LOCK btrq %[bit], %[target]\n\t" 83 :"=@ccc" (result) 84 : [target] "m" (*target), [bit] "r" (bit) 85 ); 86 return result != 0; 87 #endif 88 } -
libcfa/src/concurrency/invoke.h
re307e12 rf80f840 158 158 }; 159 159 160 // Link lists fields 161 // instrusive link field for threads 162 struct __thread_desc_link { 163 struct thread_desc * next; 164 struct thread_desc * prev; 165 unsigned long long ts; 166 }; 167 160 168 struct thread_desc { 161 169 // Core threading fields … … 188 196 // Link lists fields 189 197 // instrusive link field for threads 190 struct thread_desc * next;198 struct __thread_desc_link link; 191 199 192 200 struct { … … 199 207 extern "Cforall" { 200 208 static inline thread_desc *& get_next( thread_desc & this ) { 201 return this. next;209 return this.link.next; 202 210 } 203 211 -
libcfa/src/concurrency/kernel.cfa
re307e12 rf80f840 187 187 self_mon.recursion = 1; 188 188 self_mon_p = &self_mon; 189 next = 0p; 189 link.next = 0p; 190 link.prev = 0p; 190 191 191 192 node.next = 0p; … … 212 213 this.name = name; 213 214 this.cltr = &cltr; 215 id = -1u; 214 216 terminated{ 0 }; 215 217 do_terminate = false; … … 242 244 this.preemption_rate = preemption_rate; 243 245 ready_queue{}; 244 ready_queue_lock{}; 245 246 procs{ __get }; 246 ready_lock{}; 247 247 248 idles{ __get }; 248 249 threads{ __get }; … … 273 274 __cfaabi_dbg_print_safe("Kernel : core %p starting\n", this); 274 275 275 doregister(this->cltr, this); 276 // register the processor unless it's the main thread which is handled in the boot sequence 277 if(this != mainProcessor) { 278 this->id = doregister(this->cltr, this); 279 ready_queue_grow( this->cltr ); 280 } 281 276 282 277 283 { … … 305 311 } 306 312 307 unregister(this->cltr, this);308 309 313 V( this->terminated ); 310 314 315 316 // unregister the processor unless it's the main thread which is handled in the boot sequence 317 if(this != mainProcessor) { 318 ready_queue_shrink( this->cltr ); 319 unregister(this->cltr, this); 320 } 321 311 322 __cfaabi_dbg_print_safe("Kernel : core %p terminated\n", this); 323 324 stats_tls_tally(this->cltr); 312 325 } 313 326 … … 469 482 ); 470 483 471 Abort( pthread_attr_setstack( &attr, stack, stacksize ), "pthread_attr_setstack" ); 484 Abort( pthread_attr_setstack( &attr, stack, stacksize ), "pthread_attr_setstack" ); 472 485 473 486 Abort( pthread_create( pthread, &attr, start, arg ), "pthread_create" ); … … 535 548 verify( ! kernelTLS.preemption_state.enabled ); 536 549 537 verifyf( thrd->next == 0p, "Expected null got %p", thrd->next ); 550 verifyf( thrd->link.next == 0p, "Expected null got %p", thrd->link.next ); 551 552 553 ready_schedule_lock(thrd->curr_cluster, kernelTLS.this_processor); 554 bool was_empty = push( thrd->curr_cluster, thrd ); 555 ready_schedule_unlock(thrd->curr_cluster, kernelTLS.this_processor); 538 556 539 557 with( *thrd->curr_cluster ) { 540 lock ( ready_queue_lock __cfaabi_dbg_ctx2 );541 bool was_empty = !(ready_queue != 0);542 append( ready_queue, thrd );543 unlock( ready_queue_lock );544 545 558 if(was_empty) { 546 559 lock (proc_list_lock __cfaabi_dbg_ctx2); … … 553 566 wake_fast(idle); 554 567 } 555 556 568 } 557 569 … … 562 574 thread_desc * nextThread(cluster * this) with( *this ) { 563 575 verify( ! kernelTLS.preemption_state.enabled ); 564 lock( ready_queue_lock __cfaabi_dbg_ctx2 ); 565 thread_desc * head = pop_head( ready_queue ); 566 unlock( ready_queue_lock ); 576 577 ready_schedule_lock(this, kernelTLS.this_processor); 578 thread_desc * head = pop( this ); 579 ready_schedule_unlock(this, kernelTLS.this_processor); 580 567 581 verify( ! kernelTLS.preemption_state.enabled ); 568 582 return head; … … 726 740 pending_preemption = false; 727 741 kernel_thread = pthread_self(); 742 id = -1u; 728 743 729 744 runner{ &this }; … … 735 750 mainProcessor = (processor *)&storage_mainProcessor; 736 751 (*mainProcessor){}; 752 753 mainProcessor->id = doregister(mainCluster, mainProcessor); 737 754 738 755 //initialize the global state variables … … 781 798 kernel_stop_preemption(); 782 799 800 unregister(mainCluster, mainProcessor); 801 783 802 // Destroy the main processor and its context in reverse order of construction 784 803 // These were manually constructed so we need manually destroy them 785 ^(mainProcessor->runner){}; 786 ^(mainProcessor){}; 804 void ^?{}(processor & this) with( this ) { 805 //don't join the main thread here, that wouldn't make any sense 806 __cfaabi_dbg_print_safe("Kernel : destroyed main processor context %p\n", &runner); 807 } 808 809 ^(*mainProcessor){}; 787 810 788 811 // Final step, destroy the main thread since it is no longer needed 789 // Since we provided a stack to this taxk it will not destroy anything 790 ^(mainThread){}; 812 // Since we provided a stack to this task it will not destroy anything 813 ^(*mainThread){}; 814 815 ^(*mainCluster){}; 791 816 792 817 ^(__cfa_dbg_global_clusters.list){}; … … 804 829 with( *cltr ) { 805 830 lock (proc_list_lock __cfaabi_dbg_ctx2); 806 remove (procs, *this);807 831 push_front(idles, *this); 808 832 unlock (proc_list_lock); … … 818 842 lock (proc_list_lock __cfaabi_dbg_ctx2); 819 843 remove (idles, *this); 820 push_front(procs, *this);821 844 unlock (proc_list_lock); 822 845 } … … 959 982 } 960 983 961 void doregister( cluster * cltr, processor * proc ) {962 lock (cltr->proc_list_lock __cfaabi_dbg_ctx2);963 cltr->nprocessors += 1;964 push_front(cltr->procs, *proc);965 unlock (cltr->proc_list_lock);966 }967 968 void unregister( cluster * cltr, processor * proc ) {969 lock (cltr->proc_list_lock __cfaabi_dbg_ctx2);970 remove(cltr->procs, *proc );971 cltr->nprocessors -= 1;972 unlock(cltr->proc_list_lock);973 }974 975 984 //----------------------------------------------------------------------------- 976 985 // Debug -
libcfa/src/concurrency/kernel.hfa
re307e12 rf80f840 107 107 // Cluster from which to get threads 108 108 struct cluster * cltr; 109 unsigned int id; 109 110 110 111 // Name of the processor … … 161 162 } 162 163 164 165 //----------------------------------------------------------------------------- 166 // Cluster Tools 167 struct __processor_id; 168 169 // Reader-Writer lock protecting the ready-queue 170 struct __clusterRWLock_t { 171 // total cachelines allocated 172 unsigned int max; 173 174 // cachelines currently in use 175 volatile unsigned int alloc; 176 177 // cachelines ready to itereate over 178 // (!= to alloc when thread is in second half of doregister) 179 volatile unsigned int ready; 180 181 // writer lock 182 volatile bool lock; 183 184 // data pointer 185 __processor_id * data; 186 }; 187 188 void ?{}(__clusterRWLock_t & this); 189 void ^?{}(__clusterRWLock_t & this); 190 191 // Underlying sub quues of the ready queue 192 struct __attribute__((aligned(128))) __intrusive_ready_queue_t { 193 // spin lock protecting the queue 194 volatile bool lock; 195 unsigned int last_id; 196 197 // anchor for the head and the tail of the queue 198 struct __sentinel_t { 199 // Link lists fields 200 // instrusive link field for threads 201 // must be exactly as in thread_desc 202 __thread_desc_link link; 203 } before, after; 204 205 // Optional statistic counters 206 #if !defined(__CFA_NO_SCHED_STATS__) 207 struct __attribute__((aligned(64))) { 208 // difference between number of push and pops 209 ssize_t diff; 210 211 // total number of pushes and pops 212 size_t push; 213 size_t pop ; 214 } stat; 215 #endif 216 }; 217 218 void ?{}(__intrusive_ready_queue_t & this); 219 void ^?{}(__intrusive_ready_queue_t & this); 220 221 typedef unsigned long long __cfa_readyQ_mask_t; 222 223 // enum { 224 // __cfa_ready_queue_mask_size = (64 - sizeof(size_t)) / sizeof(size_t), 225 // __cfa_max_ready_queues = __cfa_ready_queue_mask_size * 8 * sizeof(size_t) 226 // }; 227 228 #define __cfa_readyQ_mask_size ((64 - sizeof(size_t)) / sizeof(__cfa_readyQ_mask_t)) 229 #define __cfa_max_readyQs (__cfa_readyQ_mask_size * 8 * sizeof(__cfa_readyQ_mask_t)) 230 231 //TODO adjust cache size to ARCHITECTURE 232 struct __attribute__((aligned(128))) __ready_queue_t { 233 struct { 234 volatile size_t count; 235 volatile __cfa_readyQ_mask_t mask[ __cfa_readyQ_mask_size ]; 236 } empty; 237 238 struct __attribute__((aligned(64))) { 239 __intrusive_ready_queue_t * volatile data; 240 volatile size_t count; 241 } list; 242 243 #if !defined(__CFA_NO_STATISTICS__) 244 __attribute__((aligned(64))) struct { 245 struct { 246 struct { 247 volatile size_t attempt; 248 volatile size_t success; 249 } push; 250 struct { 251 volatile size_t maskrds; 252 volatile size_t attempt; 253 volatile size_t success; 254 } pop; 255 } pick; 256 struct { 257 volatile size_t value; 258 volatile size_t count; 259 } full; 260 } global_stats; 261 262 #endif 263 }; 264 265 void ?{}(__ready_queue_t & this); 266 void ^?{}(__ready_queue_t & this); 267 163 268 //----------------------------------------------------------------------------- 164 269 // Cluster 165 270 struct cluster { 166 271 // Ready queue locks 167 __ spinlock_t ready_queue_lock;272 __clusterRWLock_t ready_lock; 168 273 169 274 // Ready queue for threads 170 __ queue_t(thread_desc)ready_queue;275 __ready_queue_t ready_queue; 171 276 172 277 // Name of the cluster … … 178 283 // List of processors 179 284 __spinlock_t proc_list_lock; 180 __dllist_t(struct processor) procs;181 285 __dllist_t(struct processor) idles; 182 unsigned int nprocessors;183 286 184 287 // List of threads -
libcfa/src/concurrency/kernel_private.hfa
re307e12 rf80f840 101 101 //----------------------------------------------------------------------------- 102 102 // Utils 103 #define KERNEL_STORAGE(T,X) static char storage_##X[sizeof(T)]103 #define KERNEL_STORAGE(T,X) __attribute((aligned(__alignof__(T)))) static char storage_##X[sizeof(T)] 104 104 105 105 static inline uint32_t tls_rand() { … … 117 117 void unregister( struct cluster * cltr, struct thread_desc & thrd ); 118 118 119 void doregister( struct cluster * cltr, struct processor * proc ); 120 void unregister( struct cluster * cltr, struct processor * proc ); 119 //======================================================================= 120 // Cluster lock API 121 //======================================================================= 122 struct __attribute__((aligned(64))) __processor_id { 123 processor * volatile handle; 124 volatile bool lock; 125 }; 126 127 // Lock-Free registering/unregistering of threads 128 // Register a processor to a given cluster and get its unique id in return 129 unsigned doregister( struct cluster * cltr, struct processor * proc ); 130 131 // Unregister a processor from a given cluster using its id, getting back the original pointer 132 void unregister( struct cluster * cltr, struct processor * proc ); 133 134 //======================================================================= 135 // Reader-writer lock implementation 136 // Concurrent with doregister/unregister, 137 // i.e., threads can be added at any point during or between the entry/exit 138 static inline void __atomic_acquire(volatile bool * ll) { 139 while( __builtin_expect(__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST), false) ) { 140 while(__atomic_load_n(ll, (int)__ATOMIC_RELAXED)) 141 asm volatile("pause"); 142 } 143 /* paranoid */ verify(*ll); 144 } 145 146 static inline bool __atomic_try_acquire(volatile bool * ll) { 147 return !__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST); 148 } 149 150 static inline void __atomic_unlock(volatile bool * ll) { 151 /* paranoid */ verify(*ll); 152 __atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE); 153 } 154 155 //----------------------------------------------------------------------- 156 // Reader side : acquire when using the ready queue to schedule but not 157 // creating/destroying queues 158 static inline void ready_schedule_lock( struct cluster * cltr, struct processor * proc) with(cltr->ready_lock) { 159 unsigned iproc = proc->id; 160 /*paranoid*/ verify(data[iproc].handle == proc); 161 /*paranoid*/ verify(iproc < ready); 162 163 // Step 1 : make sure no writer are in the middle of the critical section 164 while(__atomic_load_n(&lock, (int)__ATOMIC_RELAXED)) 165 asm volatile("pause"); 166 167 // Fence needed because we don't want to start trying to acquire the lock 168 // before we read a false. 169 // Not needed on x86 170 // std::atomic_thread_fence(std::memory_order_seq_cst); 171 172 // Step 2 : acquire our local lock 173 __atomic_acquire( &data[iproc].lock ); 174 /*paranoid*/ verify(data[iproc].lock); 175 } 176 177 static inline void ready_schedule_unlock( struct cluster * cltr, struct processor * proc) with(cltr->ready_lock) { 178 unsigned iproc = proc->id; 179 /*paranoid*/ verify(data[iproc].handle == proc); 180 /*paranoid*/ verify(iproc < ready); 181 /*paranoid*/ verify(data[iproc].lock); 182 __atomic_store_n(&data[iproc].lock, false, __ATOMIC_RELEASE); 183 } 184 185 //----------------------------------------------------------------------- 186 // Writer side : acquire when changing the ready queue, e.g. adding more 187 // queues or removing them. 188 uint_fast32_t ready_mutate_lock( struct cluster & cltr ); 189 190 void ready_mutate_unlock( struct cluster & cltr, uint_fast32_t ); 191 192 //======================================================================= 193 // Ready-Queue API 194 195 __attribute__((hot)) bool push(struct cluster * cltr, struct thread_desc * thrd); 196 __attribute__((hot)) thread_desc * pop(struct cluster * cltr); 197 void ready_queue_grow (struct cluster * cltr); 198 void ready_queue_shrink(struct cluster * cltr); 199 200 #if !defined(__CFA_NO_STATISTICS__) 201 void stats_tls_tally(struct cluster * cltr); 202 #else 203 static inline void stats_tls_tally(struct cluster * cltr) {} 204 #endif 121 205 122 206 // Local Variables: // -
libcfa/src/concurrency/monitor.cfa
re307e12 rf80f840 841 841 for( thread_desc ** thrd_it = &entry_queue.head; 842 842 *thrd_it; 843 thrd_it = &(*thrd_it)-> next843 thrd_it = &(*thrd_it)->link.next 844 844 ) { 845 845 // For each acceptable check if it matches -
libcfa/src/concurrency/thread.cfa
re307e12 rf80f840 41 41 self_mon_p = &self_mon; 42 42 curr_cluster = &cl; 43 next = 0p; 43 link.next = 0p; 44 link.prev = 0p; 44 45 45 46 node.next = 0p;
Note: See TracChangeset
for help on using the changeset viewer.