Context Navigation

actor.hfa@ b28ce93

Visit:

Last change on this file since b28ce93 was 6a6e205, checked in by Michael Brooks <mlbrooks@…>, 8 months ago
Clean some warnings from unused variables and pointer-to-int casts.
Property mode set to `100644`
File size: 25.1 KB

Line
1	#pragma once
2
3	#include <locks.hfa>
4	#include <limits.hfa>
5	#include <kernel.hfa>
6	#include <iofwd.hfa>
7	#include <virtual_dtor.hfa>
8
9	#ifdef __CFA_DEBUG__
10	#define CFA_DEBUG( stmt ) stmt
11	#else
12	#define CFA_DEBUG( stmt )
13	#endif // CFA_DEBUG
14
15	#define DEBUG_ABORT( cond, string ) CFA_DEBUG( if ( cond ) abort( string ) )
16
17	// Define the default number of processors created in the executor. Must be greater than 0.
18	#define __DEFAULT_EXECUTOR_PROCESSORS__ 2
19
20	// Define the default number of threads created in the executor. Must be greater than 0.
21	#define __DEFAULT_EXECUTOR_WORKERS__ 2
22
23	// Define the default number of executor request-queues (mailboxes) written to by actors and serviced by the
24	// actor-executor threads. Must be greater than 0.
25	#define __DEFAULT_EXECUTOR_RQUEUES__ 4
26
27	// Define if executor is created in a separate cluster
28	#define __DEFAULT_EXECUTOR_SEPCLUS__ false
29
30	#define __DEFAULT_EXECUTOR_BUFSIZE__ 10
31
32	#define __STEAL 1 // workstealing toggle. Disjoint from toggles above
33
34	// workstealing heuristic selection (only set one to be 1)
35	// #define RAND 0
36	#define SEARCH 1
37
38	// show stats
39	// #define ACTOR_STATS
40
41	// used to run and only track missed queue gulps
42	#ifdef ACTOR_STATS
43	#define ACTOR_STATS_QUEUE_MISSED
44	#endif
45
46	// forward decls
47	struct actor;
48	struct message;
49	struct executor;
50
51	enum allocation { Nodelete, Delete, Destroy, Finished }; // allocation status
52
53	typedef allocation (__receive_fn)(actor &, message &, actor , message *);
54	struct request {
55	actor * receiver;
56	message * msg;
57	__receive_fn fn;
58	};
59
60	struct a_msg {
61	int m;
62	};
63	static inline void ?{}( request & this ) {}
64	static inline void ?{}( request & this, actor * receiver, message * msg, __receive_fn fn ) {
65	this.receiver = receiver;
66	this.msg = msg;
67	this.fn = fn;
68	}
69	static inline void ?{}( request & this, request & copy ) {
70	this.receiver = copy.receiver;
71	this.msg = copy.msg;
72	this.fn = copy.fn;
73	}
74
75	// Vector-like data structure that supports O(1) queue operations with no bound on size
76	// assumes gulping behaviour (once a remove occurs, removes happen until empty beforw next insert)
77	struct copy_queue {
78	request * buffer;
79	size_t count, buffer_size, index, utilized, last_size;
80	};
81	static inline void ?{}( copy_queue & this ) {}
82	static inline void ?{}( copy_queue & this, size_t buf_size ) with(this) {
83	buffer_size = buf_size;
84	buffer = aalloc( buffer_size );
85	count = 0;
86	utilized = 0;
87	index = 0;
88	last_size = 0;
89	}
90	static inline void ^?{}( copy_queue & this ) with(this) {
91	DEBUG_ABORT( count != 0, "Actor system terminated with messages sent but not received\n" );
92	adelete(buffer);
93	}
94
95	static inline void insert( copy_queue & this, request & elem ) with(this) {
96	if ( count >= buffer_size ) { // increase arr size
97	last_size = buffer_size;
98	buffer_size = 2 * buffer_size;
99	buffer = realloc( buffer, sizeof( request ) * buffer_size );
100	/* paranoid */ verify( buffer );
101	}
102	memcpy( &buffer[count], &elem, sizeof(request) );
103	count++;
104	}
105
106	// once you start removing you need to remove all elements
107	// it is not supported to call insert() before the array is fully empty
108	static inline request & remove( copy_queue & this ) with(this) {
109	if ( count > 0 ) {
110	count--;
111	size_t old_idx = index;
112	index = count == 0 ? 0 : index + 1;
113	return buffer[old_idx];
114	}
115	return *0p;
116	}
117
118	// try to reclaim some memory if less than half of buffer is utilized
119	static inline void reclaim( copy_queue & this ) with(this) {
120	if ( utilized >= last_size \|\| buffer_size <= 4 ) { utilized = 0; return; }
121	utilized = 0;
122	buffer_size--;
123	buffer = realloc( buffer, sizeof( request ) * buffer_size ); // try to reclaim some memory
124	}
125
126	static inline bool is_empty( copy_queue & this ) with(this) { return count == 0; }
127
128	struct work_queue {
129	__spinlock_t mutex_lock;
130	copy_queue * owned_queue; // copy queue allocated and cleaned up by this work_queue
131	copy_queue * c_queue; // current queue
132	volatile bool being_processed; // flag to prevent concurrent processing
133	#ifdef ACTOR_STATS
134	unsigned int id;
135	#endif
136	#ifdef ACTOR_STATS_QUEUE_MISSED
137	size_t missed; // transfers skipped due to being_processed flag being up
138	#endif
139	}; // work_queue
140	static inline void ?{}( work_queue & this, size_t buf_size, unsigned int i ) with(this) {
141	owned_queue = alloc(); // allocated separately to avoid false sharing
142	(*owned_queue){ buf_size };
143	c_queue = owned_queue;
144	being_processed = false;
145	#ifdef ACTOR_STATS
146	id = i;
147	missed = 0;
148	#else
149	(void) i;
150	#endif
151	}
152
153	// clean up copy_queue owned by this work_queue
154	static inline void ^?{}( work_queue & this ) with(this) { delete( owned_queue ); }
155
156	static inline void insert( work_queue & this, request & elem ) with(this) {
157	lock( mutex_lock __cfaabi_dbg_ctx2 );
158	insert( *c_queue, elem );
159	unlock( mutex_lock );
160	} // insert
161
162	static inline void transfer( work_queue & this, copy_queue ** transfer_to ) with(this) {
163	lock( mutex_lock __cfaabi_dbg_ctx2 );
164	#ifdef __STEAL
165
166	// check if queue is being processed elsewhere
167	if ( unlikely( being_processed ) ) {
168	#ifdef ACTOR_STATS
169	missed++;
170	#endif
171	unlock( mutex_lock );
172	return;
173	}
174
175	being_processed = c_queue->count != 0;
176	#endif // __STEAL
177
178	c_queue->utilized = c_queue->count;
179
180	// swap copy queue ptrs
181	copy_queue * temp = *transfer_to;
182	*transfer_to = c_queue;
183	c_queue = temp;
184	unlock( mutex_lock );
185	} // transfer
186
187	// needed since some info needs to persist past worker lifetimes
188	struct worker_info {
189	volatile unsigned long long stamp;
190	#ifdef ACTOR_STATS
191	size_t stolen_from, try_steal, stolen, empty_stolen, failed_swaps, msgs_stolen;
192	unsigned long long processed;
193	size_t gulps;
194	#endif
195	};
196	static inline void ?{}( worker_info & this ) {
197	#ifdef ACTOR_STATS
198	this.stolen_from = 0;
199	this.try_steal = 0; // attempts to steal
200	this.stolen = 0; // successful steals
201	this.processed = 0; // requests processed
202	this.gulps = 0; // number of gulps
203	this.failed_swaps = 0; // steal swap failures
204	this.empty_stolen = 0; // queues empty after steal
205	this.msgs_stolen = 0; // number of messages stolen
206	#endif
207	this.stamp = rdtscl();
208	}
209
210	// #ifdef ACTOR_STATS
211	// unsigned int * stolen_arr;
212	// unsigned int * replaced_queue;
213	// #endif
214	thread worker {
215	work_queue ** request_queues;
216	copy_queue * current_queue;
217	executor * executor_;
218	unsigned int start, range;
219	int id;
220	};
221
222	#ifdef ACTOR_STATS
223	// aggregate counters for statistics
224	size_t __total_tries = 0, __total_stolen = 0, __total_workers, __all_gulps = 0, __total_empty_stolen = 0,
225	__total_failed_swaps = 0, __all_processed = 0, __num_actors_stats = 0, __all_msgs_stolen = 0;
226	#endif
227	static inline void ?{}( worker & this, cluster & clu, work_queue ** request_queues, copy_queue * current_queue, executor * executor_,
228	unsigned int start, unsigned int range, int id ) {
229	((thread &)this){ clu };
230	this.request_queues = request_queues; // array of all queues
231	this.current_queue = current_queue; // currently gulped queue (start with empty queue to use in swap later)
232	this.executor_ = executor_; // pointer to current executor
233	this.start = start; // start of worker's subrange of request_queues
234	this.range = range; // size of worker's subrange of request_queues
235	this.id = id; // worker's id and index in array of workers
236	}
237
238	static bool no_steal = false;
239	struct executor {
240	cluster * cluster; // if workers execute on separate cluster
241	processor ** processors; // array of virtual processors adding parallelism for workers
242	work_queue * request_queues; // master array of work request queues
243	copy_queue * local_queues; // array of all worker local queues to avoid deletion race
244	work_queue ** worker_req_queues; // secondary array of work queues to allow for swapping
245	worker ** workers; // array of workers executing work requests
246	worker_info * w_infos; // array of info about each worker
247	unsigned int nprocessors, nworkers, nrqueues; // number of processors/threads/request queues
248	bool seperate_clus; // use same or separate cluster for executor
249	volatile bool is_shutdown; // flag to communicate shutdown to worker threads
250	}; // executor
251
252	// #ifdef ACTOR_STATS
253	// __spinlock_t out_lock;
254	// #endif
255	static inline void ^?{}( worker & mutex this ) with(this) {
256	#ifdef ACTOR_STATS
257	__atomic_add_fetch(&__all_gulps, executor_->w_infos[id].gulps,__ATOMIC_SEQ_CST);
258	__atomic_add_fetch(&__all_processed, executor_->w_infos[id].processed,__ATOMIC_SEQ_CST);
259	__atomic_add_fetch(&__all_msgs_stolen, executor_->w_infos[id].msgs_stolen,__ATOMIC_SEQ_CST);
260	__atomic_add_fetch(&__total_tries, executor_->w_infos[id].try_steal, __ATOMIC_SEQ_CST);
261	__atomic_add_fetch(&__total_stolen, executor_->w_infos[id].stolen, __ATOMIC_SEQ_CST);
262	__atomic_add_fetch(&__total_failed_swaps, executor_->w_infos[id].failed_swaps, __ATOMIC_SEQ_CST);
263	__atomic_add_fetch(&__total_empty_stolen, executor_->w_infos[id].empty_stolen, __ATOMIC_SEQ_CST);
264
265	// per worker steal stats (uncomment alongside the lock above this routine to print)
266	// lock( out_lock __cfaabi_dbg_ctx2 );
267	// printf("Worker id: %d, processed: %llu messages, attempted %lu, stole: %lu, stolen from: %lu\n", id, processed, try_steal, stolen, __atomic_add_fetch(&executor_->w_infos[id].stolen_from, 0, __ATOMIC_SEQ_CST) );
268	// int count = 0;
269	// int count2 = 0;
270	// for ( i; range ) {
271	// if ( replaced_queue[start + i] > 0 ){
272	// count++;
273	// // printf("%d: %u, ",i, replaced_queue[i]);
274	// }
275	// if (__atomic_add_fetch(&stolen_arr[start + i],0,__ATOMIC_SEQ_CST) > 0)
276	// count2++;
277	// }
278	// printf("swapped with: %d of %u indices\n", count, executor_->nrqueues / executor_->nworkers );
279	// printf("%d of %u indices were stolen\n", count2, executor_->nrqueues / executor_->nworkers );
280	// unlock( out_lock );
281	#endif
282	}
283
284	static inline void ?{}( executor & this, unsigned int nprocessors, unsigned int nworkers, unsigned int nrqueues, bool seperate_clus, size_t buf_size ) with(this) {
285	if ( nrqueues < nworkers ) abort( "nrqueues needs to be >= nworkers\n" );
286	this.nprocessors = nprocessors;
287	this.nworkers = nworkers;
288	this.nrqueues = nrqueues;
289	this.seperate_clus = seperate_clus;
290	this.is_shutdown = false;
291
292	if ( nworkers == nrqueues )
293	no_steal = true;
294
295	#ifdef ACTOR_STATS
296	// stolen_arr = aalloc( nrqueues );
297	// replaced_queue = aalloc( nrqueues );
298	__total_workers = nworkers;
299	#endif
300
301	if ( seperate_clus ) {
302	this.cluster = alloc();
303	(*cluster){};
304	} else cluster = active_cluster();
305
306	request_queues = aalloc( nrqueues );
307	worker_req_queues = aalloc( nrqueues );
308	for ( i; nrqueues ) {
309	request_queues[i]{ buf_size, i };
310	worker_req_queues[i] = &request_queues[i];
311	}
312
313	processors = aalloc( nprocessors );
314	for ( i; nprocessors )
315	((processors[i] = alloc())){ cluster };
316
317	local_queues = aalloc( nworkers );
318	workers = aalloc( nworkers );
319	w_infos = aalloc( nworkers );
320	unsigned int reqPerWorker = nrqueues / nworkers, extras = nrqueues % nworkers;
321
322	for ( i; nworkers ) {
323	w_infos[i]{};
324	local_queues[i]{ buf_size };
325	}
326
327	for ( unsigned int i = 0, start = 0, range; i < nworkers; i += 1, start += range ) {
328	range = reqPerWorker + ( i < extras ? 1 : 0 );
329	((workers[i] = alloc())){ cluster, worker_req_queues, &local_queues[i], &this, start, range, i };
330	} // for
331	}
332	static inline void ?{}( executor & this, unsigned int nprocessors, unsigned int nworkers, unsigned int nrqueues, bool seperate_clus ) { this{ nprocessors, nworkers, nrqueues, seperate_clus, __DEFAULT_EXECUTOR_BUFSIZE__ }; }
333	static inline void ?{}( executor & this, unsigned int nprocessors, unsigned int nworkers, unsigned int nrqueues ) { this{ nprocessors, nworkers, nrqueues, __DEFAULT_EXECUTOR_SEPCLUS__ }; }
334	static inline void ?{}( executor & this, unsigned int nprocessors, unsigned int nworkers ) { this{ nprocessors, nworkers, __DEFAULT_EXECUTOR_RQUEUES__ }; }
335	static inline void ?{}( executor & this, unsigned int nprocessors ) { this{ nprocessors, __DEFAULT_EXECUTOR_WORKERS__ }; }
336	static inline void ?{}( executor & this ) { this{ __DEFAULT_EXECUTOR_PROCESSORS__ }; }
337
338	static inline void ^?{}( executor & this ) with(this) {
339	is_shutdown = true;
340
341	for ( i; nworkers )
342	delete( workers[i] );
343
344	for ( i; nprocessors ) {
345	delete( processors[i] );
346	} // for
347
348	#ifdef ACTOR_STATS_QUEUE_MISSED
349	size_t misses = 0;
350	for ( i; nrqueues ) {
351	misses += worker_req_queues[i]->missed;
352	}
353	// adelete( stolen_arr );
354	// adelete( replaced_queue );
355	#endif
356
357	adelete( workers );
358	adelete( w_infos );
359	adelete( local_queues );
360	adelete( request_queues );
361	adelete( worker_req_queues );
362	adelete( processors );
363	if ( seperate_clus ) delete( this.cluster );
364
365	#ifdef ACTOR_STATS // print formatted stats
366	printf(" Actor System Stats:\n");
367	printf("\tActors Created:\t\t\t\t%lu\n\tMessages Sent:\t\t\t\t%lu\n", __num_actors_stats, __all_processed);
368	size_t avg_gulps = __all_gulps == 0 ? 0 : __all_processed / __all_gulps;
369	printf("\tGulps:\t\t\t\t\t%lu\n\tAverage Gulp Size:\t\t\t%lu\n\tMissed gulps:\t\t\t\t%lu\n", __all_gulps, avg_gulps, misses);
370	printf("\tSteal attempts:\t\t\t\t%lu\n\tSteals:\t\t\t\t\t%lu\n\tSteal failures (no candidates):\t\t%lu\n\tSteal failures (failed swaps):\t\t%lu\t Empty steals:\t\t%lu\n",
371	__total_tries, __total_stolen, __total_tries - __total_stolen - __total_failed_swaps, __total_failed_swaps, __total_empty_stolen);
372	size_t avg_steal = __total_stolen == 0 ? 0 : __all_msgs_stolen / __total_stolen;
373	printf("\tMessages stolen:\t\t\t%lu\n\tAverage steal size:\t\t\t%lu\n", __all_msgs_stolen, avg_steal);
374	#endif
375
376	#ifndef ACTOR_STATS
377	#ifdef ACTOR_STATS_QUEUE_MISSED
378	printf("\t%lu", misses);
379	#endif
380	#endif
381
382	}
383
384	// this is a static field of executor but have to forward decl for get_next_ticket
385	static size_t __next_ticket = 0;
386
387	static inline size_t __get_next_ticket( executor & this ) with(this) {
388	#ifdef __CFA_DEBUG__
389	size_t temp = __atomic_fetch_add( &__next_ticket, 1, __ATOMIC_SEQ_CST) % nrqueues;
390
391	// reserve MAX for dead actors
392	if ( unlikely( temp == MAX ) ) temp = __atomic_fetch_add( &__next_ticket, 1, __ATOMIC_SEQ_CST) % nrqueues;
393	return temp;
394	#else
395	return __atomic_fetch_add( &__next_ticket, 1, __ATOMIC_RELAXED) % nrqueues;
396	#endif
397	} // tickets
398
399	// TODO: update globals in this file to be static fields once the static fields project is done
400	static executor * __actor_executor_ = 0p;
401	static bool __actor_executor_passed = false; // was an executor passed to actor_start
402	static size_t __num_actors_ = 0; // number of actor objects in system
403	static struct thread$ * __actor_executor_thd = 0p; // used to wake executor after actors finish
404	struct actor {
405	size_t ticket; // executor-queue handle
406	allocation alloc; // allocation action
407	inline virtual_dtor;
408	};
409
410	static inline void ?{}( actor & this ) with(this) {
411	// Once an actor is allocated it must be sent a message or the actor system cannot stop. Hence, its receive
412	// member must be called to end it
413	DEBUG_ABORT( __actor_executor_ == 0p, "Creating actor before calling actor_start() can cause undefined behaviour.\n" );
414	alloc = Nodelete;
415	ticket = __get_next_ticket( *__actor_executor_ );
416	__atomic_fetch_add( &__num_actors_, 1, __ATOMIC_RELAXED );
417	#ifdef ACTOR_STATS
418	__atomic_fetch_add( &__num_actors_stats, 1, __ATOMIC_SEQ_CST );
419	#endif
420	}
421
422	static inline void check_actor( actor & this ) {
423	if ( this.alloc != Nodelete ) {
424	switch( this.alloc ) {
425	case Delete: delete( &this ); break;
426	case Destroy:
427	CFA_DEBUG( this.ticket = MAX; ); // mark as terminated
428	^?{}(this);
429	break;
430	case Finished:
431	CFA_DEBUG( this.ticket = MAX; ); // mark as terminated
432	break;
433	default: ; // stop warning
434	}
435
436	if ( unlikely( __atomic_add_fetch( &__num_actors_, -1, __ATOMIC_RELAXED ) == 0 ) ) { // all actors have terminated
437	unpark( __actor_executor_thd );
438	}
439	}
440	}
441
442	struct message {
443	allocation alloc; // allocation action
444	inline virtual_dtor;
445	};
446
447	static inline void ?{}( message & this ) {
448	this.alloc = Nodelete;
449	}
450	static inline void ?{}( message & this, allocation alloc ) {
451	memcpy( &this.alloc, &alloc, sizeof(allocation) ); // optimization to elide ctor
452	CFA_DEBUG( if ( this.alloc == Finished ) this.alloc = Nodelete; );
453	}
454	static inline void ^?{}( message & this ) with(this) {
455	CFA_DEBUG(
456	if ( alloc == Nodelete ) {
457	printf( "CFA warning (UNIX pid:%ld) : program terminating with message %p allocated but never sent.\n",
458	(long int)getpid(), &this );
459	}
460	)
461	}
462
463	static inline void check_message( message & this ) {
464	switch ( this.alloc ) { // analyze message status
465	case Nodelete: CFA_DEBUG( this.alloc = Finished ); break;
466	case Delete: delete( &this ); break;
467	case Destroy: ^?{}( this ); break;
468	case Finished: break;
469	} // switch
470	}
471	static inline allocation set_allocation( message & this, allocation state ) {
472	CFA_DEBUG( if ( state == Nodelete ) state = Finished; );
473	allocation prev = this.alloc;
474	this.alloc = state;
475	return prev;
476	}
477	static inline allocation get_allocation( message & this ) {
478	return this.alloc;
479	}
480
481	static inline void deliver_request( request & this ) {
482	DEBUG_ABORT( this.receiver->ticket == (unsigned long int)MAX, "Attempted to send message to deleted/dead actor\n" );
483	actor * base_actor;
484	message * base_msg;
485	allocation temp = this.fn( this.receiver, this.msg, &base_actor, &base_msg );
486	memcpy( &base_actor->alloc, &temp, sizeof(allocation) ); // optimization to elide ctor
487	check_message( *base_msg );
488	check_actor( *base_actor );
489	}
490
491	// tries to atomically swap two queues and returns 0p if the swap failed
492	// returns ptr to newly owned queue if swap succeeds
493	static inline work_queue * try_swap_queues( worker & this, unsigned int victim_idx, unsigned int my_idx ) with(this) {
494	work_queue * my_queue = request_queues[my_idx];
495	work_queue * other_queue = request_queues[victim_idx];
496
497	// if either queue is 0p then they are in the process of being stolen
498	if ( other_queue == 0p ) return 0p;
499
500	// try to set our queue ptr to be 0p. If it fails someone moved our queue so return false
501	if ( !__atomic_compare_exchange_n( &request_queues[my_idx], &my_queue, 0p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST ) )
502	return 0p;
503
504	// try to set other queue ptr to be our queue ptr. If it fails someone moved the other queue so fix up then return false
505	if ( !__atomic_compare_exchange_n( &request_queues[victim_idx], &other_queue, my_queue, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST ) ) {
506	/* paranoid */ verify( request_queues[my_idx] == 0p );
507	request_queues[my_idx] = my_queue; // reset my queue ptr back to appropriate val
508	return 0p;
509	}
510
511	// we have successfully swapped and since our queue is 0p no one will touch it so write back new queue ptr non atomically
512	request_queues[my_idx] = other_queue; // last write does not need to be atomic
513	return other_queue;
514	}
515
516	// once a worker to steal from has been chosen, choose queue to steal from
517	static inline void choose_queue( worker & this, unsigned int victim_id, unsigned int swap_idx ) with(this) {
518	// have to calculate victim start and range since victim may be deleted before us in shutdown
519	const unsigned int queues_per_worker = executor_->nrqueues / executor_->nworkers;
520	const unsigned int extras = executor_->nrqueues % executor_->nworkers;
521	unsigned int vic_start, vic_range;
522	if ( extras > victim_id ) {
523	vic_range = queues_per_worker + 1;
524	vic_start = vic_range * victim_id;
525	} else {
526	vic_start = extras + victim_id * queues_per_worker;
527	vic_range = queues_per_worker;
528	}
529	unsigned int start_idx = prng( vic_range );
530
531	unsigned int tries = 0;
532	work_queue * curr_steal_queue;
533
534	for ( unsigned int i = start_idx; tries < vic_range; i = (i + 1) % vic_range ) {
535	tries++;
536	curr_steal_queue = request_queues[ i + vic_start ];
537	// avoid empty queues and queues that are being operated on
538	if ( curr_steal_queue == 0p \|\| curr_steal_queue->being_processed \|\| is_empty( *curr_steal_queue->c_queue ) )
539	continue;
540
541	#ifdef ACTOR_STATS
542	curr_steal_queue = try_swap_queues( this, i + vic_start, swap_idx );
543	if ( curr_steal_queue ) {
544	executor_->w_infos[id].msgs_stolen += curr_steal_queue->c_queue->count;
545	executor_->w_infos[id].stolen++;
546	if ( is_empty( *curr_steal_queue->c_queue ) ) executor_->w_infos[id].empty_stolen++;
547	// __atomic_add_fetch(&executor_->w_infos[victim_id].stolen_from, 1, __ATOMIC_RELAXED);
548	// replaced_queue[swap_idx]++;
549	// __atomic_add_fetch(&stolen_arr[ i + vic_start ], 1, __ATOMIC_RELAXED);
550	} else {
551	executor_->w_infos[id].failed_swaps++;
552	}
553	#else
554	curr_steal_queue = try_swap_queues( this, i + vic_start, swap_idx );
555	#endif // ACTOR_STATS
556
557	return;
558	}
559
560	return;
561	}
562
563	// choose a worker to steal from
564	static inline void steal_work( worker & this, unsigned int swap_idx ) with(this) {
565	#if RAND
566	unsigned int victim = prng( executor_->nworkers );
567	if ( victim == id ) victim = ( victim + 1 ) % executor_->nworkers;
568	choose_queue( this, victim, swap_idx );
569	#elif SEARCH
570	unsigned long long min = MAX; // smaller timestamp means longer since service
571	int min_id = 0; // use ints not uints to avoid integer underflow without hacky math
572	int n_workers = executor_->nworkers;
573	unsigned long long curr_stamp;
574	int scount = 1;
575	for ( int i = (id + 1) % n_workers; scount < n_workers; i = (i + 1) % n_workers, scount++ ) {
576	curr_stamp = executor_->w_infos[i].stamp;
577	if ( curr_stamp < min ) {
578	min = curr_stamp;
579	min_id = i;
580	}
581	}
582	choose_queue( this, min_id, swap_idx );
583	#endif
584	}
585
586	#define CHECK_TERMINATION if ( unlikely( executor_->is_shutdown ) ) break Exit
587	void main( worker & this ) with(this) {
588	// #ifdef ACTOR_STATS
589	// for ( i; executor_->nrqueues ) {
590	// replaced_queue[i] = 0;
591	// __atomic_store_n( &stolen_arr[i], 0, __ATOMIC_SEQ_CST );
592	// }
593	// #endif
594
595	// threshold of empty queues we see before we go stealing
596	const unsigned int steal_threshold = 2 * range;
597
598	// Store variable data here instead of worker struct to avoid any potential false sharing
599	unsigned int empty_count = 0;
600	request & req;
601	work_queue * curr_work_queue;
602
603	Exit:
604	for ( unsigned int i = 0;; i = (i + 1) % range ) { // cycle through set of request buffers
605	curr_work_queue = request_queues[i + start];
606
607	#ifndef __STEAL
608	CHECK_TERMINATION;
609	#endif
610
611	// check if queue is empty before trying to gulp it
612	if ( is_empty( *curr_work_queue->c_queue ) ) {
613	#ifdef __STEAL
614	empty_count++;
615	if ( empty_count < steal_threshold ) continue;
616	#else
617	continue;
618	#endif
619	}
620	transfer( *curr_work_queue, &current_queue );
621	#ifdef ACTOR_STATS
622	executor_->w_infos[id].gulps++;
623	#endif // ACTOR_STATS
624	#ifdef __STEAL
625	if ( is_empty( *current_queue ) ) {
626	if ( unlikely( no_steal ) ) { CHECK_TERMINATION; continue; }
627	empty_count++;
628	if ( empty_count < steal_threshold ) continue;
629	empty_count = 0;
630
631	CHECK_TERMINATION; // check for termination
632
633	__atomic_store_n( &executor_->w_infos[id].stamp, rdtscl(), __ATOMIC_RELAXED );
634
635	#ifdef ACTOR_STATS
636	executor_->w_infos[id].try_steal++;
637	#endif // ACTOR_STATS
638
639	steal_work( this, start + prng( range ) );
640	continue;
641	}
642	#endif // __STEAL
643	while ( ! is_empty( *current_queue ) ) {
644	#ifdef ACTOR_STATS
645	executor_->w_infos[id].processed++;
646	#endif
647	&req = &remove( *current_queue );
648	if ( !&req ) continue;
649	deliver_request( req );
650	}
651	#ifdef __STEAL
652	curr_work_queue->being_processed = false; // set done processing
653	empty_count = 0; // we found work so reset empty counter
654	#endif
655
656	CHECK_TERMINATION;
657
658	// potentially reclaim some of the current queue's vector space if it is unused
659	reclaim( *current_queue );
660	} // for
661	}
662
663	static inline void send( executor & this, request & req, unsigned long int ticket ) with(this) {
664	insert( request_queues[ticket], req);
665	}
666
667	static inline void send( actor & this, request & req ) {
668	DEBUG_ABORT( this.ticket == (unsigned long int)MAX, "Attempted to send message to deleted/dead actor\n" );
669	send( *__actor_executor_, req, this.ticket );
670	}
671
672	static inline void __reset_stats() {
673	#ifdef ACTOR_STATS
674	__total_tries = 0;
675	__total_stolen = 0;
676	__all_gulps = 0;
677	__total_failed_swaps = 0;
678	__total_empty_stolen = 0;
679	__all_processed = 0;
680	__num_actors_stats = 0;
681	__all_msgs_stolen = 0;
682	#endif
683	}
684
685	static inline void actor_start( size_t num_thds ) {
686	__reset_stats();
687	__actor_executor_thd = active_thread();
688	__actor_executor_ = alloc();
689	(__actor_executor_){ 0, num_thds, num_thds == 1 ? 1 : num_thds 16 };
690	}
691
692	static inline void actor_start() { actor_start( get_proc_count( *active_cluster() ) ); }
693
694	static inline void actor_start( executor & this ) {
695	__reset_stats();
696	__actor_executor_thd = active_thread();
697	__actor_executor_ = &this;
698	__actor_executor_passed = true;
699	}
700
701	static inline void actor_stop() {
702	park(); // unparked when actor system is finished
703
704	if ( !__actor_executor_passed ) delete( __actor_executor_ );
705	__actor_executor_ = 0p;
706	__actor_executor_thd = 0p;
707	__next_ticket = 0;
708	__actor_executor_passed = false;
709	}
710
711	// Default messages to send to any actor to change status
712	// assigned at creation to __base_msg_finished to avoid unused message warning
713	message __base_msg_finished @= { .alloc = Finished };
714	struct delete_msg_t { inline message; } delete_msg = __base_msg_finished;
715	struct destroy_msg_t { inline message; } destroy_msg = __base_msg_finished;
716	struct finished_msg_t { inline message; } finished_msg = __base_msg_finished;
717
718	allocation receive( actor &, delete_msg_t & ) { return Delete; }
719	allocation receive( actor &, destroy_msg_t & ) { return Destroy; }
720	allocation receive( actor &, finished_msg_t & ) { return Finished; }
721
722	// Default messages used all the time.
723	struct start_msg_t { inline message; } start_msg = __base_msg_finished; // start actor
724	struct stop_msg_t { inline message; } stop_msg = __base_msg_finished; // terminate actor

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format