Context Navigation

cluster.cfa @ ffac259

Last change on this file since ffac259 was a8667ab, checked in by Peter A. Buhr <pabuhr@…>, 21 months ago
expunge all rseq code from runtime
Property mode set to `100644`
File size: 15.4 KB

Rev	Line
[c42b8a1]	1	//
	2	// Cforall Version 1.0.0 Copyright (C) 2022 University of Waterloo
	3	//
	4	// The contents of this file are covered under the licence agreement in the
	5	// file "LICENCE" distributed with Cforall.
	6	//
[708ae38]	7	// cluster.cfa -- file that includes helpers for subsystem that need cluster wide support
[c42b8a1]	8	//
	9	// Author : Thierry Delisle
[708ae38]	10	// Created On : Fri Mar 11 12:39:24 2022
[c42b8a1]	11	// Last Modified By :
	12	// Last Modified On :
	13	// Update Count :
	14	//
	15
	16	#define __cforall_thread__
	17
	18	#include "bits/defs.hfa"
	19	#include "device/cpu.hfa"
[708ae38]	20	#include "kernel/cluster.hfa"
	21	#include "kernel/private.hfa"
[c42b8a1]	22
	23	#include "stdlib.hfa"
	24	#include "limits.hfa"
	25	#include "math.hfa"
	26
	27	#include "ready_subqueue.hfa"
[78a580d]	28	#include "io/types.hfa"
[c42b8a1]	29
	30	#include <errno.h>
	31	#include <unistd.h>
	32
	33	extern "C" {
	34	#include <sys/syscall.h> // __NR_xxx
	35	}
	36
	37	// No overriden function, no environment variable, no define
	38	// fall back to a magic number
	39	#ifndef __CFA_MAX_PROCESSORS__
	40	#define __CFA_MAX_PROCESSORS__ 1024
	41	#endif
	42
	43	#if !defined(__CFA_NO_STATISTICS__)
	44	#define __STATS(...) __VA_ARGS__
	45	#else
	46	#define __STATS(...)
	47	#endif
	48
	49	// returns the maximum number of processors the RWLock support
[c18bf9e]	50	__attribute__((weak)) unsigned __max_processors() libcfa_public {
[c42b8a1]	51	const char * max_cores_s = getenv("CFA_MAX_PROCESSORS");
	52	if(!max_cores_s) {
	53	__cfadbg_print_nolock(ready_queue, "No CFA_MAX_PROCESSORS in ENV\n");
	54	return __CFA_MAX_PROCESSORS__;
	55	}
	56
	57	char * endptr = 0p;
	58	long int max_cores_l = strtol(max_cores_s, &endptr, 10);
	59	if(max_cores_l < 1 \|\| max_cores_l > 65535) {
	60	__cfadbg_print_nolock(ready_queue, "CFA_MAX_PROCESSORS out of range : %ld\n", max_cores_l);
	61	return __CFA_MAX_PROCESSORS__;
	62	}
	63	if('\0' != *endptr) {
	64	__cfadbg_print_nolock(ready_queue, "CFA_MAX_PROCESSORS not a decimal number : %s\n", max_cores_s);
	65	return __CFA_MAX_PROCESSORS__;
	66	}
	67
	68	return max_cores_l;
	69	}
	70
	71	//=======================================================================
	72	// Cluster wide reader-writer lock
	73	//=======================================================================
	74	void ?{}(__scheduler_RWLock_t & this) {
[741e22c]	75	this.lock.max = __max_processors();
	76	this.lock.alloc = 0;
	77	this.lock.ready = 0;
	78	this.lock.data = alloc(this.lock.max);
	79	this.lock.write_lock = false;
[c42b8a1]	80
[741e22c]	81	/paranoid/ verify(__atomic_is_lock_free(sizeof(this.lock.alloc), &this.lock.alloc));
	82	/paranoid/ verify(__atomic_is_lock_free(sizeof(this.lock.ready), &this.lock.ready));
[c42b8a1]	83
	84	}
	85	void ^?{}(__scheduler_RWLock_t & this) {
[741e22c]	86	free(this.lock.data);
[c42b8a1]	87	}
	88
	89
	90	//=======================================================================
	91	// Lock-Free registering/unregistering of threads
[cd3fc46]	92	unsigned register_proc_id( void ) with(__scheduler_lock.lock) {
[c42b8a1]	93	bool * handle = (bool *)&kernelTLS().sched_lock;
	94
	95	// Step - 1 : check if there is already space in the data
	96	uint_fast32_t s = ready;
	97
	98	// Check among all the ready
	99	for(uint_fast32_t i = 0; i < s; i++) {
	100	bool * volatile * cell = (bool * volatile *)&data[i]; // Cforall is bugged and the double volatiles causes problems
	101	/* paranoid / verify( handle != cell );
	102
	103	bool * null = 0p; // Re-write every loop since compare thrashes it
	104	if( __atomic_load_n(cell, (int)__ATOMIC_RELAXED) == null
	105	&& __atomic_compare_exchange_n( cell, &null, handle, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
	106	/* paranoid */ verify(i < ready);
	107	/* paranoid */ verify( (kernelTLS().sched_id = i, true) );
	108	return i;
	109	}
	110	}
	111
[cd3fc46]	112	if(max <= alloc) abort("Trying to create more than %ud processors", __scheduler_lock.lock.max);
[c42b8a1]	113
	114	// Step - 2 : F&A to get a new spot in the array.
	115	uint_fast32_t n = __atomic_fetch_add(&alloc, 1, __ATOMIC_SEQ_CST);
[cd3fc46]	116	if(max <= n) abort("Trying to create more than %ud processors", __scheduler_lock.lock.max);
[c42b8a1]	117
	118	// Step - 3 : Mark space as used and then publish it.
	119	data[n] = handle;
	120	while() {
	121	unsigned copy = n;
	122	if( __atomic_load_n(&ready, __ATOMIC_RELAXED) == n
	123	&& __atomic_compare_exchange_n(&ready, &copy, n + 1, true, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
	124	break;
	125	Pause();
	126	}
	127
	128	// Return new spot.
	129	/* paranoid */ verify(n < ready);
	130	/* paranoid */ verify( (kernelTLS().sched_id = n, true) );
	131	return n;
	132	}
	133
[cd3fc46]	134	void unregister_proc_id( unsigned id ) with(__scheduler_lock.lock) {
[c42b8a1]	135	/* paranoid */ verify(id < ready);
	136	/* paranoid */ verify(id == kernelTLS().sched_id);
	137	/* paranoid */ verify(data[id] == &kernelTLS().sched_lock);
	138
	139	bool * volatile * cell = (bool * volatile *)&data[id]; // Cforall is bugged and the double volatiles causes problems
	140
	141	__atomic_store_n(cell, 0p, __ATOMIC_RELEASE);
	142	}
	143
	144	//-----------------------------------------------------------------------
	145	// Writer side : acquire when changing the ready queue, e.g. adding more
	146	// queues or removing them.
[cd3fc46]	147	uint_fast32_t ready_mutate_lock( void ) with(__scheduler_lock.lock) {
[c42b8a1]	148	/* paranoid */ verify( ! __preemption_enabled() );
	149
	150	// Step 1 : lock global lock
	151	// It is needed to avoid processors that register mid Critical-Section
	152	// to simply lock their own lock and enter.
	153	__atomic_acquire( &write_lock );
	154
	155	// Make sure we won't deadlock ourself
	156	// Checking before acquiring the writer lock isn't safe
	157	// because someone else could have locked us.
	158	/* paranoid */ verify( ! kernelTLS().sched_lock );
	159
	160	// Step 2 : lock per-proc lock
	161	// Processors that are currently being registered aren't counted
	162	// but can't be in read_lock or in the critical section.
	163	// All other processors are counted
	164	uint_fast32_t s = ready;
	165	for(uint_fast32_t i = 0; i < s; i++) {
	166	volatile bool * llock = data[i];
	167	if(llock) __atomic_acquire( llock );
	168	}
	169
	170	/* paranoid */ verify( ! __preemption_enabled() );
	171	return s;
	172	}
	173
[cd3fc46]	174	void ready_mutate_unlock( uint_fast32_t last_s ) with(__scheduler_lock.lock) {
[c42b8a1]	175	/* paranoid */ verify( ! __preemption_enabled() );
	176
	177	// Step 1 : release local locks
	178	// This must be done while the global lock is held to avoid
	179	// threads that where created mid critical section
	180	// to race to lock their local locks and have the writer
	181	// immidiately unlock them
	182	// Alternative solution : return s in write_lock and pass it to write_unlock
	183	for(uint_fast32_t i = 0; i < last_s; i++) {
	184	volatile bool * llock = data[i];
	185	if(llock) __atomic_store_n(llock, (bool)false, __ATOMIC_RELEASE);
	186	}
	187
	188	// Step 2 : release global lock
	189	/paranoid/ assert(true == write_lock);
	190	__atomic_store_n(&write_lock, (bool)false, __ATOMIC_RELEASE);
	191
	192	/* paranoid */ verify( ! __preemption_enabled() );
	193	}
	194
	195	//=======================================================================
	196	// Cluster growth
	197	static const unsigned __readyq_single_shard = 2;
	198
[31c967b]	199	void ?{}(__timestamp_t & this) { this.t.tv = 0; this.t.ma = 0; }
	200	void ^?{}(__timestamp_t &) {}
	201
[c42b8a1]	202	//-----------------------------------------------------------------------
	203	// Check that all the intrusive queues in the data structure are still consistent
[884f3f67]	204	static void check_readyQ( cluster * cltr ) with (cltr->sched) {
[c42b8a1]	205	#if defined(__CFA_WITH_VERIFY__)
	206	{
[884f3f67]	207	const unsigned lanes_count = readyQ.count;
	208	for( idx ; lanes_count ) {
	209	__intrusive_lane_t & sl = readyQ.data[idx];
[2af1943]	210	assert(!readyQ.data[idx].l.lock);
[c42b8a1]	211
	212	if(is_empty(sl)) {
[2af1943]	213	assert( sl.l.anchor.next == 0p );
	214	assert( sl.l.anchor.ts == MAX );
	215	assert( mock_head(sl) == sl.l.prev );
[c42b8a1]	216	} else {
[2af1943]	217	assert( sl.l.anchor.next != 0p );
	218	assert( sl.l.anchor.ts != MAX );
	219	assert( mock_head(sl) != sl.l.prev );
[c42b8a1]	220	}
	221	}
	222	}
	223	#endif
	224	}
	225
	226	// Call this function of the intrusive list was moved using memcpy
	227	// fixes the list so that the pointers back to anchors aren't left dangling
	228	static inline void fix(__intrusive_lane_t & ll) {
[708ae38]	229	if(is_empty(ll)) {
[2af1943]	230	verify(ll.l.anchor.next == 0p);
	231	ll.l.prev = mock_head(ll);
[708ae38]	232	}
[c42b8a1]	233	}
	234
[1756e08]	235	static void assign_list(unsigned & valrq, unsigned & valio, dlist(struct processor) & list, unsigned count) {
	236	struct processor * it = &list`first;
[c42b8a1]	237	for(unsigned i = 0; i < count; i++) {
	238	/* paranoid */ verifyf( it, "Unexpected null iterator, at index %u of %u\n", i, count);
[adb3ea1]	239	it->rdq.id = valrq;
[b035046]	240	it->rdq.target = UINT_MAX;
[adb3ea1]	241	valrq += __shard_factor.readyq;
[1a567d0]	242	#if defined(CFA_HAVE_LINUX_IO_URING_H)
	243	it->io.ctx->cq.id = valio;
[b035046]	244	it->io.target = UINT_MAX;
[1a567d0]	245	valio += __shard_factor.io;
	246	#endif
[c42b8a1]	247	it = &(*it)`next;
	248	}
	249	}
	250
	251	static void reassign_cltr_id(struct cluster * cltr) {
[adb3ea1]	252	unsigned prefrq = 0;
	253	unsigned prefio = 0;
	254	assign_list(prefrq, prefio, cltr->procs.actives, cltr->procs.total - cltr->procs.idle);
	255	assign_list(prefrq, prefio, cltr->procs.idles , cltr->procs.idle );
	256	}
	257
[1a567d0]	258	#if defined(CFA_HAVE_LINUX_IO_URING_H)
[1756e08]	259	static void assign_io(io_context$ ** data, size_t count, dlist(struct processor) & list) {
	260	struct processor * it = &list`first;
[1a567d0]	261	while(it) {
	262	/* paranoid */ verifyf( it, "Unexpected null iterator\n");
	263	/* paranoid */ verifyf( it->io.ctx->cq.id < count, "Processor %p has id %u above count %zu\n", it, it->rdq.id, count);
	264	data[it->io.ctx->cq.id] = it->io.ctx;
	265	it = &(*it)`next;
	266	}
[adb3ea1]	267	}
	268
[1a567d0]	269	static void reassign_cltr_io(struct cluster * cltr) {
	270	assign_io(cltr->sched.io.data, cltr->sched.io.count, cltr->procs.actives);
	271	assign_io(cltr->sched.io.data, cltr->sched.io.count, cltr->procs.idles );
	272	}
	273	#else
	274	static void reassign_cltr_io(struct cluster *) {}
	275	#endif
[c42b8a1]	276
[884f3f67]	277	static void fix_times( __timestamp_t * volatile & tscs, unsigned count ) {
	278	tscs = alloc(count, tscs`realloc);
	279	for(i; count) {
[2af1943]	280	tscs[i].t.tv = rdtscl();
	281	tscs[i].t.ma = 0;
[c42b8a1]	282	}
	283	}
	284
	285	// Grow the ready queue
	286	void ready_queue_grow(struct cluster * cltr) {
	287	int target = cltr->procs.total;
	288
	289	/* paranoid */ verify( ready_mutate_islocked() );
	290	__cfadbg_print_safe(ready_queue, "Kernel : Growing ready queue\n");
	291
	292	// Make sure that everything is consistent
[884f3f67]	293	/* paranoid */ check_readyQ( cltr );
[c42b8a1]	294
	295
[884f3f67]	296	// Find new count
	297	// Make sure we always have atleast 1 list
	298	size_t ocount = cltr->sched.readyQ.count;
	299	size_t ncount = max(target * __shard_factor.readyq, __readyq_single_shard);
[c42b8a1]	300
[884f3f67]	301	// Do we have to do anything?
	302	if( ocount != ncount ) {
	303
	304	// grow the ready queue
	305	with( cltr->sched ) {
	306
	307	// Allocate new array (uses realloc and memcpies the data)
	308	readyQ.data = alloc( ncount, readyQ.data`realloc );
[c42b8a1]	309
[884f3f67]	310	// Fix the moved data
	311	for( idx; ocount ) {
	312	fix(readyQ.data[idx]);
	313	}
	314
	315	// Construct new data
	316	for( idx; ocount ~ ncount) {
	317	(readyQ.data[idx]){};
	318	}
	319
	320	// Update original count
	321	readyQ.count = ncount;
[c42b8a1]	322	}
	323
	324
[884f3f67]	325	fix_times(cltr->sched.readyQ.tscs, cltr->sched.readyQ.count);
[c42b8a1]	326	}
	327
[708ae38]	328	// Fix the io times
[adb3ea1]	329	cltr->sched.io.count = target * __shard_factor.io;
[708ae38]	330	fix_times(cltr->sched.io.tscs, cltr->sched.io.count);
	331
[884f3f67]	332	// realloc the caches
	333	cltr->sched.caches = alloc( target, cltr->sched.caches`realloc );
[c42b8a1]	334
[884f3f67]	335	// reassign the clusters.
[c42b8a1]	336	reassign_cltr_id(cltr);
	337
[adb3ea1]	338	cltr->sched.io.data = alloc( cltr->sched.io.count, cltr->sched.io.data`realloc );
	339	reassign_cltr_io(cltr);
	340
[c42b8a1]	341	// Make sure that everything is consistent
[884f3f67]	342	/* paranoid */ check_readyQ( cltr );
	343	/* paranoid */ verify( (target == 0) == (cltr->sched.caches == 0p) );
[c42b8a1]	344
	345	__cfadbg_print_safe(ready_queue, "Kernel : Growing ready queue done\n");
	346
	347	/* paranoid */ verify( ready_mutate_islocked() );
	348	}
	349
	350	// Shrink the ready queue
	351	void ready_queue_shrink(struct cluster * cltr) {
	352	/* paranoid */ verify( ready_mutate_islocked() );
	353	__cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue\n");
	354
	355	// Make sure that everything is consistent
[884f3f67]	356	/* paranoid */ check_readyQ( cltr );
[c42b8a1]	357
	358	int target = cltr->procs.total;
	359
[884f3f67]	360	with( cltr->sched ) {
[c42b8a1]	361	// Remember old count
[884f3f67]	362	size_t ocount = readyQ.count;
[c42b8a1]	363
	364	// Find new count
	365	// Make sure we always have atleast 1 list
[884f3f67]	366	size_t ncount = max(target * __shard_factor.readyq, __readyq_single_shard);
	367	/* paranoid */ verifyf( ocount >= ncount, "Error in shrinking size calculation, %zu >= %zu", ocount, ncount );
	368	/* paranoid / verifyf( ncount == target __shard_factor.readyq \|\| ncount == __readyq_single_shard,
[bfb9bf5]	369	/* paranoid / "Error in shrinking size calculation, expected %u or %u, got %zu", target __shard_factor.readyq, __readyq_single_shard, ncount );
[884f3f67]	370
	371	readyQ.count = ncount;
[c42b8a1]	372
	373	// for printing count the number of displaced threads
	374	#if defined(__CFA_DEBUG_PRINT__) \|\| defined(__CFA_DEBUG_PRINT_READY_QUEUE__)
	375	__attribute__((unused)) size_t displaced = 0;
	376	#endif
	377
	378	// redistribute old data
[884f3f67]	379	for( idx; ncount ~ ocount) {
[c42b8a1]	380	// Lock is not strictly needed but makes checking invariants much easier
[2af1943]	381	__attribute__((unused)) bool locked = __atomic_try_acquire(&readyQ.data[idx].l.lock);
[c42b8a1]	382	verify(locked);
	383
	384	// As long as we can pop from this lane to push the threads somewhere else in the queue
[884f3f67]	385	while(!is_empty(readyQ.data[idx])) {
[c42b8a1]	386	struct thread$ * thrd;
	387	unsigned long long _;
[884f3f67]	388	[thrd, _] = pop(readyQ.data[idx]);
[c42b8a1]	389
	390	push(cltr, thrd, true);
	391
	392	// for printing count the number of displaced threads
	393	#if defined(__CFA_DEBUG_PRINT__) \|\| defined(__CFA_DEBUG_PRINT_READY_QUEUE__)
	394	displaced++;
	395	#endif
	396	}
	397
	398	// Unlock the lane
[2af1943]	399	__atomic_unlock(&readyQ.data[idx].l.lock);
[c42b8a1]	400
	401	// TODO print the queue statistics here
	402
[884f3f67]	403	^(readyQ.data[idx]){};
[c42b8a1]	404	}
	405
	406	__cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue displaced %zu threads\n", displaced);
	407
	408	// Allocate new array (uses realloc and memcpies the data)
[884f3f67]	409	readyQ.data = alloc( ncount, readyQ.data`realloc );
[c42b8a1]	410
	411	// Fix the moved data
[884f3f67]	412	for( idx; ncount ) {
	413	fix(readyQ.data[idx]);
[c42b8a1]	414	}
	415
[884f3f67]	416	fix_times(readyQ.tscs, ncount);
[c42b8a1]	417	}
[884f3f67]	418	cltr->sched.caches = alloc( target, cltr->sched.caches`realloc );
[c42b8a1]	419
[708ae38]	420	// Fix the io times
[adb3ea1]	421	cltr->sched.io.count = target * __shard_factor.io;
[708ae38]	422	fix_times(cltr->sched.io.tscs, cltr->sched.io.count);
[c42b8a1]	423
	424	reassign_cltr_id(cltr);
	425
[adb3ea1]	426	cltr->sched.io.data = alloc( cltr->sched.io.count, cltr->sched.io.data`realloc );
	427	reassign_cltr_io(cltr);
	428
[c42b8a1]	429	// Make sure that everything is consistent
[884f3f67]	430	/* paranoid */ verify( (target == 0) == (cltr->sched.caches == 0p) );
	431	/* paranoid */ check_readyQ( cltr );
[c42b8a1]	432
	433	__cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue done\n");
	434	/* paranoid */ verify( ready_mutate_islocked() );
	435	}
	436
[884f3f67]	437	void ready_queue_close(struct cluster * cltr) {
	438	free( cltr->sched.readyQ.data );
	439	free( cltr->sched.readyQ.tscs );
	440	cltr->sched.readyQ.data = 0p;
	441	cltr->sched.readyQ.tscs = 0p;
	442	cltr->sched.readyQ.count = 0;
	443
	444	free( cltr->sched.io.tscs );
	445	free( cltr->sched.caches );
	446	}
	447
[2af1943]	448	#define nested_offsetof(type, field) ((off_t)(&(((type*)0)-> field)))
	449
[c42b8a1]	450	// Ctor
	451	void ?{}( __intrusive_lane_t & this ) {
[2af1943]	452	this.l.lock = false;
	453	this.l.prev = mock_head(this);
	454	this.l.anchor.next = 0p;
	455	this.l.anchor.ts = MAX;
[c42b8a1]	456	#if !defined(__CFA_NO_STATISTICS__)
[2af1943]	457	this.l.cnt = 0;
[c42b8a1]	458	#endif
	459
	460	// We add a boat-load of assertions here because the anchor code is very fragile
[15c93d8]	461	/* paranoid */ _Static_assert( offsetof( thread$, rdy_link ) == nested_offsetof(__intrusive_lane_t, l.anchor) );
	462	/* paranoid */ verify( offsetof( thread$, rdy_link ) == nested_offsetof(__intrusive_lane_t, l.anchor) );
	463	/* paranoid */ verify( ((uintptr_t)( mock_head(this) ) + offsetof( thread$, rdy_link )) == (uintptr_t)(&this.l.anchor) );
	464	/* paranoid */ verify( &mock_head(this)->rdy_link.next == &this.l.anchor.next );
	465	/* paranoid */ verify( &mock_head(this)->rdy_link.ts == &this.l.anchor.ts );
	466	/* paranoid */ verify( mock_head(this)->rdy_link.next == 0p );
	467	/* paranoid */ verify( mock_head(this)->rdy_link.ts == MAX );
[2af1943]	468	/* paranoid */ verify( mock_head(this) == this.l.prev );
	469	/* paranoid */ verify( __alignof__(__intrusive_lane_t) == 64 );
	470	/* paranoid */ verify( __alignof__(this) == 64 );
	471	/* paranoid */ verifyf( ((intptr_t)(&this) % 64) == 0, "Expected address to be aligned %p %% 64 == %zd", &this, ((intptr_t)(&this) % 64) );
[c42b8a1]	472	}
	473
[2af1943]	474	#undef nested_offsetof
	475
[c42b8a1]	476	// Dtor is trivial
	477	void ^?{}( __intrusive_lane_t & this ) {
	478	// Make sure the list is empty
[2af1943]	479	/* paranoid */ verify( this.l.anchor.next == 0p );
	480	/* paranoid */ verify( this.l.anchor.ts == MAX );
	481	/* paranoid */ verify( mock_head(this) == this.l.prev );
[c42b8a1]	482	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format