Context Navigation

source: benchmark/readyQ/locality.cpp @ 89da3a9

Last change on this file since 89da3a9 was aec2c022, checked in by Thierry Delisle <tdelisle@…>, 2 years ago
Clean-up the benchmarks a little
Property mode set to `100644`
File size: 7.6 KB

Rev	Line
[c4241b6]	1	#include "rq_bench.hpp"
	2	#pragma GCC diagnostic push
	3	#pragma GCC diagnostic ignored "-Wunused-parameter"
	4	#include <libfibre/fibre.h>
	5	#pragma GCC diagnostic pop
	6
	7	struct Result {
	8	uint64_t count = 0;
	9	uint64_t dmigs = 0;
	10	uint64_t gmigs = 0;
	11	};
	12
	13	// ==================================================
	14	struct __attribute__((aligned(128))) MyData {
	15	uint64_t _p1[16]; // padding
	16	uint64_t * data;
	17	size_t len;
	18	BaseProcessor * ttid;
	19	size_t id;
	20	uint64_t _p2[16]; // padding
	21
	22	MyData(size_t id, size_t size)
	23	: data( (uintptr_t )aligned_alloc(128, size sizeof(uint64_t)) )
	24	, len( size )
	25	, ttid( &Context::CurrProcessor() )
	26	, id( id )
	27	{
	28	for(size_t i = 0; i < this->len; i++) {
	29	this->data[i] = 0;
	30	}
	31	}
	32
	33	uint64_t moved(BaseProcessor * ttid) {
	34	if(this->ttid == ttid) {
	35	return 0;
	36	}
	37	this->ttid = ttid;
	38	return 1;
	39	}
	40
	41	__attribute__((noinline)) void access(size_t idx) {
	42	size_t l = this->len;
	43	this->data[idx % l] += 1;
	44	}
	45	};
	46
	47	// ==================================================
	48	struct __attribute__((aligned(128))) MyCtx {
	49	struct MyData * volatile data;
	50
	51	struct {
	52	struct MySpot ** ptr;
	53	size_t len;
	54	} spots;
	55
	56	bench_sem sem;
	57
	58	Result result;
	59
	60	bool share;
	61	size_t cnt;
	62	BaseProcessor * ttid;
	63	size_t id;
	64
	65	MyCtx(MyData * d, MySpot ** spots, size_t len, size_t cnt, bool share, size_t id)
	66	: data( d )
	67	, spots{ .ptr = spots, .len = len }
	68	, share( share )
	69	, cnt( cnt )
	70	, ttid( &Context::CurrProcessor() )
	71	, id( id )
	72	{}
	73
	74	uint64_t moved(BaseProcessor * ttid) {
	75	if(this->ttid == ttid) {
	76	return 0;
	77	}
	78	this->ttid = ttid;
	79	return 1;
	80	}
	81	};
	82
	83	// ==================================================
	84	// Atomic object where a single thread can wait
	85	// May exchanges data
	86	struct __attribute__((aligned(128))) MySpot {
	87	MyCtx * volatile ptr;
	88	size_t id;
	89	uint64_t _p1[16]; // padding
	90
	91	MySpot(size_t id) : ptr( nullptr ), id( id ) {}
	92
	93
	94	static inline MyCtx * one() {
	95	return reinterpret_cast<MyCtx *>(1);
	96	}
	97
	98	// Main handshake of the code
	99	// Single seat, first thread arriving waits
	100	// Next threads unblocks current one and blocks in its place
	101	// if share == true, exchange data in the process
	102	bool put( MyCtx & ctx, MyData * data, bool share) {
	103	// Attempt to CAS our context into the seat
	104	for(;;) {
	105	MyCtx * expected = this->ptr;
	106	if (expected == one()) { // Seat is closed, return
	107	return true;
	108	}
	109
	110	if (__atomic_compare_exchange_n(&this->ptr, &expected, &ctx, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
	111	if(expected) {
	112	if(share) {
	113	expected->data = data;
	114	}
	115	expected->sem.post();
	116	}
	117	break; // We got the seat
	118	}
	119	}
	120
	121	// Block once on the seat
	122	ctx.sem.wait();
	123
	124	// Someone woke us up, get the new data
	125	return false;
	126	}
	127
	128	// Shutdown the spot
	129	// Wake current thread and mark seat as closed
	130	void release() {
	131	struct MyCtx * val = __atomic_exchange_n(&this->ptr, one(), __ATOMIC_SEQ_CST);
	132	if (!val) {
	133	return;
	134	}
	135
	136	// Someone was there, release them
	137	val->sem.post();
	138	}
	139	};
	140
	141	// ==================================================
	142	// Random number generator, Go's native one is to slow and global
	143	uint64_t __xorshift64( uint64_t & state ) {
	144	uint64_t x = state;
	145	x ^= x << 13;
	146	x ^= x >> 7;
	147	x ^= x << 17;
	148	return state = x;
	149	}
	150
	151	// ==================================================
	152	// Do some work by accessing 'cnt' cells in the array
	153	__attribute__((noinline)) void work(MyData & data, size_t cnt, uint64_t & state) {
	154	for (size_t i = 0; i < cnt; i++) {
	155	data.access(__xorshift64(state));
	156	}
	157	}
	158
	159	void thread_main( MyCtx & ctx ) {
[f03209d3]	160	uint64_t state = ctx.id;
[c4241b6]	161
	162	// Wait for start
	163	ctx.sem.wait();
	164
	165	// Main loop
	166	for(;;) {
	167	// Touch our current data, write to invalidate remote cache lines
	168	work( *ctx.data, ctx.cnt, state );
	169
	170	// Wait on a random spot
	171	uint64_t idx = __xorshift64(state) % ctx.spots.len;
	172	bool closed = ctx.spots.ptr[idx]->put(ctx, ctx.data, ctx.share);
	173
	174	// Check if the experiment is over
	175	if (closed) break;
	176	if ( clock_mode && stop) break;
	177	if (!clock_mode && ctx.result.count >= stop_count) break;
	178
	179	// Check everything is consistent
	180	assert( ctx.data );
	181
	182	// write down progress and check migrations
	183	BaseProcessor * ttid = &Context::CurrProcessor();
	184	ctx.result.count += 1;
	185	ctx.result.gmigs += ctx.moved(ttid);
	186	ctx.result.dmigs += ctx.data->moved(ttid);
	187	}
	188
	189	__atomic_fetch_add(&threads_left, -1, __ATOMIC_SEQ_CST);
	190	}
	191
	192	// ==================================================
	193	int main(int argc, char * argv[]) {
	194	unsigned wsize = 2;
	195	unsigned wcnt = 2;
[f03209d3]	196	unsigned nspots = 0;
[c4241b6]	197	bool share = false;
	198	option_t opt[] = {
	199	BENCH_OPT,
[f03209d3]	200	{ 'n', "nspots", "Number of spots where threads sleep (nthreads - nspots are active at the same time)", nspots},
[c4241b6]	201	{ 'w', "worksize", "Size of the array for each threads, in words (64bit)", wsize},
	202	{ 'c', "workcnt" , "Number of words to touch when working (random pick, cells can be picked more than once)", wcnt },
	203	{ 's', "share" , "Pass the work data to the next thread when blocking", share, parse_truefalse }
	204	};
	205	BENCH_OPT_PARSE("libfibre cycle benchmark");
	206
	207	std::cout.imbue(std::locale(""));
	208	setlocale(LC_ALL, "");
	209
	210	unsigned long long global_count = 0;
	211	unsigned long long global_gmigs = 0;
	212	unsigned long long global_dmigs = 0;
	213
[f03209d3]	214	if( nspots == 0 ) { nspots = nthreads - nprocs; }
	215
[c4241b6]	216	uint64_t start, end;
	217	{
	218	FibreInit(1, nprocs);
[8fe35be]	219	MyData ** data_arrays = new MyData *[nthreads]();
[c4241b6]	220	for(size_t i = 0; i < nthreads; i++) {
	221	data_arrays[i] = new MyData( i, wsize );
	222	}
	223
[f03209d3]	224	MySpot * spots[nspots];
	225	for(unsigned i = 0; i < nspots; i++) {
[c4241b6]	226	spots[i] = new MySpot{ i };
	227	}
	228
[f03209d3]	229	threads_left = nthreads - nspots;
[8fe35be]	230	Fibre ** threads = new Fibre *[nthreads]();
	231	MyCtx ** thddata = new MyCtx *[nthreads]();
[c4241b6]	232	{
	233	for(size_t i = 0; i < nthreads; i++) {
	234	thddata[i] = new MyCtx(
	235	data_arrays[i],
	236	spots,
[f03209d3]	237	nspots,
[c4241b6]	238	wcnt,
	239	share,
	240	i
	241	);
[1f950c3b]	242	threads[i] = new Fibre();
	243	threads[i]->run( reinterpret_cast<void ()(MyCtx)>(thread_main), thddata[i] );
[c4241b6]	244	}
	245
	246	bool is_tty = isatty(STDOUT_FILENO);
[e54d0c3]	247	start = timeHiRes();
[c4241b6]	248
	249	for(size_t i = 0; i < nthreads; i++) {
	250	thddata[i]->sem.post();
	251	}
	252	wait<Fibre>(start, is_tty);
	253
	254	stop = true;
[e54d0c3]	255	end = timeHiRes();
[c4241b6]	256	printf("\nDone\n");
	257
	258	for(size_t i = 0; i < nthreads; i++) {
	259	thddata[i]->sem.post();
	260	fibre_join( threads[i], nullptr );
	261	global_count += thddata[i]->result.count;
	262	global_gmigs += thddata[i]->result.gmigs;
	263	global_dmigs += thddata[i]->result.dmigs;
	264	}
	265	}
	266
	267	for(size_t i = 0; i < nthreads; i++) {
	268	delete( data_arrays[i] );
	269	}
[8fe35be]	270	delete[](data_arrays);
[c4241b6]	271
[f03209d3]	272	for(size_t i = 0; i < nspots; i++) {
[c4241b6]	273	delete( spots[i] );
	274	}
[8fe35be]	275
	276	delete[](threads);
	277	delete[](thddata);
[c4241b6]	278	}
	279
	280	printf("Duration (ms) : %'ld\n", to_miliseconds(end - start));
	281	printf("Number of processors : %'d\n", nprocs);
	282	printf("Number of threads : %'d\n", nthreads);
[f03209d3]	283	printf("Number of spots : %'d\n", nspots);
[c4241b6]	284	printf("Work size (64bit words): %'15u\n", wsize);
[aec2c022]	285	printf("Data sharing : %s\n", share ? "On" : "Off");
[c4241b6]	286	printf("Total Operations(ops) : %'15llu\n", global_count);
	287	printf("Total G Migrations : %'15llu\n", global_gmigs);
	288	printf("Total D Migrations : %'15llu\n", global_dmigs);
[f03209d3]	289	printf("Ops per second : %'18.2lf\n", ((double)global_count) / to_fseconds(end - start));
	290	printf("ns per ops : %'18.2lf\n", ((double)(end - start)) / global_count);
	291	printf("Ops per threads : %'15llu\n", global_count / nthreads);
	292	printf("Ops per procs : %'15llu\n", global_count / nprocs);
	293	printf("Ops/sec/procs : %'18.2lf\n", (((double)global_count) / nprocs) / to_fseconds(end - start));
	294	printf("ns per ops/procs : %'18.2lf\n", ((double)(end - start)) / (global_count / nprocs));
[c4241b6]	295	fflush(stdout);
[e54d0c3]	296	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: