Context Navigation

cluster.hfa

Last change on this file was ba068c0, checked in by Peter A. Buhr <pabuhr@…>, 7 months ago
rename clock routines processor and program to processor_cpu and program_cpu to deal with name clash with processor
Property mode set to `100644`
File size: 5.8 KB

Line
1	//
2	// Cforall Version 1.0.0 Copyright (C) 2022 University of Waterloo
3	//
4	// The contents of this file are covered under the licence agreement in the
5	// file "LICENCE" distributed with Cforall.
6	//
7	// cluster.hfa -- file that includes helpers for subsystem that need cluster wide support
8	//
9	// Author : Thierry Delisle
10	// Created On : Tue Mar 15 16:40:12 2022
11	// Last Modified By :
12	// Last Modified On :
13	// Update Count :
14	//
15
16	#pragma once
17
18	#include "device/cpu.hfa"
19	#include "kernel/private.hfa"
20	#include "math.hfa"
21
22	#include <limits.h>
23	#include <inttypes.h>
24
25	#include "clock.hfa"
26
27	#if defined(READYQ_USE_LINEAR_AVG)
28
29	// no conversion needed in this case
30	static inline __readyQ_avg_t __to_readyQ_avg(unsigned long long intsc) { return intsc; }
31
32	// warn normally all ints
33	#define warn_large_before warnf( !strict \|\| old_avg < 33_000_000_000, "Suspiciously large previous average: %'llu (%llx), %'" PRId64 "ms \n", old_avg, old_avg, program_cpu()`ms )
34	#define warn_large_after warnf( !strict \|\| ret < 33_000_000_000, "Suspiciously large new average after %'" PRId64 "ms cputime: %'llu (%llx) from %'llu-%'llu (%'llu, %'llu) and %'llu\n", program_cpu()`ms, ret, ret, currtsc, intsc, new_val, new_val / 1000000, old_avg )
35
36	// 8X linear factor is just 8 * x
37	#define AVG_FACTOR( x ) (8 * (x))
38
39	#elif defined(READYQ_USE_LOGDBL_AVG)
40
41	// convert to log2 scale but using double
42	static inline __readyQ_avg_t __to_readyQ_avg(unsigned long long intsc) { if(unlikely(0 == intsc)) return 0.0; else return log2((__readyQ_avg_t)intsc); }
43
44	#define warn_large_before warnf( !strict \|\| old_avg < 35.0, "Suspiciously large previous average: %'lf, %'" PRId64 "ms \n", old_avg, program_cpu()`ms )
45	#define warn_large_after warnf( !strict \|\| ret < 35.3, "Suspiciously large new average after %'" PRId64 "ms cputime: %'lf from %'llu-%'llu (%'llu, %'llu) and %'lf\n", program_cpu()`ms, ret, currtsc, intsc, new_val, new_val / 1000000, old_avg ); \
46	verify(ret >= 0)
47
48	// 8X factor in logscale is log2(8X) = log2(8) + log2(X) = 3 + log2(X)
49	#define AVG_FACTOR( x ) (3.0 + (x))
50
51	// we need to overload the __atomic_load_n because they don't support double
52	static inline double __atomic_load_n(volatile double * ptr, int mem) {
53	volatile uint64_t * uptr = (volatile uint64_t *)ptr;
54	_Static_assert(sizeof(uptr) == sizeof(ptr));
55	uint64_t ret = 0;
56	ret = __atomic_load_n(uptr, mem);
57	uint64_t *rp = &ret;
58	double ret = (volatile double )rp;
59	/* paranoid */ verify( ret == 0 \|\| ret > 3e-100 );
60	return ret;
61	}
62
63	// we need to overload the __atomic_store_n because they don't support double
64	static inline void __atomic_store_n(volatile double * ptr, double val, int mem) {
65	/* paranoid */ verify( val == 0 \|\| val > 3e-100 );
66	volatile uint64_t * uptr = (volatile uint64_t *)ptr;
67	_Static_assert(sizeof(uptr) == sizeof(ptr));
68	uint64_t * valp = (uint64_t *)&val;
69	__atomic_store_n(uptr, *valp, mem);
70	}
71
72	#elif defined(READYQ_USE_LOGDBL_AVG)
73
74	//convert to log2 scale but with fix point u32.32 values
75	static inline __readyQ_avg_t __to_readyQ_avg(unsigned long long intsc) { return ulog2_32_32(tsc); }
76
77	// 8X factor, +3 in logscale (see above) is + 0x3.00000000
78	#define AVG_FACTOR( x ) (0x3_00000000ull + (x))
79
80	#else
81	#error must pick a scheme for averaging
82	#endif
83
84	//-----------------------------------------------------------------------
85	// Calc moving average based on existing average, before and current time.
86	static inline __readyQ_avg_t moving_average(unsigned long long currtsc, unsigned long long intsc, __readyQ_avg_t old_avg, bool strict) {
87	(void)strict; // disable the warning around the fact this is unused in release.
88	/* paranoid */ warn_large_before;
89
90	const unsigned long long new_val = currtsc > intsc ? currtsc - intsc : 0;
91	const __readyQ_avg_t total_weight = 16;
92	const __readyQ_avg_t new_weight = 12;
93	const __readyQ_avg_t old_weight = total_weight - new_weight;
94	const __readyQ_avg_t ret = ((new_weight * __to_readyQ_avg(new_val)) + (old_weight * old_avg)) / total_weight;
95
96	/* paranoid */ warn_large_after;
97	return ret;
98	}
99
100	static inline void touch_tsc(__timestamp_t * tscs, size_t idx, unsigned long long ts_prev, unsigned long long ts_next, bool strict) {
101	if (ts_next == ULLONG_MAX) return;
102	unsigned long long now = rdtscl();
103	__readyQ_avg_t pma = __atomic_load_n(&tscs[ idx ].t.ma, __ATOMIC_RELAXED);
104	__atomic_store_n(&tscs[ idx ].t.tv, ts_next, __ATOMIC_RELAXED);
105	__atomic_store_n(&tscs[ idx ].t.ma, moving_average(now, ts_prev, pma, strict), __ATOMIC_RELAXED);
106	}
107
108	//-----------------------------------------------------------------------
109	// Calc age a timestamp should be before needing help.
110	forall(Data_t * \| { unsigned long long ts(Data_t & this); })
111	static inline __readyQ_avg_t calc_cutoff(
112	const unsigned long long ctsc,
113	unsigned procid,
114	size_t count,
115	Data_t * data,
116	__timestamp_t * tscs,
117	const unsigned shard_factor,
118	bool strict
119	) {
120	unsigned start = procid;
121	__readyQ_avg_t max = 0;
122	for(i; shard_factor) {
123	unsigned long long ptsc = ts(data[start + i]);
124	if(ptsc != ULLONG_MAX) {
125	/* paranoid */ verify( start + i < count );
126	__readyQ_avg_t avg = moving_average(ctsc, ptsc, tscs[start + i].t.ma, strict);
127	if(avg > max) max = avg;
128	}
129	}
130	return AVG_FACTOR( max );
131	}
132
133	static inline unsigned cache_id(struct cluster * cltr, unsigned idx) with (cltr->sched) {
134	// Figure out the current cpu and make sure it is valid
135	const int cpu = __kernel_getcpu();
136	/* paranoid */ verify(cpu >= 0);
137	/* paranoid */ verify(cpu < cpu_info.hthrd_count);
138	unsigned this_cache = cpu_info.llc_map[cpu].cache;
139
140	// Super important: don't write the same value over and over again
141	// We want to maximise our chances that his particular values stays in cache
142	if(caches[idx].id != this_cache)
143	__atomic_store_n(&caches[idx].id, this_cache, __ATOMIC_RELAXED);
144
145	return this_cache;
146	}
147
148	const static struct {
149	unsigned readyq;
150	unsigned io;
151	} __shard_factor = { 2, 1 };
152
153	// Local Variables: //
154	// mode: c //
155	// tab-width: 4 //
156	// End: //

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format