Context Navigation

array.hfa@ 80e83b6c

Visit:

Last change on this file since 80e83b6c was eb0d9b7, checked in by Michael Brooks <mlbrooks@…>, 4 weeks ago

Improve libcfa-array's bound-check removal and write that thesis section.

The libcfa change adds a more performant alternative for a subset of multidimensional indexing cases that were already functionally correct.
That the new alternative is more performant is not shown in the test suite.
There is an associated new high-performance option for passing an array-or-slice to a function.
The added test cases cover those options.

The added in-thesis demos rely on the new more-performant alternative for multidimensional indexing.

Property mode set to 100644

File size: 13.5 KB

Line
1	#pragma once
2
3
4
5	forall( __CFA_tysys_id_only_X & ) struct tag {};
6	#define ttag(T) ((tag(T)){})
7	#define ztag(n) ttag(n)
8
9	#ifdef __CFA_DEBUG__
10	#define subcheck( arr, sub, len ) \
11	if ( (sub) < 0 \|\| (sub) >= (len) ) \
12	abort( "Subscript %ld exceeds dimension range [0,%zu) for array %p.\n", \
13	(sub), (len), (arr) )
14	#define subchecku( arr, sub, len ) \
15	if ( (sub) >= (len) ) \
16	abort( "Subscript %ld exceeds dimension range [0,%zu) for array %p.\n", \
17	(sub), (len), (arr) )
18	#else
19	#define subcheck( arr, sub, len ) do {} while (0)
20	#define subchecku( arr, sub, len ) do {} while (0)
21	#endif
22
23	//
24	// The `array` macro is the public interface.
25	// It computes the type of a dense (trivially strided) array.
26	// All user-declared objects are dense arrays.
27	//
28	// The `arpk` (ARray with PacKing info explicit) type is, generally, a slice with _any_ striding.
29	// This type is meant for internal use.
30	// CFA programmers should not instantiate it directly, nor access its field.
31	// CFA programmers should call ?[?] on it.
32	// Yet user-given `array(stuff)` expands to `arpk(stuff')`.
33	// The comments here explain the resulting internals.
34	//
35	// Just as a plain-C "multidimesional" array is really array-of-array-of-...,
36	// so does arpk generally show up as arpk-of-arpk-of...
37	//
38	// In the example of `array(float, 3, 4, 5) a;`,
39	// `typeof(a)` is an `arpk` instantiation.
40	// These comments explain _its_ arguments, i.e. those of the topmost `arpk` level.
41	//
42	// [N] : the number of elements in `a`; 3 in the example
43	// S : carries the stride size (distance in bytes between &myA[0] and &myA[1]), in sizeof(S);
44	// same as Timmed when striding is trivial, same as Timmed in the example
45	// Timmed : (T-immediate) the inner type; conceptually, `typeof(a)` is "arpk of Timmed";
46	// array(float, 4, 5) in the example
47	// Tbase : (T-base) the deepest element type that is not arpk; float in the example
48	//
49	forall( [N], S & \| sized(S), Timmed &, Tbase & ) {
50	//
51	// Single-dim array struct (with explicit packing and atom)
52	//
53	struct arpk {
54	S strides[N];
55	};
56
57	// About the choice of integral types offered as subscript overloads:
58	// Intent is to cover these use cases:
59	// a[0] // i : zero_t
60	// a[1] // i : one_t
61	// a[2] // i : int
62	// float foo( ptrdiff_t i ) { return a[i]; } // i : ptrdiff_t
63	// float foo( size_t i ) { return a[i]; } // i : size_t
64	// forall( [N] ) ... for( i; N ) { total += a[i]; } // i : typeof( sizeof(42) )
65	// for( i; 5 ) { total += a[i]; } // i : int
66	//
67	// It gets complicated by:
68	// - CFA does overloading on concrete types, like int and unsigned int, not on typedefed
69	// types like size_t. So trying to overload on ptrdiff_t vs int works in 64-bit mode
70	// but not in 32-bit mode.
71	//
72	// cfa -m32 (and gcc) cfa -m64 (and gcc)
73	// ptrdiff_t int long int
74	// size_t unsigned int unsigned long int
75	// typeof( sizeof(42) ) unsigned int unsigned long int
76	// int int int
77	//
78	// So the solution must support types {zero_t, one_t, int, unsigned int, long int, unsigned long int}
79	//
80	// The solution cannot rely on implicit conversions (e.g. just have one overload for ptrdiff_t)
81	// because assertion satisfaction requires types to match exacly. Both higher-dimensional
82	// subscripting and operations on slices use asserted subscript operators. The test case
83	// array-collections/array-sbscr-types covers the combinations. Mike beleives that commenting out
84	// any of the current overloads leads to one of those cases failing, either on 64- or 32-bit.
85	// Mike is open to being shown a smaller set of overloads that still passes the test.
86
87
88	static inline Timmed & ?[?]( arpk( N, S, Timmed, Tbase ) & a, zero_t ) {
89	subcheck( a, 0L, N );
90	return (Timmed &)a.strides[0];
91	}
92
93	static inline Timmed & ?[?]( arpk( N, S, Timmed, Tbase ) & a, one_t ) {
94	subcheck( a, 1L, N );
95	return (Timmed &)a.strides[1];
96	}
97
98	static inline Timmed & ?[?]( arpk( N, S, Timmed, Tbase ) & a, int i ) {
99	subcheck( a, (long int)i, N );
100	return (Timmed &)a.strides[i];
101	}
102
103	static inline const Timmed & ?[?]( const arpk( N, S, Timmed, Tbase ) & a, int i ) {
104	subcheck( a, (long int)i, N );
105	return (Timmed &)a.strides[i];
106	}
107
108	static inline Timmed & ?[?]( arpk( N, S, Timmed, Tbase ) & a, unsigned int i ) {
109	subchecku( a, (unsigned long int)i, N );
110	return (Timmed &)a.strides[i];
111	}
112
113	static inline const Timmed & ?[?]( const arpk( N, S, Timmed, Tbase ) & a, unsigned int i ) {
114	subchecku( a, (unsigned long int)i, N );
115	return (Timmed &)a.strides[i];
116	}
117
118	static inline Timmed & ?[?]( arpk( N, S, Timmed, Tbase ) & a, long int i ) {
119	subcheck( a, i, N );
120	return (Timmed &)a.strides[i];
121	}
122
123	static inline const Timmed & ?[?]( const arpk( N, S, Timmed, Tbase ) & a, long int i ) {
124	subcheck( a, i, N );
125	return (Timmed &)a.strides[i];
126	}
127
128	static inline Timmed & ?[?]( arpk( N, S, Timmed, Tbase ) & a, unsigned long int i ) {
129	subchecku( a, i, N );
130	return (Timmed &)a.strides[i];
131	}
132
133	static inline const Timmed & ?[?]( const arpk( N, S, Timmed, Tbase ) & a, unsigned long int i ) {
134	subchecku( a, i, N );
135	return (Timmed &)a.strides[i];
136	}
137
138	static inline size_t len( arpk( N, S, Timmed, Tbase ) & ) {
139	return N;
140	}
141
142	static inline void __taglen( tag(arpk( N, S, Timmed, Tbase )), tag(N) ) {}
143	}
144
145	// RAII pattern has workarounds for
146	// - Trac 226: Simplest handling would be, require immediate element to be otype, let autogen
147	// raii happen. Performance on even a couple dimensions is unacceptable because of exponential
148	// thunk creation: ?{}() needs all four otype funcs from next level, so does ^?{}(), so do the
149	// other two. This solution offers ?{}() that needs only ?{}(), and similar for ^?{}.
150
151	// skip initializing elements
152	// array(float, 5) x = { delay_init };
153	enum () delay_init_t { delay_init };
154	forall( [N], S & \| sized(S), Timmed &, Tbase & )
155	static inline void ?{}( arpk( N, S, Timmed, Tbase ) & this, delay_init_t ) {
156	void ?{}( S (&)[N] ) {}
157	?{}(this.strides);
158	}
159
160	// call default ctor on elements
161	// array(float, 5) x;
162	forall( [N], S & \| sized(S), Timmed &, Tbase & \| { void ?{}( Timmed & ); } )
163	static inline void ?{}( arpk( N, S, Timmed, Tbase ) & this ) {
164	?{}( this, delay_init );
165	for (i; N) ?{}( (Timmed &)this.strides[i] );
166	}
167
168	forall( [N], S & \| sized(S), Timmed &, Tbase & \| { void ^?{}( Timmed & ); } )
169	static inline void ^?{}( arpk( N, S, Timmed, Tbase ) & this ) {
170	void ^?{}( S (&)[N] ) {}
171	^?{}(this.strides);
172
173	for (i; N ) {
174	^?{}( (Timmed &)this.strides[N-i-1] );
175	}
176	}
177
178
179	//
180	// Sugar for declaring array structure instances
181	//
182
183	forall( Te * )
184	static inline Te mkar_( tag(Te) ) {}
185
186	forall( [N], ZTags ... , Trslt &, Tatom & \| { Trslt mkar_( tag(Tatom), ZTags ); } )
187	static inline arpk( N, Trslt, Trslt, Tatom) mkar_( tag(Tatom), tag(N), ZTags ) {}
188
189	// based on https://stackoverflow.com/questions/1872220/is-it-possible-to-iterate-over-arguments-in-variadic-macros
190
191	// Make a FOREACH macro
192	#define FE_0(WHAT)
193	#define FE_1(WHAT, X) WHAT(X)
194	#define FE_2(WHAT, X, ...) WHAT(X)FE_1(WHAT, __VA_ARGS__)
195	#define FE_3(WHAT, X, ...) WHAT(X)FE_2(WHAT, __VA_ARGS__)
196	#define FE_4(WHAT, X, ...) WHAT(X)FE_3(WHAT, __VA_ARGS__)
197	#define FE_5(WHAT, X, ...) WHAT(X)FE_4(WHAT, __VA_ARGS__)
198	//... repeat as needed
199
200	#define GET_MACRO(_0,_1,_2,_3,_4,_5,NAME,...) NAME
201	#define FOR_EACH(action,...) \
202	GET_MACRO(_0,__VA_ARGS__,FE_5,FE_4,FE_3,FE_2,FE_1,FE_0)(action,__VA_ARGS__)
203
204	#define COMMA_ttag(X) , ttag(X)
205	#define array( TE, ...) typeof( mkar_( ttag(TE) FOR_EACH( COMMA_ttag, __VA_ARGS__ ) ) )
206
207	#define COMMA_ztag(X) , ztag(X)
208	#define zarray( TE, ...) typeof( mkar_( ttag(TE) FOR_EACH( COMMA_ztag, __VA_ARGS__ ) ) )
209
210	//
211	// Sugar for multidimensional indexing
212	//
213
214	// Core -[[-,-,-]] operator
215
216	#ifdef TRY_BROKEN_DESIRED_MD_SUBSCRIPT
217
218	// Desired form. One definition with recursion on IxBC (worked until Jan 2021, see trac #__TODO__)
219
220	forall( TA &, TB &, TC &, IxAB, IxBC ... \| { TB & ?[?]( TA &, IxAB ); TC & ?[?]( TB &, IxBC ); } )
221	static inline TC & ?[?]( TA & this, IxAB ab, IxBC bc ) {
222	return this[ab][bc];
223	}
224
225	#else
226
227	// Workaround form. Listing all possibilities up to 4 dims.
228
229	forall( TA &, TB &, TC &, IxAB_0, IxBC \| { TB & ?[?]( TA &, IxAB_0 ); TC & ?[?]( TB &, IxBC ); } )
230	static inline TC & ?[?]( TA & this, IxAB_0 ab, IxBC bc ) {
231	return this[ab][bc];
232	}
233
234	forall( TA &, TB &, TC &, IxAB_0, IxAB_1, IxBC \| { TB & ?[?]( TA &, IxAB_0, IxAB_1 ); TC & ?[?]( TB &, IxBC ); } )
235	static inline TC & ?[?]( TA & this, IxAB_0 ab0, IxAB_1 ab1, IxBC bc ) {
236	return this[[ab0,ab1]][bc];
237	}
238
239	forall( TA &, TB &, TC &, IxAB_0, IxAB_1, IxAB_2, IxBC \| { TB & ?[?]( TA &, IxAB_0, IxAB_1, IxAB_2 ); TC & ?[?]( TB &, IxBC ); } )
240	static inline TC & ?[?]( TA & this, IxAB_0 ab0, IxAB_1 ab1, IxAB_2 ab2, IxBC bc ) {
241	return this[[ab0,ab1,ab2]][bc];
242	}
243
244	// Further form of -[-,-,-] that avoids using the trait system.
245	// Above overloads work for any type with (recursively valid) subscript operator,
246	// provided said subscript is passed as an assertion.
247	// Below works only on arpk variations but never passes its subscript though an assertion.
248	//
249	// When arpk implements the trait used above,
250	// the critical assertion is backed by a nontrivial thunk.
251	// There is no "thunk problem" (lifetime) issue, when used as shown in the test suite.
252	// But the optimizer has shown difficulty removing these thunks in cases where "it should,"
253	// i.e. when all user code is in one compilation unit.
254	// Not that every attempt at removing such a thunk fails; cases have been found going both ways.
255	// Cases have been found with unnecessary bound-checks removed successfully,
256	// on user code written against the overloads below,
257	// but where these bound checks (which occur within `call`ed thunks) are not removed,
258	// on user code written against the overloads above.
259	//
260	// The overloads below provide specializations of the above
261	// that are a little harder to use than the ones above,
262	// but where array API erasure has been seen to be more effective.
263	// Note that the style below does not appeal to a case where thunk inlining is more effective;
264	// rather, it simply does not rely on thunks in the first place.
265	//
266	// Both usage styles are shown in test array-md-sbscr-cases#numSubscrTypeCompatibility,
267	// with the more general one above being "high abstraction,"
268	// and the more performant one below being "mid abstraction" and "low abstraction."
269	//
270	// A breadth of index types is not given here (providing -[size_t,size_t,...] only)
271	// because these declarations are not feeding a trait, so safe implicit arithmetic conversion kiks in.
272	// Even so, there may still be an un-met need for accepting
273	// either ptrdiff_t or size_t (signed or unsigned)
274	// because Mike has seen the optimizer resist removing bound checks when sign-conversion is in play.
275	// "Only size_t" is meeting today's need
276	// and no solution is known that avoids 2^D overloads for D dimensions
277	// while offering multiple subscript types and staying assertion-free.
278	//
279	// This approach, of avoiding traits entirely, is likely incompatible with the original desire
280	// to have one recursive multidimensional subscript operator (TRY_BROKEN_DESIRED_MD_SUBSCRIPT).
281	// To make a single declaration work,
282	// we would probably have to get better at coaxing the optimizer into inlining thunks.
283
284	forall( [N2], S2, [N1], S1, Timmed1, Tbase )
285	static inline Timmed1 & ?[?]( arpk( N2, S2, arpk( N1, S1, Timmed1, Tbase ), Tbase ) & this, size_t ix2, size_t ix1 ) {
286	return this[ix2][ix1];
287	}
288
289	forall( [N3], S3, [N2], S2, [N1], S1*, Timmed1, Tbase )
290	static inline Timmed1 & ?[?]( arpk( N3, S3, arpk( N2, S2, arpk( N1, S1, Timmed1, Tbase ), Tbase ), Tbase ) & this, size_t ix3, size_t ix2, size_t ix1 ) {
291	return this[ix3][ix2][ix1];
292	}
293
294	forall( [N4], S4, [N3], S3, [N2], S2, [N1], S1, Timmed1, Tbase )
295	static inline Timmed1 & ?[?]( arpk( N4, S4, arpk( N3, S3, arpk( N2, S2, arpk( N1, S1, Timmed1, Tbase ), Tbase ), Tbase ), Tbase ) & this, size_t ix4, size_t ix3, size_t ix2, size_t ix1 ) {
296	return this[ix4][ix3][ix2][ix1];
297	}
298
299
300
301	#endif
302
303	// Available for users to work around Trac #265
304	// If `a[...0...]` isn't working, try `a[...ix0...]` instead.
305
306	#define ix0 ((ptrdiff_t)0)
307
308
309
310	//
311	// Rotation
312	//
313
314	// Base
315	forall( [Nq], Sq & \| sized(Sq), Tbase & )
316	static inline tag(arpk( Nq, Sq, Tbase, Tbase )) enq_( tag(Tbase ), tag(Nq), tag(Sq), tag(Tbase ) ) {
317	tag(arpk( Nq, Sq, Tbase, Tbase )) ret;
318	return ret;
319	}
320
321	// Rec
322	forall( [Nq], Sq & \| sized(Sq), [N], S & \| sized(S), recq &, recr &, Tbase & \| { tag(recr) enq_( tag(Tbase), tag(Nq), tag(Sq), tag(recq) ); } )
323	static inline tag(arpk( N, S, recr, Tbase )) enq_( tag(Tbase ), tag(Nq), tag(Sq), tag(arpk( N, S, recq, Tbase )) ) {
324	tag(arpk( N, S, recr, Tbase )) ret;
325	return ret;
326	}
327
328	// Wrapper
329	extern struct all_t {} all;
330	forall( [N], S & \| sized(S), Te &, result &, Tbase & \| { tag(result) enq_( tag(Tbase), tag(N), tag(S), tag(Te) ); } )
331	static inline result & ?[?]( arpk( N, S, Te, Tbase ) & this, all_t ) {
332	return (result&) this;
333	}
334
335	//
336	// Trait of array or slice
337	//
338
339	// desired:
340	// forall( A &, Tv &, [N] )
341	// trait ar {
342	// Tv& ?[?]( A &, zero_t );
343	// Tv& ?[?]( A &, one_t );
344	// Tv& ?[?]( A &, int );
345	// ...
346	// size_t len( A & );
347	// void __taglen( tag(C), tag(N) );
348	// };
349
350	// working around N's not being accepted as arguments to traits
351
352	#define ar( A, Tv, N ) { \
353	Tv& ?[?]( A &, zero_t ); \
354	Tv& ?[?]( A &, one_t ); \
355	Tv& ?[?]( A &, int ); \
356	Tv& ?[?]( A &, unsigned int ); \
357	Tv& ?[?]( A &, long int ); \
358	Tv& ?[?]( A &, unsigned long int ); \
359	size_t len( A & ); \
360	void __taglen( tag(A), tag(N) ); \
361	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format