source: libcfa/src/collections/array.hfa@ 80e83b6c

Last change on this file since 80e83b6c was eb0d9b7, checked in by Michael Brooks <mlbrooks@…>, 4 weeks ago

Improve libcfa-array's bound-check removal and write that thesis section.

The libcfa change adds a more performant alternative for a subset of multidimensional indexing cases that were already functionally correct.
That the new alternative is more performant is not shown in the test suite.
There is an associated new high-performance option for passing an array-or-slice to a function.
The added test cases cover those options.

The added in-thesis demos rely on the new more-performant alternative for multidimensional indexing.

  • Property mode set to 100644
File size: 13.5 KB
Line 
1#pragma once
2
3
4
5forall( __CFA_tysys_id_only_X & ) struct tag {};
6#define ttag(T) ((tag(T)){})
7#define ztag(n) ttag(n)
8
9#ifdef __CFA_DEBUG__
10#define subcheck( arr, sub, len ) \
11 if ( (sub) < 0 || (sub) >= (len) ) \
12 abort( "Subscript %ld exceeds dimension range [0,%zu) for array %p.\n", \
13 (sub), (len), (arr) )
14#define subchecku( arr, sub, len ) \
15 if ( (sub) >= (len) ) \
16 abort( "Subscript %ld exceeds dimension range [0,%zu) for array %p.\n", \
17 (sub), (len), (arr) )
18#else
19#define subcheck( arr, sub, len ) do {} while (0)
20#define subchecku( arr, sub, len ) do {} while (0)
21#endif
22
23//
24// The `array` macro is the public interface.
25// It computes the type of a dense (trivially strided) array.
26// All user-declared objects are dense arrays.
27//
28// The `arpk` (ARray with PacKing info explicit) type is, generally, a slice with _any_ striding.
29// This type is meant for internal use.
30// CFA programmers should not instantiate it directly, nor access its field.
31// CFA programmers should call ?[?] on it.
32// Yet user-given `array(stuff)` expands to `arpk(stuff')`.
33// The comments here explain the resulting internals.
34//
35// Just as a plain-C "multidimesional" array is really array-of-array-of-...,
36// so does arpk generally show up as arpk-of-arpk-of...
37//
38// In the example of `array(float, 3, 4, 5) a;`,
39// `typeof(a)` is an `arpk` instantiation.
40// These comments explain _its_ arguments, i.e. those of the topmost `arpk` level.
41//
42// [N] : the number of elements in `a`; 3 in the example
43// S : carries the stride size (distance in bytes between &myA[0] and &myA[1]), in sizeof(S);
44// same as Timmed when striding is trivial, same as Timmed in the example
45// Timmed : (T-immediate) the inner type; conceptually, `typeof(a)` is "arpk of Timmed";
46// array(float, 4, 5) in the example
47// Tbase : (T-base) the deepest element type that is not arpk; float in the example
48//
49forall( [N], S & | sized(S), Timmed &, Tbase & ) {
50 //
51 // Single-dim array struct (with explicit packing and atom)
52 //
53 struct arpk {
54 S strides[N];
55 };
56
57 // About the choice of integral types offered as subscript overloads:
58 // Intent is to cover these use cases:
59 // a[0] // i : zero_t
60 // a[1] // i : one_t
61 // a[2] // i : int
62 // float foo( ptrdiff_t i ) { return a[i]; } // i : ptrdiff_t
63 // float foo( size_t i ) { return a[i]; } // i : size_t
64 // forall( [N] ) ... for( i; N ) { total += a[i]; } // i : typeof( sizeof(42) )
65 // for( i; 5 ) { total += a[i]; } // i : int
66 //
67 // It gets complicated by:
68 // - CFA does overloading on concrete types, like int and unsigned int, not on typedefed
69 // types like size_t. So trying to overload on ptrdiff_t vs int works in 64-bit mode
70 // but not in 32-bit mode.
71 //
72 // cfa -m32 (and gcc) cfa -m64 (and gcc)
73 // ptrdiff_t int long int
74 // size_t unsigned int unsigned long int
75 // typeof( sizeof(42) ) unsigned int unsigned long int
76 // int int int
77 //
78 // So the solution must support types {zero_t, one_t, int, unsigned int, long int, unsigned long int}
79 //
80 // The solution cannot rely on implicit conversions (e.g. just have one overload for ptrdiff_t)
81 // because assertion satisfaction requires types to match exacly. Both higher-dimensional
82 // subscripting and operations on slices use asserted subscript operators. The test case
83 // array-collections/array-sbscr-types covers the combinations. Mike beleives that commenting out
84 // any of the current overloads leads to one of those cases failing, either on 64- or 32-bit.
85 // Mike is open to being shown a smaller set of overloads that still passes the test.
86
87
88 static inline Timmed & ?[?]( arpk( N, S, Timmed, Tbase ) & a, zero_t ) {
89 subcheck( a, 0L, N );
90 return (Timmed &)a.strides[0];
91 }
92
93 static inline Timmed & ?[?]( arpk( N, S, Timmed, Tbase ) & a, one_t ) {
94 subcheck( a, 1L, N );
95 return (Timmed &)a.strides[1];
96 }
97
98 static inline Timmed & ?[?]( arpk( N, S, Timmed, Tbase ) & a, int i ) {
99 subcheck( a, (long int)i, N );
100 return (Timmed &)a.strides[i];
101 }
102
103 static inline const Timmed & ?[?]( const arpk( N, S, Timmed, Tbase ) & a, int i ) {
104 subcheck( a, (long int)i, N );
105 return (Timmed &)a.strides[i];
106 }
107
108 static inline Timmed & ?[?]( arpk( N, S, Timmed, Tbase ) & a, unsigned int i ) {
109 subchecku( a, (unsigned long int)i, N );
110 return (Timmed &)a.strides[i];
111 }
112
113 static inline const Timmed & ?[?]( const arpk( N, S, Timmed, Tbase ) & a, unsigned int i ) {
114 subchecku( a, (unsigned long int)i, N );
115 return (Timmed &)a.strides[i];
116 }
117
118 static inline Timmed & ?[?]( arpk( N, S, Timmed, Tbase ) & a, long int i ) {
119 subcheck( a, i, N );
120 return (Timmed &)a.strides[i];
121 }
122
123 static inline const Timmed & ?[?]( const arpk( N, S, Timmed, Tbase ) & a, long int i ) {
124 subcheck( a, i, N );
125 return (Timmed &)a.strides[i];
126 }
127
128 static inline Timmed & ?[?]( arpk( N, S, Timmed, Tbase ) & a, unsigned long int i ) {
129 subchecku( a, i, N );
130 return (Timmed &)a.strides[i];
131 }
132
133 static inline const Timmed & ?[?]( const arpk( N, S, Timmed, Tbase ) & a, unsigned long int i ) {
134 subchecku( a, i, N );
135 return (Timmed &)a.strides[i];
136 }
137
138 static inline size_t len( arpk( N, S, Timmed, Tbase ) & ) {
139 return N;
140 }
141
142 static inline void __taglen( tag(arpk( N, S, Timmed, Tbase )), tag(N) ) {}
143}
144
145// RAII pattern has workarounds for
146// - Trac 226: Simplest handling would be, require immediate element to be otype, let autogen
147// raii happen. Performance on even a couple dimensions is unacceptable because of exponential
148// thunk creation: ?{}() needs all four otype funcs from next level, so does ^?{}(), so do the
149// other two. This solution offers ?{}() that needs only ?{}(), and similar for ^?{}.
150
151// skip initializing elements
152// array(float, 5) x = { delay_init };
153enum () delay_init_t { delay_init };
154forall( [N], S & | sized(S), Timmed &, Tbase & )
155static inline void ?{}( arpk( N, S, Timmed, Tbase ) & this, delay_init_t ) {
156 void ?{}( S (&)[N] ) {}
157 ?{}(this.strides);
158}
159
160// call default ctor on elements
161// array(float, 5) x;
162forall( [N], S & | sized(S), Timmed &, Tbase & | { void ?{}( Timmed & ); } )
163static inline void ?{}( arpk( N, S, Timmed, Tbase ) & this ) {
164 ?{}( this, delay_init );
165 for (i; N) ?{}( (Timmed &)this.strides[i] );
166}
167
168forall( [N], S & | sized(S), Timmed &, Tbase & | { void ^?{}( Timmed & ); } )
169static inline void ^?{}( arpk( N, S, Timmed, Tbase ) & this ) {
170 void ^?{}( S (&)[N] ) {}
171 ^?{}(this.strides);
172
173 for (i; N ) {
174 ^?{}( (Timmed &)this.strides[N-i-1] );
175 }
176}
177
178
179//
180// Sugar for declaring array structure instances
181//
182
183forall( Te * )
184static inline Te mkar_( tag(Te) ) {}
185
186forall( [N], ZTags ... , Trslt &, Tatom & | { Trslt mkar_( tag(Tatom), ZTags ); } )
187static inline arpk( N, Trslt, Trslt, Tatom) mkar_( tag(Tatom), tag(N), ZTags ) {}
188
189// based on https://stackoverflow.com/questions/1872220/is-it-possible-to-iterate-over-arguments-in-variadic-macros
190
191 // Make a FOREACH macro
192 #define FE_0(WHAT)
193 #define FE_1(WHAT, X) WHAT(X)
194 #define FE_2(WHAT, X, ...) WHAT(X)FE_1(WHAT, __VA_ARGS__)
195 #define FE_3(WHAT, X, ...) WHAT(X)FE_2(WHAT, __VA_ARGS__)
196 #define FE_4(WHAT, X, ...) WHAT(X)FE_3(WHAT, __VA_ARGS__)
197 #define FE_5(WHAT, X, ...) WHAT(X)FE_4(WHAT, __VA_ARGS__)
198 //... repeat as needed
199
200 #define GET_MACRO(_0,_1,_2,_3,_4,_5,NAME,...) NAME
201 #define FOR_EACH(action,...) \
202 GET_MACRO(_0,__VA_ARGS__,FE_5,FE_4,FE_3,FE_2,FE_1,FE_0)(action,__VA_ARGS__)
203
204#define COMMA_ttag(X) , ttag(X)
205#define array( TE, ...) typeof( mkar_( ttag(TE) FOR_EACH( COMMA_ttag, __VA_ARGS__ ) ) )
206
207#define COMMA_ztag(X) , ztag(X)
208#define zarray( TE, ...) typeof( mkar_( ttag(TE) FOR_EACH( COMMA_ztag, __VA_ARGS__ ) ) )
209
210//
211// Sugar for multidimensional indexing
212//
213
214// Core -[[-,-,-]] operator
215
216#ifdef TRY_BROKEN_DESIRED_MD_SUBSCRIPT
217
218// Desired form. One definition with recursion on IxBC (worked until Jan 2021, see trac #__TODO__)
219
220forall( TA &, TB &, TC &, IxAB, IxBC ... | { TB & ?[?]( TA &, IxAB ); TC & ?[?]( TB &, IxBC ); } )
221static inline TC & ?[?]( TA & this, IxAB ab, IxBC bc ) {
222 return this[ab][bc];
223}
224
225#else
226
227// Workaround form. Listing all possibilities up to 4 dims.
228
229forall( TA &, TB &, TC &, IxAB_0, IxBC | { TB & ?[?]( TA &, IxAB_0 ); TC & ?[?]( TB &, IxBC ); } )
230static inline TC & ?[?]( TA & this, IxAB_0 ab, IxBC bc ) {
231 return this[ab][bc];
232}
233
234forall( TA &, TB &, TC &, IxAB_0, IxAB_1, IxBC | { TB & ?[?]( TA &, IxAB_0, IxAB_1 ); TC & ?[?]( TB &, IxBC ); } )
235static inline TC & ?[?]( TA & this, IxAB_0 ab0, IxAB_1 ab1, IxBC bc ) {
236 return this[[ab0,ab1]][bc];
237}
238
239forall( TA &, TB &, TC &, IxAB_0, IxAB_1, IxAB_2, IxBC | { TB & ?[?]( TA &, IxAB_0, IxAB_1, IxAB_2 ); TC & ?[?]( TB &, IxBC ); } )
240static inline TC & ?[?]( TA & this, IxAB_0 ab0, IxAB_1 ab1, IxAB_2 ab2, IxBC bc ) {
241 return this[[ab0,ab1,ab2]][bc];
242}
243
244// Further form of -[-,-,-] that avoids using the trait system.
245// Above overloads work for any type with (recursively valid) subscript operator,
246// provided said subscript is passed as an assertion.
247// Below works only on arpk variations but never passes its subscript though an assertion.
248//
249// When arpk implements the trait used above,
250// the critical assertion is backed by a nontrivial thunk.
251// There is no "thunk problem" (lifetime) issue, when used as shown in the test suite.
252// But the optimizer has shown difficulty removing these thunks in cases where "it should,"
253// i.e. when all user code is in one compilation unit.
254// Not that every attempt at removing such a thunk fails; cases have been found going both ways.
255// Cases have been found with unnecessary bound-checks removed successfully,
256// on user code written against the overloads below,
257// but where these bound checks (which occur within `call`ed thunks) are not removed,
258// on user code written against the overloads above.
259//
260// The overloads below provide specializations of the above
261// that are a little harder to use than the ones above,
262// but where array API erasure has been seen to be more effective.
263// Note that the style below does not appeal to a case where thunk inlining is more effective;
264// rather, it simply does not rely on thunks in the first place.
265//
266// Both usage styles are shown in test array-md-sbscr-cases#numSubscrTypeCompatibility,
267// with the more general one above being "high abstraction,"
268// and the more performant one below being "mid abstraction" and "low abstraction."
269//
270// A breadth of index types is not given here (providing -[size_t,size_t,...] only)
271// because these declarations are not feeding a trait, so safe implicit arithmetic conversion kiks in.
272// Even so, there may still be an un-met need for accepting
273// either ptrdiff_t or size_t (signed or unsigned)
274// because Mike has seen the optimizer resist removing bound checks when sign-conversion is in play.
275// "Only size_t" is meeting today's need
276// and no solution is known that avoids 2^D overloads for D dimensions
277// while offering multiple subscript types and staying assertion-free.
278//
279// This approach, of avoiding traits entirely, is likely incompatible with the original desire
280// to have one recursive multidimensional subscript operator (TRY_BROKEN_DESIRED_MD_SUBSCRIPT).
281// To make a single declaration work,
282// we would probably have to get better at coaxing the optimizer into inlining thunks.
283
284forall( [N2], S2*, [N1], S1*, Timmed1, Tbase )
285static inline Timmed1 & ?[?]( arpk( N2, S2, arpk( N1, S1, Timmed1, Tbase ), Tbase ) & this, size_t ix2, size_t ix1 ) {
286 return this[ix2][ix1];
287}
288
289forall( [N3], S3*, [N2], S2*, [N1], S1*, Timmed1, Tbase )
290static inline Timmed1 & ?[?]( arpk( N3, S3, arpk( N2, S2, arpk( N1, S1, Timmed1, Tbase ), Tbase ), Tbase ) & this, size_t ix3, size_t ix2, size_t ix1 ) {
291 return this[ix3][ix2][ix1];
292}
293
294forall( [N4], S4*, [N3], S3*, [N2], S2*, [N1], S1*, Timmed1, Tbase )
295static inline Timmed1 & ?[?]( arpk( N4, S4, arpk( N3, S3, arpk( N2, S2, arpk( N1, S1, Timmed1, Tbase ), Tbase ), Tbase ), Tbase ) & this, size_t ix4, size_t ix3, size_t ix2, size_t ix1 ) {
296 return this[ix4][ix3][ix2][ix1];
297}
298
299
300
301#endif
302
303// Available for users to work around Trac #265
304// If `a[...0...]` isn't working, try `a[...ix0...]` instead.
305
306#define ix0 ((ptrdiff_t)0)
307
308
309
310//
311// Rotation
312//
313
314// Base
315forall( [Nq], Sq & | sized(Sq), Tbase & )
316static inline tag(arpk( Nq, Sq, Tbase, Tbase )) enq_( tag(Tbase ), tag(Nq), tag(Sq), tag(Tbase ) ) {
317 tag(arpk( Nq, Sq, Tbase, Tbase )) ret;
318 return ret;
319}
320
321// Rec
322forall( [Nq], Sq & | sized(Sq), [N], S & | sized(S), recq &, recr &, Tbase & | { tag(recr) enq_( tag(Tbase), tag(Nq), tag(Sq), tag(recq) ); } )
323static inline tag(arpk( N, S, recr, Tbase )) enq_( tag(Tbase ), tag(Nq), tag(Sq), tag(arpk( N, S, recq, Tbase )) ) {
324 tag(arpk( N, S, recr, Tbase )) ret;
325 return ret;
326}
327
328// Wrapper
329extern struct all_t {} all;
330forall( [N], S & | sized(S), Te &, result &, Tbase & | { tag(result) enq_( tag(Tbase), tag(N), tag(S), tag(Te) ); } )
331static inline result & ?[?]( arpk( N, S, Te, Tbase ) & this, all_t ) {
332 return (result&) this;
333}
334
335//
336// Trait of array or slice
337//
338
339// desired:
340// forall( A &, Tv &, [N] )
341// trait ar {
342// Tv& ?[?]( A &, zero_t );
343// Tv& ?[?]( A &, one_t );
344// Tv& ?[?]( A &, int );
345// ...
346// size_t len( A & );
347// void __taglen( tag(C), tag(N) );
348// };
349
350// working around N's not being accepted as arguments to traits
351
352#define ar( A, Tv, N ) { \
353 Tv& ?[?]( A &, zero_t ); \
354 Tv& ?[?]( A &, one_t ); \
355 Tv& ?[?]( A &, int ); \
356 Tv& ?[?]( A &, unsigned int ); \
357 Tv& ?[?]( A &, long int ); \
358 Tv& ?[?]( A &, unsigned long int ); \
359 size_t len( A & ); \
360 void __taglen( tag(A), tag(N) ); \
361}
Note: See TracBrowser for help on using the repository browser.