source: libcfa/src/collections/array.hfa@ 80e83b6c

Last change on this file since 80e83b6c was eb0d9b7, checked in by Michael Brooks <mlbrooks@…>, 5 weeks ago

Improve libcfa-array's bound-check removal and write that thesis section.

The libcfa change adds a more performant alternative for a subset of multidimensional indexing cases that were already functionally correct.
That the new alternative is more performant is not shown in the test suite.
There is an associated new high-performance option for passing an array-or-slice to a function.
The added test cases cover those options.

The added in-thesis demos rely on the new more-performant alternative for multidimensional indexing.

  • Property mode set to 100644
File size: 13.5 KB
RevLine 
[a5e26821]1#pragma once
2
[c7625e0]3
4
[6e50a6b]5forall( __CFA_tysys_id_only_X & ) struct tag {};
[c7625e0]6#define ttag(T) ((tag(T)){})
[6e50a6b]7#define ztag(n) ttag(n)
[c7625e0]8
[fee4436]9#ifdef __CFA_DEBUG__
[8ee211d]10#define subcheck( arr, sub, len ) \
11 if ( (sub) < 0 || (sub) >= (len) ) \
[1f6623c]12 abort( "Subscript %ld exceeds dimension range [0,%zu) for array %p.\n", \
[8ee211d]13 (sub), (len), (arr) )
14#define subchecku( arr, sub, len ) \
15 if ( (sub) >= (len) ) \
[1f6623c]16 abort( "Subscript %ld exceeds dimension range [0,%zu) for array %p.\n", \
[8ee211d]17 (sub), (len), (arr) )
[fee4436]18#else
[8ee211d]19#define subcheck( arr, sub, len ) do {} while (0)
20#define subchecku( arr, sub, len ) do {} while (0)
[fee4436]21#endif
[c7625e0]22
[ad24245]23//
24// The `array` macro is the public interface.
25// It computes the type of a dense (trivially strided) array.
26// All user-declared objects are dense arrays.
[c7625e0]27//
[ad24245]28// The `arpk` (ARray with PacKing info explicit) type is, generally, a slice with _any_ striding.
29// This type is meant for internal use.
30// CFA programmers should not instantiate it directly, nor access its field.
31// CFA programmers should call ?[?] on it.
32// Yet user-given `array(stuff)` expands to `arpk(stuff')`.
33// The comments here explain the resulting internals.
34//
35// Just as a plain-C "multidimesional" array is really array-of-array-of-...,
36// so does arpk generally show up as arpk-of-arpk-of...
37//
38// In the example of `array(float, 3, 4, 5) a;`,
39// `typeof(a)` is an `arpk` instantiation.
40// These comments explain _its_ arguments, i.e. those of the topmost `arpk` level.
41//
42// [N] : the number of elements in `a`; 3 in the example
43// S : carries the stride size (distance in bytes between &myA[0] and &myA[1]), in sizeof(S);
44// same as Timmed when striding is trivial, same as Timmed in the example
45// Timmed : (T-immediate) the inner type; conceptually, `typeof(a)` is "arpk of Timmed";
46// array(float, 4, 5) in the example
47// Tbase : (T-base) the deepest element type that is not arpk; float in the example
[c7625e0]48//
[63f42a8]49forall( [N], S & | sized(S), Timmed &, Tbase & ) {
[1bb0170]50 //
[b8e047a]51 // Single-dim array struct (with explicit packing and atom)
[1bb0170]52 //
53 struct arpk {
54 S strides[N];
55 };
56
57 // About the choice of integral types offered as subscript overloads:
58 // Intent is to cover these use cases:
59 // a[0] // i : zero_t
60 // a[1] // i : one_t
61 // a[2] // i : int
62 // float foo( ptrdiff_t i ) { return a[i]; } // i : ptrdiff_t
63 // float foo( size_t i ) { return a[i]; } // i : size_t
64 // forall( [N] ) ... for( i; N ) { total += a[i]; } // i : typeof( sizeof(42) )
65 // for( i; 5 ) { total += a[i]; } // i : int
66 //
67 // It gets complicated by:
68 // - CFA does overloading on concrete types, like int and unsigned int, not on typedefed
69 // types like size_t. So trying to overload on ptrdiff_t vs int works in 64-bit mode
70 // but not in 32-bit mode.
71 //
[0210a543]72 // cfa -m32 (and gcc) cfa -m64 (and gcc)
73 // ptrdiff_t int long int
74 // size_t unsigned int unsigned long int
75 // typeof( sizeof(42) ) unsigned int unsigned long int
76 // int int int
[1bb0170]77 //
78 // So the solution must support types {zero_t, one_t, int, unsigned int, long int, unsigned long int}
79 //
80 // The solution cannot rely on implicit conversions (e.g. just have one overload for ptrdiff_t)
81 // because assertion satisfaction requires types to match exacly. Both higher-dimensional
82 // subscripting and operations on slices use asserted subscript operators. The test case
[0210a543]83 // array-collections/array-sbscr-types covers the combinations. Mike beleives that commenting out
[1bb0170]84 // any of the current overloads leads to one of those cases failing, either on 64- or 32-bit.
85 // Mike is open to being shown a smaller set of overloads that still passes the test.
86
[0210a543]87
[b8e047a]88 static inline Timmed & ?[?]( arpk( N, S, Timmed, Tbase ) & a, zero_t ) {
[8ee211d]89 subcheck( a, 0L, N );
[b8e047a]90 return (Timmed &)a.strides[0];
[1bb0170]91 }
92
[b8e047a]93 static inline Timmed & ?[?]( arpk( N, S, Timmed, Tbase ) & a, one_t ) {
[8ee211d]94 subcheck( a, 1L, N );
[b8e047a]95 return (Timmed &)a.strides[1];
[1bb0170]96 }
97
[b8e047a]98 static inline Timmed & ?[?]( arpk( N, S, Timmed, Tbase ) & a, int i ) {
[8ee211d]99 subcheck( a, (long int)i, N );
[b8e047a]100 return (Timmed &)a.strides[i];
[1bb0170]101 }
102
[b8e047a]103 static inline const Timmed & ?[?]( const arpk( N, S, Timmed, Tbase ) & a, int i ) {
[8ee211d]104 subcheck( a, (long int)i, N );
[b8e047a]105 return (Timmed &)a.strides[i];
[1bb0170]106 }
107
[b8e047a]108 static inline Timmed & ?[?]( arpk( N, S, Timmed, Tbase ) & a, unsigned int i ) {
[8ee211d]109 subchecku( a, (unsigned long int)i, N );
[b8e047a]110 return (Timmed &)a.strides[i];
[1bb0170]111 }
112
[b8e047a]113 static inline const Timmed & ?[?]( const arpk( N, S, Timmed, Tbase ) & a, unsigned int i ) {
[8ee211d]114 subchecku( a, (unsigned long int)i, N );
[b8e047a]115 return (Timmed &)a.strides[i];
[1bb0170]116 }
117
[b8e047a]118 static inline Timmed & ?[?]( arpk( N, S, Timmed, Tbase ) & a, long int i ) {
[8ee211d]119 subcheck( a, i, N );
[b8e047a]120 return (Timmed &)a.strides[i];
[1bb0170]121 }
122
[b8e047a]123 static inline const Timmed & ?[?]( const arpk( N, S, Timmed, Tbase ) & a, long int i ) {
[8ee211d]124 subcheck( a, i, N );
[b8e047a]125 return (Timmed &)a.strides[i];
[1bb0170]126 }
127
[b8e047a]128 static inline Timmed & ?[?]( arpk( N, S, Timmed, Tbase ) & a, unsigned long int i ) {
[8ee211d]129 subchecku( a, i, N );
[b8e047a]130 return (Timmed &)a.strides[i];
[1bb0170]131 }
132
[b8e047a]133 static inline const Timmed & ?[?]( const arpk( N, S, Timmed, Tbase ) & a, unsigned long int i ) {
[8ee211d]134 subchecku( a, i, N );
[b8e047a]135 return (Timmed &)a.strides[i];
[1bb0170]136 }
137
[ee70ff5]138 static inline size_t len( arpk( N, S, Timmed, Tbase ) & ) {
[1bb0170]139 return N;
140 }
141
[b8e047a]142 static inline void __taglen( tag(arpk( N, S, Timmed, Tbase )), tag(N) ) {}
[cfbc56ec]143}
[a5e26821]144
[cfbc56ec]145// RAII pattern has workarounds for
146// - Trac 226: Simplest handling would be, require immediate element to be otype, let autogen
[1bb0170]147// raii happen. Performance on even a couple dimensions is unacceptable because of exponential
148// thunk creation: ?{}() needs all four otype funcs from next level, so does ^?{}(), so do the
149// other two. This solution offers ?{}() that needs only ?{}(), and similar for ^?{}.
[cfbc56ec]150
[1665ee5]151// skip initializing elements
152// array(float, 5) x = { delay_init };
[cdf7d43]153enum () delay_init_t { delay_init };
[1665ee5]154forall( [N], S & | sized(S), Timmed &, Tbase & )
155static inline void ?{}( arpk( N, S, Timmed, Tbase ) & this, delay_init_t ) {
[1bb0170]156 void ?{}( S (&)[N] ) {}
157 ?{}(this.strides);
[1665ee5]158}
[cfbc56ec]159
[1665ee5]160// call default ctor on elements
161// array(float, 5) x;
162forall( [N], S & | sized(S), Timmed &, Tbase & | { void ?{}( Timmed & ); } )
163static inline void ?{}( arpk( N, S, Timmed, Tbase ) & this ) {
164 ?{}( this, delay_init );
[b8e047a]165 for (i; N) ?{}( (Timmed &)this.strides[i] );
[cfbc56ec]166}
167
168forall( [N], S & | sized(S), Timmed &, Tbase & | { void ^?{}( Timmed & ); } )
[b8e047a]169static inline void ^?{}( arpk( N, S, Timmed, Tbase ) & this ) {
[1bb0170]170 void ^?{}( S (&)[N] ) {}
171 ^?{}(this.strides);
[cfbc56ec]172
[1bb0170]173 for (i; N ) {
[b8e047a]174 ^?{}( (Timmed &)this.strides[N-i-1] );
[1bb0170]175 }
[c7625e0]176}
177
[1665ee5]178
[c7625e0]179//
180// Sugar for declaring array structure instances
181//
182
[cfbc56ec]183forall( Te * )
[9fa538c]184static inline Te mkar_( tag(Te) ) {}
[c7625e0]185
[b9dae14c]186forall( [N], ZTags ... , Trslt &, Tatom & | { Trslt mkar_( tag(Tatom), ZTags ); } )
[b8e047a]187static inline arpk( N, Trslt, Trslt, Tatom) mkar_( tag(Tatom), tag(N), ZTags ) {}
[c7625e0]188
189// based on https://stackoverflow.com/questions/1872220/is-it-possible-to-iterate-over-arguments-in-variadic-macros
190
[1bb0170]191 // Make a FOREACH macro
192 #define FE_0(WHAT)
193 #define FE_1(WHAT, X) WHAT(X)
194 #define FE_2(WHAT, X, ...) WHAT(X)FE_1(WHAT, __VA_ARGS__)
195 #define FE_3(WHAT, X, ...) WHAT(X)FE_2(WHAT, __VA_ARGS__)
196 #define FE_4(WHAT, X, ...) WHAT(X)FE_3(WHAT, __VA_ARGS__)
197 #define FE_5(WHAT, X, ...) WHAT(X)FE_4(WHAT, __VA_ARGS__)
198 //... repeat as needed
[c7625e0]199
[1bb0170]200 #define GET_MACRO(_0,_1,_2,_3,_4,_5,NAME,...) NAME
201 #define FOR_EACH(action,...) \
202 GET_MACRO(_0,__VA_ARGS__,FE_5,FE_4,FE_3,FE_2,FE_1,FE_0)(action,__VA_ARGS__)
[c7625e0]203
204#define COMMA_ttag(X) , ttag(X)
205#define array( TE, ...) typeof( mkar_( ttag(TE) FOR_EACH( COMMA_ttag, __VA_ARGS__ ) ) )
206
207#define COMMA_ztag(X) , ztag(X)
208#define zarray( TE, ...) typeof( mkar_( ttag(TE) FOR_EACH( COMMA_ztag, __VA_ARGS__ ) ) )
209
210//
211// Sugar for multidimensional indexing
212//
213
214// Core -[[-,-,-]] operator
215
[63a4b92]216#ifdef TRY_BROKEN_DESIRED_MD_SUBSCRIPT
217
[c7625e0]218// Desired form. One definition with recursion on IxBC (worked until Jan 2021, see trac #__TODO__)
219
[63a4b92]220forall( TA &, TB &, TC &, IxAB, IxBC ... | { TB & ?[?]( TA &, IxAB ); TC & ?[?]( TB &, IxBC ); } )
[9fa538c]221static inline TC & ?[?]( TA & this, IxAB ab, IxBC bc ) {
[1bb0170]222 return this[ab][bc];
[c7625e0]223}
224
[d1abc63c]225#else
[c7625e0]226
[63a4b92]227// Workaround form. Listing all possibilities up to 4 dims.
[c7625e0]228
[63a4b92]229forall( TA &, TB &, TC &, IxAB_0, IxBC | { TB & ?[?]( TA &, IxAB_0 ); TC & ?[?]( TB &, IxBC ); } )
[9fa538c]230static inline TC & ?[?]( TA & this, IxAB_0 ab, IxBC bc ) {
[1bb0170]231 return this[ab][bc];
[c7625e0]232}
233
[63a4b92]234forall( TA &, TB &, TC &, IxAB_0, IxAB_1, IxBC | { TB & ?[?]( TA &, IxAB_0, IxAB_1 ); TC & ?[?]( TB &, IxBC ); } )
[9fa538c]235static inline TC & ?[?]( TA & this, IxAB_0 ab0, IxAB_1 ab1, IxBC bc ) {
[1bb0170]236 return this[[ab0,ab1]][bc];
[63a4b92]237}
238
239forall( TA &, TB &, TC &, IxAB_0, IxAB_1, IxAB_2, IxBC | { TB & ?[?]( TA &, IxAB_0, IxAB_1, IxAB_2 ); TC & ?[?]( TB &, IxBC ); } )
[9fa538c]240static inline TC & ?[?]( TA & this, IxAB_0 ab0, IxAB_1 ab1, IxAB_2 ab2, IxBC bc ) {
[1bb0170]241 return this[[ab0,ab1,ab2]][bc];
[63a4b92]242}
243
[eb0d9b7]244// Further form of -[-,-,-] that avoids using the trait system.
245// Above overloads work for any type with (recursively valid) subscript operator,
246// provided said subscript is passed as an assertion.
247// Below works only on arpk variations but never passes its subscript though an assertion.
248//
249// When arpk implements the trait used above,
250// the critical assertion is backed by a nontrivial thunk.
251// There is no "thunk problem" (lifetime) issue, when used as shown in the test suite.
252// But the optimizer has shown difficulty removing these thunks in cases where "it should,"
253// i.e. when all user code is in one compilation unit.
254// Not that every attempt at removing such a thunk fails; cases have been found going both ways.
255// Cases have been found with unnecessary bound-checks removed successfully,
256// on user code written against the overloads below,
257// but where these bound checks (which occur within `call`ed thunks) are not removed,
258// on user code written against the overloads above.
259//
260// The overloads below provide specializations of the above
261// that are a little harder to use than the ones above,
262// but where array API erasure has been seen to be more effective.
263// Note that the style below does not appeal to a case where thunk inlining is more effective;
264// rather, it simply does not rely on thunks in the first place.
265//
266// Both usage styles are shown in test array-md-sbscr-cases#numSubscrTypeCompatibility,
267// with the more general one above being "high abstraction,"
268// and the more performant one below being "mid abstraction" and "low abstraction."
269//
270// A breadth of index types is not given here (providing -[size_t,size_t,...] only)
271// because these declarations are not feeding a trait, so safe implicit arithmetic conversion kiks in.
272// Even so, there may still be an un-met need for accepting
273// either ptrdiff_t or size_t (signed or unsigned)
274// because Mike has seen the optimizer resist removing bound checks when sign-conversion is in play.
275// "Only size_t" is meeting today's need
276// and no solution is known that avoids 2^D overloads for D dimensions
277// while offering multiple subscript types and staying assertion-free.
278//
279// This approach, of avoiding traits entirely, is likely incompatible with the original desire
280// to have one recursive multidimensional subscript operator (TRY_BROKEN_DESIRED_MD_SUBSCRIPT).
281// To make a single declaration work,
282// we would probably have to get better at coaxing the optimizer into inlining thunks.
283
284forall( [N2], S2*, [N1], S1*, Timmed1, Tbase )
285static inline Timmed1 & ?[?]( arpk( N2, S2, arpk( N1, S1, Timmed1, Tbase ), Tbase ) & this, size_t ix2, size_t ix1 ) {
286 return this[ix2][ix1];
287}
288
289forall( [N3], S3*, [N2], S2*, [N1], S1*, Timmed1, Tbase )
290static inline Timmed1 & ?[?]( arpk( N3, S3, arpk( N2, S2, arpk( N1, S1, Timmed1, Tbase ), Tbase ), Tbase ) & this, size_t ix3, size_t ix2, size_t ix1 ) {
291 return this[ix3][ix2][ix1];
292}
293
294forall( [N4], S4*, [N3], S3*, [N2], S2*, [N1], S1*, Timmed1, Tbase )
295static inline Timmed1 & ?[?]( arpk( N4, S4, arpk( N3, S3, arpk( N2, S2, arpk( N1, S1, Timmed1, Tbase ), Tbase ), Tbase ), Tbase ) & this, size_t ix4, size_t ix3, size_t ix2, size_t ix1 ) {
296 return this[ix4][ix3][ix2][ix1];
297}
298
299
300
[63a4b92]301#endif
302
[997324c]303// Available for users to work around Trac #265
304// If `a[...0...]` isn't working, try `a[...ix0...]` instead.
[a5e26821]305
[997324c]306#define ix0 ((ptrdiff_t)0)
[a5e26821]307
308
309
[c7625e0]310//
311// Rotation
312//
313
314// Base
[63f42a8]315forall( [Nq], Sq & | sized(Sq), Tbase & )
[b8e047a]316static inline tag(arpk( Nq, Sq, Tbase, Tbase )) enq_( tag(Tbase ), tag(Nq), tag(Sq), tag(Tbase ) ) {
317 tag(arpk( Nq, Sq, Tbase, Tbase )) ret;
[1bb0170]318 return ret;
[6448f7d]319}
[c7625e0]320
321// Rec
[b8e047a]322forall( [Nq], Sq & | sized(Sq), [N], S & | sized(S), recq &, recr &, Tbase & | { tag(recr) enq_( tag(Tbase), tag(Nq), tag(Sq), tag(recq) ); } )
323static inline tag(arpk( N, S, recr, Tbase )) enq_( tag(Tbase ), tag(Nq), tag(Sq), tag(arpk( N, S, recq, Tbase )) ) {
324 tag(arpk( N, S, recr, Tbase )) ret;
[1bb0170]325 return ret;
[6448f7d]326}
[c7625e0]327
328// Wrapper
[058ece2]329extern struct all_t {} all;
[b8e047a]330forall( [N], S & | sized(S), Te &, result &, Tbase & | { tag(result) enq_( tag(Tbase), tag(N), tag(S), tag(Te) ); } )
331static inline result & ?[?]( arpk( N, S, Te, Tbase ) & this, all_t ) {
[1bb0170]332 return (result&) this;
[c7625e0]333}
334
335//
336// Trait of array or slice
337//
338
[a5e26821]339// desired:
[ee70ff5]340// forall( A &, Tv &, [N] )
[7882c58]341// trait ar {
[ee70ff5]342// Tv& ?[?]( A &, zero_t );
343// Tv& ?[?]( A &, one_t );
344// Tv& ?[?]( A &, int );
[1bb0170]345// ...
[ee70ff5]346// size_t len( A & );
[1bb0170]347// void __taglen( tag(C), tag(N) );
[a5e26821]348// };
349
350// working around N's not being accepted as arguments to traits
351
[1bb0170]352#define ar( A, Tv, N ) { \
[ee70ff5]353 Tv& ?[?]( A &, zero_t ); \
354 Tv& ?[?]( A &, one_t ); \
355 Tv& ?[?]( A &, int ); \
356 Tv& ?[?]( A &, unsigned int ); \
357 Tv& ?[?]( A &, long int ); \
358 Tv& ?[?]( A &, unsigned long int ); \
359 size_t len( A & ); \
[1bb0170]360 void __taglen( tag(A), tag(N) ); \
[a5e26821]361}
Note: See TracBrowser for help on using the repository browser.