#pragma once

//#include <assert.h>


forall( __CFA_tysys_id_only_X & ) struct tag {};
#define ttag(T) ((tag(T)){})
#define ztag(n) ttag(n)

#ifdef __CFA_DEBUG__
#define subcheck( arr, sub, len ) \
	if ( (sub) < 0 || (sub) >= (len) ) \
		abort( "subscript %ld exceeds dimension range [0,%zd) for array %p.\n", \
			   (sub), (len), (arr) )
#define subchecku( arr, sub, len ) \
	if ( (sub) >= (len) ) \
		abort( "subscript %ld exceeds dimension range [0,%zd) for array %p.\n", \
			   (sub), (len), (arr) )
#else
#define subcheck( arr, sub, len ) do {} while (0)
#define subchecku( arr, sub, len ) do {} while (0)
#endif

// 
// The `array` macro is the public interface.
// It computes the type of a dense (trivially strided) array.
// All user-declared objects are dense arrays.
//
// The `arpk` (ARray with PacKing info explicit) type is, generally, a slice with _any_ striding.
// This type is meant for internal use.
// CFA programmers should not instantiate it directly, nor access its field.
// CFA programmers should call ?[?] on it.
// Yet user-given `array(stuff)` expands to `arpk(stuff')`.
// The comments here explain the resulting internals.
//
// Just as a plain-C "multidimesional" array is really array-of-array-of-...,
// so does arpk generally show up as arpk-of-arpk-of...
//
// In the example of `array(float, 3, 4, 5) a;`,
// `typeof(a)` is an `arpk` instantiation.
// These comments explain _its_ arguments, i.e. those of the topmost `arpk` level.
//
// [N]    : the number of elements in `a`; 3 in the example
// S      : carries the stride size (distance in bytes between &myA[0] and &myA[1]), in sizeof(S); 
//          same as Timmed when striding is trivial, same as Timmed in the example
// Timmed : (T-immediate) the inner type; conceptually, `typeof(a)` is "arpk of Timmed";
//          array(float, 4, 5) in the example
// Tbase  : (T-base) the deepest element type that is not arpk; float in the example
//
forall( [N], S & | sized(S), Timmed &, Tbase & ) {
	//
	// Single-dim array struct (with explicit packing and atom)
	//
	struct arpk {
		S strides[N];
	};

	// About the choice of integral types offered as subscript overloads:
	// Intent is to cover these use cases:
	//    a[0]                                                // i : zero_t
	//    a[1]                                                // i : one_t
	//    a[2]                                                // i : int
	//    float foo( ptrdiff_t i ) { return a[i]; }           // i : ptrdiff_t
	//    float foo( size_t i ) { return a[i]; }              // i : size_t
	//    forall( [N] ) ... for( i; N ) { total += a[i]; }    // i : typeof( sizeof(42) )
	//    for( i; 5 ) { total += a[i]; }                      // i : int
	//
	// It gets complicated by:
	// -  CFA does overloading on concrete types, like int and unsigned int, not on typedefed
	//    types like size_t.  So trying to overload on ptrdiff_t vs int works in 64-bit mode
	//    but not in 32-bit mode.
	// -  Given bug of Trac #247, CFA gives sizeof expressions type unsigned long int, when it
	//    should give them type size_t.
	//
	//                          gcc -m32         cfa -m32 given bug         gcc -m64 (and cfa)
	// ptrdiff_t                int              int                        long int
	// size_t                   unsigned int     unsigned int               unsigned long int
	// typeof( sizeof(42) )     unsigned int     unsigned long int          unsigned long int
	// int                      int              int                        int
	//
	// So the solution must support types {zero_t, one_t, int, unsigned int, long int, unsigned long int}
	//
	// The solution cannot rely on implicit conversions (e.g. just have one overload for ptrdiff_t)
	// because assertion satisfaction requires types to match exacly.  Both higher-dimensional
	// subscripting and operations on slices use asserted subscript operators.  The test case
	// array-container/array-sbscr-cases covers the combinations.  Mike beleives that commenting out
	// any of the current overloads leads to one of those cases failing, either on 64- or 32-bit.
	// Mike is open to being shown a smaller set of overloads that still passes the test.

	static inline Timmed & ?[?]( arpk( N, S, Timmed, Tbase ) & a, zero_t ) {
		subcheck( a, 0L, N );
		return (Timmed &)a.strides[0];
	}

	static inline Timmed & ?[?]( arpk( N, S, Timmed, Tbase ) & a, one_t ) {
		subcheck( a, 1L, N );
		return (Timmed &)a.strides[1];
	}

	static inline Timmed & ?[?]( arpk( N, S, Timmed, Tbase ) & a, int i ) {
		subcheck( a, (long int)i, N );
		return (Timmed &)a.strides[i];
	}

	static inline const Timmed & ?[?]( const arpk( N, S, Timmed, Tbase ) & a, int i ) {
		subcheck( a, (long int)i, N );
		return (Timmed &)a.strides[i];
	}

	static inline Timmed & ?[?]( arpk( N, S, Timmed, Tbase ) & a, unsigned int i ) {
		subchecku( a, (unsigned long int)i, N );
		return (Timmed &)a.strides[i];
	}

	static inline const Timmed & ?[?]( const arpk( N, S, Timmed, Tbase ) & a, unsigned int i ) {
		subchecku( a, (unsigned long int)i, N );
		return (Timmed &)a.strides[i];
	}

	static inline Timmed & ?[?]( arpk( N, S, Timmed, Tbase ) & a, long int i ) {
		subcheck( a, i, N );
		return (Timmed &)a.strides[i];
	}

	static inline const Timmed & ?[?]( const arpk( N, S, Timmed, Tbase ) & a, long int i ) {
		subcheck( a, i, N );
		return (Timmed &)a.strides[i];
	}

	static inline Timmed & ?[?]( arpk( N, S, Timmed, Tbase ) & a, unsigned long int i ) {
		subchecku( a, i, N );
		return (Timmed &)a.strides[i];
	}

	static inline const Timmed & ?[?]( const arpk( N, S, Timmed, Tbase ) & a, unsigned long int i ) {
		subchecku( a, i, N );
		return (Timmed &)a.strides[i];
	}

	static inline size_t ?`len( arpk( N, S, Timmed, Tbase ) & ) {
		return N;
	}

	static inline void __taglen( tag(arpk( N, S, Timmed, Tbase )), tag(N) ) {}
}

// RAII pattern has workarounds for
//  - Trac 226:  Simplest handling would be, require immediate element to be otype, let autogen
//	raii happen.  Performance on even a couple dimensions is unacceptable because of exponential
//	thunk creation: ?{}() needs all four otype funcs from next level, so does ^?{}(), so do the
//	other two.  This solution offers ?{}() that needs only ?{}(), and similar for ^?{}.

// skip initializing elements
//   array(float, 5) x = { delay_init };
enum () delay_init_t { delay_init };
forall( [N], S & | sized(S), Timmed &, Tbase & )
static inline void ?{}( arpk( N, S, Timmed, Tbase ) & this, delay_init_t ) {
	void ?{}( S (&)[N] ) {}
	?{}(this.strides);
}

// call default ctor on elements
//   array(float, 5) x;
forall( [N], S & | sized(S), Timmed &, Tbase & | { void ?{}( Timmed & ); } )
static inline void ?{}( arpk( N, S, Timmed, Tbase ) & this ) {	
	?{}( this, delay_init );
	for (i; N) ?{}( (Timmed &)this.strides[i] );
}

forall( [N], S & | sized(S), Timmed &, Tbase & | { void ^?{}( Timmed & ); } )
static inline void ^?{}( arpk( N, S, Timmed, Tbase ) & this ) {
	void ^?{}( S (&)[N] ) {}
	^?{}(this.strides);

	for (i; N ) {
		^?{}( (Timmed &)this.strides[N-i-1] );
	}
}


//
// Sugar for declaring array structure instances
//

forall( Te * )
static inline Te mkar_( tag(Te) ) {}

forall( [N], ZTags ... , Trslt &, Tatom & | { Trslt mkar_( tag(Tatom), ZTags ); } )
static inline arpk( N, Trslt, Trslt, Tatom) mkar_( tag(Tatom), tag(N), ZTags ) {}

// based on https://stackoverflow.com/questions/1872220/is-it-possible-to-iterate-over-arguments-in-variadic-macros

	// Make a FOREACH macro
	#define FE_0(WHAT)
	#define FE_1(WHAT, X) WHAT(X)
	#define FE_2(WHAT, X, ...) WHAT(X)FE_1(WHAT, __VA_ARGS__)
	#define FE_3(WHAT, X, ...) WHAT(X)FE_2(WHAT, __VA_ARGS__)
	#define FE_4(WHAT, X, ...) WHAT(X)FE_3(WHAT, __VA_ARGS__)
	#define FE_5(WHAT, X, ...) WHAT(X)FE_4(WHAT, __VA_ARGS__)
	//... repeat as needed

	#define GET_MACRO(_0,_1,_2,_3,_4,_5,NAME,...) NAME
	#define FOR_EACH(action,...) \
	GET_MACRO(_0,__VA_ARGS__,FE_5,FE_4,FE_3,FE_2,FE_1,FE_0)(action,__VA_ARGS__)

#define COMMA_ttag(X) , ttag(X)
#define array( TE, ...) typeof( mkar_( ttag(TE)  FOR_EACH( COMMA_ttag, __VA_ARGS__ ) ) )

#define COMMA_ztag(X) , ztag(X)
#define zarray( TE, ...) typeof( mkar_( ttag(TE)  FOR_EACH( COMMA_ztag, __VA_ARGS__ ) ) )

//
// Sugar for multidimensional indexing
//

// Core -[[-,-,-]] operator

#ifdef TRY_BROKEN_DESIRED_MD_SUBSCRIPT

// Desired form.  One definition with recursion on IxBC (worked until Jan 2021, see trac #__TODO__)

forall( TA &, TB &, TC &, IxAB, IxBC ... | { TB & ?[?]( TA &, IxAB ); TC & ?[?]( TB &, IxBC ); } )
static inline TC & ?[?]( TA & this, IxAB ab, IxBC bc ) {
	return this[ab][bc];
}

#else

// Workaround form.  Listing all possibilities up to 4 dims.

forall( TA &, TB &, TC &, IxAB_0, IxBC | { TB & ?[?]( TA &, IxAB_0 ); TC & ?[?]( TB &, IxBC ); } )
static inline TC & ?[?]( TA & this, IxAB_0 ab, IxBC bc ) {
	return this[ab][bc];
}

forall( TA &, TB &, TC &, IxAB_0, IxAB_1, IxBC | { TB & ?[?]( TA &, IxAB_0, IxAB_1 ); TC & ?[?]( TB &, IxBC ); } )
static inline TC & ?[?]( TA & this, IxAB_0 ab0, IxAB_1 ab1, IxBC bc ) {
	return this[[ab0,ab1]][bc];
}

forall( TA &, TB &, TC &, IxAB_0, IxAB_1, IxAB_2, IxBC | { TB & ?[?]( TA &, IxAB_0, IxAB_1, IxAB_2 ); TC & ?[?]( TB &, IxBC ); } )
static inline TC & ?[?]( TA & this, IxAB_0 ab0, IxAB_1 ab1, IxAB_2 ab2, IxBC bc ) {
	return this[[ab0,ab1,ab2]][bc];
}

#endif

// Available for users to work around Trac #265
// If `a[...0...]` isn't working, try `a[...ix0...]` instead.

#define ix0 ((ptrdiff_t)0)


//
// Rotation
//

// Base
forall( [Nq], Sq & | sized(Sq), Tbase & )
static inline tag(arpk( Nq, Sq, Tbase, Tbase )) enq_( tag(Tbase ), tag(Nq), tag(Sq), tag(Tbase ) ) {
	tag(arpk( Nq, Sq, Tbase, Tbase )) ret;
	return ret;
}

// Rec
forall( [Nq], Sq & | sized(Sq), [N], S & | sized(S), recq &, recr &, Tbase & | { tag(recr) enq_( tag(Tbase), tag(Nq), tag(Sq), tag(recq) ); } )
static inline tag(arpk( N, S, recr, Tbase )) enq_( tag(Tbase ), tag(Nq), tag(Sq), tag(arpk( N, S, recq, Tbase )) ) {
	tag(arpk( N, S, recr, Tbase )) ret;
	return ret;
}

// Wrapper
extern struct all_t {} all;
forall( [N], S & | sized(S), Te &, result &, Tbase & | { tag(result) enq_( tag(Tbase), tag(N), tag(S), tag(Te) ); } )
static inline result & ?[?]( arpk( N, S, Te, Tbase ) & this, all_t ) {
	return (result&) this;
}

//
// Trait of array or slice
//

// desired:
// forall(A &, Tv &, [N])
// trait ar {
//	 Tv& ?[?]( A&, zero_t );
//	 Tv& ?[?]( A&, one_t  );
//	 Tv& ?[?]( A&, int	);
//				   ...
//	 size_t ?`len( A& );
//	 void __taglen( tag(C), tag(N) );
// };

// working around N's not being accepted as arguments to traits

#define ar( A, Tv, N ) {				\
	Tv& ?[?]( A&, zero_t );				\
	Tv& ?[?]( A&, one_t );				\
	Tv& ?[?]( A&, int );				\
	Tv& ?[?]( A&, unsigned int );		\
	Tv& ?[?]( A&, long int );			\
	Tv& ?[?]( A&, unsigned long int );	\
	size_t ?`len( A& );					\
	void __taglen( tag(A), tag(N) );	\
}