Changeset 0554c1a for doc/theses/mike_brooks_MMath
- Timestamp:
- Apr 9, 2024, 10:11:29 PM (8 months ago)
- Branches:
- master
- Children:
- c4024b46
- Parents:
- 0bbe172
- Location:
- doc/theses/mike_brooks_MMath
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
doc/theses/mike_brooks_MMath/background.tex
r0bbe172 r0554c1a 1 1 \chapter{Background} 2 2 3 Since this work builds on C, it is necessary to explain the C mechanisms and their shortcomings for array, linked list, and string ,3 Since this work builds on C, it is necessary to explain the C mechanisms and their shortcomings for array, linked list, and string. 4 4 5 5 … … 71 71 \begin{cquote} 72 72 \begin{tabular}{@{}ll@{}} 73 \multicolumn{1}{@{}c}{\textbf{Array}} & \multicolumn{1}{c@{}}{\textbf{Function }} \\73 \multicolumn{1}{@{}c}{\textbf{Array}} & \multicolumn{1}{c@{}}{\textbf{Function Pointer}} \\ 74 74 \begin{cfa} 75 75 int @(*@ar@)[@5@]@; // definition … … 90 90 After all, reading a C array type is easy: just read it from the inside out, and know when to look left and when to look right! 91 91 92 \CFA provides its own type, variable and routine declarations, using a differentsyntax.92 \CFA provides its own type, variable and routine declarations, using a simpler syntax. 93 93 The new declarations place qualifiers to the left of the base type, while C declarations place qualifiers to the right of the base type. 94 94 The qualifiers have the same meaning in \CFA as in C. 95 Hence, a \CFA declaration is read left to right, where a function return type is enclosed in brackets @[]@.95 Then, a \CFA declaration is read left to right, where a function return type is enclosed in brackets @[@\,@]@. 96 96 \begin{cquote} 97 97 \begin{tabular}{@{}l@{\hspace{3em}}ll@{}} 98 \multicolumn{1}{c@{\hspace{3em}}}{\textbf{C}} & \multicolumn{1}{c}{\textbf{\CFA}} & \\98 \multicolumn{1}{c@{\hspace{3em}}}{\textbf{C}} & \multicolumn{1}{c}{\textbf{\CFA}} & \multicolumn{1}{c}{\textbf{read left to right}} \\ 99 99 \begin{cfa} 100 100 int @*@ x1 @[5]@; … … 127 127 Each row of the table shows alternate syntactic forms. 128 128 The simplest occurrences of types distinguished in the preceding discussion are marked with $\triangleright$. 129 Removing the declared variable @x@, gives the type used for variable, structure field, cast or error message \PAB{(though note Section TODO points out that some types cannot be casted to)}.129 Removing the declared variable @x@, gives the type used for variable, structure field, cast or error messages \PAB{(though note Section TODO points out that some types cannot be casted to)}. 130 130 Unfortunately, parameter declarations \PAB{(section TODO)} have more syntactic forms and rules. 131 131 132 132 \begin{table} 133 133 \centering 134 \caption{Syntactic Reference for Array vs Pointer. Includes interaction with constness.}134 \caption{Syntactic Reference for Array vs Pointer. Includes interaction with \lstinline{const}ness.} 135 135 \label{bkgd:ar:usr:avp} 136 136 \begin{tabular}{ll|l|l|l} 137 137 & Description & \multicolumn{1}{c|}{C} & \multicolumn{1}{c|}{\CFA} & \multicolumn{1}{c}{\CFA-thesis} \\ 138 138 \hline 139 139 $\triangleright$ & value & @T x;@ & @T x;@ & \\ 140 140 \hline 141 141 & immutable value & @const T x;@ & @const T x;@ & \\ 142 142 & & @T const x;@ & @T const x;@ & \\ 143 143 \hline \hline 144 144 $\triangleright$ & pointer to value & @T * x;@ & @* T x;@ & \\ 145 145 \hline 146 146 & immutable ptr. to val. & @T * const x;@ & @const * T x;@ & \\ … … 149 149 & & @T const * x;@ & @* T const x;@ & \\ 150 150 \hline \hline 151 151 $\triangleright$ & array of value & @T x[10];@ & @[10] T x@ & @array(T, 10) x@ \\ 152 152 \hline 153 153 & ar.\ of immutable val. & @const T x[10];@ & @[10] const T x@ & @const array(T, 10) x@ \\ … … 163 163 & & @T const * x[10];@ & @[10] * T const x@ & @array(* const T, 10) x@ \\ 164 164 \hline \hline 165 165 $\triangleright$ & ptr.\ to ar.\ of value & @T (*x)[10];@ & @* [10] T x@ & @* array(T, 10) x@ \\ 166 166 \hline 167 167 & imm. ptr.\ to ar.\ of val. & @T (* const x)[10];@ & @const * [10] T x@ & @const * array(T, 10) x@ \\ … … 180 180 \item static 181 181 \item star as dimension 182 \item under pointer decay: int p1[const 3] being int const *p1182 \item under pointer decay: @int p1[const 3]@ being @int const *p1@ 183 183 \end{itemize} 184 184 … … 197 197 \lstinput{10-10}{bkgd-carray-decay.c} 198 198 199 So, C provides an implicit conversion from @float[10]@ to @float *@, as described in ARM-6.3.2.1.3:199 So, C provides an implicit conversion from @float[10]@ to @float *@. 200 200 \begin{quote} 201 Except when it is the operand of the @sizeof@ operator, or the unary @&@ operator, or is a 202 string literal used to initialize an array 203 an expression that has type ``array of type'' is 204 converted to an expression with type ``pointer to type'' that points to the initial element of 205 the array object 201 Except when it is the operand of the @sizeof@ operator, or the unary @&@ operator, or is a string literal used to 202 initialize an array an expression that has type ``array of \emph{type}'' is converted to an expression with type 203 ``pointer to \emph{type}'' that points to the initial element of the array object~\cite[\S~6.3.2.1.3]{C11} 206 204 \end{quote} 207 208 205 This phenomenon is the famous ``pointer decay,'' which is a decay of an array-typed expression into a pointer-typed one. 209 210 206 It is worthy to note that the list of exception cases does not feature the occurrence of @ar@ in @ar[i]@. 211 Thus, subscripting happens on pointers, not arrays. 212 213 Subscripting proceeds first with pointer decay, if needed. Next, ARM-6.5.2.1.2 explains that @ar[i]@ is treated as if it were @(*((a)+(i)))@. 214 ARM-6.5.6.8 explains that the addition, of a pointer with an integer type, is defined only when the pointer refers to an element that is in an array, with a meaning of ``@i@ elements away from,'' which is valid if @ar@ is big enough and @i@ is small enough. 215 Finally, ARM-6.5.3.2.4 explains that the @*@ operator's result is the referenced element. 216 217 Taken together, these rules also happen to illustrate that @ar[i]@ and @i[a]@ mean the same thing. 207 Thus, subscripting happens on pointers not arrays. 208 209 Subscripting proceeds first with pointer decay, if needed. Next, \cite[\S~6.5.2.1.2]{C11} explains that @ar[i]@ is treated as if it were @(*((a)+(i)))@. 210 \cite[\S~6.5.6.8]{C11} explains that the addition, of a pointer with an integer type, is defined only when the pointer refers to an element that is in an array, with a meaning of ``@i@ elements away from,'' which is valid if @ar@ is big enough and @i@ is small enough. 211 Finally, \cite[\S~6.5.3.2.4]{C11} explains that the @*@ operator's result is the referenced element. 212 Taken together, these rules illustrate that @ar[i]@ and @i[a]@ mean the same thing! 218 213 219 214 Subscripting a pointer when the target is standard-inappropriate is still practically well-defined. … … 227 222 fs[5] = 3.14; 228 223 \end{cfa} 229 The @malloc@ behaviour is specified as returning a pointer to ``space for an object whose size is'' as requested (ARM-7.22.3.4.2). 230 But program says \emph{nothing} more about this pointer value, that might cause its referent to \emph{be} an array, before doing the subscript. 231 232 Under this assumption, a pointer being subscripted (or added to, then dereferenced) 233 by any value (positive, zero, or negative), gives a view of the program's entire address space, 234 centred around the @p@ address, divided into adjacent @sizeof(*p)@ chunks, 235 each potentially (re)interpreted as @typeof(*p)@. 236 237 I call this phenomenon ``array diffraction,'' which is a diffraction of a single-element pointer 238 into the assumption that its target is in the middle of an array whose size is unlimited in both directions. 239 224 The @malloc@ behaviour is specified as returning a pointer to ``space for an object whose size is'' as requested (\cite[\S~7.22.3.4.2]{C11}). 225 But \emph{nothing} more is said about this pointer value, specifically that its referent might \emph{be} an array allowing subscripting. 226 227 Under this assumption, a pointer being subscripted (or added to, then dereferenced) by any value (positive, zero, or negative), gives a view of the program's entire address space, centred around the @p@ address, divided into adjacent @sizeof(*p)@ chunks, each potentially (re)interpreted as @typeof(*p)@. 228 I call this phenomenon ``array diffraction,'' which is a diffraction of a single-element pointer into the assumption that its target is in the middle of an array whose size is unlimited in both directions. 240 229 No pointer is exempt from array diffraction. 241 242 230 No array shows its elements without pointer decay. 243 231 244 232 A further pointer--array confusion, closely related to decay, occurs in parameter declarations. 245 ARM-6.7.6.3.7explains that when an array type is written for a parameter,246 the parameter's type becomes a type that I summarize as beingthe array-decayed type.233 \cite[\S~6.7.6.3.7]{C11} explains that when an array type is written for a parameter, 234 the parameter's type becomes a type that can be summarized as the array-decayed type. 247 235 The respective handling of the following two parameter spellings shows that the array-spelled one is really, like the other, a pointer. 248 236 \lstinput{12-16}{bkgd-carray-decay.c} 249 237 As the @sizeof(x)@ meaning changed, compared with when run on a similarly-spelled local variable declaration, 250 GCC also gives this code the warning: ```sizeof' on array function parameter `x' will return size of `float *'.'' 251 252 The caller of such a function is left with the reality that a pointer parameter is a pointer, no matter how it's spelled: 238 GCC also gives this code the warning for the first assertion: 239 \begin{cfa} 240 warning: 'sizeof' on array function parameter 'x' will return size of 'float *' 241 \end{cfa} 242 The caller of such a function is left with the reality that a pointer parameter is a pointer, no matter how it is spelled: 253 243 \lstinput{18-21}{bkgd-carray-decay.c} 254 244 This fragment gives no warnings. … … 266 256 depending on whether the object is a local variable or a parameter. 267 257 268 269 258 In summary, when a function is written with an array-typed parameter, 270 259 \begin{itemize} … … 277 266 As a result, a function with a pointer-to-array parameter sees the parameter exactly as the caller does: 278 267 \lstinput{32-42}{bkgd-carray-decay.c} 279 280 \VRef[Figure]{bkgd:ar:usr:decay-parm} gives the reference for the decay phenomenon seen in parameter declarations. 281 282 \begin{figure} 268 \VRef[Table]{bkgd:ar:usr:decay-parm} gives the reference for the decay phenomenon seen in parameter declarations. 269 270 \begin{table} 271 \caption{Syntactic Reference for Decay during Parameter-Passing. 272 Includes interaction with \lstinline{const}ness, where ``immutable'' refers to a restriction on the callee's ability.} 273 \label{bkgd:ar:usr:decay-parm} 283 274 \centering 284 275 \begin{tabular}{llllll} 285 & Description & Type & Param. Decl & \CFA-C \\ \hline 286 $\triangleright$ & ptr.\ to val. 287 & @T *@ 288 & \pbox{20cm}{ \vspace{2pt} \lstinline{T * x,} \\ \lstinline{T x[10],} \\ \lstinline{T x[],} }\vspace{2pt} 289 & \pbox{20cm}{ \vspace{2pt} \lstinline{[ * T ]} \\ \lstinline{[ [10] T ]} \\ \lstinline{[ [] T ]} } 290 \\ \hline 291 & \pbox{20cm}{ \vspace{2pt} ptr.\ to val.\\ \footnotesize{no writing the ptr.\ in \lstinline{x}} }\vspace{2pt} 292 & @T * const@ 293 & \pbox{20cm}{ \vspace{2pt} \lstinline{T * const x,} \\ \lstinline{T x[const 10],} \\ \lstinline{T x[const],} }\vspace{2pt} 294 & \pbox{20cm}{ \vspace{2pt} \lstinline{[ const * T ]} \\ \lstinline{[ [const 10] T ]} \\ \lstinline{[ [const] T ]} } 295 \\ \hline 296 & \pbox{20cm}{ \vspace{2pt} ptr.\ to val.\\ \footnotesize{no writing the val.\ in \lstinline{*x}} }\vspace{2pt} 297 & \pbox{20cm}{ \vspace{2pt} \lstinline{const T *} \\ \lstinline{T const *} } 298 & \pbox{20cm}{ \vspace{2pt} \lstinline{const T * x,} \\ \lstinline{T const * x,} \\ \lstinline{const T x[10],} \\ \lstinline{T const x[10],} \\ \lstinline{const T x[],} \\ \lstinline{T const x[],} }\vspace{2pt} 299 & \pbox{20cm}{ \vspace{2pt} \lstinline{[* const T]} \\ \lstinline{[ [10] const T ]} \\ \lstinline{[ [] const T ]} } 300 \\ \hline \hline 301 $\triangleright$ & ptr.\ to ar.\ of val. 302 & @T(*)[10]@ 303 & \pbox{20cm}{ \vspace{2pt} \lstinline{T (*x)[10],} \\ \lstinline{T x[3][10],} \\ \lstinline{T x[][10],} }\vspace{2pt} 304 & \pbox{20cm}{ \vspace{2pt} \lstinline{[* [10] T]} \\ \lstinline{[ [3] [10] T ]} \\ \lstinline{[ [] [10] T ]} } 305 \\ \hline 306 & ptr.\ to ptr.\ to val. 307 & @T **@ 308 & \pbox{20cm}{ \vspace{2pt} \lstinline{T ** x,} \\ \lstinline{T *x[10],} \\ \lstinline{T *x[],} }\vspace{2pt} 309 & \pbox{20cm}{ \vspace{2pt} \lstinline{[ * * T ]} \\ \lstinline{[ [10] * T ]} \\ \lstinline{[ [] * T ]} } 310 \\ \hline 311 & \pbox{20cm}{ \vspace{2pt} ptr.\ to ptr.\ to val.\\ \footnotesize{no writing the val.\ in \lstinline{**argv}} }\vspace{2pt} 312 & @const char **@ 313 & \pbox{20cm}{ \vspace{2pt} \lstinline{const char *argv[],} \\ \footnotesize{(others elided)} }\vspace{2pt} 314 & \pbox{20cm}{ \vspace{2pt} \lstinline{[ [] * const char ]} \\ \footnotesize{(others elided)} } 315 \\ \hline 276 & Description & Type & Parameter Declaration & \CFA \\ 277 \hline 278 & & & @T * x,@ & @* T x,@ \\ 279 $\triangleright$ & pointer to value & @T *@ & @T x[10],@ & @[10] T x,@ \\ 280 & & & @T x[],@ & @[] T x,@ \\ 281 \hline 282 & & & @T * const x,@ & @const * T x@ \\ 283 & immutable ptr.\ to val. & @T * const@ & @T x[const 10],@ & @[const 10] T x,@ \\ 284 & & & @T x[const],@ & @[const] T x,@\\ 285 \hline 286 & & & @const T * x,@ & @ * const T x,@ \\ 287 & & & @T const * x,@ & @ * T const x,@ \\ 288 & ptr.\ to immutable val. & @const T *@ & @const T x[10],@ & @[10] const T x,@ \\ 289 & & @T const *@ & @T const x[10],@ & @[10] T const x,@ \\ 290 & & & @const T x[],@ & @[] const T x,@ \\ 291 & & & @T const x[],@ & @[] T const x,@ \\ 292 \hline \hline 293 & & & @T (*x)[10],@ & @* [10] T x,@ \\ 294 $\triangleright$ & ptr.\ to ar.\ of val. & @T(*)[10]@ & @T x[3][10],@ & @[3][10] T x,@ \\ 295 & & & @T x[][10],@ & @[][10] T x,@ \\ 296 \hline 297 & & & @T ** x,@ & @** T x,@ \\ 298 & ptr.\ to ptr.\ to val. & @T **@ & @T * x[10],@ & @[10] * T x,@ \\ 299 & & & @T * x[],@ & @[] * T x,@ \\ 300 \hline 301 & ptr.\ to ptr.\ to imm.\ val. & @const char **@ & @const char * argv[],@ & @[] * const char argv,@ \\ 302 & & & \emph{others elided} & \emph{others elided} \\ 303 \hline 316 304 \end{tabular} 317 \caption{Unfortunate Syntactic Reference for Decay during Parameter-Passing. Includes interaction with constness, where ``no writing'' refers to a restriction on the callee's ability.} 318 \label{bkgd:ar:usr:decay-parm} 319 \end{figure} 305 \end{table} 320 306 321 307 322 308 \subsection{Lengths may vary, checking does not} 323 309 324 When the desired number of elements is unknown at compile time, 325 a variable-length array is a solution: 326 \begin{cfa} 327 int main( int argc, const char *argv[] ) { 310 When the desired number of elements is unknown at compile time, a variable-length array is a solution: 311 \begin{cfa} 312 int main( int argc, const char * argv[] ) { 328 313 assert( argc == 2 ); 329 314 size_t n = atol( argv[1] ); 330 assert( 0 < n && n < 1000 ); 331 315 assert( 0 < n ); 332 316 float ar[n]; 333 317 float b[10]; 334 335 318 // ... discussion continues here 336 319 } 337 320 \end{cfa} 338 This arrangement allocates @n@ elements on the @main@ stack frame for @ar@, just as it puts 10 elements on the @main@ stack frame for @b@. 339 The variable-sized allocation of @ar@ is provided by @alloca@. 340 341 In a situation where the array sizes are not known to be small enough for stack allocation to be sensible, corresponding heap allocations are achievable as: 342 \begin{cfa} 343 float *ax1 = malloc( sizeof( float[n] ) ); 344 float *ax2 = malloc( n * sizeof( float ) ); 345 float *bx1 = malloc( sizeof( float[1000000] ) ); 346 float *bx2 = malloc( 1000000 * sizeof( float ) ); 347 \end{cfa} 348 349 350 VLA 321 This arrangement allocates @n@ elements on the @main@ stack frame for @ar@, called a \newterm{variable length array} (VLA), as well as 10 elements in the same stack frame for @b@. 322 The variable-sized allocation of @ar@ is provided by the @alloca@ routine, which bumps the stack pointer. 323 Note, the C standard supports VLAs, but the \CC standard does not; 324 both @gcc@ and @g++@ support VLAs. 325 As well, there is misinformation about VLAs, \eg VLAs cause stack failures or are inefficient. 326 VLAs exist as far back as Algol W~\cite[\S~5.2]{AlgolW} and are a sound and efficient data type. 327 328 In situations where the stack size has a small bound (coroutines or user-level threads), unbounded VLAs can overflow the stack so a heap allocation is used. 329 \begin{cfa} 330 float * ax1 = malloc( sizeof( float[n] ) ); 331 float * ax2 = malloc( n * sizeof( float ) ); 332 float * bx1 = malloc( sizeof( float[1000000] ) ); 333 float * bx2 = malloc( 1000000 * sizeof( float ) ); 334 \end{cfa} 351 335 352 336 Parameter dependency … … 357 341 358 342 359 360 \subsection{C has full-service, dynamically sized, multidimensional arrays (and \CC does not)} 343 \subsection{Dynamically sized, multidimensional arrays} 361 344 362 345 In C and \CC, ``multidimensional array'' means ``array of arrays.'' Other meanings are discussed in TODO. -
doc/theses/mike_brooks_MMath/programs/bkgd-carray-decay.c
r0bbe172 r0554c1a 4 4 float (*pa)[10] = &a; $\C{// pointer to array}$ 5 5 float a0 = a[0]; $\C{// element}$ 6 float * pa0 = &(a[0]); $\C{// pointer to element}$6 float * pa0 = &(a[0]); $\C{// pointer to element}$ 7 7 8 float * pa0x = a; $\C{// (ok)}$8 float * pa0x = a; $\C{// (ok)}$ 9 9 assert( pa0 == pa0x ); 10 10 assert( sizeof(pa0x) != sizeof(a) ); 11 11 12 void f( float x[10], float * y ) {13 static_assert( sizeof(x) == sizeof(void *) );14 static_assert( sizeof(y) == sizeof(void *) );12 void f( float x[10], float * y ) { 13 static_assert( sizeof(x) == sizeof(void *) ); 14 static_assert( sizeof(y) == sizeof(void *) ); 15 15 } 16 16 f(0,0); … … 22 22 23 23 char ca[] = "hello"; $\C{// array on stack, initialized from read-only data}$ 24 char * cp = "hello";$\C{// pointer to read-only data [decay here]}$25 void edit( char c[] ) { $\C{// param is pointer}$24 char * cp = "hello"; $\C{// pointer to read-only data [decay here]}$ 25 void edit( char c[] ) { $\C{// parameter is pointer}$ 26 26 c[3] = 'p'; 27 27 } 28 28 edit( ca ); $\C{// ok [decay here]}$ 29 edit( c p );$\C{// Segmentation fault}$29 edit( cp ); $\C{// Segmentation fault}$ 30 30 edit( "hello" ); $\C{// Segmentation fault [decay here]}$ 31 31 32 32 void decay( float x[10] ) { 33 static_assert( sizeof(x) == sizeof(void *) );33 static_assert( sizeof(x) == sizeof(void *) ); 34 34 } 35 35 static_assert( sizeof(a) == 10 * sizeof(float) );
Note: See TracChangeset
for help on using the changeset viewer.