Changeset 10a9479d
- Timestamp:
- Nov 23, 2024, 8:28:37 PM (11 months ago)
- Branches:
- master
- Children:
- 956b389
- Parents:
- b006c51e (diff), de7b7a5 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the(diff)
links above to see all the changes relative to each parent. - Files:
-
- 6 added
- 60 edited
- 1 moved
Legend:
- Unmodified
- Added
- Removed
-
doc/LaTeXmacros/common.sty
rb006c51e r10a9479d 11 11 %% Created On : Sat Apr 9 10:06:17 2016 12 12 %% Last Modified By : Peter A. Buhr 13 %% Last Modified On : Sun Aug 25 11:52:19202414 %% Update Count : 66 113 %% Last Modified On : Sun Nov 3 21:10:34 2024 14 %% Update Count : 662 15 15 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 16 16 … … 27 27 \usepackage[ignoredisplayed]{enumitem} % do not affect trivlist 28 28 \setlist{labelsep=1ex}% global 29 \setlist[itemize]{topsep=0.5ex,parsep=0.25ex,itemsep=0.25ex,listparindent=\parindent,leftmargin=\parindent}% global 29 \setlist{topsep=0pt}% global 30 \setlist[itemize]{parsep=0.25ex,itemsep=0.25ex,listparindent=\parindent}% global 30 31 \setlist[itemize,1]{label=\textbullet}% local 31 32 %\renewcommand{\labelitemi}{{\raisebox{0.25ex}{\footnotesize$\bullet$}}} 32 \setlist[enumerate]{ topsep=0.5ex,parsep=0.25ex,itemsep=0.25ex,listparindent=\parindent}% global33 \setlist[enumerate]{parsep=0.25ex,itemsep=0.25ex,listparindent=\parindent}% global 33 34 \setlist[enumerate,2]{leftmargin=\parindent,labelsep=*,align=parleft,label=\alph*.}% local 34 \setlist[description]{ topsep=0.5ex,itemsep=0pt,listparindent=\parindent,leftmargin=\parindent,labelsep=1.5ex}35 \setlist[description]{itemsep=0pt,listparindent=\parindent,leftmargin=\parindent,labelsep=1.5ex} 35 36 36 37 % Names used in the document. -
doc/LaTeXmacros/common.tex
rb006c51e r10a9479d 11 11 %% Created On : Sat Apr 9 10:06:17 2016 12 12 %% Last Modified By : Peter A. Buhr 13 %% Last Modified On : Sun Aug 25 11:52:20 202414 %% Update Count : 6 7313 %% Last Modified On : Sun Nov 3 09:11:30 2024 14 %% Update Count : 684 15 15 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 16 16 … … 27 27 \usepackage[ignoredisplayed]{enumitem} % do not affect trivlist 28 28 \setlist{labelsep=1ex}% global 29 \setlist[itemize]{topsep=0.5ex,parsep=0.25ex,itemsep=0.25ex,listparindent=\parindent,leftmargin=\parindent}% global 29 \setlist{topsep=0pt}% global 30 \setlist[itemize]{parsep=0.25ex,itemsep=0.25ex,listparindent=\parindent}% global 30 31 \setlist[itemize,1]{label=\textbullet}% local 31 32 %\renewcommand{\labelitemi}{{\raisebox{0.25ex}{\footnotesize$\bullet$}}} 32 \setlist[enumerate]{ topsep=0.5ex,parsep=0.25ex,itemsep=0.25ex,listparindent=\parindent}% global33 \setlist[enumerate]{parsep=0.25ex,itemsep=0.25ex,listparindent=\parindent}% global 33 34 \setlist[enumerate,2]{leftmargin=\parindent,labelsep=*,align=parleft,label=\alph*.}% local 34 \setlist[description]{ topsep=0.5ex,itemsep=0pt,listparindent=\parindent,leftmargin=\parindent,labelsep=1.5ex}35 \setlist[description]{itemsep=0pt,listparindent=\parindent,leftmargin=\parindent,labelsep=1.5ex} 35 36 36 37 % Names used in the document. -
doc/bibliography/pl.bib
rb006c51e r10a9479d 3685 3685 address = {Waterloo, Ontario, Canada, N2L 3G1}, 3686 3686 note = {\url{http://uwspace.uwaterloo.ca/bitstream/10012/3501/1/Thesis.pdf}}, 3687 } 3688 3689 @article{Hesselink24, 3690 author = {Wim A. Hesselink and Peter A. Buhr and Colby A. Parsons}, 3691 title = {First-Come-First-Served as a Separate Principle}, 3692 journal = {ACM Trans. Parallel Comput.}, 3693 publisher = {ACM}, 3694 address = {New York, NY, USA}, 3695 volume = 11, 3696 number = 4, 3697 month = nov, 3698 year = 2024, 3687 3699 } 3688 3700 -
doc/theses/fangren_yu_MMath/content1.tex
rb006c51e r10a9479d 1 \chapter{ Recent Features Introduced to \CFA}1 \chapter{\CFA Features and Type System Interactions} 2 2 \label{c:content1} 3 3 4 This chapter discusses some recent additions to the \CFA language and their interactions with the type system.4 This chapter discusses \CFA feature introduced over time by multiple people and their interactions with the type system. 5 5 6 6 … … 17 17 Succinctly, if the address changes often, use a pointer; 18 18 if the value changes often, use a reference. 19 Note, \CC made its reference address immutable starting a \emph{belief} that immutability is a fundamental aspect of a reference's pointer. 20 The results is asymmetry semantics between the pointer and reference. 19 Java has mutable references but no pointers. 20 \CC has mutable pointers but immutable references; 21 hence, references match with functional programming. 22 However, the consequence is asymmetry semantics between the pointer and reference. 21 23 \CFA adopts a uniform policy between pointers and references where mutability is a separate property made at the declaration. 22 24 … … 36 38 Like pointers, reference can be cascaded, \ie a reference to a reference, \eg @&& r2@.\footnote{ 37 39 \CC uses \lstinline{&&} for rvalue reference, a feature for move semantics and handling the \lstinline{const} Hell problem.} 38 Usage of a reference variable automatically performs the same number of dereferences as the number of references in its declaration, \eg @r 3@ becomes @***r3@.40 Usage of a reference variable automatically performs the same number of dereferences as the number of references in its declaration, \eg @r2@ becomes @**r2@. 39 41 Finally, to reassign a reference's address needs a mechanism to stop the auto-referencing, which is accomplished by using a single reference to cancel all the auto-dereferencing, \eg @&r3 = &y@ resets @r3@'s address to point to @y@. 40 42 \CFA's reference type (including multi-de/references) is powerful enough to describe the lvalue rules in C by types only. … … 66 68 int x = 3; $\C{// mutable}$ 67 69 const int cx = 5; $\C{// immutable}$ 68 int * const cp = &x, $\C{// immutable pointer }$70 int * const cp = &x, $\C{// immutable pointer pointer/reference}$ 69 71 & const cr = cx; 70 const int * const ccp = &cx, $\C{// immutable value and pointer }$72 const int * const ccp = &cx, $\C{// immutable value and pointer/reference}$ 71 73 & const ccr = cx; 72 // pointer 74 \end{cfa} 75 \begin{cquote} 76 \setlength{\tabcolsep}{26pt} 77 \begin{tabular}{@{}lll@{}} 78 pointer & reference & \\ 79 \begin{cfa} 73 80 *cp = 7; 74 cp = &x; $\C{// error, assignment of read-only variable}$ 75 *ccp = 7; $\C{// error, assignment of read-only location}$ 76 ccp = &cx; $\C{// error, assignment of read-only variable}$ 77 // reference 81 cp = &x; 82 *ccp = 7; 83 ccp = &cx; 84 \end{cfa} 85 & 86 \begin{cfa} 78 87 cr = 7; 79 cr = &x; $\C{// error, assignment of read-only variable}$ 80 *ccr = 7; $\C{// error, assignment of read-only location}$ 81 ccr = &cx; $\C{// error, assignment of read-only variable}$ 82 \end{cfa} 88 cr = &x; 89 *ccr = 7; 90 ccr = &cx; 91 \end{cfa} 92 & 93 \begin{cfa} 94 // allowed 95 // error, assignment of read-only variable 96 // error, assignment of read-only location 97 // error, assignment of read-only variable 98 \end{cfa} 99 \end{tabular} 100 \end{cquote} 83 101 Interestingly, C does not give a warning/error if a @const@ pointer is not initialized, while \CC does. 84 Hence, type @& const@ is similar to \CC reference, but \CFA does not preclude initialization with a non-variable address.102 Hence, type @& const@ is similar to a \CC reference, but \CFA does not preclude initialization with a non-variable address. 85 103 For example, in system's programming, there are cases where an immutable address is initialized to a specific memory location. 86 104 \begin{cfa} … … 96 114 However, there is an inherent ambiguity for auto-dereferencing: every argument expression involving a reference variable can potentially mean passing the reference's value or address. 97 115 Without any restrictions, this ambiguity limits the behaviour of reference types in \CFA polymorphic functions, where a type @T@ can bind to a reference or non-reference type. 98 This ambiguity prevents the type system treating reference types the same way as other types in many caseseven if type variables could be bound to reference types.116 This ambiguity prevents the type system treating reference types the same way as other types, even if type variables could be bound to reference types. 99 117 The reason is that \CFA uses a common \emph{object trait}\label{p:objecttrait} (constructor, destructor and assignment operators) to handle passing dynamic concrete type arguments into polymorphic functions, and the reference types are handled differently in these contexts so they do not satisfy this common interface. 100 118 101 119 Moreover, there is also some discrepancy in how the reference types are treated in initialization and assignment expressions. 102 For example, in line 3 of the previous example code \see{\VPageref{p:refexamples}}:120 For example, in line 3 of the example code on \VPageref{p:refexamples}: 103 121 \begin{cfa} 104 122 int @&@ r1 = x, @&&@ r2 = r1, @&&&@ r3 = r2; $\C{// references to x}$ … … 129 147 vector( int @&@ ) vec; $\C{// vector of references to ints}$ 130 148 \end{cfa} 131 While it is possible to write a reference type as the argument to a generic type, it is disallowed in assertion checking, if the generic type requires the object trait \see{\VPageref{p:objecttrait}} for the type argument (a fairly common use case).149 While it is possible to write a reference type as the argument to a generic type, it is disallowed in assertion checking, if the generic type requires the object trait \see{\VPageref{p:objecttrait}} for the type argument, a fairly common use case. 132 150 Even if the object trait can be made optional, the current type system often misbehaves by adding undesirable auto-dereference on the referenced-to value rather than the reference variable itself, as intended. 133 151 Some tweaks are necessary to accommodate reference types in polymorphic contexts and it is unclear what can or cannot be achieved. 134 Currently, there are contexts where \CFA programmer must use pointer types, giving up the benefits of auto-dereference operations and better syntax with reference types.152 Currently, there are contexts where \CFA programmer is forced to use a pointer type, giving up the benefits of auto-dereference operations and better syntax with reference types. 135 153 136 154 … … 165 183 Along with making returning multiple values a first-class feature, tuples were extended to simplify a number of other common context that normally require multiple statements and/or additional declarations, all of which reduces coding time and errors. 166 184 \begin{cfa} 167 [x, y, z] = 3; $\C[2in]{// x = 3; y = 3; z = 3, where types are different}$185 [x, y, z] = 3; $\C[2in]{// x = 3; y = 3; z = 3, where types may be different}$ 168 186 [x, y] = [y, x]; $\C{// int tmp = x; x = y; y = tmp;}$ 169 187 void bar( int, int, int ); … … 212 230 bar( t2 ); $\C{// bar defined above}$ 213 231 \end{cfa} 214 \VRef[Figure]{f:Nesting} shows The difference is nesting of structures and tuples.232 \VRef[Figure]{f:Nesting} shows the difference is nesting of structures and tuples. 215 233 The left \CC nested-structure is named so it is not flattened. 216 234 The middle C/\CC nested-structure is unnamed and flattened, causing an error because @i@ and @j@ are duplication names. … … 220 238 221 239 \begin{figure} 222 \setlength{\tabcolsep}{ 15pt}240 \setlength{\tabcolsep}{20pt} 223 241 \begin{tabular}{@{}ll@{\hspace{90pt}}l@{}} 224 242 \multicolumn{1}{c}{\CC} & \multicolumn{1}{c}{C/\CC} & \multicolumn{1}{c}{tuple} \\ … … 273 291 As noted, tradition languages manipulate multiple values by in/out parameters and/or structures. 274 292 K-W C adopted the structure for tuple values or variables, and as needed, the fields are extracted by field access operations. 275 As well, For the tuple-assignment implementation, the left-hand tuple expression is expanded into assignments of each component, creating temporary variables to avoid unexpected side effects.276 For example, the tuple value returned from @foo@ is a structure, and its fields are individually assigned to a left-hand tuple, @x@, @y@, @z@, orcopied directly into a corresponding tuple variable.293 As well, for the tuple-assignment implementation, the left-hand tuple expression is expanded into assignments of each component, creating temporary variables to avoid unexpected side effects. 294 For example, the tuple value returned from @foo@ is a structure, and its fields are individually assigned to a left-hand tuple, @x@, @y@, @z@, \emph{or} copied directly into a corresponding tuple variable. 277 295 278 296 In the second implementation of \CFA tuples by Rodolfo Gabriel Esteves~\cite{Esteves04}, a different strategy is taken to handle MVR functions. … … 286 304 [x, y] = gives_two(); 287 305 \end{cfa} 288 The Till K-W C implementation translates the program to: 306 \VRef[Figure]{f:AlternateTupleImplementation} shows the two implementation approaches. 307 In the left approach, the return statement is rewritten to pack the return values into a structure, which is returned by value, and the structure fields are indiviually assigned to the left-hand side of the assignment. 308 In the right approach, the return statement is rewritten as direct assignments into the passed-in argument addresses. 309 The right imlementation looks more concise and saves unnecessary copying. 310 The downside is indirection within @gives_two@ to access values, unless values get hoisted into registers for some period of time, which is common. 311 312 \begin{figure} 313 \begin{cquote} 314 \setlength{\tabcolsep}{20pt} 315 \begin{tabular}{@{}ll@{}} 316 Till K-W C implementation & Rodolfo \CFA implementation \\ 289 317 \begin{cfa} 290 318 struct _tuple2 { int _0; int _1; } 291 struct _tuple2 gives_two() { ... struct _tuple2 ret = { r1, r2 }, return ret; } 319 struct _tuple2 gives_two() { 320 ... struct _tuple2 ret = { r1, r2 }; 321 return ret; 322 } 292 323 int x, y; 293 324 struct _tuple2 _tmp = gives_two(); 294 325 x = _tmp._0; y = _tmp._1; 295 326 \end{cfa} 296 while the Rodolfo implementation translates it to: 297 \begin{cfa} 298 void gives_two( int * r1, int * r2 ) { ... *r1 = ...; *r2 = ...; return; } 327 & 328 \begin{cfa} 329 330 void gives_two( int * r1, int * r2 ) { 331 ... *r1 = ...; *r2 = ...; 332 return; 333 } 299 334 int x, y; 335 300 336 gives_two( &x, &y ); 301 337 \end{cfa} 302 and inside the body of the function @gives_two@, the return statement is rewritten as assignments into the passed-in argument addresses. 303 This implementation looks more concise, and in the case of returning values having nontrivial types, \eg aggregates, this implementation saves unnecessary copying. 304 For example, 305 \begin{cfa} 306 [ x, y ] gives_two(); 307 int x, y; 308 [ x, y ] = gives_two(); 309 \end{cfa} 310 becomes 311 \begin{cfa} 312 void gives_two( int &, int & ); 313 int x, y; 314 gives_two( x, y ); 315 \end{cfa} 316 eliminiating any copying in or out of the call. 317 The downside is indirection within @gives_two@ to access values, unless values get hoisted into registers for some period of time, which is common. 338 \end{tabular} 339 \end{cquote} 340 \caption{Alternate Tuple Implementation} 341 \label{f:AlternateTupleImplementation} 342 \end{figure} 318 343 319 344 Interestingly, in the third implementation of \CFA tuples by Robert Schluntz~\cite[\S~3]{Schluntz17}, the MVR functions revert back to structure based, where it remains in the current version of \CFA. 320 345 The reason for the reversion was to have a uniform approach for tuple values/variables making tuples first-class types in \CFA, \ie allow tuples with corresponding tuple variables. 321 This extension was possible, because in parallel with Schluntz's work, generic types were beingadded independently by Moss~\cite{Moss19}, and the tuple variables leveraged the same implementation techniques as the generic variables.346 This extension was possible, because in parallel with Schluntz's work, generic types were added independently by Moss~\cite{Moss19}, and the tuple variables leveraged the same implementation techniques as the generic variables. 322 347 \PAB{I'm not sure about the connection here. Do you have an example of what you mean?} 323 348 … … 339 364 \begin{cfa} 340 365 void f( int, int ); 341 void f( [int, int]);366 void f( @[@ int, int @]@ ); 342 367 f( 3, 4 ); // ambiguous call 343 368 \end{cfa} … … 358 383 the call to @f@ can be interpreted as @T = [1]@ and @U = [2, 3, 4, 5]@, or @T = [1, 2]@ and @U = [3, 4, 5]@, and so on. 359 384 The restriction ensures type checking remains tractable and does not take too long to compute. 360 Therefore, tuple types are never present in any fixed-argument function calls .361 362 Finally, a type-safe variadic argument signature was added by Robert Schluntz~\cite[\S~4.1.2]{Schluntz17} using @forall@ and a new tuple parameter-type, denoted by the keyword @ttype 385 Therefore, tuple types are never present in any fixed-argument function calls, because of the flattening. 386 387 Finally, a type-safe variadic argument signature was added by Robert Schluntz~\cite[\S~4.1.2]{Schluntz17} using @forall@ and a new tuple parameter-type, denoted by the keyword @ttype@ in Schluntz's implementation, but changed to the ellipsis syntax similar to \CC's template parameter pack. 363 388 For C variadics, \eg @va_list@, the number and types of the arguments must be conveyed in some way, \eg @printf@ uses a format string indicating the number and types of the arguments. 364 389 \VRef[Figure]{f:CVariadicMaxFunction} shows an $N$ argument @maxd@ function using the C untyped @va_list@ interface. … … 370 395 \begin{figure} 371 396 \begin{cfa} 372 double maxd( int @count@, ... ) {397 double maxd( int @count@, @...@ ) { // ellipse parameter 373 398 double max = 0; 374 399 va_list args; … … 566 591 struct U u; u.k; u.l; 567 592 \end{cfa} 568 and the hoisted type names can clash with global type snames.593 and the hoisted type names can clash with global type names. 569 594 For good reasons, \CC chose to change this semantics: 570 595 \begin{cquote} … … 584 609 \end{cfa} 585 610 \CFA chose to adopt the \CC non-compatible change for nested types, since \CC's change has already forced certain coding changes in C libraries that must be parsed by \CC. 611 \CFA also added the ability to access from a variable through a type to a field. 612 \begin{cfa} 613 struct S s; @s.T@.i; @s.U@.k; 614 \end{cfa} 586 615 587 616 % https://gcc.gnu.org/onlinedocs/gcc/Unnamed-Fields.html … … 604 633 \end{cfa} 605 634 Note, the position of the substructure is normally unimportant, unless there is some form of memory or @union@ overlay. 606 Like the anonymous nested types, the aggregate field names are hoisted into @struct S@, so there is direct access, \eg @s.x@ and @s.i@. 607 However, like the implicit C hoisting of nested structures, the field names must be unique and the type names are now at a different scope level, unlike type nesting in \CC. 608 In addition, a pointer to a structure is automatically converted to a pointer to an anonymous field for assignments and function calls, providing containment inheritance with implicit subtyping, \ie @U@ $\subset$ @S@ and @W@ $\subset$ @S@. 609 For example: 635 Like an anonymous nested type, a named nested Plan-9 type has its field names hoisted into @struct S@, so there is direct access, \eg @s.x@ and @s.i@. 636 Hence, the field names must be unique, unlike \CC nested types, but the type names are at a nested scope level, unlike type nesting in C. 637 In addition, a pointer to a structure is automatically converted to a pointer to an anonymous field for assignments and function calls, providing containment inheritance with implicit subtyping, \ie @U@ $\subset$ @S@ and @W@ $\subset$ @S@, \eg: 610 638 \begin{cfa} 611 639 void f( union U * u ); 612 640 void g( struct W * ); 613 union U * up; 614 struct W * wp; 615 struct S * sp; 616 up = sp; $\C{// assign pointer to U in S}$ 617 wp = sp; $\C{// assign pointer to W in S}$ 641 union U * up; struct W * wp; struct S * sp; 642 up = &s; $\C{// assign pointer to U in S}$ 643 wp = &s; $\C{// assign pointer to W in S}$ 618 644 f( &s ); $\C{// pass pointer to U in S}$ 619 645 g( &s ); $\C{// pass pointer to W in S}$ 620 646 \end{cfa} 621 622 \CFA extends the Plan-9 substructure by allowing polymorphism for values and pointers. 623 The extended substructure is denoted using @inline@, allowing backwards compatibility to existing Plan-9 features. 647 Note, there is no value assignment, such as, @w = s@, to copy the @W@ field from @S@. 648 649 Unfortunately, the Plan-9 designers did not lookahead to other useful features, specifically nested types. 650 This nested type compiles in \CC and \CFA. 651 \begin{cfa} 652 struct R { 653 @struct T;@ $\C[2in]{// forward declaration, conflicts with Plan-9 syntax}$ 654 struct S { $\C{// nested types, mutually recursive reference}\CRT$ 655 S * sp; T * tp; ... 656 }; 657 struct T { 658 S * sp; T * tp; ... 659 }; 660 }; 661 \end{cfa} 662 Note, the syntax for the forward declaration conflicts with the Plan-9 declaration syntax. 663 664 \CFA extends the Plan-9 substructure by allowing polymorphism for values and pointers, where the extended substructure is denoted using @inline@. 624 665 \begin{cfa} 625 666 struct S { 626 @inline@ W; $\C{// extended Plan-9 substructure}$667 @inline@ struct W; $\C{// extended Plan-9 substructure}$ 627 668 unsigned int tag; 628 669 @inline@ U; $\C{// extended Plan-9 substructure}$ 629 670 } s; 630 671 \end{cfa} 631 Note, like \CC, \CFA allows optional prefixing of type names with their kind, \eg @struct@, @union@, and @enum@, unless there is ambiguity with variable names in the same scope. 632 The following shows both value and pointer polymorphism. 672 Note, the declaration of @U@ is not prefixed with @union@. 673 Like \CC, \CFA allows optional prefixing of type names with their kind, \eg @struct@, @union@, and @enum@, unless there is ambiguity with variable names in the same scope. 674 In addition, a semi-non-compatible change is made so that Plan-9 syntax means a forward declaration in a nested type. 675 Since the Plan-9 extension is not part of C and rarely used, this change has minimal impact. 676 Hence, all Plan-9 semantics are denoted by the @inline@ qualifier, which good ``eye-candy'' when reading a structure definition to spot Plan-9 definitions. 677 Finally, the following code shows the value and pointer polymorphism. 633 678 \begin{cfa} 634 679 void f( U, U * ); $\C{// value, pointer}$ 635 680 void g( W, W * ); $\C{// value, pointer}$ 636 U u, * up; 637 S s, * sp; 638 W w, * wp; 639 u = s; up = sp; $\C{// value, pointer}$ 640 w = s; wp = sp; $\C{// value, pointer}$ 681 U u, * up; S s, * sp; W w, * wp; 682 u = s; up = sp; $\C{// value, pointer}$ 683 w = s; wp = sp; $\C{// value, pointer}$ 641 684 f( s, &s ); $\C{// value, pointer}$ 642 685 g( s, &s ); $\C{// value, pointer}$ … … 645 688 In general, non-standard C features (@gcc@) do not need any special treatment, as they are directly passed through to the C compiler. 646 689 However, the Plan-9 semantics allow implicit conversions from the outer type to the inner type, which means the \CFA type resolver must take this information into account. 647 Therefore, the \CFA translator must implement the Plan-9 features and insert necessary type conversions into the translated code output.690 Therefore, the \CFA resolver must implement the Plan-9 features and insert necessary type conversions into the translated code output. 648 691 In the current version of \CFA, this is the only kind of implicit type conversion other than the standard C conversions. 649 692 650 Since variable overloading is possible in \CFA, \CFA's implementation of Plan-9 polymorphism allows duplicate field names. 651 When an outer field and an embedded field have the same name and type, the inner field is shadowed and cannot be accessed directly by name. 652 While such definitions are allowed, duplicate field names is not good practice in general and should be avoided if possible. 653 Plan-9 fields can be nested, and a struct definition can contain multiple Plan-9 embedded fields. 654 In particular, the \newterm{diamond pattern}~\cite[\S~6.1]{Stroustrup89}\cite[\S~4]{Cargill91} can occur and result in a nested field to be embedded twice. 693 Plan-9 polymorphism can result in duplicate field names. 694 For example, the \newterm{diamond pattern}~\cite[\S~6.1]{Stroustrup89}\cite[\S~4]{Cargill91} can result in nested fields being embedded twice. 655 695 \begin{cfa} 656 696 struct A { int x; }; … … 658 698 struct C { inline A; }; 659 699 struct D { 660 inline B; 661 inline C; 662 }; 663 D d; 664 \end{cfa} 665 In the above example, the expression @d.x@ becomes ambiguous, since it can refer to the indirectly embedded field either from @B@ or from @C@. 666 It is still possible to disambiguate the expression by first casting the outer struct to one of the directly embedded type, such as @((B)d).x@. 700 inline B; // B.x 701 inline C; // C.x 702 } d; 703 \end{cfa} 704 Because the @inline@ structures are flattened, the expression @d.x@ is ambiguous, as it can refer to the embedded field either from @B@ or @C@. 705 @gcc@ generates a syntax error about the duplicate member @x@. 706 The equivalent \CC definition compiles: 707 \begin{c++} 708 struct A { int x; }; 709 struct B : public A {}; 710 struct C : public A {}; 711 struct D : @public B, C@ { // multiple inheritance 712 } d; 713 \end{c++} 714 and again the expression @d.x@ is ambiguous. 715 While \CC has no direct syntax to disambiguate @x@, \ie @d.B.x@ or @d.C.x@, it is possible with casts, @((B)d).x@ or @((C)d).x@. 716 Like \CC, \CFA compiles the Plan-9 version and provides direct syntax and casts to disambiguate @x@. 717 While ambiguous definitions are allowed, duplicate field names is poor practice and should be avoided if possible. 718 However, when a programmer does not control all code, this problem can occur and a naming workaround should exist. -
doc/theses/mike_brooks_MMath/Makefile
rb006c51e r10a9479d 10 10 TeXSRC = ${wildcard *.tex} 11 11 PicSRC = ${notdir ${wildcard ${Pictures}/*.png}} 12 Demo SRC = ${notdir ${wildcard ${Programs}/*-demo.cfa}}12 DemoPgmSRC = ${notdir ${wildcard ${Programs}/*-demo.cfa}} 13 13 PgmSRC = ${notdir ${wildcard ${Programs}/*}} 14 14 RunPgmSRC = ${notdir ${wildcard ${Programs}/*.run.*}} … … 24 24 BASE = ${basename ${DOCUMENT}} # remove suffix 25 25 26 DemoTex = ${DemoSRC:%.cfa=${Build}/%.tex}27 26 RunPgmExe = ${addprefix ${Build}/,${basename ${basename ${RunPgmSRC}}}} 28 27 RunPgmOut = ${RunPgmExe:%=%.out} 28 DemoPgmExe = ${addprefix ${Build}/,${basename ${basename ${DemoPgmSRC}}}} 29 DemoPgmOut = ${DemoPgmExe:%=%.out} 29 30 30 31 # Commands … … 38 39 # Rules and Recipes 39 40 40 .PHONY : all fragments_ran clean # not file names 41 .PRECIOUS : ${Build}/% ${Build}/%-demo # don't delete intermediates 41 .PHONY : all clean # not file names 42 .SECONDARY: 43 #.PRECIOUS : ${Build}/% # don't delete intermediates 42 44 .ONESHELL : 43 45 44 all : fragments_ran ${DOCUMENT} 45 46 fragments_ran : $(RunPgmOut) 46 all : ${DOCUMENT} 47 47 48 48 clean : … … 51 51 # File Dependencies 52 52 53 %.pdf : ${TeXSRC} $ {DemoTex} ${PicSRC} ${PgmSRC} ${BibSRC} ${BibRep}/pl.bib ${LaTMac}/common.tex Makefile | ${Build}53 %.pdf : ${TeXSRC} $(RunPgmOut) ${DemoPgmOut} ${PicSRC} ${BibSRC} ${BibRep}/pl.bib ${LaTMac}/common.tex Makefile | ${Build} 54 54 ${LaTeX} ${BASE} 55 55 ${BibTeX} ${Build}/${BASE} … … 64 64 mkdir -p $@ 65 65 66 %-demo.tex: %-demo| ${Build}67 $ < >$@66 ${Build}/%-demo: ${Programs}/%-demo.cfa | ${Build} 67 ${CFA} $< -o $@ 68 68 69 ${Build}/% -demo: ${Programs}/%-demo.cfa | ${Build}69 ${Build}/%: ${Programs}/%-demo.cfa | ${Build} 70 70 ${CFA} $< -o $@ 71 71 -
doc/theses/mike_brooks_MMath/array.tex
rb006c51e r10a9479d 2 2 \label{c:Array} 3 3 4 Arrays in C are possibly the single most misunderstood and incorrectly used feature in the language, resulting in the largest proportion of runtime errors and security violations. 5 This chapter describes the new \CFA language and library features that introduce a length-checked array type, @array@, to the \CFA standard library~\cite{Cforall}. 6 7 Offering the @array@ type, as a distinct alternative to the C array, is consistent with \CFA's goal of backwards compatibility, \ie virtually all existing C (@gcc@) programs can be compiled by \CFA with only a small number of changes, similar to \CC (@g++@). 8 However, a few compatibility-breaking changes to the behaviour of the C array are necessary, both as an implementation convenience and to fix C's lax treatment of arrays. 9 Hence, the @array@ type is an opportunity to start from a clean slate and show a cohesive selection of features, making it unnecessary to deal with every inherited complexity of the C array. 10 4 11 5 12 \section{Introduction} 6 13 \label{s:ArrayIntro} 7 14 8 Arrays in C are possibly the single most misunderstood and incorrectly used feature in the language, resulting in the largest proportion of runtime errors and security violations. 9 This chapter describes the new \CFA language and library features that introduce a length-checked array type to the \CFA standard library~\cite{Cforall}. 10 11 Specifically, a new \CFA array is declared by instantiating the generic @array@ type, 12 much like instantiating any other standard-library generic type (such as @dlist@), 15 The new \CFA array is declared by instantiating the generic @array@ type, 16 much like instantiating any other standard-library generic type (such as \CC @vector@), 13 17 though using a new style of generic parameter. 14 18 \begin{cfa} … … 16 20 \end{cfa} 17 21 Here, the arguments to the @array@ type are @float@ (element type) and @99@ (length). 18 When this type is used as a function parameter, the type-system requires that a call's argument matches, down to the length.22 When this type is used as a function parameter, the type-system requires that a call's argument is a perfect match. 19 23 \begin{cfa} 20 24 void f( @array( float, 42 )@ & p ) {} $\C{// p accepts 42 floats}$ 21 f( x ); $\C{// statically rejected: type s are different, 99 != 42}$25 f( x ); $\C{// statically rejected: type lengths are different, 99 != 42}$ 22 26 23 27 test2.cfa:3:1 error: Invalid application of existing declaration(s) in expression. … … 25 29 \end{cfa} 26 30 Here, the function @f@'s parameter @p@ is declared with length 42. 27 The call @f( x )@, with the argument being the previously-declared object, is invalid, because the @array@ lengths @99@ and @42@ do not match. 28 29 A function declaration can be polymorphic over these @array@ arguments by using the @forall@ declaration prefix. 30 This function @g@'s takes arbitrary type parameter @T@ (familiar) and \emph{dimension parameter} @N@ (new). 31 A dimension paramter represents a to-be-determined count of elements, managed by the type system. 31 However, the call @f( x )@ is invalid, because @x@'s length is @99@, which does not match @42@. 32 33 A function declaration can be polymorphic over these @array@ arguments by using the \CFA @forall@ declaration prefix. 32 34 \begin{cfa} 33 35 forall( T, @[N]@ ) … … 40 42 Cforall Runtime error: subscript 1000 exceeds dimension range [0,99) $for$ array 0x555555558020. 41 43 \end{cfa} 44 Function @g@ takes an arbitrary type parameter @T@ and a \emph{dimension parameter} @N@. 45 A dimension parameter represents a to-be-determined count of elements, managed by the type system. 42 46 The call @g( x, 0 )@ is valid because @g@ accepts any length of array, where the type system infers @float@ for @T@ and length @99@ for @N@. 43 Inferring values for @T@ and @N@ is implicit , without programmer involvement.47 Inferring values for @T@ and @N@ is implicit. 44 48 Furthermore, in this case, the runtime subscript @x[0]@ (parameter @i@ being @0@) in @g@ is valid because 0 is in the dimension range $[0,99)$ of argument @x@. 45 The call @g( x, 1000 )@ is also accepted through compile time;49 However, the call @g( x, 1000 )@ is also accepted through compile time; 46 50 however, this case's subscript, @x[1000]@, generates an error, because @1000@ is outside the dimension range $[0,99)$ of argument @x@. 51 In general, the @forall( ..., [N] )@ participates in the user-relevant declaration of the name @N@, which becomes usable in parameter/return declarations and within a function. 52 The syntactic form is chosen to parallel other @forall@ forms: 53 \begin{cfa} 54 forall( @[N]@ ) ... $\C[1.5in]{// dimension}$ 55 forall( T ) ... $\C{// value datatype (formerly, "otype")}$ 56 forall( T & ) ... $\C{// opaque datatype (formerly, "dtype")}\CRT$ 57 \end{cfa} 58 % The notation @array(thing, N)@ is a single-dimensional case, giving a generic type instance. 47 59 48 60 The generic @array@ type is comparable to the C array type, which \CFA inherits from C. 49 61 Their runtime characteristics are often identical, and some features are available in both. 50 For example, assume a caller instantiates @N@ with 42 (discussion about how to follow) in:62 For example, assume a caller has an argument that instantiates @N@ with 42. 51 63 \begin{cfa} 52 64 forall( [N] ) 53 void declDemo( ) {65 void declDemo( ... ) { 54 66 float x1[N]; $\C{// built-in type ("C array")}$ 55 67 array(float, N) x2; $\C{// type from library}$ … … 59 71 The two variables have identical size and layout; they both encapsulate 42-float stack allocations, with no additional ``bookkeeping'' allocations or headers. 60 72 Providing this explicit generic approach requires a significant extension to the \CFA type system to support a full-feature, safe, efficient (space and time) array-type, which forms the foundation for more complex array forms in \CFA. 61 62 Admittedly, the @array@ library type (type for @x2@) is syntactically different from its C counterpart. 63 A future goal (TODO xref) is to provide the new features upon a built-in type whose syntax approaches C's (declaration style of @x1@). 73 In all following discussion, ``C array'' means the types like that of @x@ and ``\CFA array'' means the standard-library @array@ type (instantiations), like the type of @x2@. 74 75 Admittedly, the @array@ library type for @x2@ is syntactically different from its C counterpart. 76 A future goal (TODO xref) is to provide the new @array@ features with syntax approaching C's (declaration style of @x1@). 64 77 Then, the library @array@ type could be removed, giving \CFA a largely uniform array type. 65 At present, the C-syntax array gets partial support for the new features, so the generic @array@ is used exclusively when introducing features;78 At present, the C-syntax @array@ is only partially supported, so the generic @array@ is used exclusively in the thesis; 66 79 feature support and C compatibility are revisited in Section ? TODO. 67 68 Offering the @array@ type, as a distinct alternative to the C array, is consistent with \CFA's goal of backwards compatibility, \ie virtually all existing C (@gcc@) programs can be compiled by \CFA with only a small number of changes, similar to \CC (@g++@).69 However, a few compatibility-breaking changes to the behaviour of the C array are necessary, both as an implementation convenience and to fix C's lax treatment of arrays.70 Hence, the @array@ type is an opportunity to start from a clean slate and show a cohesive selection of features, making it unnecessary to deal with every inherited complexity of the C array.71 72 In all discussion following, ``C array'' means the types like that of @x@ and ``\CFA array'' means the standard-library @array@ type (instantiations), like the type of @x2@.73 80 74 81 My contributions in this chapter are: … … 83 90 84 91 85 \section{Definitions and design considerations} 86 87 88 \subsection{Dependent typing} 89 90 91 92 General dependent typing allows the type system to encode arbitrary predicates (e.g. behavioural specifications for functions), 92 \section{Dependent typing} 93 94 General dependent typing allows the type system to encode arbitrary predicates (\eg behavioural specifications for functions), 93 95 which is an anti-goal for my work. 94 96 Firstly, this application is strongly associated with pure functional languages, … … 101 103 102 104 103 104 105 \section{Features added} 105 106 … … 109 110 By declaring type variables at the front of object declarations, an array dimension is lexically referenceable where it is needed. 110 111 For example, a declaration can share one length, @N@, among a pair of parameters and the return, 111 meaning that it requires both input arrays to be of the same length, and guarantees that the result with beof that length as well.112 meaning that it requires both input arrays to be of the same length, and guarantees that the result is of that length as well. 112 113 \lstinput{10-17}{hello-array.cfa} 113 This function @f@ does a pointwise comparison of its two input arrays, checking if each pair of numbers is within half a percent of each other, returning the answers in a newly allocated @bool@ array. 114 The dynamic allocation of the @ret@ array by preexisting @alloc@ uses the parameterized dimension information implicitly within its @sizeof@ determination, and casts the return type. 115 Note that alloc only sees one whole type for its @T@ (which is @f@'s @array(bool, N)@); this type's size is a computation based on @N@. 116 \begin{cfa} 117 // simplification 118 static inline forall( T & | sized(T) ) 114 Function @f@ does a pointwise comparison of its two input arrays, checking if each pair of numbers is within half a percent of each other, returning the answers in a newly allocated @bool@ array. 115 The dynamic allocation of the @ret@ array, by the library @alloc@ function, 116 \begin{cfa} 117 forall( T & | sized(T) ) 119 118 T * alloc() { 120 return (T *)malloc( sizeof(T) ); 121 } 122 \end{cfa} 123 This example illustrates how the new @array@ type plugs into existing \CFA behaviour by implementing necessary @sized@ assertions needed by other types. 124 (@sized@ implies a concrete \vs abstract type with a runtime-available size, exposed as @sizeof@.) 119 return @(T *)@malloc( @sizeof(T)@ ); 120 } 121 \end{cfa} 122 uses the parameterized dimension information implicitly within its @sizeof@ determination, and casts the return type. 123 Note that @alloc@ only sees one whole type for its @T@ (which is @f@'s @array(bool, N)@); this type's size is a computation based on @N@. 124 This example illustrates how the new @array@ type plugs into existing \CFA behaviour by implementing necessary \emph{sized} assertions needed by other types. 125 (\emph{sized} implies a concrete \vs abstract type with a runtime-available size, exposed as @sizeof@.) 125 126 As a result, there is significant programming safety by making the size accessible and implicit, compared with C's @calloc@ and non-array supporting @memalign@, which take an explicit length parameter not managed by the type system. 126 127 … … 142 143 The result is a significant improvement in safety and usability. 143 144 144 In general, the @forall( ..., [N] )@ participates in the user-relevant declaration of the name @N@, which becomes usable in parameter/return declarations and within a function.145 The syntactic form is chosen to parallel other @forall@ forms:146 \begin{cfa}147 forall( @[N]@ ) ... $\C[1.5in]{// dimension}$148 forall( T & ) ... $\C{// opaque datatype (formerly, "dtype")}$149 forall( T ) ... $\C{// value datatype (formerly, "otype")}\CRT$150 \end{cfa}151 % The notation @array(thing, N)@ is a single-dimensional case, giving a generic type instance.152 145 In summary: 153 146 \begin{itemize} … … 168 161 % agreed, though already said 169 162 \item 170 \CC does not allow a template function to be nested, while \CFA le sts its polymorphic functions to be nested.163 \CC does not allow a template function to be nested, while \CFA lets its polymorphic functions to be nested. 171 164 % why is this important? 172 165 \item … … 227 220 \end{cfa} 228 221 \end{tabular} 229 \caption{\lstinline{N}-style param ters, for \CC template \vs \CFA generic type }222 \caption{\lstinline{N}-style parameters, for \CC template \vs \CFA generic type } 230 223 \label{f:TemplateVsGenericType} 231 224 \end{figure} 232 225 233 226 Just as the first example in \VRef[Section]{s:ArrayIntro} shows a compile-time rejection of a length mismatch, 234 so are length mismatches stopped when they inv love dimension parameters.227 so are length mismatches stopped when they involve dimension parameters. 235 228 While \VRef[Figure]{f:fHarness} shows successfully calling a function @f@ expecting two arrays of the same length, 236 229 \begin{cfa} 237 230 array( bool, N ) & f( array( float, N ) &, array( float, N ) & ); 238 231 \end{cfa} 239 a static rejection occurs when attempting to call @f@ with arrays of potentiallydiffering lengths.232 a static rejection occurs when attempting to call @f@ with arrays of differing lengths. 240 233 \lstinput[tabsize=1]{70-74}{hello-array.cfa} 241 234 When the argument lengths themselves are statically unknown, … … 252 245 Orthogonally, the \CFA array type works within generic \emph{types}, \ie @forall@-on-@struct@. 253 246 The same argument safety and the associated implicit communication of array length occurs. 254 Preexisting \CFA allowed aggregate types to be generalized with type parameters, enabling parameterizing forelement types.247 Preexisting \CFA allowed aggregate types to be generalized with type parameters, enabling parameterizing of element types. 255 248 Now, \CFA also allows parameterizing them by length. 256 249 Doing so gives a refinement of C's ``flexible array member'' pattern[TODO: cite ARM 6.7.2.1 pp18]\cite{arr:gnu-flex-mbr}. … … 259 252 This flexibility, in turn, allows for multiple array members. 260 253 \lstinput{10-15}{hello-accordion.cfa} 261 This structure's layout has the starting offset of @studentIds@ varying according to the generic parameter @C@, and the offset of @preferences@ varying according to both generic parameters. 262 263 The school example has the data structure capturing many students' course-preference forms. 264 It has course- and student-level metadata (their respective display names) and a position-based preferecens' matrix. 265 The input files in \VRef[Figure]{f:checkHarness} give example data. 266 267 When a function operates on a @School@ structure, the type system handles its memory layout transparently. 268 \lstinput{30-37}{hello-accordion.cfa} 269 In the running example, this @getPref@ function answers, 270 for the student at position @sIx@, what is the position of its @pref@\textsuperscript{th}-favoured class? 271 272 \VRef[Figure]{f:checkHarness} shows the @School@ harness and results with different array sizes. 273 This example program prints the courses in each student's preferred order, all using the looked-up display names. 274 Note the declaration of the @school@ variable. 254 The structure has course- and student-level metatdata (their respective field names) and a position-based preferences' matrix. 255 Its layout has the starting offset of @studentIds@ varying according to the generic parameter @C@, and the offset of @preferences@ varying according to both generic parameters. 256 257 \VRef[Figure]{f:checkHarness} shows a program main using @School@ and results with different array sizes. 258 The @school@ variable holds many students' course-preference forms. 275 259 It is on the stack and its initialization does not use any casting or size arithmetic. 276 260 Both of these points are impossible with a C flexible array member. … … 280 264 \end{cfa} 281 265 This ability to avoid casting and size arithmetic improves safety and usability over C flexible array members. 282 266 Finally, inputs and outputs are given at the bottom for different sized schools. 267 The example program prints the courses in each student's preferred order, all using the looked-up display names. 283 268 284 269 \begin{figure} 285 % super hack to get this to line up 286 \begin{tabular}{@{}ll@{\hspace{25pt}}l@{}} 287 \begin{tabular}{@{}p{3.25in}@{}} 270 \begin{cquote} 288 271 \lstinput{50-55}{hello-accordion.cfa} 289 \vspace*{-3pt}290 272 \lstinput{90-98}{hello-accordion.cfa} 291 \end{tabular} 292 & 293 \raisebox{0.32\totalheight}{% 294 }% 295 & 296 \end{tabular} 297 298 TODO: Get Peter's layout help 299 300 \$ cat school1 301 273 \ \\ 274 @$ cat school1@ 302 275 \lstinput{}{school1} 303 276 304 \$ ./a.out < school1 305 277 @$ ./a.out < school1@ 306 278 \lstinput{}{school1.out} 307 279 308 \$ cat school2 309 280 @$ cat school2@ 310 281 \lstinput{}{school2} 311 282 312 \$ ./a.out < school2 313 283 @$ ./a.out < school2@ 314 284 \lstinput{}{school2.out} 285 \end{cquote} 315 286 316 287 \caption{\lstinline{School} harness, input and output} 317 288 \label{f:checkHarness} 318 289 \end{figure} 290 291 When a function operates on a @School@ structure, the type system handles its memory layout transparently. 292 \lstinput{30-37}{hello-accordion.cfa} 293 In the example, this @getPref@ function answers, for the student at position @is@, what is the position of its @pref@\textsuperscript{th}-favoured class? 319 294 320 295 … … 333 308 But simplifications close enough for the present discussion are: 334 309 \begin{cfa} 335 336 337 338 339 340 struct array_1d{341 342 343 \end{cfa} 344 Th is structure pattern, plus a subscript operator, is all that @array@ provides.310 forall( [N] ) 311 struct array_1d_float { 312 float items[N]; 313 }; 314 forall( T, [N] ) 315 struct array_1d_T { 316 T items[N]; 317 }; 318 \end{cfa} 319 These two structure patterns, plus a subscript operator, is all that @array@ provides. 345 320 346 321 My main work is letting a programmer define 347 such a struct re (one whose type is parameterized by @[N]@)322 such a structure (one whose type is parameterized by @[N]@) 348 323 and functions that operate on it (these being similarly parameterized). 349 324 … … 351 326 \begin{itemize} 352 327 \item 353 The resolver, providingvalues for a used declaration's type-system variables,328 Resolver provided values for a used declaration's type-system variables, 354 329 gathered from type information in scope at the usage site. 355 356 330 \item 357 331 The box pass, encoding information about type parameters 358 332 into ``extra'' regular parameters/arguments on declarations and calls. 359 333 Notably, it conveys the size of a type @foo@ as a @__sizeof_foo@ parameter, 360 and rewrites the @sizeof(foo)@ expression as @__sizeof_foo@, i.e.a use of the parameter.334 and rewrites the @sizeof(foo)@ expression as @__sizeof_foo@, \ie a use of the parameter. 361 335 \end{itemize} 362 336 … … 364 338 This work is detailed in \VRef[Section]{s:ArrayTypingC}. 365 339 However, the resolution--boxing scheme, in its preexisting state, was already equipped to work on (desugared) dimension parameters. 366 The discussion following explains the desugaring and how correctlowered code results.367 368 A n even simpler structure, and a toy function on it, demonstrate what's needed for the encoding.369 \begin{cfa} 370 forall( [@N@] ) { // [1] 371 372 void f( thing(@N@) ) { sout | @N@; } // [2], [3]373 374 375 thing( @10@ ) x; f(x); // prints 10, [4]376 thing( 100 ) y; f(y); // prints 100377 378 340 The following discussion explains the desugaring and how correctly lowered code results. 341 342 A simpler structure, and a toy function on it, demonstrate what is needed for the encoding. 343 \begin{cfa} 344 forall( [@N@] ) { $\C{// [1]}$ 345 struct thing {}; 346 void f( thing(@N@) ) { sout | @N@; } $\C{// [2], [3]}$ 347 } 348 int main() { 349 thing( @10@ ) x; f( x ); $\C{// prints 10, [4]}$ 350 thing( 100 ) y; f( y ); $\C{// prints 100}$ 351 return 0; 352 } 379 353 \end{cfa} 380 354 This example has: … … 389 363 A value like 10 being used as an argument to the parameter @N@. 390 364 \end{enumerate} 391 The chosen solution being to encode the value @N@ \emph{as a type},items 1 and 2 are immediately available for free.365 The chosen solution is to encode the value @N@ \emph{as a type}, so items 1 and 2 are immediately available for free. 392 366 Item 3 needs a way to recover the encoded value from a (valid) type (and to reject invalid types occurring here). 393 367 Item 4 needs a way to produce a type that encodes the given value. … … 400 374 \item 401 375 Given a dimension expression $e$, produce type @char[@$e$@]@ to represent it. 402 If $e$ evaluates to $n$ then the en doded type has size $n$.376 If $e$ evaluates to $n$ then the encoded type has size $n$. 403 377 \item 404 378 Given a type $T$ (produced by these rules), recover the value that it represents with the expression @sizeof(@$T$@)@. … … 410 384 The running example is lowered to: 411 385 \begin{cfa} 412 forall( @N*@ ) { // [1] 413 414 void f( thing(@N@) ) { sout | @sizeof(N)@; } // [2], [3]415 416 417 thing( char[@10@] ) x; f(x); // prints 10, [4]418 thing( char[100] ) y; f(y); // prints 100419 420 386 forall( @N *@ ) { $\C{// [1]}$ 387 struct thing {}; 388 void f( thing(@N@) ) { sout | @sizeof(N)@; } $\C{// [2], [3]}$ 389 } 390 int main() { 391 thing( char[@10@] ) x; f( x ); $\C{// prints 10, [4]}$ 392 thing( char[100] ) y; f( y ); $\C{// prints 100}$ 393 return 0; 394 } 421 395 \end{cfa} 422 396 Observe: … … 430 404 The @sout...@ expression (being an application of the @?|?@ operator) has a second argument that is an ordinary expression. 431 405 \item 432 The type of variable @x@ is another @thing(-)@ type; the argument to the generic @thing@ is a type (array type).406 The type of variable @x@ is another @thing(-)@ type; the argument to the generic @thing@ is a type (array type of bytes, @char@). 433 407 \end{enumerate} 434 408 435 409 From this point, preexisting \CFA compilation takes over lowering it the rest of the way to C. 436 Inspecting the result shows what the above translation achieves. 437 A form that shows only the relevant changes of the box pass (as informed by the resolver), leaving the rest unadulterated, is: 438 \begin{cfa} 439 // [1] 440 void f( size_t __sizeof_N, @void *@ ) { sout | @__sizeof_N@; } // [2], [3] 441 int main() { 442 struct __conc_thing_10 {} x; f(@10@, &x); // prints 10, [4] 443 struct __conc_thing_100 {} y; f(@100@, &y); // prints 100 444 return 0; 445 } 410 Here the result shows only the relevant changes of the box pass (as informed by the resolver), leaving the rest unadulterated: 411 \begin{cfa} 412 // [1] 413 void f( size_t __sizeof_N, @void *@ ) { sout | @__sizeof_N@; } $\C{// [2], [3]}$ 414 int main() { 415 struct __conc_thing_10 {} x; f( @10@, &x ); $\C{// prints 10, [4]}$ 416 struct __conc_thing_100 {} y; f( @100@, &y ); $\C{// prints 100}$ 417 return 0; 418 } 446 419 \end{cfa} 447 420 Observe: … … 452 425 The type @thing(N)@ is (replaced by @void *@, but thereby effectively) gone. 453 426 \item 454 The @sout...@ expression (being an application of the @?|?@ operator) has a second argument that is a regular variable (parameter) usage.427 The @sout...@ expression (being an application of the @?|?@ operator) has a regular variable (parameter) usage for its second argument. 455 428 \item 456 429 Information about the particular @thing@ instantiation (value 10) has moved, from the type, to a regular function-call argument. 457 430 \end{enumerate} 458 At the end of the desugaring and downstream processing, the original C idiom of ``pass both a pointer and a length parameter'' has been reconstructed.459 In the programmer-written form, only the thingis passed.431 At the end of the desugaring and downstream processing, the original C idiom of ``pass both a length parameter and a pointer'' has been reconstructed. 432 In the programmer-written form, only the @thing@ is passed. 460 433 The compiler's action produces the more complex form, which if handwritten, would be error-prone. 461 434 462 Back at the very front end, the parsing changes, AST schema extensions, and validation rules for enabling the sugared user input are:435 Back at the compiler front end, the parsing changes AST schema extensions and validation rules for enabling the sugared user input. 463 436 \begin{itemize} 464 437 \item … … 467 440 Have the new brand of type-variable, \emph{Dimension}, in the AST form of a type-variable, to represent one parsed from @[-]@. 468 441 \item 469 Allow a type variable to occur in expression position. Validate (after parsing) that only dimension-branded type variables are used here.442 Allow a type variable to occur in an expression. Validate (after parsing) that only dimension-branded type variables are used here. 470 443 \item 471 444 Allow an expression to occur in type-argument position. Brand the resulting type argument as a dimension. … … 473 446 Validate (after parsing), on a generic-type usage, \eg the type part of the declaration 474 447 \begin{cfa} 475 @array_1d( foo, bar ) x;@448 array_1d( foo, bar ) x; 476 449 \end{cfa} 450 \vspace*{-10pt} 477 451 that the brands on the generic arguments match the brands of the declared type variables. 478 452 Here, that @foo@ is a type and @bar@ is a dimension. … … 488 462 from one party who knows it, to another who is willing to work with any given length. 489 463 For scenarios where the concern is a mishandled length, 490 the interaction is between two parties who both claim to know (something about) it. 491 Such a scenario occurs in this pure C fragment, wich today's C compilers accept: 492 \begin{cfa} 493 int n = @42@; 494 float x[n]; 495 float (*xp)[@999@] = &x; 496 (*xp)[@500@]; // in "bound"? 497 \end{cfa} 498 464 the interaction is between two parties who both claim to know something about it. 465 Such a scenario occurs in this pure C fragment, which today's C compilers accept: 466 \begin{cfa} 467 int n = @42@; 468 float x[n]; 469 float (*xp)[@999@] = &x; 470 (*xp)[@500@]; $\C{// in "bound"?}$ 471 \end{cfa} 499 472 Here, the array @x@ has length 42, while a pointer to it (@xp@) claims length 999. 500 473 So, while the subscript of @xp@ at position 500 is out of bound of its referent @x@, … … 505 478 The \CFA new-array rejects the analogous case: 506 479 \begin{cfa} 507 int n = @42@; 508 array(float, n) x; 509 array(float, 999) * xp = x; // static rejection here 510 (*xp)[@500@]; // runtime check vs len 999 511 \end{cfa} 512 513 % TODO: kill the vertical whitespace around these lists 514 % nothing from https://stackoverflow.com/questions/1061112/eliminate-space-before-beginitemize is working 515 516 The way the \CFA array is implemented, 517 the type analysis of this \CFA case reduces to a case similar to the earlier C version. 480 int n = @42@; 481 array(float, n) x; 482 array(float, 999) * xp = x; $\C{// static rejection here}$ 483 (*xp)[@500@]; $\C{// runtime check vs len 999}$ 484 \end{cfa} 485 The way the \CFA array is implemented, the type analysis of this case reduces to a case similar to the earlier C version. 518 486 The \CFA compiler's compatibility analysis proceeds as: 519 \begin{itemize}[ noitemsep,partopsep=-\parskip,parsep=0pt,leftmargin=4em]487 \begin{itemize}[parsep=0pt] 520 488 \item 521 489 Is @array(float, 999)@ type-compatible with @array(float, n)@? 522 490 \item 523 Is @arrayX(float, char[999])@ type-compatible with @arrayX(float, char[n])@? 524 \footnote{Here, \lstinline{arrayX} represents the type that results491 Is @arrayX(float, char[999])@ type-compatible with @arrayX(float, char[n])@?\footnote{ 492 Here, \lstinline{arrayX} represents the type that results 525 493 from desugaring the \lstinline{array} type 526 494 into a type whose generic parameters are all types. … … 531 499 Is @char[999]@ type-compatible with @char[n]@? 532 500 \end{itemize} 533 534 I chose to achieve the necessary rejection of the \CFA case 535 by adding a rejection of the corresponding C case. 536 537 This decision is not backward compatible. 501 To achieve the necessary \CFA rejections meant rejecting the corresponding C case, which is not backward compatible. 538 502 There are two complementary mitigations for this incompatibility. 539 503 … … 542 506 This situation might arise if @n@ were known to be 999, 543 507 rather than 42, as in the introductory examples. 544 The programmer can add a cast , as in:545 \begin{cfa} 546 xp = ( float (*)[999] ) &x;547 \end{cfa} 548 This addition causes \CFA to accept, be acause now, the programmer has accepted blame.508 The programmer can add a cast in the \CFA code. 509 \begin{cfa} 510 xp = @(float (*)[999])@ &x; 511 \end{cfa} 512 This addition causes \CFA to accept, because now, the programmer has accepted blame. 549 513 This addition is benign in plain C, because the cast is valid, just unnecessary there. 550 514 Moreover, the addition can even be seen as appropriate ``eye candy,'' … … 556 520 Second, the incompatibility only affects types like pointer-to-array, 557 521 which are are infrequently used in C. 558 The more common C idiom for aliasing an array is to use the pointer-to-first-element type, 559 which does not participate in the \CFA array's length checking. 560 \footnote{Notably, the desugaring of the \lstinline{array@} type, 561 avoids letting any \lstinline{-[-]} type decay, 562 in order to preserve the length information that powers runtime bound checking.} 563 Therefore, the frequency of needing to upgrade wild C code (as discussed in the first mitigation) 522 The more common C idiom for aliasing an array is to use a pointer-to-first-element type, 523 which does not participate in the \CFA array's length checking.\footnote{ 524 Notably, the desugaring of the \lstinline{array} type avoids letting any \lstinline{-[-]} type decay, 525 in order to preserve the length information that powers runtime bound-checking.} 526 Therefore, the frequency of needing to upgrade legacy C code (as discussed in the first mitigation) 564 527 is anticipated to be low. 565 528 566 529 Because the incompatibility represents a low cost to a \CFA onboarding effort 567 530 (with a plausible side benefit of linting the original code for a missing annotation), 568 I elected not to add special measuresto retain the compatibility.531 no special measures were added to retain the compatibility. 569 532 It would be possible to flag occurrences of @-[-]@ types that come from @array@ desugaring, 570 533 treating those with stricter \CFA rules, while treating others with classic C rules. … … 573 536 574 537 Having allowed that both the initial C example's check 575 \begin{itemize} [noitemsep,partopsep=-\parskip,parsep=0pt,leftmargin=4em]538 \begin{itemize} 576 539 \item 577 540 Is @float[999]@ type-compatible with @float[n]@? 578 541 \end{itemize} 579 and the second \CFA ex mple's induced check580 \begin{itemize} [noitemsep,partopsep=-\parskip,parsep=0pt,leftmargin=4em]542 and the second \CFA example's induced check 543 \begin{itemize} 581 544 \item 582 545 Is @char[999]@ type-compatible with @char[n]@? … … 587 550 To implement the new \CFA rules, I took the syntactic recursion a step further, obtaining, 588 551 in both cases: 589 \begin{itemize} [noitemsep,partopsep=-\parskip,parsep=0pt,leftmargin=4em]552 \begin{itemize} 590 553 \item 591 Is @999@ TBD-compatible with @n@?554 Is @999@ compatible with @n@? 592 555 \end{itemize} 593 This compatibility question applies to a pair of expressions, where the earlier oneswere to types.556 This compatibility question applies to a pair of expressions, where the earlier implementation were to types. 594 557 Such an expression-compatibility question is a new addition to the \CFA compiler. 595 These questions only arise in the context of dimension expressions on (C) array types.558 Note, these questions only arise in the context of dimension expressions on (C) array types. 596 559 597 560 TODO: ensure these compiler implementation matters are treated under \CFA compiler background: … … 600 563 GenPoly. 601 564 602 The relevant technical component of the \CFA compiler is, 603 within the type resolver, the type unification procedure. 565 The relevant technical component of the \CFA compiler is the type unification procedure within the type resolver. 604 566 I added rules for continuing this unification into expressions that occur within types. 605 567 It is still fundamentally doing \emph{type} unification … … 607 569 and not participating in binding any variables that stand in for expression fragments 608 570 (for there is no such sort of variable in \CFA's analysis.) 609 610 571 An unfortunate fact about the \CFA compiler's preexisting implementation is that 611 572 type unification suffers from two forms of duplication. … … 613 574 The first duplication has (many of) the unification rules stated twice. 614 575 As a result, my additions for dimension expressions are stated twice. 615 The extra statement of the rules occurs in the GenPolymodule,576 The extra statement of the rules occurs in the @GenPoly@ module, 616 577 where concrete types like @array(int, 5)@\footnote{ 617 578 Again, the presentation is simplified 618 by leaving the \lstinline{array} macro unexpanded }579 by leaving the \lstinline{array} macro unexpanded.} 619 580 are lowered into corresponding C types @struct __conc_array_1234@ (the suffix being a generated index). 620 In this case, the struct's definition gives fields that hardcode the argument values of @float@ and @5@. 621 The next time an @array(-,-)@ concrete instance is encountered, 622 is the previous @struct __conc_array_1234@ suitable for it? 623 Yes, for another occurrance of @array(int, 5)@; 581 In this case, the struct's definition contains fields that hardcode the argument values of @float@ and @5@. 582 The next time an @array(-,-)@ concrete instance is encountered, it checks if the previous @struct __conc_array_1234@ is suitable for it. 583 Yes, for another occurrence of @array(int, 5)@; 624 584 no, for either @array(rational(int), 5)@ or @array(int, 42)@. 625 585 By the last example, this phase must ``reject'' … … 630 590 In the program 631 591 \begin{cfa} 632 void f( double );633 forall( T & ) void f( T & );634 635 636 f(x);637 638 \end{cfa} 639 when resolving the function call, the first unification stage640 compares the type s @T@, of the parameter,with @array( float, n + 1 )@, of the argument.592 void @f@( double ); 593 forall( T & ) void @f@( T & ); 594 void g( int n ) { 595 array( float, n + 1 ) x; 596 f(x); // overloaded 597 } 598 \end{cfa} 599 when resolving the function call, @g@, the first unification stage 600 compares the type @T@ of the parameter with @array( float, n + 1 )@, of the argument. 641 601 TODO: finish. 642 602 … … 647 607 TODO: Deal with directionality, that I'm doing exact-match, no ``at least as long as,'' no subtyping. 648 608 Should it be an earlier scoping principle? Feels like it should matter in more places than here.} 649 So, a ``yes'' answer must represent a guarantee that both expressions willevaluate the650 same result, while a ``no'' can tolerate ``they might, but we're not sure ,'609 So, a ``yes'' answer must represent a guarantee that both expressions evaluate the 610 same result, while a ``no'' can tolerate ``they might, but we're not sure'', 651 611 provided that practical recourses are available 652 to let programmers express theirbetter knowledge.653 The specific rule-set that I offer withthe current release is, in fact, extremely conservative.612 to let programmers express better knowledge. 613 The new rule-set in the current release is, in fact, extremely conservative. 654 614 I chose to keep things simple, 655 and allow real future needs do drive adding additional complexity, 656 within the framework that I laid out. 615 and allow future needs to drive adding additional complexity, within the new framework. 657 616 658 617 For starters, the original motivating example's rejection … … 662 621 Rather, the analysis assumes a variable's value can be anything, 663 622 and so there can be no guarantee that its value is 999. 664 So, a variable useand a literal can never match.623 So, a variable and a literal can never match. 665 624 666 625 Two occurrences of the same literal value are obviously a fine match. 667 For two occurrences of the same varia lbe, more information is needed.626 For two occurrences of the same variable, more information is needed. 668 627 For example, this one is fine 669 628 \begin{cfa} 670 671 672 float (*xp)[n] = x;// accept673 629 void f( const int n ) { 630 float x[n]; 631 float (*xp)[n] = x; // accept 632 } 674 633 \end{cfa} 675 634 while this one is not: 676 635 \begin{cfa} 677 678 679 680 681 float (*xp)[n] = x;// reject682 636 void f() { 637 int n = 42; 638 float x[n]; 639 n = 999; 640 float (*xp)[n] = x; // reject 641 } 683 642 \end{cfa} 684 643 Furthermore, the fact that the first example sees @n@ as @const@ 685 is not actually a sufficent basis.644 is not actually sufficient. 686 645 In this example, @f@'s length expression's declaration is as @const@ as it can be, 687 646 yet its value still changes between the two invocations: 688 \begin{cfa} 689 // compile unit 1 690 void g(); 691 void f( const int & const nr ) { 692 float x[nr]; 693 g(); 694 float (*xp)[nr] = x; // reject 695 } 696 // compile unit 2 697 static int n = 42; 698 void g() { 699 n = 99; 700 } 701 void f( const int & ); 702 int main () { 703 f(n); 704 return 0; 705 } 706 \end{cfa} 707 The issue in this last case is, 708 just because you aren't able to change something doesn't mean someone else can't. 709 710 My rule set also respects a feature of the C tradition. 711 In spite of the several limitations of the C rules 647 \begin{cquote} 648 \setlength{\tabcolsep}{15pt} 649 \begin{tabular}{@{}ll@{}} 650 \begin{cfa} 651 // compile unit 1 652 void g(); 653 void f( const int & const nr ) { 654 float x[nr]; 655 g(); // change n 656 @float (*xp)[nr] = x;@ // reject 657 } 658 \end{cfa} 659 & 660 \begin{cfa} 661 // compile unit 2 662 static int n = 42; 663 void g() { 664 n = 99; 665 } 666 667 f( n ); 668 \end{cfa} 669 \end{tabular} 670 \end{cquote} 671 The issue here is that knowledge needed to make a correct decision is hidden by separate compilation. 672 Even within a translation unit, static analysis might not be able to provide all the information. 673 674 My rule set also respects a traditional C feature: In spite of the several limitations of the C rules 712 675 accepting cases that produce different values, there are a few mismatches that C stops. 713 C is quite precise when working with two static values :714 \begin{cfa} 715 716 717 float (*xp1)[42] = &x;// accept718 float (*xp2)[999] = &x;// reject676 C is quite precise when working with two static values. 677 \begin{cfa} 678 enum { fortytwo = 42 }; 679 float x[fortytwo]; 680 float (*xp1)[42] = &x; // accept 681 float (*xp2)[999] = &x; // reject 719 682 \end{cfa} 720 683 My \CFA rules agree with C's on these cases. 721 684 722 Myrules classify expressions into three groups:685 In summary, the new rules classify expressions into three groups: 723 686 \begin{description} 724 687 \item[Statically Evaluable] 725 688 Expressions for which a specific value can be calculated (conservatively) 726 689 at compile-time. 727 A preexisting \CFA compiler module defines which expressions qualify,690 A preexisting \CFA compiler module defines which literals, enumerators, and expressions qualify, 728 691 and evaluates them. 729 Includes literals and enumeration values.730 692 \item[Dynamic but Stable] 731 The value of a variable declared as @const@. 732 Includes a @const@ parameter. 693 The value of a variable declared as @const@, including a @const@ parameter. 733 694 \item[Potentially Unstable] 734 695 The catch-all category. Notable examples include: 735 any function-call result (@float x[foo()];@),736 the particular function-call result that is a pointer dereference (@void f(const int * n) { float x[*n]; }@), and696 any function-call result, @float x[foo()];@, 697 the particular function-call result that is a pointer dereference, @void f(const int * n)@ @{ float x[*n]; }@, and 737 698 any use of a reference-typed variable. 738 699 \end{description} 739 740 My \CFA rules are: 700 Within these groups, my \CFA rules are: 741 701 \begin{itemize} 742 702 \item … … 744 704 Notably, this rule allows a literal to match with an enumeration value, based on the value. 745 705 \item 746 Accept a Dynamic but Stable pair, if both expressions are written out the same, e.g. refers tosame variable declaration.706 Accept a Dynamic but Stable pair, if both expressions are written out the same, \eg refers to the same variable declaration. 747 707 \item 748 708 Otherwise, reject. 749 Notably, reject all pairs from the Potentially Unstable group. 750 Notably, reject all pairs that cross groups. 709 Notably, reject all pairs from the Potentially Unstable group and all pairs that cross groups. 751 710 \end{itemize} 752 753 711 The traditional C rules are: 754 712 \begin{itemize} … … 759 717 \end{itemize} 760 718 761 762 \newcommand{\falsealarm}{{\color{orange}\small{*}}}763 \newcommand{\allowmisuse}{{\color{red}\textbf{!}}}764 \newcommand{\cmark}{\ding{51}} % from pifont765 \newcommand{\xmark}{\ding{55}}766 719 \begin{figure} 720 \newcommand{\falsealarm}{{\color{blue}\small{*}}} 721 \newcommand{\allowmisuse}{{\color{red}\textbf{!}}} 722 \newcommand{\cmark}{\ding{51}} % from pifont 723 \newcommand{\xmark}{\ding{55}} 724 767 725 \begin{tabular}{@{}l@{\hspace{16pt}}c@{\hspace{8pt}}c@{\hspace{16pt}}c@{\hspace{8pt}}c@{\hspace{16pt}}c} 768 726 & \multicolumn{2}{c}{\underline{Values Equal}} … … 778 736 \end{tabular} 779 737 780 \ vspace{12pt}781 \noindent\textbf{Legend :}782 \begin{itemize} 738 \medskip 739 \noindent\textbf{Legend} 740 \begin{itemize}[leftmargin=*] 783 741 \item 784 742 Each row gives the treatment of a test harness of the form 785 743 \begin{cfa} 786 787 744 float x[ expr1 ]; 745 float (*xp)[ expr2 ] = &x; 788 746 \end{cfa} 789 where \lstinline{expr1} and \lstinline{expr2} are metavariables varying according to the row's Case. 747 \vspace*{-10pt} 748 where \lstinline{expr1} and \lstinline{expr2} are meta-variables varying according to the row's Case. 790 749 Each row's claim applies to other harnesses too, including, 791 750 \begin{itemize} 792 751 \item 793 calling a function with a param ter like \lstinline{x} and an argument of the \lstinline{xp} type,752 calling a function with a parameter like \lstinline{x} and an argument of the \lstinline{xp} type, 794 753 \item 795 754 assignment in place of initialization, … … 801 760 \item 802 761 Each case's claim is symmetric (swapping \lstinline{expr1} with \lstinline{expr2} has no effect), 803 even though most test harnesses are asym etric.762 even though most test harnesses are asymmetric. 804 763 \item 805 764 The table treats symbolic identity (Same/Different on rows) 806 apart from value eq ality (Equal/Unequal on columns).765 apart from value equality (Equal/Unequal on columns). 807 766 \begin{itemize} 808 767 \item … … 819 778 while every Accept under Values Unequal is an allowed misuse (\allowmisuse). 820 779 \end{itemize} 821 \caption{Case comparison for array type compatibility, given pairs of dimension expressions. 822 TODO: get Peter's LaTeX help on overall appearance, probably including column spacing/centering and bullet indentation.}780 781 \caption{Case comparison for array type compatibility, given pairs of dimension expressions.} 823 782 \label{f:DimexprRuleCompare} 824 783 \end{figure} … … 826 785 827 786 Figure~\ref{f:DimexprRuleCompare} gives a case-by-case comparison of the consequences of these rule sets. 828 It demonstrates that the \CFA false alarms occur in the same cases as C treats unsafe ly.829 It also shows that C-incompatibilities only occur in cases that C treats unsafe ly.787 It demonstrates that the \CFA false alarms occur in the same cases as C treats unsafe. 788 It also shows that C-incompatibilities only occur in cases that C treats unsafe. 830 789 831 790 … … 837 796 whose reuses are rejected by the blunt current-state rules: 838 797 \begin{cfa} 839 840 841 float (*xp)[nr] = & x;// reject: nr varying (no references)842 843 float (*yp)[nv + 1] = & y; // reject: ?+? unpredicable (no functions)844 798 void f( int & nr, const int nv ) { 799 float x[nr]; 800 float (*xp)[nr] = &x; // reject: nr varying (no references) 801 float y[nv + 1]; 802 float (*yp)[nv + 1] = &y; // reject: ?+? unpredictable (no functions) 803 } 845 804 \end{cfa} 846 805 Yet, both dimension expressions are reused safely. 847 (The @nr@ reference is never written, not volatile806 The @nr@ reference is never written, not volatile 848 807 and control does not leave the function between the uses. 849 The name @?+?@ resolves to a function that is quite predictable. )850 The programmer here can add the constant declarations:851 \begin{cfa} 852 853 854 855 float (*xp)[nx] = & x; // acept856 857 858 float (*yp)[ny] = & y;// accept859 808 The name @?+?@ resolves to a function that is quite predictable. 809 Here, the programmer can add the constant declarations (cast does not work): 810 \begin{cfa} 811 void f( int & nr, const int nv ) { 812 @const int nx@ = nr; 813 float x[nx]; 814 float (*xp)[nx] = & x; // accept 815 @const int ny@ = nv + 1; 816 float y[ny]; 817 float (*yp)[ny] = & y; // accept 818 } 860 819 \end{cfa} 861 820 The result is the originally intended semantics, … … 863 822 864 823 The snapshotting trick is also used by the translation, though to achieve a different outcome. 865 Rather obviously, every array must be subscriptable, even a biz zarre one:866 \begin{cfa} 867 868 824 Rather obviously, every array must be subscriptable, even a bizarre one: 825 \begin{cfa} 826 array( float, rand(10) ) x; 827 x[0]; // 10% chance of bound-check failure 869 828 \end{cfa} 870 829 Less obvious is that the mechanism of subscripting is a function call, … … 874 833 Adjusting the example to make the function's use of length more explicit: 875 834 \begin{cfa} 876 877 878 879 835 forall ( T * ) 836 void f( T * x ) { sout | sizeof(*x); } 837 float x[ rand(10) ]; 838 f( x ); 880 839 \end{cfa} 881 840 Considering that the partly translated function declaration is, loosely, 882 841 \begin{cfa} 883 void f( size_t __sizeof_T, void * x ) { sout | __sizeof_T; } 884 \end{cfa} 885 the translated call must not go like: 886 \begin{cfa} 887 float x[ rand(10) ]; 888 f( rand(10), &x ); 889 \end{cfa} 890 Rather, its actual translation is like: 891 \begin{cfa} 892 size_t __dim_x = rand(10); 893 float x[ __dim_x ]; 894 f( __dim_x, &x ); 895 \end{cfa} 896 The occurrence of this dimension hoisting during translation was present in the preexisting \CFA compiler. 897 But its cases were buggy, particularly with determining, ``Can hoisting be skipped here?'' 898 For skipping this hoisting is clearly desirable in some cases, 899 not the least of which is when the programmer has already done so manually. 900 My work includes getting these cases right, harmonized with the accept/reject criteria, and tested. 901 902 842 void f( size_t __sizeof_T, void * x ) { sout | __sizeof_T; } 843 \end{cfa} 844 the translation must call the dimension argument twice: 845 \begin{cfa} 846 float x[ rand(10) ]; 847 f( rand(10), &x ); 848 \end{cfa} 849 Rather, the translation is: 850 \begin{cfa} 851 size_t __dim_x = rand(10); 852 float x[ __dim_x ]; 853 f( __dim_x, &x ); 854 \end{cfa} 855 The occurrence of this dimension hoisting during translation was in the preexisting \CFA compiler. 856 But its cases were buggy, particularly with determining, ``Can hoisting the expression be skipped here?'', for skipping this hoisting is clearly desirable in some cases. 857 For example, when the programmer has already done so manually. \PAB{I don't know what this means.} 858 In the new implementation, these cases are correct, harmonized with the accept/reject criteria. 903 859 904 860 TODO: Discuss the interaction of this dimension hoisting with the challenge of extra unification for cost calculation 905 861 906 862 907 \section{Multidimensional Arrays} 908 \label{toc:mdimpl} 909 910 % TODO: introduce multidimensional array feature and approaches 911 912 When working with arrays, \eg linear algebra, array dimensions are referred to as ``rows'' and ``columns'' for a matrix, adding ``planes'' for a cube. 913 (There is little terminology for higher dimensional arrays.) 914 For example, an acrostic poem\footnote{A type of poetry where the first, last or other letters in a line spell out a particular word or phrase in a vertical column.} 915 can be treated as a grid of characters, where the rows are the text and the columns are the embedded keyword(s). 916 Within a poem, there is the concept of a \newterm{slice}, \eg a row is a slice for the poem text, a column is a slice for a keyword. 917 In general, the dimensioning and subscripting for multidimensional arrays has two syntactic forms: @m[r,c]@ or @m[r][c]@. 918 919 Commonly, an array, matrix, or cube, is visualized (especially in mathematics) as a contiguous row, rectangle, or block. 920 This conceptualization is reenforced by subscript ordering, \eg $m_{r,c}$ for a matrix and $c_{p,r,c}$ for a cube. 921 Few programming languages differ from the mathematical subscript ordering. 922 However, computer memory is flat, and hence, array forms are structured in memory as appropriate for the runtime system. 923 The closest representation to the conceptual visualization is for an array object to be contiguous, and the language structures this memory using pointer arithmetic to access the values using various subscripts. 924 This approach still has degrees of layout freedom, such as row or column major order, \ie juxtaposed rows or columns in memory, even when the subscript order remains fixed. 925 For example, programming languages like MATLAB, Fortran, Julia and R store matrices in column-major order since they are commonly used for processing column-vectors in tabular data sets but retain row-major subscripting. 926 In general, storage layout is hidden by subscripting, and only appears when passing arrays among different programming languages or accessing specific hardware. 927 928 \VRef[Figure]{f:FixedVariable} shows two C90 approaches for manipulating a contiguous matrix. 929 Note, C90 does not support VLAs. 930 The fixed-dimension approach (left) uses the type system; 931 however, it requires all dimensions except the first to be specified at compile time, \eg @m[][6]@, allowing all subscripting stride calculations to be generated with constants. 932 Hence, every matrix passed to @fp1@ must have exactly 6 columns but the row size can vary. 933 The variable-dimension approach (right) ignores (violates) the type system, \ie argument and parameters types do not match, and subscripting is performed manually using pointer arithmetic in the macro @sub@. 934 935 \begin{figure} 936 \begin{tabular}{@{}l@{\hspace{40pt}}l@{}} 937 \multicolumn{1}{c}{\textbf{Fixed Dimension}} & \multicolumn{1}{c}{\textbf{Variable Dimension}} \\ 938 \begin{cfa} 939 940 void fp1( int rows, int m[][@6@] ) { 941 ... printf( "%d ", @m[r][c]@ ); ... 942 } 943 int fm1[4][@6@], fm2[6][@6@]; // no VLA 944 // initialize matrixes 945 fp1( 4, fm1 ); // implicit 6 columns 946 fp1( 6, fm2 ); 947 \end{cfa} 948 & 949 \begin{cfa} 950 #define sub( m, r, c ) *(m + r * sizeof( m[0] ) + c) 951 void fp2( int rows, int cols, int *m ) { 952 ... printf( "%d ", @sub( m, r, c )@ ); ... 953 } 954 int vm1[@4@][@4@], vm2[@6@][@8@]; // no VLA 955 // initialize matrixes 956 fp2( 4, 4, vm1 ); 957 fp2( 6, 8, vm2 ); 958 \end{cfa} 959 \end{tabular} 960 \caption{C90 Fixed \vs Variable Contiguous Matrix Styles} 961 \label{f:FixedVariable} 962 \end{figure} 963 964 Many languages allow multidimensional arrays-of-arrays, \eg in Pascal or \CC. 965 \begin{cquote} 966 \begin{tabular}{@{}ll@{}} 967 \begin{pascal} 968 var m : array[0..4, 0..4] of Integer; (* matrix *) 969 type AT = array[0..4] of Integer; (* array type *) 970 type MT = array[0..4] of AT; (* array of array type *) 971 var aa : MT; (* array of array variable *) 972 m@[1][2]@ := 1; aa@[1][2]@ := 1 (* same subscripting *) 973 \end{pascal} 974 & 975 \begin{c++} 976 int m[5][5]; 977 978 typedef vector< vector<int> > MT; 979 MT vm( 5, vector<int>( 5 ) ); 980 m@[1][2]@ = 1; aa@[1][2]@ = 1; 981 \end{c++} 982 \end{tabular} 983 \end{cquote} 984 The language decides if the matrix and array-of-array are laid out the same or differently. 985 For example, an array-of-array may be an array of row pointers to arrays of columns, so the rows may not be contiguous in memory nor even the same length (triangular matrix). 986 Regardless, there is usually a uniform subscripting syntax masking the memory layout, even though a language could differentiated between the two forms using subscript syntax, \eg @m[1,2]@ \vs @aa[1][2]@. 987 Nevertheless, controlling memory layout can make a difference in what operations are allowed and in performance (caching/NUMA effects). 988 989 C also provides non-contiguous arrays-of-arrays. 990 \begin{cfa} 991 int m[5][5]; $\C{// contiguous}$ 992 int * aa[5]; $\C{// non-contiguous}$ 993 \end{cfa} 994 both with different memory layout using the same subscripting, and both with different degrees of issues. 995 The focus of this work is on the contiguous multidimensional arrays in C. 996 The reason is that programmers are often forced to use the more complex array-of-array form when a contiguous array would be simpler, faster, and safer. 997 Nevertheless, the C array-of-array form is still important for special circumstances. 998 999 \VRef[Figure]{f:ContiguousNon-contiguous} shows the extensions made in C99 for manipulating contiguous \vs non-contiguous arrays.\footnote{C90 also supported non-contiguous arrays.} 1000 First, VLAs are supported. 1001 Second, for contiguous arrays, C99 conjoins one or more of the parameters as a downstream dimension(s), \eg @cols@, implicitly using this parameter to compute the row stride of @m@. 1002 If the declaration of @fc@ is changed to: 1003 \begin{cfa} 1004 void fc( int rows, int cols, int m[@rows@][@cols@] ) ... 1005 \end{cfa} 1006 it is possible for C to perform bound checking across all subscripting, but it does not. 1007 While this contiguous-array capability is a step forward, it is still the programmer's responsibility to manually manage the number of dimensions and their sizes, both at the function definition and call sites. 1008 That is, the array does not automatically carry its structure and sizes for use in computing subscripts. 1009 While the non-contiguous style in @faa@ looks very similar to @fc@, the compiler only understands the unknown-sized array of row pointers, and it relies on the programmer to traverse the columns in a row correctly with a correctly bounded loop index. 1010 Specifically, there is no requirement that the rows are the same length, like a poem with different length lines. 1011 1012 \begin{figure} 1013 \begin{tabular}{@{}ll@{}} 1014 \multicolumn{1}{c}{\textbf{Contiguous}} & \multicolumn{1}{c}{\textbf{ Non-contiguous}} \\ 1015 \begin{cfa} 1016 void fc( int rows, @int cols@, int m[ /* rows */ ][@cols@] ) { 1017 ... printf( "%d ", @m[r][c]@ ); ... 1018 } 1019 int m@[5][5]@; 1020 for ( int r = 0; r < 5; r += 1 ) { 1021 1022 for ( int c = 0; c < 5; c += 1 ) 1023 m[r][c] = r + c; 1024 } 1025 fc( 5, 5, m ); 1026 \end{cfa} 1027 & 1028 \begin{cfa} 1029 void faa( int rows, int cols, int * m[ @/* cols */@ ] ) { 1030 ... printf( "%d ", @m[r][c]@ ); ... 1031 } 1032 int @* aa[5]@; // row pointers 1033 for ( int r = 0; r < 5; r += 1 ) { 1034 @aa[r] = malloc( 5 * sizeof(int) );@ // create rows 1035 for ( int c = 0; c < 5; c += 1 ) 1036 aa[r][c] = r + c; 1037 } 1038 faa( 5, 5, aa ); 1039 \end{cfa} 1040 \end{tabular} 1041 \caption{C99 Contiguous \vs Non-contiguous Matrix Styles} 1042 \label{f:ContiguousNon-contiguous} 1043 \end{figure} 1044 1045 1046 \subsection{Multidimensional array implementation} 863 \section{Multidimensional array implementation} 1047 864 \label{s:ArrayMdImpl} 1048 865 … … 1221 1038 S & | sized(S), $\C{// masquerading-as}$ 1222 1039 Timmed &, $\C{// immediate element, often another array}$ 1223 Tbase & $\C{// base element, e.g.float, never array}$1040 Tbase & $\C{// base element, \eg float, never array}$ 1224 1041 ) { // distribute forall to each element 1225 1042 struct arpk { … … 1274 1091 1275 1092 1276 1277 1093 \section{Array lifecycle} 1094 1095 An array is an aggregate, like a structure; 1096 both are containers wrapping subordinate objects. 1097 Any arbitrary object type, like @string@, can be an array element or structure member. 1098 A consequence is that the lifetime of the container must match with its subordinate objects: all elements and members must be initialized/uninitialized implicitly as part of the container's allocation/deallocation. 1099 Modern programming languages implicitly perform these operations via a type's constructor and destructor. 1100 Therefore, \CFA must assure that an array's subordinate objects' lifetime operations are called. 1101 1102 Preexisting \CFA mechanisms achieve this requirement, but with poor performance. 1103 Furthermore, advanced array users need an exception to the basic mechanism, which does not occur with other aggregates. 1104 Hence, arrays introduce subleties in supporting an element's lifecycle. 1105 1106 The preexisting \CFA support for contained-element lifecycle is based on recursive occurrences of the object-type (@otype@) pseudo-trait. 1107 A type is an @otype@, if it provides a default (parameterless) constructor, copy constructor, assignment operator, and destructor (like \CC). 1108 When declaring a structure with @otype@ members, the compiler implicitly generates implementations of the four @otype@ functions for the outer structure. 1109 Then the generated default constructor for the outer structure calls the default constructor for each member, and the other @otype@ functions work similarly. 1110 For a member that is a C array, these calls occur in a loop for each array element, which even works for VLAs. 1111 This logic works the same, whether the member is a concrete type (that happens to be an @otype@) or if the member is a polymorphic type asserted to be an @otype@ (which is implicit in the syntax, @forall(T)@). 1112 The \CFA array has the simplified form (similar to one seen before): 1113 \begin{cfa} 1114 forall( T * ) // non-otype element, no lifecycle functions 1115 // forall( T ) // otype element, lifecycle functions asserted 1116 struct array5 { 1117 T __items[ 5 ]; 1118 }; 1119 \end{cfa} 1120 Being a structure with a C-array member, using the otype-form declaration for @T@ causes @array5(float)@ to implement @otype@ too. 1121 1122 But this @otype@-recursion pattern has a performance issue. 1123 For example, in a cube of @float@: 1124 \begin{cfa} 1125 array5( array5( array5( float ) ) ) 1126 \end{cfa} 1127 the first few steps of the compiler's work to find the lifecycle functions, under the @otype@-recursion pattern, are shown in \VRef[Figure]{f:OtypeRecursionBlowup}. 1128 All the work needed for the full @float@-cube would have 256 leaves. 1129 1130 %array5(T) offers 1131 %1 parameterless ctor, which asks for T to have 1132 % 1 parameterless ctor 1133 % 2 copy ctor 1134 % 3 asgt 1135 % 4 dtor 1136 %2 copy ctor, which asks for T to have 1137 % 1 parameterless ctor 1138 % 2 copy ctor 1139 % 3 asgt 1140 % 4 dtor 1141 %3 asgt, which asks for T to have 1142 % 1 parameterless ctor 1143 % 2 copy ctor 1144 % 3 asgt 1145 % 4 dtor 1146 %4 dtor, which asks for T to have 1147 % 1 parameterless ctor 1148 % 2 copy ctor 1149 % 3 asgt 1150 % 4 dtor 1151 1152 \begin{figure} 1153 \centering 1154 \setlength{\tabcolsep}{15pt} 1155 \begin{tabular}{@{}lll@{}} 1156 \begin{cfa}[deletekeywords={default}] 1157 float offers 1158 1 default ctor 1159 2 copy ctor 1160 3 asgt 1161 4 dtor 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 \end{cfa} 1187 & 1188 \begin{cfa}[deletekeywords={default}] 1189 array5(float) has 1190 1 default ctor, using float's 1191 1 default ctor 1192 2 copy ctor 1193 3 asgt 1194 4 dtor 1195 2 copy ctor, using float's 1196 1 default ctor 1197 2 copy ctor 1198 3 asgt 1199 4 dtor 1200 3 asgt, using float's 1201 1 default ctor 1202 2 copy ctor 1203 3 asgt 1204 4 dtor 1205 4 dtor, using float's 1206 1 default ctor 1207 2 copy ctor 1208 3 asgt 1209 4 dtor 1210 1211 1212 1213 1214 1215 1216 1217 1218 \end{cfa} 1219 & 1220 \begin{cfa}[deletekeywords={default}] 1221 array5(array5(float)) has 1222 1 default ctor, using array5(float)'s 1223 1 default ctor, using float's 1224 1 default ctor 1225 2 copy ctor 1226 3 asgt 1227 4 dtor 1228 2 copy ctor, using float's 1229 1 default ctor 1230 2 copy ctor 1231 3 asgt 1232 4 dtor 1233 3 asgt, using float's 1234 1 default ctor 1235 2 copy ctor 1236 3 asgt 1237 4 dtor 1238 4 dtor, using float's 1239 1 default ctor 1240 2 copy ctor 1241 3 asgt 1242 4 dtor 1243 2 copy ctor, using array5(float)'s 1244 ... 4 children, 16 leaves 1245 3 asgt, using array5(float)'s 1246 ... 4 children, 16 leaves 1247 4 dtor, using array5(float)'s 1248 ... 4 children, 16 leaves 1249 (64 leaves) 1250 \end{cfa} 1251 \end{tabular} 1252 \caption{Exponential thunk generation under the otype-recursion pattern. 1253 Each time that one type's function (\eg ctor) uses another type's function, the \CFA compiler generates a thunk, to capture the used function's dependencies, presented according to the using function's need. 1254 So, each non-leaf line represents a generated thunk and every line represents a search request for the resolver to find a satisfying function.} 1255 \label{f:OtypeRecursionBlowup} 1256 \end{figure} 1257 1258 So the @otype@-recursion pattern seeks a quantity of helper functions, and generates a quantity of thunks, that are exponential in the number of dimensions. 1259 Anecdotal experience with this solution found the resulting compile times annoyingly slow at three dimensions, and unusable at four. 1260 1261 The issue is that the otype-recursion pattern uses more assertions than needed. 1262 Consider how @array5(float)@'s default constructor is getting all four lifecycle assertions about the element type, @float@. 1263 It only needs @float@'s default constructor; 1264 the full set of operations is never used. 1265 Current work by the \CFA team aims to improve this situation. 1266 Therefore, a workaround is needed for now. 1267 1268 The workaround is to provide a default constructor, copy constructor and destructor, defined with lean, bespoke assertions: 1269 \begin{cquote} 1270 \begin{tabular}{@{}l@{\hspace{0.5in}}l@{}} 1271 \begin{cfa} 1272 // autogenerated for otype-recursion: 1273 forall( T ) 1274 void ?{}( array5(T) & this ) { 1275 for (i; 5) { ( this[i] ){}; } 1276 } 1277 forall( T ) 1278 void ?{}( array5(T) & this, array5(T) & src ) { 1279 for (i; 5) { ( this[i] ){ src[i] }; } 1280 } 1281 forall( T ) 1282 void ^?{}( array5(T) & this ) { 1283 for (i; 5) { ^( this[i] ){}; } 1284 } 1285 \end{cfa} 1286 & 1287 \begin{cfa} 1288 // lean, bespoke: 1289 forall( T* | { void @?{}( T & )@; } ) 1290 void ?{}( array5(T) & this ) { 1291 for (i; 5) { ( this[i] ){}; } 1292 } 1293 forall( T* | { void @?{}( T &, T )@; } ) 1294 void ?{}( array5(T) & this, array5(T) & src ) { 1295 for (i; 5) { ( this[i] ){ src[i] }; } 1296 } 1297 forall( T* | { void @?{}( T & )@; } ) 1298 void ^?{}( array5(T) & this ) { 1299 for (i; 5) { ^( this[i] ){}; } 1300 } 1301 \end{cfa} 1302 \end{tabular} 1303 \end{cquote} 1304 Moreover, the assignment operator is skipped, to avoid hitting a lingering growth case. 1305 Skipping assignment is tolerable because array assignment is not a common operation. 1306 With this solution, the critical lifecycle functions are available, with no growth in thunk creation. 1307 1308 Finally, the intuition that a programmer using an array always wants the elements' default constructor called \emph{automatically} is simplistic. 1309 Arrays exist to store different values at each position. 1310 So, array initialization needs to let the programmer provide different constructor arguments to each element. 1311 \begin{cfa} 1312 thread worker { int id; }; 1313 void ?{}( worker & ) = void; // remove default constructor 1314 void ?{}( worker &, int id ); 1315 array( worker, 5 ) ws = @{}@; // rejected; but desire is for no initialization yet 1316 for (i; 5) (ws[i]){ @i@ }; // explicitly initialize each thread, giving id 1317 \end{cfa} 1318 Note the use of the \CFA explicit constructor call, analogous to \CC's placement-@new@. 1319 This call is where initialization is desired, and not at the declaration of @ws@. 1320 The attempt to initialize from nothing (equivalent to dropping @= {}@ altogether) is invalid because the @worker@ type removes the default constructor. 1321 The @worker@ type is designed this way to work with the threading system. 1322 A thread type forks a thread at the end of each constructor and joins with it at the start of each destructor. 1323 But a @worker@ cannot begin its forked-thead work without knowing its @id@. 1324 Therefore, there is a conflict between the implicit actions of the builtin @thread@ type and a user's desire to defer these actions. 1325 1326 Another \CFA feature may, at first, seem viable for initializing the array @ws@, though on closer inspection, it is not. 1327 C initialization, \lstinline|array(worker, 5) ws @= {};|, ignores all \CFA lifecycle management and uses C empty initialization. 1328 This option does achieve the desired semantics on the construction side. 1329 But on destruction side, the desired semantics is for implicit destructor calls to continue, to keep the join operation tied to lexical scope. 1330 C initialization disables \emph{all} implicit lifecycle management, but the goal is to disable only the implicit construction. 1331 1332 To fix this problem, I enhanced the \CFA standard library to provide the missing semantics, available in either form: 1333 \begin{cfa} 1334 array( @uninit@(worker), 5 ) ws1; 1335 array( worker, 5) ws2 = { @delay_init@ }; 1336 \end{cfa} 1337 Both cause the @ws@-construction-time implicit call chain to stop before reaching a @worker@ constructor, while leaving the implicit destruction calls intact. 1338 Two forms are available, to parallel the development of this feature in \uCpp. 1339 Originally \uCpp offered only the @ws1@ form, using the class-template @uNoCtor@ equivalent to \CFA's @uninit@. 1340 More recently, \uCpp was extended with the declaration macro, @uArray@, with usage similar to the @ws2@ case. 1341 Based on experience piloting @uArray@ as a replacement of @uNoCtor@, it might be possible to remove the first option. 1342 1343 % note to Mike, I have more fragments on some details available in push24\fragments\uNoCtor.txt 1278 1344 1279 1345 \section{Comparison with other arrays} -
doc/theses/mike_brooks_MMath/background.tex
rb006c51e r10a9479d 178 178 \lstinput{34-34}{bkgd-carray-arrty.c} 179 179 The inspection begins by using @sizeof@ to provide program semantics for the intuition of an expression's type. 180 An architecture with 64-bit pointer size is used, to keep irrelevant details fixed.180 An architecture with 64-bit pointer size is used, to remove irrelevant details. 181 181 \lstinput{35-36}{bkgd-carray-arrty.c} 182 182 Now consider the @sizeof@ expressions derived from @ar@, modified by adding pointer-to and first-element (and including unnecessary parentheses to avoid any confusion about precedence). … … 215 215 216 216 My observation is recognizing: 217 \begin{itemize}[leftmargin=*, topsep=0pt,itemsep=0pt]217 \begin{itemize}[leftmargin=*,itemsep=0pt] 218 218 \item There is value in using a type that knows its size. 219 219 \item The type pointer to the (first) element does not. … … 302 302 303 303 In summary, when a function is written with an array-typed parameter, 304 \begin{itemize}[leftmargin=* ,topsep=0pt]304 \begin{itemize}[leftmargin=*] 305 305 \item an appearance of passing an array by value is always an incorrect understanding, 306 306 \item a dimension value, if any is present, is ignored, … … 532 532 \subsection{Array Parameter Declaration} 533 533 534 Passing an array a long with a function call is obviously useful.535 Let us say that a parameter is an array parameter when the calledfunction intends to subscript it.536 This section asserts that a more satisfactory/formal characterization does not exist in C, surveys the ways that C API authors communicate ``@p@ has zero or more @T@s,'' and calls out the minority cases where the C type system is using or verifying such claims.537 538 A C function'sparameter declarations look different, from the caller's and callee's perspectives.534 Passing an array as an argument to a function is necessary. 535 Assume a parameter is an array when the function intends to subscript it. 536 This section asserts that a more satisfactory/formal characterization does not exist in C, surveys the ways that C API authors communicate ``@p@ has zero or more dimensions'' and calls out the minority cases where the C type system is using or verifying such claims. 537 538 A C parameter declarations look different, from the caller's and callee's perspectives. 539 539 Both perspectives consist of the text read by a programmer and the semantics enforced by the type system. 540 The caller's perspec itve is available from a mere function declaration (which allow definition-before-use and separate compilation), but can also be read from (the non-body part of) a function definition.540 The caller's perspective is available from a function declaration, which allow definition-before-use and separate compilation, but can also be read from (the non-body part of) a function definition. 541 541 The callee's perspective is what is available inside the function. 542 542 \begin{cfa} 543 int foo( int, float, char ); $\C{// declaration, names optional}$ 544 int bar( int i, float f, char c ) { $\C{// definition, names mandatory}$ 545 $/* caller's perspective of foo's; callee's perspective of bar's */$ 546 ... 547 } 548 $/* caller's persepectives of foo's and bar's */$ 549 \end{cfa} 550 The caller's perspective is more limited. 551 The example shows, so far, that parameter names (by virtue of being optional) are really comments in the caller's perspective, while they are semantically significant in the callee's perspective. 543 int foo( int, float, char ); $\C{// declaration, names optional}$ 544 int bar( int i, float f, char c ) { $\C{// definition, names mandatory}$ 545 // caller's perspective of foo; callee's perspective of bar 546 } 547 // caller's perspectives of foo's and bar's 548 \end{cfa} 549 In caller's perspective, the parameter names (by virtue of being optional) are really comments; 550 in the callee's perspective, parameter names are semantically significant. 552 551 Array parameters introduce a further, subtle, semantic difference and considerable freedom to comment. 553 552 554 At the semantic level, there is no such thing as an array parameter, except for one case (@T [static 5]@) discussed shortly.553 At the semantic level, there is no such thing as an array parameter, except for one case (@T [static 5]@) discussed shortly. 555 554 Rather, there are only pointer parameters. 556 This fact probably shares considerable responsibility for the common sense of ``an array is just a pointer,'' w ich has been refuted in non-parameter contexts.555 This fact probably shares considerable responsibility for the common sense of ``an array is just a pointer,'' which has been refuted in non-parameter contexts. 557 556 This fact holds in both the caller's and callee's perspectives. 558 However, a parameter's type can include ``array of.'' 559 For example, the type ``pointer to array of 5 ints'' (@T(*)[5]@) is a pointer type, a fully meaningful parameter type (in the sense that this description does not contain any information that the type system ignores), and a type that appears the same in the caller's \vs callee's perspectives. 560 The outermost type constructor (syntactically first dimension) is really the one that determines the flavour of parameter. 557 However, a parameter's type can include ``array of.'', \eg the type ``pointer to array of 5 ints'' (@T (*)[5]@) is a pointer type. 558 This type is fully meaningful in the sense that its description does not contain any information that the type system ignores, and the type appears the same in the caller's \vs callee's perspectives. 559 In fact, the outermost type constructor (syntactically first dimension) is really the one that determines the flavour of parameter. 560 561 Yet, C allows array syntax for the outermost type constructor, from which comes the freedom to comment. 562 An array parameter declaration can specify the outermost dimension with a dimension value, @[10]@ (which is ignored), an empty dimension list, @[ ]@, or a pointer, @*@, as seen in \VRef[Figure]{f:ArParmEquivDecl}. 563 The rationale for rejecting the first ``invalid'' row follows shortly, while the second ``invalid'' row is simple nonsense, included to complete the pattern; its syntax hints at what the final row actually achieves. 561 564 562 565 \begin{figure} … … 596 599 \end{tabular} 597 600 \end{cquote} 598 \caption{Multiple ways to declare an arrray parameter. Across a valid row, every declaration is equivalent. Each column gives a declaration style. Really, the style can be read from the first row only. The second row shows how the style extends to multiple dimensions, with the rows thereafter providing context for the choice of which second-row \lstinline{[]}receives the column-style variation.} 601 \caption{Multiple ways to declare an array parameter. 602 Across a valid row, every declaration is equivalent. 603 Each column gives a declaration style, where the style for that column is read from the first row. 604 The second row begins the style for multiple dimensions, with the rows thereafter providing context for the choice of which second-row \lstinline{[]} receives the column-style variation.} 599 605 \label{f:ArParmEquivDecl} 600 606 \end{figure} 601 607 602 Yet, C allows array syntax for the outermost type constructor, from which comes the freedom to comment. 603 An array parameter declaration can specify the outermost dimension with a dimension value, @[10]@ (which is ignored), an empty dimension list, @[ ]@, or a pointer, @*@, as seen in \VRef[Figure]{f:ArParmEquivDecl}. The rationale for rejecting the first ``invalid'' row follows shortly, while the second ``invalid'' row is simple nonsense, included to complete the pattern; its syntax hints at what the final row actually achieves. 604 605 In the lefmost style, the typechecker ignores the actual value in most practical cases. 606 This value is allowed to be a dynamic expression, so it is \emph{possible} to use the leftmost style in many practical cases. 608 In the leftmost style, the typechecker ignores the actual value in most practical cases. 609 This value is allowed to be a dynamic expression, and then it has practical cases. 610 \begin{cfa} 611 void foo( int @n@ ) { 612 float _42( float @a[n]@ ) { // nested function 613 a[0] = 42; 614 } 615 float b[n]; 616 _42( b ); 617 } 618 \end{cfa} 619 607 620 608 621 % To help contextualize the matrix part of this example, the syntaxes @float [5][]@, @float [][]@ and @float (*)[]@ are all rejected, for reasons discussed shortly. 609 622 % So are @float[5]*@, @float[]*@ and @float (*)*@. These latter ones are simply nonsense, though they hint at ``1d array of pointers'', whose equivalent syntax options are, @float *[5]@, @float *[]@, and @float **@. 610 623 611 It is a matter of taste as to whether a programmer should use a form as far left as possible (getting the most out of syntactically integrated comments), sticking to the right (avoiding false comfort from suggesting the typechecker is checking more than it is), or compromising in the middle (reducing unchecked information, yet clearly stating, ``I will subscript this one'').612 613 Note that this equivalence of pointer and array declarations is special to param ters.624 It is a matter of taste as to whether a programmer should use a form as far left as possible (getting the most out of possible subscripting and dimension sizes), sticking to the right (avoiding false comfort from suggesting the typechecker is checking more than it is), or compromising in the middle (reducing unchecked information, yet clearly stating, ``I will subscript). 625 626 Note that this equivalence of pointer and array declarations is special to parameters. 614 627 It does not apply to local variables, where true array declarations are possible. 615 628 \begin{cfa} … … 626 639 float sum( float v[] ); 627 640 float arg = 3.14; 628 sum( &arg ); $\C{// accepted, v := \&arg}$641 sum( &arg ); $\C{// accepted, v = \&arg}$ 629 642 \end{cfa} 630 643 … … 672 685 Here, the distance between the first and second elements of each array depends on the inner dimension size. 673 686 674 The last observation is a fact of the callee's perspective. 675 There is little type-system checking, in the caller's perspective, that what is being passed, matches. 676 \begin{cfa} 677 void f( float [][10] ); 678 int n = 100; 679 float a[100], b[n]; 680 f(&a); // reject 681 f(&b); // accept 682 \end{cfa} 683 This size is therefore, a callee's assumption. 684 685 Finally, to handle higher-dimensional VLAs, C repurposed the @*@ \emph{within} the dimension in a declaration to mean that the callee will have make an assumption about the size here, but no (unchecked, possibly wrong) information about this assumption is included for the caller-programmer's benefit/overconfidence. 687 This significance of an inner dimension's length is a fact of the callee's perspective. 688 In the caller's perspective, the type sytem is quite lax. 689 Here, there is (some, but) little checking that what is being passed, matches. 690 % void f( float [][10] ); 691 % int n = 100; 692 % float a[100], b[n]; 693 % f(&a); // reject 694 % f(&b); // accept 695 \begin{cfa} 696 void foo() { 697 void f( float [][10] ); 698 int n = 100; 699 float a[100], b[3][12], c[n], d[n][n]; 700 f( a ); 701 f( b ); $\C{// reject: inner dimension 12 for 10}$ 702 f( c ); 703 f( @d@ ); $\C{// accept with inner dimension n for 10}$ 704 f( &a ); $\C{// reject: inner dimension 100 for 10}$ 705 f( &b ); 706 f( @&c@ ); $\C{// accept with inner dimension n for 10}$ 707 f( &d ); 708 } 709 \end{cfa} 710 The cases without comments are rejections, but simply because the array ranks do not match; in the commented cases, the ranks match and the rules being discussed apply. 711 The cases @f(b)@ and @f(&a)@ show where some length checking occurs. 712 But this checking misses the cases @f(d)@ and @f(&c)@, allowing the calls with mismatched lengths, actually 100 for 10. 713 The C checking rule avoids false alarms, at the expense of safety, by allowing any combinations that involve dynamic values. 714 Ultimately, an inner dimension's size is a callee's \emph{assumption} because the type system uses declaration details in the callee's perspective that it does not enforce in the caller's perspective. 715 716 Finally, to handle higher-dimensional VLAs, C repurposed the @*@ \emph{within} the dimension in a declaration to mean that the callee has make an assumption about the size, but no (unchecked, possibly wrong) information about this assumption is included for the caller-programmer's benefit/over-confidence. 686 717 \begin{cquote} 687 718 @[@ \textit{type-qualifier-list$_{opt}$} @* ]@ … … 1162 1193 with all the variance being due to the (inevitable) cache status of the nodes being managed. 1163 1194 1195 1164 1196 \section{String} 1197 \label{s:String} 1165 1198 1166 1199 A string is a sequence of symbols, where the form of a symbol can vary significantly: 7/8-bit characters (ASCII/Latin-1), or 2/4/8-byte (UNICODE) characters/symbols or variable length (UTF-8/16/32) characters. -
doc/theses/mike_brooks_MMath/programs/hello-accordion.cfa
rb006c51e r10a9479d 30 30 forall( [C], [S] ) 31 31 int getPref( @School( C, S ) & school@, int is, int pref ) { 32 33 32 for ( ic; C ) { 33 int curPref = @school.preferences@[ic][is]; $\C{// offset calculation implicit}$ 34 34 if ( curPref == pref ) return ic; 35 35 } 36 36 assert( false ); 37 37 } 38 38 … … 59 59 60 60 { string sv; 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 61 int iv; 62 // headers' row 63 sin | "\nc\\s"; 64 for ( is ; ns ) { 65 // column label 66 sin | sv; 67 school.student_ids[is] = sv; 68 } 69 // body rows 70 for ( ic ; nc ) { 71 // row label 72 sin | sv; 73 school.course_codes[ic] = sv; 74 for ( is ; ns ) { 75 // matrix item 76 sin | iv; 77 school.preferences[ic][is] = iv; 78 } 79 } 80 } 81 81 82 82 … … 91 91 sout | school.student_ids[is] | ": " | nonl; 92 92 for ( pref; 1 ~= nc ) { 93 94 93 int ic = getPref(school, is, pref); 94 sout | school.course_codes[ ic ] | nonl; 95 95 } 96 96 sout | nl; -
doc/theses/mike_brooks_MMath/programs/hello-array.cfa
rb006c51e r10a9479d 114 114 f( y, y ); $\C{// ok}$ 115 115 if ( M == N ) 116 f( x, @(array( float, M ) &)@y ); $\C{// ok} $116 f( x, @(array( float, M ) &)@y ); $\C{// ok}\CRT$ 117 117 } 118 118 -
doc/theses/mike_brooks_MMath/programs/sharing-demo.cfa
rb006c51e r10a9479d 5 5 #define str(s) #s 6 6 7 ofstream outfile; 8 7 9 void demo1() { 8 10 sout | sepOff; 9 sout | "Consider two strings @s1@ and @s1a@ that are in an aliasing relationship, and a third, @s2@, made by a simple copy from @s1@."; 10 sout | "\\par\\noindent"; 11 sout | "\\begin{tabular}{llll}"; 12 sout | "\t\t\t\t& @s1@\t& @s1a@\t& @s2@\t\\\\"; 11 // sout | "Consider two strings @s1@ and @s1a@ that are in an aliasing relationship, and a third, @s2@, made by a simple copy from @s1@."; 13 12 14 13 #define S1 string s1 = "abc" … … 21 20 assert( s1a == "abc" ); 22 21 assert( s2 == "abc" ); 23 sout | xstr(S1) | "\t\\\\"; 24 sout | xstr(S1A) | "\t\\\\"; 25 sout | xstr(S2) | "\t& " | s1 | "\t& " | s1a | "\t& " | s2; 26 sout | "\\end{tabular}"; 27 sout | "\\par\\noindent"; 28 29 sout | "Aliasing (@`shareEdits@) means that changes flow in both directions; with a simple copy, they do not."; 30 sout | "\\par\\noindent"; 31 sout | "\\begin{tabular}{llll}"; 32 sout | "\t\t& @s1@\t& @s1a@\t& @s2@\t\\\\"; 33 sout | "\t\t& " | s1 | "\t& " | s1a | "\t& " | s2 | "\t\\\\"; 22 23 open( outfile, "build/sharing1.tex" ); 24 outfile | "\\begin{cquote}"; 25 outfile | "\\begin{tabular}{@{}llll@{}}"; 26 outfile | "\t\t\t& @s1@\t& @s1a@\t& @s2@\t\\\\"; 27 outfile | xstr(S1) | "\t\\\\"; 28 outfile | xstr(S1A) | "\t\\\\"; 29 outfile | xstr(S2) | "\t& " | s1 | "\t& " | s1a | "\t& " | s2; 30 outfile | "\\end{tabular}"; 31 outfile | "\\end{cquote}"; 32 close( outfile ); 33 34 // sout | "Aliasing (@`shareEdits@) means that changes flow in both directions; with a simple copy, they do not."; 35 open( outfile, "build/sharing2.tex" ); 36 outfile | "\\begin{cquote}"; 37 outfile | "\\begin{tabular}{@{}llll@{}}"; 38 outfile | "\t\t& @s1@\t& @s1a@\t& @s2@\t\\\\"; 39 outfile | "\\multicolumn{1}{r}{initial} & " | s1 | "\t& " | s1a | "\t& " | s2 | "\t\\\\"; 34 40 35 41 #define S1s1 s1 [1] = '+' 36 42 S1s1; 37 43 assert( s1 == "a+c" ); 38 sout| xstr(S1s1) | "\t& " | s1 | "\t& " | s1a | "\t& " | s2 | "\t\\\\";44 outfile | xstr(S1s1) | "\t& " | s1 | "\t& " | s1a | "\t& " | s2 | "\t\\\\"; 39 45 40 46 #define S1As1 s1a[1] = '-' 41 47 S1As1; 42 48 assert( s1a == "a-c" ); 43 sout| xstr(S1As1) | "\t& " | s1 | "\t& " | s1a | "\t& " | s2 | "\t\\\\";49 outfile | xstr(S1As1) | "\t& " | s1 | "\t& " | s1a | "\t& " | s2 | "\t\\\\"; 44 50 45 51 #define S2s1 s2 [1] = '|' 46 52 S2s1; 47 53 assert( s2 == "a|c" ); 48 sout | xstr(S2s1) | "\t& " | s1 | "\t& " | s1a | "\t& " | s2; 49 sout | "\\end{tabular}"; 50 sout | "\\par\\noindent"; 51 52 sout | "Assignment of a value is just a modificiation." 53 "\nThe aliasing relationship is established at construction and is unaffected by assignment of a value."; 54 sout | "\\par\\noindent"; 55 sout | "\\begin{tabular}{llll}"; 56 sout | "\t\t& @s1@\t& @s1a@\t& @s2@\t\\\\"; 57 sout | "\t\t& " | s1 | "\t& " | s1a | "\t& " | s2 | "\t\\\\"; 54 outfile | xstr(S2s1) | "\t& " | s1 | "\t& " | s1a | "\t& " | s2; 55 outfile | "\\end{tabular}"; 56 outfile | "\\end{cquote}"; 57 close( outfile ); 58 59 // sout | "Assignment of a value is just a modificiation." 60 // "\nThe aliasing relationship is established at construction and is unaffected by assignment of a value."; 61 open( outfile, "build/sharing3.tex" ); 62 outfile | "\\begin{cquote}"; 63 outfile | "\\begin{tabular}{llll}"; 64 outfile | "\t\t& @s1@\t& @s1a@\t& @s2@\t\\\\"; 65 outfile | "\\multicolumn{1}{r}{initial} & " | s1 | "\t& " | s1a | "\t& " | s2 | "\t\\\\"; 58 66 59 67 #define S1qrs s1 = "qrs" 60 68 S1qrs; 61 69 assert( s1 == "qrs" ); 62 sout| xstr(S1qrs) | "\t& " | s1 | "\t& " | s1a | "\t& " | s2 | "\t\\\\";70 outfile | xstr(S1qrs) | "\t& " | s1 | "\t& " | s1a | "\t& " | s2 | "\t\\\\"; 63 71 64 72 #define S1Atuv s1a = "tuv" 65 73 S1Atuv; 66 74 assert( s1a == "tuv" ); 67 sout| xstr(S1Atuv) | "\t& " | s1 | "\t& " | s1a | "\t& " | s2 | "\t\\\\";75 outfile | xstr(S1Atuv) | "\t& " | s1 | "\t& " | s1a | "\t& " | s2 | "\t\\\\"; 68 76 69 77 #define S2wxy s2 = "wxy" 70 78 S2wxy; 71 79 assert( s2 == "wxy" ); 72 sout | xstr(S2wxy) | "\t& " | s1 | "\t& " | s1a | "\t& " | s2; 73 sout | "\\end{tabular}"; 74 sout | "\\par\\noindent"; 75 76 sout | "Assignment from a string is just assignment of a value." 77 "\nWhether of not the RHS participates in aliasing is irrelevant. Any aliasing of the LHS is unaffected."; 78 sout | "\\par\\noindent"; 79 sout | "\\begin{tabular}{llll}"; 80 sout | "\t\t& @s1@\t& @s1a@\t& @s2@\t\\\\"; 81 sout | "\t\t& " | s1 | "\t& " | s1a | "\t& " | s2 | "\t\\\\"; 80 outfile | xstr(S2wxy) | "\t& " | s1 | "\t& " | s1a | "\t& " | s2; 81 outfile | "\\end{tabular}"; 82 outfile | "\\end{cquote}"; 83 close( outfile ); 84 85 // sout | "Assignment from a string is just assignment of a value." 86 // "\nWhether of not the RHS participates in aliasing is irrelevant. Any aliasing of the LHS is unaffected."; 87 open( outfile, "build/sharing4.tex" ); 88 outfile | "\\begin{cquote}"; 89 outfile | "\\begin{tabular}{llll}"; 90 outfile | "\t\t& @s1@\t& @s1a@\t& @s2@\t\\\\"; 91 outfile | "\\multicolumn{1}{r}{initial} & " | s1 | "\t& " | s1a | "\t& " | s2 | "\t\\\\"; 82 92 83 93 #define S1S2 s1 = s2 … … 86 96 assert( s1a == "wxy" ); 87 97 assert( s2 == "wxy" ); 88 sout| xstr(S1S2) | "\t& " | s1 | "\t& " | s1a | "\t& " | s2 | "\t\\\\";98 outfile | xstr(S1S2) | "\t& " | s1 | "\t& " | s1a | "\t& " | s2 | "\t\\\\"; 89 99 90 100 #define S1aaa s1 = "aaa" … … 93 103 assert( s1a == "aaa" ); 94 104 assert( s2 == "wxy" ); 95 sout| xstr(S1aaa) | "\t& " | s1 | "\t& " | s1a | "\t& " | s2 | "\t\\\\";105 outfile | xstr(S1aaa) | "\t& " | s1 | "\t& " | s1a | "\t& " | s2 | "\t\\\\"; 96 106 97 107 #define S2S1 s2 = s1 … … 100 110 assert( s1a == "aaa" ); 101 111 assert( s2 == "aaa" ); 102 sout| xstr(S2S1) | "\t& " | s1 | "\t& " | s1a | "\t& " | s2 | "\t\\\\";112 outfile | xstr(S2S1) | "\t& " | s1 | "\t& " | s1a | "\t& " | s2 | "\t\\\\"; 103 113 104 114 #define S2bbb s2 = "bbb" … … 107 117 assert( s1a == "aaa" ); 108 118 assert( s2 == "bbb" ); 109 sout| xstr(S2bbb) | "\t& " | s1 | "\t& " | s1a | "\t& " | s2 | "\t\\\\";110 111 119 outfile | xstr(S2bbb) | "\t& " | s1 | "\t& " | s1a | "\t& " | s2 | "\t\\\\"; 120 121 #define S2S1a s2 = s1a 112 122 S2S1a; 113 123 assert( s1 == "aaa" ); 114 124 assert( s1a == "aaa" ); 115 125 assert( s2 == "aaa" ); 116 sout| xstr(S2S1a) | "\t& " | s1 | "\t& " | s1a | "\t& " | s2 | "\t\\\\";126 outfile | xstr(S2S1a) | "\t& " | s1 | "\t& " | s1a | "\t& " | s2 | "\t\\\\"; 117 127 118 128 #define S2ccc s2 = "ccc" … … 121 131 assert( s1a == "aaa" ); 122 132 assert( s2 == "ccc" ); 123 sout| xstr(S2ccc) | "\t& " | s1 | "\t& " | s1a | "\t& " | s2 | "\t\\\\";124 133 outfile | xstr(S2ccc) | "\t& " | s1 | "\t& " | s1a | "\t& " | s2 | "\t\\\\"; 134 125 135 #define S1xxx s1 = "xxx" 126 136 S1xxx; … … 128 138 assert( s1a == "xxx" ); 129 139 assert( s2 == "ccc" ); 130 sout | xstr(S1xxx) | "\t& " | s1 | "\t& " | s1a | "\t& " | s2 | "\t\\\\"; 131 sout | "\\end{tabular}"; 132 sout | "\\par"; 140 outfile | xstr(S1xxx) | "\t& " | s1 | "\t& " | s1a | "\t& " | s2 | "\t\\\\"; 141 outfile | "\\end{tabular}"; 142 outfile | "\\end{cquote}"; 143 close( outfile ); 133 144 } 134 145 135 146 136 147 void demo2() { 137 sout | "Consider new strings @s1_mid@ being an alias for a run in the middle of @s1@, along with @s2@, made by a simple copy from the middle of @s1@."; 138 sout | "\\par\\noindent"; 139 sout | "\\begin{tabular}{llll}"; 140 sout | "\t\t\t\t& @s1@\t& @s1_mid@\t& @s2@\t\\\\"; 148 // sout | "Consider new strings @s1_mid@ being an alias for a run in the middle of @s1@, along with @s2@, made by a simple copy from the middle of @s1@."; 149 open( outfile, "build/sharing5.tex" ); 150 outfile | "\\begin{cquote}"; 151 outfile | "\\begin{tabular}{llll}"; 152 outfile | "\t\t\t\t& @s1@\t& @s1_mid@\t& @s2@\t\\\\"; 141 153 142 154 #define D2_s1_abcd string s1 = "abcd" 143 155 D2_s1_abcd; 144 sout| xstr(D2_s1_abcd) | "\t\\\\";156 outfile | xstr(D2_s1_abcd) | "\t\\\\"; 145 157 146 158 #define D2_s1mid_s1 string s1_mid = s1(1,2)`shareEdits 147 159 D2_s1mid_s1; 148 sout| xstr(D2_s1mid_s1) | "\t\\\\";160 outfile | xstr(D2_s1mid_s1) | "\t\\\\"; 149 161 150 162 #define D2_s2_s1 string s2 = s1(1,2) 151 D2_s2_s1; 163 D2_s2_s1; 152 164 assert( s1 == "abcd" ); 153 165 assert( s1_mid == "bc" ); 154 166 assert( s2 == "bc" ); 155 sout | xstr(D2_s2_s1) | "\t& " | s1 | "\t& " | s1_mid | "\t& " | s2 | "\t\\\\"; 156 sout | "\\end{tabular}"; 157 sout | "\\par\\noindent"; 158 159 sout | "Again, @`shareEdits@ passes changes in both directions; copy does not. Note the difference in index values, with the \\emph{b} position being 1 in the longer string and 0 in the shorter strings. In the case of s1 aliasing with @s1_mid@, the very same character is being accessed by different postitions."; 160 sout | "\\par\\noindent"; 161 sout | "\\begin{tabular}{llll}"; 162 sout | "\t\t\t\t& @s1@\t& @s1_mid@\t& @s2@\t\\\\"; 163 sout | "\t& " | s1 | "\t& " | s1_mid | "\t& " | s2 | "\t\\\\"; 167 outfile | xstr(D2_s2_s1) | "\t& " | s1 | "\t& " | s1_mid | "\t& " | s2 | "\t\\\\"; 168 outfile | "\\end{tabular}"; 169 outfile | "\\end{cquote}"; 170 close( outfile ); 171 172 // sout | "Again, @`shareEdits@ passes changes in both directions; copy does not. Note the difference in index values, with the \\emph{b} position being 1 in the longer string and 0 in the shorter strings. In the case of s1 aliasing with @s1_mid@, the very same character is being accessed by different postitions."; 173 open( outfile, "build/sharing6.tex" ); 174 outfile | "\\begin{cquote}"; 175 outfile | "\\begin{tabular}{llll}"; 176 outfile | "\t\t\t\t& @s1@\t& @s1_mid@\t& @s2@\t\\\\"; 177 outfile | "\\multicolumn{1}{r}{initial} & " | s1 | "\t& " | s1_mid | "\t& " | s2 | "\t\\\\"; 164 178 165 179 #define D2_s1_plus s1 [1] = '+' … … 168 182 assert( s1_mid == "+c" ); 169 183 assert( s2 == "bc" ); 170 sout| xstr(D2_s1_plus) | "\t& " | s1 | "\t& " | s1_mid | "\t& " | s2 | "\t\\\\";184 outfile | xstr(D2_s1_plus) | "\t& " | s1 | "\t& " | s1_mid | "\t& " | s2 | "\t\\\\"; 171 185 172 186 #define D2_s1mid_minus s1_mid[0] = '-' … … 175 189 assert( s1_mid == "-c" ); 176 190 assert( s2 == "bc" ); 177 sout| xstr(D2_s1mid_minus) | "\t& " | s1 | "\t& " | s1_mid | "\t& " | s2 | "\t\\\\";178 179 191 outfile | xstr(D2_s1mid_minus) | "\t& " | s1 | "\t& " | s1_mid | "\t& " | s2 | "\t\\\\"; 192 193 #define D2_s2_pipe s2 [0] = '|' 180 194 D2_s2_pipe; 181 195 assert( s1 == "a-cd" ); 182 196 assert( s1_mid == "-c" ); 183 197 assert( s2 == "|c" ); 184 sout | xstr(D2_s2_pipe) | "\t& " | s1 | "\t& " | s1_mid | "\t& " | s2 | "\t\\\\"; 185 sout | "\\end{tabular}"; 186 sout | "\\par\\noindent"; 187 188 sout | "Once again, assignment of a value is a modificiation that flows through the aliasing relationship, without affecting its structure."; 189 sout | "\\par\\noindent"; 190 sout | "\\begin{tabular}{llll}"; 191 sout | "\t\t\t\t& @s1@\t& @s1_mid@\t& @s2@\t\\\\"; 192 sout | "\t& " | s1 | "\t& " | s1_mid | "\t& " | s2 | "\t\\\\"; 198 outfile | xstr(D2_s2_pipe) | "\t& " | s1 | "\t& " | s1_mid | "\t& " | s2 | "\t\\\\"; 199 outfile | "\\end{tabular}"; 200 outfile | "\\end{cquote}"; 201 close( outfile ); 202 203 // sout | "Once again, assignment of a value is a modificiation that flows through the aliasing relationship, without affecting its structure."; 204 open( outfile, "build/sharing7.tex" ); 205 outfile | "\\begin{cquote}"; 206 outfile | "\\begin{tabular}{llll}"; 207 outfile | "\t\t\t\t& @s1@\t& @s1_mid@\t& @s2@\t\\\\"; 208 outfile | "\\multicolumn{1}{r}{initial} & " | s1 | "\t& " | s1_mid | "\t& " | s2 | "\t\\\\"; 193 209 194 210 #define D2_s1mid_ff s1_mid = "ff" … … 197 213 assert( s1_mid == "ff" ); 198 214 assert( s2 == "|c" ); 199 sout| xstr(D2_s1mid_ff) | "\t& " | s1 | "\t& " | s1_mid | "\t& " | s2 | "\t\\\\";200 215 outfile | xstr(D2_s1mid_ff) | "\t& " | s1 | "\t& " | s1_mid | "\t& " | s2 | "\t\\\\"; 216 201 217 #define D2_s2_gg s2 = "gg" 202 218 D2_s2_gg; … … 204 220 assert( s1_mid == "ff" ); 205 221 assert( s2 == "gg" ); 206 sout | xstr(D2_s2_gg) | "\t& " | s1 | "\t& " | s1_mid | "\t& " | s2 | "\t\\\\"; 207 sout | "\\end{tabular}"; 208 sout | "\\par\\noindent"; 209 210 sout | "In the \\emph{ff} step, which is a positive example of flow across an aliasing relationship, the result is straightforward to accept because the flow direction is from contained (small) to containing (large). The following rules for edits through aliasing substrings will guide how to flow in the opposite direction."; 211 sout | "\\par"; 212 213 214 sout | "Growth and shrinkage are natural extensions. An empty substring is a real thing, at a well-defined location, whose meaning is extrapolated from the examples so far. The intended metaphor is to operating a GUI text editor. Having an aliasing substring is like using the mouse to select a few words. Assigning onto an aliasign substring is like typing from having a few words selected: depending how much you type, the file being edited can get shorter or longer."; 215 sout | "\\par\\noindent"; 216 sout | "\\begin{tabular}{lll}"; 217 sout | "\t\t\t\t& @s1@\t& @s1_mid@\t\\\\"; 218 sout | "\t& " | s1 | "\t& " | s1_mid | "\t\\\\"; 222 outfile | xstr(D2_s2_gg) | "\t& " | s1 | "\t& " | s1_mid | "\t& " | s2 | "\t\\\\"; 223 outfile | "\\end{tabular}"; 224 outfile | "\\end{cquote}"; 225 close( outfile ); 226 227 // sout | "In the \\emph{ff} step, which is a positive example of flow across an aliasing relationship, the result is straightforward to accept because the flow direction is from contained (small) to containing (large). The following rules for edits through aliasing substrings will guide how to flow in the opposite direction."; 228 // sout | "\\par"; 229 230 231 // sout | "Growth and shrinkage are natural extensions. An empty substring is a real thing, at a well-defined location, whose meaning is extrapolated from the examples so far. The intended metaphor is to operating a GUI text editor. Having an aliasing substring is like using the mouse to select a few words. Assigning onto an aliasign substring is like typing from having a few words selected: depending how much you type, the file being edited can get shorter or longer."; 232 open( outfile, "build/sharing8.tex" ); 233 outfile | "\\begin{cquote}"; 234 outfile | "\\begin{tabular}{lll}"; 235 outfile | "\t\t\t\t& @s1@\t& @s1_mid@\t\\\\"; 236 outfile | "\\multicolumn{1}{r}{initial} & " | s1 | "\t& " | s1_mid | "\t\\\\"; 219 237 220 238 assert( s1 == "affd" ); 221 // assert( s1_mid == "fc" ); 222 sout| xstr(D2_s2_gg) | "\t& " | s1 | "\t& " | s1_mid | "\t\\\\";239 // assert( s1_mid == "fc" ); // ????????? bug? 240 outfile | xstr(D2_s2_gg) | "\t& " | s1 | "\t& " | s1_mid | "\t\\\\"; 223 241 224 242 #define D2_s1mid_hhhh s1_mid = "hhhh" … … 226 244 assert( s1 == "ahhhhd" ); 227 245 assert( s1_mid == "hhhh" ); 228 sout| xstr(D2_s1mid_hhhh) | "\t& " | s1 | "\t& " | s1_mid | "\t\\\\";229 246 outfile | xstr(D2_s1mid_hhhh) | "\t& " | s1 | "\t& " | s1_mid | "\t\\\\"; 247 230 248 #define D2_s1mid_i s1_mid = "i" 231 249 D2_s1mid_i; 232 250 assert( s1 == "aid" ); 233 251 assert( s1_mid == "i" ); 234 sout| xstr(D2_s1mid_i) | "\t& " | s1 | "\t& " | s1_mid | "\t\\\\";252 outfile | xstr(D2_s1mid_i) | "\t& " | s1 | "\t& " | s1_mid | "\t\\\\"; 235 253 236 254 #define D2_s1mid_empty s1_mid = "" … … 238 256 assert( s1 == "ad" ); 239 257 // assert( s1_mid == "" ); ------ Should be so, but fails 240 sout| xstr(D2_s1mid_empty) | "\t& " | s1 | "\t& " | s1_mid | "\t\\\\";258 outfile | xstr(D2_s1mid_empty) | "\t& " | s1 | "\t& " | s1_mid | "\t\\\\"; 241 259 242 260 #define D2_s1mid_jj s1_mid = "jj" … … 244 262 assert( s1 == "ajjd" ); 245 263 assert( s1_mid == "jj" ); 246 sout | xstr(D2_s1mid_jj) | "\t& " | s1 | "\t& " | s1_mid | "\t\\\\"; 247 sout | "\\end{tabular}"; 248 sout | "\\par\\noindent"; 249 250 sout | "Multiple portions can be aliased. When there are several aliasing substrings at once, the text editor analogy becomes an online multi-user editor. I should be able to edit a paragraph in one place (changing the document's length), without my edits affecting which letters are within a mouse-selection that you had made previously, somewhere else."; 251 sout | "\\par\\noindent"; 252 sout | "\\begin{tabular}{lllll}"; 253 sout | "\t\t\t\t& @s1@\t& @s1_bgn@\t& @s1_mid@\t& @s1_end@\t\\\\"; 264 outfile | xstr(D2_s1mid_jj) | "\t& " | s1 | "\t& " | s1_mid | "\t\\\\"; 265 outfile | "\\end{tabular}"; 266 outfile | "\\end{cquote}"; 267 close( outfile ); 268 269 // sout | "Multiple portions can be aliased. When there are several aliasing substrings at once, the text editor analogy becomes an online multi-user editor. I should be able to edit a paragraph in one place (changing the document's length), without my edits affecting which letters are within a mouse-selection that you had made previously, somewhere else."; 270 open( outfile, "build/sharing9.tex" ); 271 outfile | "\\begin{cquote}"; 272 outfile | "\\begin{tabular}{lllll}"; 273 outfile | "\t\t\t\t& @s1@\t& @s1_bgn@\t& @s1_mid@\t& @s1_end@\t\\\\"; 254 274 255 275 #define D2_s1bgn_s1 string s1_bgn = s1(0, 1)`shareEdits 256 276 D2_s1bgn_s1; 257 sout| xstr(D2_s1bgn_s1) | "\t\\\\";277 outfile | xstr(D2_s1bgn_s1) | "\t\\\\"; 258 278 259 279 #define D2_s1end_s1 string s1_end = s1(3, 1)`shareEdits … … 263 283 assert( s1_mid == "jj" ); 264 284 assert( s1_end == "d" ); 265 sout| xstr(D2_s1end_s1) | "\t& " | s1 | "\t& " | s1_bgn | "\t& " | s1_mid | "\t& " | s1_end | "\t\\\\";266 285 outfile | xstr(D2_s1end_s1) | "\t& " | s1 | "\t& " | s1_bgn | "\t& " | s1_mid | "\t& " | s1_end | "\t\\\\"; 286 267 287 #define D1_s1bgn_zzz s1_bgn = "zzzz" 268 288 D1_s1bgn_zzz; … … 271 291 assert( s1_mid == "jj" ); 272 292 assert( s1_end == "d" ); 273 sout | xstr(D1_s1bgn_zzz) | "\t& " | s1 | "\t& " | s1_bgn | "\t& " | s1_mid | "\t& " | s1_end | "\t\\\\"; 274 sout | "\\end{tabular}"; 275 sout | "\\par\\noindent"; 276 277 sout | "When an edit happens on an aliasing substring that overlaps another, an effect is unavoidable. Here, the passive party sees its selection shortened, to exclude the characters that were not part of the original selection."; 278 sout | "\\par\\noindent"; 279 sout | "\\begin{tabular}{llllll}"; 280 sout | "\t\t\t\t& @s1@\t& @s1_bgn@\t& @s1_crs@\t& @s1_mid@\t& @s1_end@\t\\\\"; 293 outfile | xstr(D1_s1bgn_zzz) | "\t& " | s1 | "\t& " | s1_bgn | "\t& " | s1_mid | "\t& " | s1_end | "\t\\\\"; 294 outfile | "\\end{tabular}"; 295 outfile | "\\end{cquote}"; 296 close( outfile ); 297 298 // sout | "When an edit happens on an aliasing substring that overlaps another, an effect is unavoidable. Here, the passive party sees its selection shortened, to exclude the characters that were not part of the original selection."; 299 open( outfile, "build/sharing10.tex" ); 300 outfile | "\\begin{cquote}"; 301 outfile | "\\begin{tabular}{llllll}"; 302 outfile | "\t\t\t\t& @s1@\t& @s1_bgn@\t& @s1_crs@\t& @s1_mid@\t& @s1_end@\t\\\\"; 281 303 282 304 #define D2_s1crs_s1 string s1_crs = s1(3, 2)`shareEdits … … 286 308 assert( s1_crs == "zj" ); 287 309 assert( s1_mid == "jj" ); 288 assert( s1_end == "d" ); 289 sout| xstr(D2_s1crs_s1) | "\t& " | s1 | "\t& " | s1_bgn | "\t& " | s1_crs | "\t& " | s1_mid | "\t& " | s1_end | "\t\\\\";310 assert( s1_end == "d" ); 311 outfile | xstr(D2_s1crs_s1) | "\t& " | s1 | "\t& " | s1_bgn | "\t& " | s1_crs | "\t& " | s1_mid | "\t& " | s1_end | "\t\\\\"; 290 312 291 313 #define D2_s1crs_ppp s1_crs = "+++" … … 296 318 assert( s1_mid == "j" ); 297 319 assert( s1_end == "d" ); 298 sout| xstr(D2_s1crs_ppp) | "\t& " | s1 | "\t& " | s1_bgn | "\t& " | s1_crs | "\t& " | s1_mid | "\t& " | s1_end | "\t\\\\";299 sout| "\\end{tabular}";300 sout | "\\par\\noindent";301 sout | "TODO: finish typesetting the demo";302 303 320 outfile | xstr(D2_s1crs_ppp) | "\t& " | s1 | "\t& " | s1_bgn | "\t& " | s1_crs | "\t& " | s1_mid | "\t& " | s1_end | "\t\\\\"; 321 outfile | "\\end{tabular}"; 322 outfile | "\\end{cquote}"; 323 close( outfile ); 324 325 // "This shortening behaviour means that a modification has to occur entirely inside a substring, to show up in that substring. Sharing changes through the intersection of partially overlapping aliases is still possible, so long as the receiver's boundary is not inside the edit." 304 326 305 327 string word = "Phi"; … … 309 331 assert( consonants == "Ph" ); 310 332 assert( miniscules == "hi" ); 311 333 312 334 consonants[1] = 's'; 313 335 assert( word == "Psi" ); … … 321 343 string greet_bgn = all(10,1)`shareEdits; 322 344 string greet_end = all(14,1)`shareEdits; 323 345 324 346 assert( all == "They said hello again" ); 325 347 assert( greet == "hello" ); 326 348 assert( greet_bgn == "h" ); 327 349 assert( greet_end == "o" ); 328 350 329 351 330 352 greet = "sup"; … … 333 355 // assert( greet_bgn == "" ); ------ Should be so, but fails 334 356 // assert( greet_end == "" ); 335 336 337 338 339 340 341 342 343 344 345 greet_bgn = "what"; 346 347 357 358 359 360 361 362 /* As in the earlier step where \emph{aj} becomes \emph{ajjd}, such empty substrings maintain their places in the total string, and can be used for filling it. Because @greet_bgn@ was orginally at the start of the edit, in the outcome, the empty @greet_bgn@ sits just before the written value. Similarly @greed_end@ goes after. Though not shown, an overwritten substring at neither side goes arbitrarily to the before side. */ 363 364 365 366 367 greet_bgn = "what"; 368 369 348 370 assert( all == "They said whatsup again" ); 349 371 350 372 assert( greet == "sup" ); 351 373 352 374 assert( greet_bgn == "what" ); 353 375 // assert( greet_end == "" ); ------ Should be so, but fails 354 355 356 greet_end = "..."; 357 358 376 377 378 greet_end = "..."; 379 380 359 381 assert( all == "They said whatsup... again" ); 360 382 361 383 assert( greet == "sup" ); 362 384 363 385 assert( greet_bgn == "what" ); 364 386 365 387 assert( greet_end == "..." ); 366 367 368 369 370 371 372 388 389 390 391 392 393 /* Though these empty substrings hold their places in the total string, an empty string only belongs to bigger strings when it occurs completely inside them. There is no such state as including an empty substring at an edge. For this reason, @word@ gains the characters added by assigning to @greet_bgn@ and @greet_end@, but the string @greet@ does not. */ 394 373 395 374 396 } … … 377 399 int main(int argc, char ** argv) { 378 400 379 380 381 401 demo1(); 402 demo2(); 403 // printf("%% %s done running\n", argv[0]); 382 404 } -
doc/theses/mike_brooks_MMath/string.tex
rb006c51e r10a9479d 5 5 6 6 7 \section{Logical overlap} 8 9 \input{sharing-demo.tex} 7 \section{String Operations} 8 9 To prepare for the following discussion, a simple comparison among C, \CC, and \CFA basic string operation is presented. 10 \begin{cquote} 11 \begin{tabular}{@{}l|l|l@{}} 12 C @char [ ]@ & \CC @string@ & \CFA @string@ \\ 13 \hline 14 @strcpy@, @strncpy@ & @=@ & @=@ \\ 15 @strcat@, @strncat@ & @+@ & @+@ \\ 16 @strcmp@, @strncmp@ & @==@, @!=@, @<@, @<=@, @>@, @>=@ & @==@, @!=@, @<@, @<=@, @>@, @>=@ \\ 17 @strlen@ & @length@ & @size@ \\ 18 @[ ]@ & @[ ]@ & @[ ]@ \\ 19 & @substr@ & @substr@ \\ 20 & @replace@ & @=@ \emph{(on a substring)}\\ 21 @strstr@ & @find@, @rfind@ & @find@, MISSING \\ 22 @strcspn@ & @find_first_of@, @find_last_of@ & @include@, MISSING \\ 23 @strspn@ & @find_first_not_of@, @find_last_not_of@ & @exclude@, MISSING \\ 24 & @c_str@ & MISSING \\ 25 \end{tabular} 26 \end{cquote} 27 The key commonality is that operations work on groups of characters for assigning. copying, scanning, and updating. 28 Because a C string is null terminated and requires explicit storage management \see{\VRef{s:String}}, most of its group operations are error prone and expensive. 29 Most high-level string libraries use a separate length field and specialized storage management to support group operations. 30 \CC strings retain null termination to interface with library functions requiring C strings. 31 \begin{cfa} 32 int open( const char * pathname, int flags ); 33 string fname{ "test.cc" ); 34 open( fname.@c_str()@ ); 35 \end{cfa} 36 The function @c_str@ does not create a new null-terminated C string from the \CC string, as that requires passing ownership of the C string to the caller for eventual deletion.\footnote{ 37 C functions like \lstinline{strdup} do return allocated storage that must be freed by the caller.} 38 Instead, each \CC string is null terminator just in case it might be needed for this purpose. 39 Providing this backwards compatibility with C has a ubiquitous performance and storage cost. 40 41 42 \section{Storage Management} 43 44 This section discusses issues related to storage management of strings. 45 Specifically, it is common for strings to logically overlap completely or partially. 46 \begin{cfa} 47 string s1 = "abcdef"; 48 string s2 = s1; $\C{// complete overlap, s2 == "abcdef"}$ 49 string s3 = s1.substr( 0, 3 ); $\C{// partial overlap, s3 == "abc"}$ 50 \end{cfa} 51 This raises the question of how strings behave when an overlapping component is changed, 52 \begin{cfa} 53 s3[1] = 'w'; $\C{// what happens to s1 and s2?}$ 54 \end{cfa} 55 This question is the notion of mutable or immutable strings. 56 For example, Java has immutable strings that are copied when any overlapping string changes. 57 Note, the notion of underlying string mutability is not specified by @const@, \eg: 58 \begin{cfa} 59 const string s1 = "abc"; 60 \end{cfa} 61 Here, @const@ applies to the @s1@ pointer to @"abc"@, and @"abc"@ is an immutable constant that is \emph{copied} into the string's storage. 62 Hence, @s1@ is not pointing at an immutable constant, meaning its underlying string is always mutable, unless some other designation is specified, such as Java's global rule. 63 64 65 \subsection{Logical overlap} 66 67 \CFA provides a dynamic mechanism to indicate mutable or immutable as an assignment attribute: @`shareEdits@. 10 68 11 69 Consider two strings @s1@ and @s1a@ that are in an aliasing relationship, and a third, @s2@, made by a simple copy from @s1@. 12 \par\noindent 13 \begin{tabular}{llll} 14 & @s1@ & @s1a@ & @s2@ \\ 15 %\input{sharing-demo1.tex} 16 \end{tabular} 17 \par\noindent 70 Aliasing (@`shareEdits@) means that changes flow in both directions; with a simple copy, they do not. 71 \input{sharing1.tex} 72 73 Aliasing (@`shareEdits@) means that changes flow in both directions; with a simple copy, they do not. 74 \input{sharing2.tex} 75 76 Assignment of a value is just a modification. 77 The aliasing relationship is established at construction and is unaffected by assignment of a value. 78 \input{sharing3.tex} 79 80 Assignment from a string is just assignment of a value. 81 Whether of not the RHS participates in aliasing is irrelevant. Any aliasing of the LHS is unaffected. 82 \input{sharing4.tex} 83 84 Consider new strings @s1_mid@ being an alias for a run in the middle of @s1@, along with @s2@, made by a simple copy from the middle of @s1@. 85 \input{sharing5.tex} 86 87 Again, @`shareEdits@ passes changes in both directions; copy does not. 88 Note the difference in index values, with the \emph{b} position being 1 in the longer string and 0 in the shorter strings. 89 In the case of s1 aliasing with @s1_mid@, the very same character is being accessed by different positions. 90 \input{sharing6.tex} 91 92 Once again, assignment of a value is a modification that flows through the aliasing relationship, without affecting its structure. 93 \input{sharing7.tex} 94 95 In the \emph{ff} step, which is a positive example of flow across an aliasing relationship, the result is straightforward to accept because the flow direction is from contained (small) to containing (large). 96 The following rules for edits through aliasing substrings will guide how to flow in the opposite direction. 97 98 Growth and shrinkage are natural extensions. 99 An empty substring is a real thing, at a well-defined location, whose meaning is extrapolated from the examples so far. 100 The intended metaphor is to operating a GUI text editor. 101 Having an aliasing substring is like using the mouse to select a few words. 102 Assigning onto an aliasing substring is like typing from having a few words selected: depending how much you type, the file being edited can get shorter or longer. 103 \input{sharing8.tex} 104 105 Multiple portions can be aliased. 106 When there are several aliasing substrings at once, the text editor analogy becomes an online multi-user editor. 107 I should be able to edit a paragraph in one place (changing the document's length), without my edits affecting which letters are within a mouse-selection that you had made previously, somewhere else. 108 \input{sharing9.tex} 109 110 When an edit happens on an aliasing substring that overlaps another, an effect is unavoidable. 111 Here, the passive party sees its selection shortened, to exclude the characters that were not part of the original selection. 112 \input{sharing10.tex} 113 114 TODO: finish typesetting the demo 115 116 %\input{sharing-demo.tex} 18 117 19 118 20 119 \subsection{RAII limitations} 21 120 22 Earlier work on \CFA [to cite Schluntz] implemented the feature of constructors and destructors. A constructor is a user-defined function that runs implicitly, when control passes an object's declaration, while a destructor runs at the exit of the declaration's lexical scope. The feature allows programmers to assume that, whenever a runtime object of a certain type is accessible, the system called one of the programmer's constructor functions on that object, and a matching destructor call will happen in the future. The feature helps programmers know that their programs' invariants obtain. 23 24 The purposes of such invariants go beyond ensuring authentic values for the bits inside the object. These invariants can track occurrences of the managed objects in other data structures. Reference counting is a typical application of the latter invariant type. With a reference-counting smart pointer, the constructor and destructor \emph{of the pointer type} track the life cycles of occurrences of these pointers, by incrementing and decrementing a counter (usually) on the referent object, that is, they maintain a that is state separate from the objects to whose life cycles they are attached. Both the \CC and \CFA RAII systems ares powerful enough to achieve such reference counting. 25 26 The \CC RAII system supports a more advanced application. A life cycle function has access to the object under management, by location; constructors and destuctors receive a @this@ parameter providing its memory address. A lifecycle-function implementation can then add its objects to a collection upon creation, and remove them at destruction. A modulue that provides such objects, by using and encapsulating such a collection, can traverse the collection at relevant times, to keep the objects ``good.'' Then, if you are the user of such an module, declaring an object of its type means not only receiving an authentically ``good'' value at initialization, but receiving a subscription to a service that will keep the value ``good'' until you are done with it. 27 28 In many cases, the relationship between memory location and lifecycle is simple. But with stack-allocated objects being used as parameters and returns, there is a sender version in one stack frame and a receiver version in another. \CC is able to treat those versions as distinct objects and guarantee a copy-constructor call for communicating the value from one to the other. This ability has implications on the language's calling convention. Consider an ordinary function @void f( Vehicle x )@, which receives an aggregate by value. If the type @Vehicle@ has custom lifecycle functions, then a call to a user-provided copy constructor occurs, after the caller evaluates its argument expression, after the callee's stack frame exists, with room for its variable @x@ (which is the location that the copy-constructor must target), but before the user-provided body of @f@ begins executing. \CC achieves this ordering by changing the function signature, in the compiled form, to pass-by-reference and having the callee invoke the copy constructor in its preamble. On the other hand, if @Vehicle@ is a simple structure then the C calling convention is applied as the code originally appeared, that is, the callsite implementation code performs a bitwise copy from the caller's expression result, into the callee's x. 29 30 TODO: learn correction to fix inconcsistency: this discussion says the callee invokes the copy constructor, but only the caller knows which copy constructor to use! 31 32 TODO: discuss the return-value piece of this pattern 33 34 The \CFA RAII system has limited support for using lifecycle functions to provide a ``stay good'' service. It works in restricted settings, including on dynamically allocated objects. It does not work for communicating arguments and returns by value because the system does not produce a constructor call that tracks the implied move from a sender frame to a reciver frame. This limitation does not prevent a typical reference-counting design from using call-with-value/return-of-value, because the constructor--destructor calls are correctly balanced. But it impedes a ``stay-good'' service from supporting call-with-value/return-of-value, because the lifecycles presented to the constructor/destor calls do not keep stable locations. A ``stay-good'' service is acheivable so long as call-with-value/return-of-value do not occur. The original presentation [to cite Schluntz section] acknoweledges this limitiation; the present discussion makes its consequences more apparent. 35 36 The \CFA team sees this limitation as part of a tactical interem state that should some day be improved. The \CFA compiler is currently a source-to-source translator that targets relativly portable C. Several details of its features are provisionally awkward or under-performant until finer control of its code generation is feasible. In the present state, all calls that appear in \CFA source code as call-with-value/return-of-value are emitted this way to the underlying C calling convention. SO WHAT? 37 38 The present string-API contribution has both the ``stay good'' promise and call-with-value/return-of-value being essential. The main string API uses a work-around to acheive the full feature set, at a runtime performance penalty. An alternative API sacrifices call-with-value/return-of-value functionality to recover full runtime performance. These APIs are layered, with the slower, friendlier High Level API (HL) wrapping the faster, more primitive Low Level API (LL). They present the same features, up to lifecycle management, with call-with-value/return-of-value being disabled in LL and implemented with the workaround in HL. The intention is for most future code to target HL. In a more distant future state, where \CFA has an RAII system that can handle the problematic quadrant, the HL layer can be abolished, the LL can be renamed to match today's HL, and LL can have its call-with-value/return-of-value permission reenabled. Then, programs written originally against HL will simply run faster. In the meantime, two use cases of LL exist. Performance-critical sections of applications have LL as an option. Within [Xref perf experiments], though HL-v-LL penalties are measured, typcial comparisons of the contributed string libary vs similar systems are made using LL. This measurement gives a fair estimate of the goal state for \CFA while it is an evloving work in progress. 39 121 Earlier work on \CFA~\cite[ch.~2]{Schluntz17} implemented object constructors and destructors for all types (basic and user defined). 122 A constructor is a user-defined function run implicitly \emph{after} an object's declaration-storage is created, and a destructor is a user-defined function run \emph{before} an object's declaration-storage is deleted. 123 This feature, called RAII~\cite[p.~389]{Stroustrup94}, guarantees pre invariants for users before accessing an object and post invariants for the programming environment after an object terminates. 124 125 The purposes of these invariants goes beyond ensuring authentic values inside an object. 126 Invariants can also track occurrences of managed objects in other data structures. 127 For example, reference counting is a typical application of an invariant outside of the data values. 128 With a reference-counting smart-pointer, the constructor and destructor \emph{of a pointer type} tracks the life cycle of the object it points to. 129 Both \CC and \CFA RAII systems are powerful enough to achieve reference counting. 130 131 In general, a lifecycle function has access to an object by location, \ie constructors and destructors receive a @this@ parameter providing an object's memory address. 132 The lifecycle implementation can then add this object to a collection at creation and remove it at destruction. 133 A module providing lifecycle semantics can traverse the collection at relevant times to keep the objects ``good.'' 134 Hence, declaring such an object not only ensures ``good'' authentic values, but also an implicit subscription to a service that keeps the value ``good'' across its lifetime. 135 136 In many cases, the relationship between memory location and lifecycle is straightforward. 137 For example, stack-allocated objects being used as parameters and returns, with a sender version in one stack frame and a receiver version in another, as opposed to assignment where sender and receiver are in the same stack frame. 138 What is crucial for lifecycle management is knowing if the receiver is initialized or uninitialized, \ie an object is or is not currently associated with management. 139 To provide this knowledge, languages differentiate between initialization and assignment to a left-hand side. 140 \begin{cfa} 141 Obj obj2 = obj1; // initialization, obj2 is uninitialized 142 obj2 = obj1; // assignment, obj2 must be initialized for management to work 143 \end{cfa} 144 Initialization occurs at declaration by value, parameter by argument, return temporary by function call. 145 Hence, it is necessary to have two kinds of constructors: by value or object. 146 \begin{cfa} 147 Obj obj1{ 1, 2, 3 }; // by value, management is initialized 148 Obj obj2 = obj1; // by obj, management is updated 149 \end{cfa} 150 When no object management is required, initialization copies the right-hand value. 151 Hence, the calling convention remains uniform, where the unmanaged case uses @memcpy@ as the initialization constructor and managed uses the specified initialization constructor. 152 153 The \CFA RAII system supports lifecycle functions, except for returning a value from a function to a temporary. 154 For example, in \CC: 155 \begin{cfa} 156 struct S {...}; 157 S identity( S s ) { return s; } 158 S s; 159 s = identity( s ); // S temp = identity( s ); s = temp; 160 \end{cfa} 161 the generated code for the function call created a temporary with initialization from the function call, and then assigns the temporary to the receiver. 162 This two step approach means extra storage for the temporary and two copies to get the result into the receiver variable. 163 \CC{17} introduced return value-optimization (RVO)~\cite{RVO20} to ``avoid copying an object that a function returns as its value, including avoiding creation of a temporary object''. 164 \CFA uses C semantics for function return giving direct value-assignment, which eliminates unnecessary code, but skips an essential feature needed by lifetime management. 165 The following discusses the consequences of this semantics with respect to lifetime management of \CFA strings. 166 167 The present string-API contribution provides lifetime management with initialization semantics on function return. 168 The workaround to achieve the full lifetime semantics does have a runtime performance penalty. 169 An alternative API sacrifices return initialization semantics to recover full runtime performance. 170 These APIs are layered, with the slower, friendlier High Level API (HL) wrapping the faster, more primitive Low Level API (LL). 171 Both API present the same features, up to lifecycle management, with return initialization being disabled in LL and implemented with the workaround in HL. 172 The intention is for most future code to target HL. 173 When \CFA becomes a full compiler, it can provide return initialization with RVO optimizations. 174 Then, programs written with the HL API will simply run faster. 175 In the meantime, performance-critical sections of applications use LL. 176 Subsequent performance experiments~\VRef{s:PerformanceAssessment} with other string libraries has \CFA strings using the LL API. 177 These measurement gives a fair estimate of the goal state for \CFA. 40 178 41 179 42 180 \subsection{Memory management} 43 181 44 A centrepriece of the string module is its memory manager. The managment scheme defines a large shared buffer for strings' text. Allocation in this buffer is always bump-pointer; the buffer is compacted and/or relocated with growth when it fills. A string is a smart pointer into this buffer. 45 46 This cycle of frequent cheap allocations, interspersed with infrequent expensive compactions, has obvious similarities to a general-purpose memory manager based on garbage collection (GC). A few differences are noteworthy. First, in a general purpose manager, the objects of allocation contain pointers to other such objects, making the transitive reachability of these objects be a critical property. Here, the allocations are of buffers of text, never pointers, so one allocation never keeps another one alive. Second, in a general purpose manager, where the handle that keeps an allocation alive is the same as the program's general-purpose inter-object reference, an extremely lean representation of this reference is required. Here, a fatter representation is acceptable because [why??]. 47 48 49 Figure [memmgr-basix.vsdx] shows the representation. A heap header, with its text buffer, defines a sharing context. Often, one global sharing context is appropriate for an entire program; exceptions are discussed in [xref TBD]. Strings are handles into the buffer. They are members of a linked list whose order matches the order of their buffer fragments (exactly, where there is no overlapping, and approximately, where there is). The header maintains a next-allocation pointer (alloc, in the figure) after the last live allocation of the buffer. No external references into the buffer are allowed and the management procedure relocates the text allocations as needed. The string handles contain explicit length fields, null termination characters are not used and all string text is kept in contiguous storage. When strings (the inter-linked hanldes) are allocated on the program's call stack, a sustained period with no use of the program's dynamic memory allocator can ensue, during which the program nonetheless creates strings, destroys them, and runs length-increasing modifications on existing ones. 50 51 Compaction happens when the heap fills. It is the first of two uses of the linked list. The list allows discovering all live string handles, and from them, the ranges of the character buffer that are in use. With these ranges known, their total character count gives the amount of space in use. When that amount is small, compared with the current buffer size, an in-place compaction occurs, which enatils copying the in-use ranges, to be adjacent, at the font of the buffer. When the in-use amount is large, a larger buffer is allocated (using the program's general-purpose dynamic allcator), the in-use strings are copied to be adjacent at the front of it, and the original buffer is freed back to the program's general allocator. Either way, navigating the links between the handles provides the pointers into the buffer, first read, to find the source fragment, then written with the location of the resultant fragment. This linkage across the structure is unaffected during a compaction; only the pointers from the handles to the buffer are modified. This modification, along with the grooming/provisioning of the text storage resouce that it represents, is an example, in the language of [xref RAII limitations] of the string module providing a ``stay good'' service. 52 53 Object lifecycle events are the subscription-management triggers in such a service. There are two fundamental string-creation routines: importing from external text like a C-string, and initialization from an existing \CFA string. When importing, a fresh allocation at the free end fo the buffer occurs, into which the text is copied. The resultant handle is therefore inserted into the list at the position after the incumbent last handle, a position given by the heap manager's ``last handle'' pointer. When initializing from text already on the \CFA heap, the resultant handle is a second reference onto the original run of characters. In this case, the resultant handle's linked-list position is beside the original handle. Both string initialization styles preserve the string module's internal invriant that the linked-list order match the buffer order. For string destruction, the list being doubly linked provides for easy removal of the disappearing handle. 54 55 While a string handle is live, it accepts modification operations, some of which make it reference a different portion of the underlying buffer, and accordingly, move the handle to a different position in the inter-handle list. While special cases have more optimal handling, the general case requires a fresh buffer run. In this case, the new run is allocated at the bump-pointer end and filled with the required value. Then, handles that originally referenced the old location and need to see the new value are pointed at the new buffer location, unlinked from their original positions in the handles' list, and linked in at the end of the list. An optimal case, when the target is not a substring of something larger, and the source is text from elsewhere in the managed buffer, allows the target to be re-pointed at the source characters, and accordingly, move list position to be beside the source. Cases where in-place editing happens, addressed further in [xref: TBD], leave affected handles in their original list positions. In analogy to the two cases of string initialization, the two cases of realizing assignment by moving either to a fresh buffer run, or to overlap references with the source, maintain the invariant of linked list order matching buffer order. 56 57 58 To explain: GCing allocator doing bump-pointer with compaction 59 60 61 62 At the level of the memory manager, these modifications can aways be explained as assignments; for example, an append is an assignemnt into the empty substring at the end. 63 64 While favourable conditions allow for in-place editing, the general case requires a fresh buffer run. For example, if the new value does not fit in the old place, or if other handles are still using the old value, then the new value will use a fresh buffer run. 182 A centrepiece of the string module is its memory manager. 183 The management scheme defines a shared buffer for string text. 184 Allocation in this buffer is via a bump-pointer; 185 the buffer is compacted and/or relocated with growth when it fills. 186 A string is a smart pointer into this buffer. 187 188 This cycle of frequent cheap allocations, interspersed with infrequent expensive compactions, has obvious similarities to a general-purpose memory manager based on garbage collection (GC). 189 A few differences are noteworthy. 190 First, in a general purpose manager, the objects of allocation contain pointers to other such objects, making the transitive reachability of these objects be a critical property. 191 Here, the allocations are text, so one allocation never keeps another alive. 192 Second, in a general purpose manager, the handle that keeps an allocation alive is just a lean pointer. 193 For strings, a fatter representation is acceptable because there are fewer string head pointers versus chained pointers within nodes as for linked containers. 194 195 \begin{figure} 196 \includegraphics{memmgr-basic} 197 \caption{String memory-management data structures} 198 \label{f:memmgr-basic} 199 \end{figure} 200 201 \VRef[Figure]{f:memmgr-basic} shows the representation. 202 A heap header and its text buffer, defines a sharing context. 203 Normally, one global sharing context is appropriate for an entire program; 204 exceptions are discussed in [xref TBD]. 205 A string is a handle into the buffer and linked into a list. 206 The list is doubly linked for $O(1)$ insertion and removal at any location. 207 Strings are orders n the list by text-buffer address, where there is no overlapping, and approximately, where there is. 208 The header maintains a next-allocation pointer, @alloc@, pointing to the last live allocation of the buffer. 209 No external references point into the buffer and the management procedure relocates the text allocations as needed. 210 A string handle contains an explicit string, while its string is contiguous and not null terminated. 211 The length sets an upper limit on the string size, but is large (4 or 8 bytes). 212 String handles can be allocated in the stack or heap, while the text buffer is large enough with good management so that only one dynamic allocation is necessary for it during program execution. 213 During this period strings can vary in size dynamically. 214 215 When the text buffer fills, \ie the next new string allocation causes @alloc@ to point beyond the end of the buffer, the strings are compacted. 216 The linked handles define all live strings in the buffer, which indirectly defines the allocated and free space in the buffer. 217 Since the string handles are in (roughly) sorted order, the handle list can be traversed copying the first text to the start of the buffer and subsequent strings after each over. 218 After compaction, if the amount of free storage is still less than the new string allocation, a larger text buffer is heap allocated, the current buffer is copies into the new buffer, and the original buffer is freed. 219 Note, the list of string handles is unaffected during a compaction; 220 only the string pointers are modified to new buffer locations. 221 222 Object lifecycle events are the subscription-management triggers in such a service. 223 There are two fundamental string-creation routines: importing external text like a C-string or reading a string, and initialization from an existing \CFA string. 224 When importing, storage comes from the end of the buffer, into which the text is copied. 225 The resultant handle is inserted at the end of the handle list to maintain ordering. 226 When initializing from text already in the text buffer, the new handle is a second reference into the original run of characters. 227 In this case, the new handle's linked-list position is after the original handle. 228 Both string initialization styles preserve the string module's internal invariant that the linked-list order matches the buffer order. 229 For string destruction, handles are removed from the list. 230 231 Certain string operations can results in a subset (substring) of another string. 232 The resulting handle is then place in the correct sorted position in the list, possible with a short linear search to locate the position. 233 For string operations resulting in a new string, that string is allocated at the end of the buffer. 234 For shared-edit strings, handles that originally referenced containing locations need to see the new value at the new buffer location. 235 These strings are moved to appropriate locations at the end of the list (addressed further in [xref: TBD]. 236 For nonshared-edit strings, a containing string can be moved and the nonshared strings can remain in the same position. 237 String assignment words similarly to string initialization, maintain the invariant of linked list order matching buffer order. 238 239 At the level of the memory manager, these modifications can always be explained as assignments; for example, an append is an assignment into the empty substring at the end. 240 241 While favourable conditions allow for in-place editing, the general case requires a fresh buffer run. 242 For example, if the new value does not fit in the old place, or if other handles are still using the old value, then the new value will use a fresh buffer run. 65 243 66 244 where there is room for the resulting value in the original buffer location, and where all handles referring to the original buffer location should see the new value, 67 245 68 69 always boiled down to assignment and appendment. Assignment has special cases that happen in-place, but in the general case, it is implemented as a sequence of appends onto a fresh allocation at the end of the buffer. (The sequence has multiple steps when the assignment target is a substring: old before, new middle, old after.) Similarly, an append request can be serviced in-place when there is room, or as a pair of appends 70 246 always boiled down to assignment and appendment. 247 Assignment has special cases that happen in-place, but in the general case, it is implemented as a sequence of appends onto a fresh allocation at the end of the buffer. 248 (The sequence has multiple steps when the assignment target is a substring: old before, new middle, old after.) 249 Similarly, an append request can be serviced in-place when there is room, or as a pair of appends. 71 250 72 251 73 252 \subsection{Sharing implementation} 74 253 75 The \CFA string module has two manners in which se rveral string handles can share an unerlying run of characters.76 77 The first type of sharing is user-requested, following the [xref Logical Overlap]. Here, the user requests, explicitly, that both handles be views of the same logical, modifiable string. This state is typically prod ecd by the substring operation. In a typical substring call, the source string-handle is referencing an entire string, and the resluting, newly made, string handle is referencing a portion of the orignal. In this state, a subsequent modification made by either is visible in both.78 79 The second type of sharing happens when the system implicitly delays the physical execution of a logical \emph{copy} operation, as part of its copy-on-write optimization. This state is typically produced by constructing a new string, using an original string as its in tialization source. In this state, a subsequent modification done on one handle triggers the deferred copy action, leaving the handles referencing different runs within the buffer, holding distinct values.254 The \CFA string module has two manners in which several string handles can share an underlying run of characters. 255 256 The first type of sharing is user-requested, following the [xref Logical Overlap]. Here, the user requests, explicitly, that both handles be views of the same logical, modifiable string. This state is typically produced by the substring operation. In a typical substring call, the source string-handle is referencing an entire string, and the resulting, newly made, string handle is referencing a portion of the original. In this state, a subsequent modification made by either is visible in both. 257 258 The second type of sharing happens when the system implicitly delays the physical execution of a logical \emph{copy} operation, as part of its copy-on-write optimization. This state is typically produced by constructing a new string, using an original string as its initialization source. In this state, a subsequent modification done on one handle triggers the deferred copy action, leaving the handles referencing different runs within the buffer, holding distinct values. 80 259 81 260 A further abstraction, in the string module's implementation, helps distinguish the two senses of sharing. A share-edit set (SES) is an equivalence class over string handles, being the reflexive, symmetric and transitive closure of the relationship of one being constructed from the other, with the ``share edits'' opt-in given. It is represented by a second linked list among the handles. A string that shares edits with no other is in a SES by itself. Inside a SES, a logical modification of one substring portion may change the logical value in another, depending on whether the two actually overlap. Conversely, no logical value change can flow outside of a SES. Even if a modification on one string handle does not reveal itself \emph{logically} to anther handle in the same SES (because they don't overlap), if the modification is length-changing, completing the modification requires visiting the second handle to adjust its location in the sliding text. … … 84 263 \subsection{Avoiding implicit sharing} 85 264 86 There are tradeoffs associated with the copy-on-write mechanism. Several qua titative matters are detailed in the [xref: Performance Assessment] section and the qualitiative issue of multi-threaded support is introduced here. The \CFA sting library provides a switch to disable the sharing mechanism for situtations where it is inappropriate.265 There are tradeoffs associated with the copy-on-write mechanism. Several qualitative matters are detailed in the [xref: Performance Assessment] section and the qualitative issue of multi-threaded support is introduced here. The \CFA sting library provides a switch to disable the sharing mechanism for situations where it is inappropriate. 87 266 88 267 Because of the inter-linked string handles, any participant managing one string is also managing, directly, the neighbouring strings, and from there, a data structure of the ``set of all strings.'' This data structure is intended for sequential access. A negative consequence of this decision is that multiple threads using strings need to be set up so that they avoid attempting to modify (concurrently) an instance of this structure. A positive consequence is that a single-threaded program, or a program with several independent threads, can use the sharing context without an overhead from locking. 89 268 90 The \CFA sting library provides the @string_sharectx@ type to control an ambient sharing context for the current thread. It allows two adjustments: to opt out of sharing entirely, or to begin sharing within a private context. Either way, the chosen mode applies to the current thread, for the duration of the lifetime of the created @string_sharectx@ object, up to being suspended by child lif times of different contexts. The indended use is with stack-managed lifetimes, in which the established context lasts until the current function returns, and affects all functions called that don't create their own contexts.91 \lstinputlisting[language=CFA, firstline=20, lastline=34]{sharectx -demo.cfa}269 The \CFA sting library provides the @string_sharectx@ type to control an ambient sharing context for the current thread. It allows two adjustments: to opt out of sharing entirely, or to begin sharing within a private context. Either way, the chosen mode applies to the current thread, for the duration of the lifetime of the created @string_sharectx@ object, up to being suspended by child lifetimes of different contexts. The indented use is with stack-managed lifetimes, in which the established context lasts until the current function returns, and affects all functions called that don't create their own contexts. 270 \lstinputlisting[language=CFA, firstline=20, lastline=34]{sharectx.run.cfa} 92 271 In this example, the single-letter functions are called in alphabetic order. The functions @a@ and @d@ share string character ranges within themselves, but not with each other. The functions @b@, @c@ and @e@ never share anything. 93 272 94 273 [ TODO: true up with ``is thread local'' (implement that and expand this discussion to give a concurrent example, or adjust this wording) ] 95 274 96 When the string library is running with sharing disabled, it runs without implicit thread-safety challenges (which same as the STL) and with performance goals similar to the STL's. This thread-safety quality means concurrent users of one string object must still bring their own mutual ex lusion, but the string libary will not add any cross thread uses that were not apparent in the user's code.275 When the string library is running with sharing disabled, it runs without implicit thread-safety challenges (which same as the STL) and with performance goals similar to the STL's. This thread-safety quality means concurrent users of one string object must still bring their own mutual exclusion, but the string library will not add any cross thread uses that were not apparent in the user's code. 97 276 98 277 Running with sharing disabled can be thought of as STL-emulation mode. … … 107 286 108 287 109 \subsection{Performance assessment} 110 111 I assessed the CFA string library's speed and memory usage. I present these results ineven quivalent cases, due to either micro-optimizations foregone, or fundamental costs of the added functionality. They also show the benefits and tradeoffs, as >100\% effects, of switching to CFA, with the tradeoff points quantified. The final test shows the overall win of the CFA text-sharing mechanism. It exercises several operations together, showing CFA enabling clean user code to achieve performance that STL requires less-clean user code to achieve. 112 113 To discuss: general goal of ... while STL makes you think about memory management, all the time, and if you do your performance can be great ... CFA sacrifices this advantage modestly in exchange for big wins when you're not thinking about memory mamangement. [Does this position cover all of it?] 288 \section{Performance assessment} 289 \label{s:PerformanceAssessment} 290 291 I assessed the \CFA string library's speed and memory usage. I present these results in even equivalent cases, due to either micro-optimizations foregone, or fundamental costs of the added functionality. They also show the benefits and tradeoffs, as >100\% effects, of switching to \CFA, with the tradeoff points quantified. The final test shows the overall win of the \CFA text-sharing mechanism. It exercises several operations together, showing \CFA enabling clean user code to achieve performance that STL requires less-clean user code to achieve. 292 293 To discuss: general goal of ... while STL makes you think about memory management, all the time, and if you do your performance can be great ... \CFA sacrifices this advantage modestly in exchange for big wins when you're not thinking about memory management. [Does this position cover all of it?] 114 294 115 295 To discuss: revisit HL v LL APIs 116 296 117 To discuss: revisit no sharing as STL emulation modes297 To discuss: revisit no-sharing as STL emulation modes 118 298 119 299 These tests use randomly generated text fragments of varying lengths. A collection of such fragments is a \emph{corpus}. The mean length of a fragment from corpus is a typical explanatory variable. Such a length is used in one of three modes: … … 121 301 \item [Fixed-size] means all string fragments are of the stated size 122 302 \item [Varying from 1] means string lengths are drawn from a geometric distribution with the stated mean, and all lengths occur 123 \item [Varying from 16] means string lengths are drawn from a geometric distribution with the stated mean, but only lengths 16 and obove occur; thus, the stated mean will be above 16.303 \item [Varying from 16] means string lengths are drawn from a geometric distribution with the stated mean, but only lengths 16 and above occur; thus, the stated mean will be above 16. 124 304 \end{description} 125 The geometric distribution implies that lengths much longer than the mean occur frequently. The special treatment of length 16 deals with comparison to STL, given that STL has short-string optimization (see [ todo: write and cross-ref future-work SSO]), currently not implmented in \CFA. When success notwithstanding SSO is illustrated, a fixed-size or from-16 distribution ensures that extra-optimized cases are not part of the mix on the STL side. In all experiments that use a corpus, its text is generated and loaded into the SUT before the timed phase begins.305 The geometric distribution implies that lengths much longer than the mean occur frequently. The special treatment of length 16 deals with comparison to STL, given that STL has short-string optimization (see [TODO: write and cross-ref future-work SSO]), currently not implemented in \CFA. When success notwithstanding SSO is illustrated, a fixed-size or from-16 distribution ensures that extra-optimized cases are not part of the mix on the STL side. In all experiments that use a corpus, its text is generated and loaded into the SUT before the timed phase begins. 126 306 127 307 To discuss: vocabulary for reused case variables … … 136 316 \subsubsection{Test: Append} 137 317 138 This test measures the speed of appending fragments of text onto a growing string. Its subcases include both CFA being similar to STL, and their designs offering a tradeoff.139 140 One experimental variable is the user's operation being @a = a + b@ vs. @a += b@. While experienced programmers expect the latter to be ``what you obviously should do,'' control ing the penatly of the former both helps the API be accessible to beginners and also helps offer confidence that when a user tries to compose operations, the forms that are most natural to the user's composition are viable.141 142 Another experimental variable is whether the user's logical allocation is fresh vs reused. Here, \emph{reusing a logical allocation}, means that the pr gram variable, into which the user is concatenating, previously held a long string:\\318 This test measures the speed of appending fragments of text onto a growing string. Its subcases include both \CFA being similar to STL, and their designs offering a tradeoff. 319 320 One experimental variable is the user's operation being @a = a + b@ vs. @a += b@. While experienced programmers expect the latter to be ``what you obviously should do,'' controlling the penalty of the former both helps the API be accessible to beginners and also helps offer confidence that when a user tries to compose operations, the forms that are most natural to the user's composition are viable. 321 322 Another experimental variable is whether the user's logical allocation is fresh vs reused. Here, \emph{reusing a logical allocation}, means that the program variable, into which the user is concatenating, previously held a long string:\\ 143 323 \begin{tabular}{ll} 144 324 Logical allocation fresh & Logical allocation reused \\ … … 150 330 @ } @ & @ } @ 151 331 \end{tabular}\\ 152 These benchmark drivers have an outer loop for ``until a sample-worthy amount of execution has happened'' and an inner loop for ``build up the desired-length string.'' It is sensible to doubt that a user should have to care about this difference, yet the STL performs differently in these cases. Concret ly, both cases incur the cost of copying characters into the target string, but only the allocation-fresh case incurs a further reallocation cost, which is generally paid at points of doubling the length. For the STL, this cost includes obtaining a fresh buffer from the memory allocator and copying older characters into the new buffer, while CFA-sharing hides such a cost entirely. The reuse-vs-fresh distinction is only relevant in the currrent \emph{append} tests.153 154 The \emph{append} tests use the varying-from-1 corpus construction; that is they do not assume away the STL's advantage from small-string op itimization.155 156 To discuss: any other case variables intr uduced in the performance intro332 These benchmark drivers have an outer loop for ``until a sample-worthy amount of execution has happened'' and an inner loop for ``build up the desired-length string.'' It is sensible to doubt that a user should have to care about this difference, yet the STL performs differently in these cases. Concretely, both cases incur the cost of copying characters into the target string, but only the allocation-fresh case incurs a further reallocation cost, which is generally paid at points of doubling the length. For the STL, this cost includes obtaining a fresh buffer from the memory allocator and copying older characters into the new buffer, while \CFA-sharing hides such a cost entirely. The reuse-vs-fresh distinction is only relevant in the current \emph{append} tests. 333 334 The \emph{append} tests use the varying-from-1 corpus construction; that is they do not assume away the STL's advantage from small-string optimization. 335 336 To discuss: any other case variables introduced in the performance intro 157 337 158 338 \begin{figure} … … 162 342 \end{figure} 163 343 164 Figure \ref{fig:string-graph-peq-cppemu} shows this behaviour, by the STL and by \CFA in STL emulation mode. \CFA reproduces STL's performance, up to a 15\% penalty averaged over the cases shown, diminishing with larger strings, and 50\% in the worst case. This pena tly characterizes the amount of implementation fine tuning done with STL and not done with \CFA in present state. The larger inherent penalty, for a user mismanaging reuse, is 40\% averaged over the cases shown, is minimally 24\%, shows up consistently between the STL and \CFA implementations, and increases with larger strings.344 Figure \ref{fig:string-graph-peq-cppemu} shows this behaviour, by the STL and by \CFA in STL emulation mode. \CFA reproduces STL's performance, up to a 15\% penalty averaged over the cases shown, diminishing with larger strings, and 50\% in the worst case. This penalty characterizes the amount of implementation fine tuning done with STL and not done with \CFA in present state. The larger inherent penalty, for a user mismanaging reuse, is 40\% averaged over the cases shown, is minimally 24\%, shows up consistently between the STL and \CFA implementations, and increases with larger strings. 165 345 166 346 \begin{figure} … … 170 350 \end{figure} 171 351 172 In sharing mode, \CFA makes the fresh/reuse difference disappear, as shown in Figure \ref{fig:string-graph-peq-sharing}. At append lengths 5 and above, CFA not only splits the two baseline STL cases, but its slowdown of 16\% over (STL with user-managed reuse) is close to the \CFA-v-STL implementation difference seen with \CFA in STL-emulation mode.352 In sharing mode, \CFA makes the fresh/reuse difference disappear, as shown in Figure \ref{fig:string-graph-peq-sharing}. At append lengths 5 and above, \CFA not only splits the two baseline STL cases, but its slowdown of 16\% over (STL with user-managed reuse) is close to the \CFA-v-STL implementation difference seen with \CFA in STL-emulation mode. 173 353 174 354 \begin{figure} … … 178 358 \end{figure} 179 359 180 When the user takes a further step beyond the STL's optimal zone, by running @x = x + y@, as in Figure \ref{fig:string-graph-pta-sharing}, the STL's penalty is above $15 \times$ while CFA's (with sharing) is under $2 \times$, averaged across the cases shown here. Moreover, the STL's gap increases with string size, while \CFA's converges.360 When the user takes a further step beyond the STL's optimal zone, by running @x = x + y@, as in Figure \ref{fig:string-graph-pta-sharing}, the STL's penalty is above $15 \times$ while \CFA's (with sharing) is under $2 \times$, averaged across the cases shown here. Moreover, the STL's gap increases with string size, while \CFA's converges. 181 361 182 362 \subsubsection{Test: Pass argument} … … 184 364 To have introduced: STL string library forces users to think about memory management when communicating values across a function call 185 365 186 STL charges a prohibitive penalty for passing a string by value. With implicit sharing active, \CFA treats this operation as normal and supported. This test illustrates a main ad jantage of the \CFA sharing algorithm. It also has a case in which STL's small-string optimization provides a successful mitigation.366 STL charges a prohibitive penalty for passing a string by value. With implicit sharing active, \CFA treats this operation as normal and supported. This test illustrates a main advantage of the \CFA sharing algorithm. It also has a case in which STL's small-string optimization provides a successful mitigation. 187 367 188 368 \begin{figure} … … 201 381 This test directly compares the allocation schemes of the \CFA string with sharing, compared with the STL string. It treats the \CFA scheme as a form of garbage collection, and the STL scheme as an application of malloc-free. The test shows that \CFA enables faster speed at a cost in memory usage. 202 382 203 A garbage collector, afforded the freedom of managed memory, often runs faster than malloc-free (in an am mortized analysis, even though it must occasionally stop to collect) because it is able to use its collection time to move objects. (In the case of the mini-allocator powering the \CFA string library, objects are runs of text.) Moving objects lets fresh allocations consume from a large contiguous store of available memory; the ``bump pointer'' book-keeping for such a scheme is very light. A malloc-free implementation without the freedom to move objects must, in the general case, allocate in the spaces between existing objects; doing so entails the heavier book-keeping to navigate and maintain a linked structure.383 A garbage collector, afforded the freedom of managed memory, often runs faster than malloc-free (in an amortized analysis, even though it must occasionally stop to collect) because it is able to use its collection time to move objects. (In the case of the mini-allocator powering the \CFA string library, objects are runs of text.) Moving objects lets fresh allocations consume from a large contiguous store of available memory; the ``bump pointer'' book-keeping for such a scheme is very light. A malloc-free implementation without the freedom to move objects must, in the general case, allocate in the spaces between existing objects; doing so entails the heavier book-keeping to navigate and maintain a linked structure. 204 384 205 385 A garbage collector keeps allocations around for longer than the using program can reach them. By contrast, a program using malloc-free (correctly) releases allocations exactly when they are no longer reachable. Therefore, the same harness will use more memory while running under garbage collection. A garbage collector can minimize the memory overhead by searching for these dead allocations aggressively, that is, by collecting more often. Tuned in this way, it spends a lot of time collecting, easily so much as to overwhelm its speed advantage from bump-pointer allocation. If it is tuned to collect rarely, then it leaves a lot of garbage allocated (waiting to be collected) but gains the advantage of little time spent doing collection. … … 215 395 \begin{figure} 216 396 \includegraphics[width=\textwidth]{string-graph-allocn.png} 217 \caption{Space and time performance, under varying fraction-live targets, for the five string lengths shown, at ( emph{Fixed-size} corpus construction. [MISSING] The identified clusters are for the default fraction-live target, which is 30\%. MISSING: STL results, typically just below the 0.5--0.9CFA segment. All runs keep an average of 836 strings live, and the median string lifetime is ?? allocations.}397 \caption{Space and time performance, under varying fraction-live targets, for the five string lengths shown, at (\emph{Fixed-size} corpus construction. [MISSING] The identified clusters are for the default fraction-live target, which is 30\%. MISSING: STL results, typically just below the 0.5--0.9 \CFA segment. All runs keep an average of 836 strings live, and the median string lifetime is ?? allocations.} 218 398 \label{fig:string-graph-allocn} 219 399 \end{figure} 220 400 221 Figure \ref{fig:string-graph-allocn} shows the results of this experi emnt. At all string sizes, varying the liveness threshold gives offers speed-for-space tradeoffs relative to STL. At the default liveness threshold, all measured string sizes see a ??\%--??\% speedup for a ??\%--??\% increase in memory footprint.401 Figure \ref{fig:string-graph-allocn} shows the results of this experiment. At all string sizes, varying the liveness threshold gives offers speed-for-space tradeoffs relative to STL. At the default liveness threshold, all measured string sizes see a ??\%--??\% speedup for a ??\%--??\% increase in memory footprint. 222 402 223 403 … … 225 405 \subsubsection{Test: Normalize} 226 406 227 This test is more applied than the earlier ones. It combines the effects of several operations. It also demonstrates a case of the CFA API allowing user code to perform well, while being written without overt memory management, while achieving similar performance in STL requires adding memory-management complexity.407 This test is more applied than the earlier ones. It combines the effects of several operations. It also demonstrates a case of the \CFA API allowing user code to perform well, while being written without overt memory management, while achieving similar performance in STL requires adding memory-management complexity. 228 408 229 409 To motivate: edits being rare -
doc/theses/mike_brooks_MMath/uw-ethesis.bib
rb006c51e r10a9479d 124 124 } 125 125 126 127 126 @misc{Mendio24, 128 127 contributer = {pabuhr@plg}, … … 132 131 howpublished= {\url{https://www.mend.io/most-secure-programming-languages}}, 133 132 } 133 134 @misc{RVO20, 135 contributer = {pabuhr@plg}, 136 title = {Return value optimization ({RVO})}, 137 author = {Special Interest Group on {C++}}, 138 year = 2020, 139 month = jun, 140 howpublished= {\url{https://sigcpp.github.io/2020/06/08/return-value-optimization}}, 141 } -
doc/theses/mike_brooks_MMath/uw-ethesis.tex
rb006c51e r10a9479d 102 102 \input{common} 103 103 %\usepackage{common} 104 104 105 \CFAStyle % CFA code-style 105 106 \lstset{language=cfa,belowskip=-1pt} % set default language to CFA … … 108 109 \lstnewenvironment{java}[1][]{\lstset{language=java,escapechar=\$,moredelim=**[is][\color{red}]{@}{@},}\lstset{#1}}{} 109 110 \lstset{inputpath={programs}} 111 \lstset{xleftmargin=1\parindentlnth} 110 112 111 113 \newcommand{\uCpp}{$\mu$\CC} -
doc/uC++toCFA/.gitignore
rb006c51e r10a9479d 3 3 *.pdf 4 4 *.ps 5 *.cc 6 *.cfa -
doc/uC++toCFA/uC++toCFA.tex
rb006c51e r10a9479d 11 11 %% Created On : Wed Apr 6 14:53:29 2016 12 12 %% Last Modified By : Peter A. Buhr 13 %% Last Modified On : Tue Oct 22 17:45:48202414 %% Update Count : 6 06813 %% Last Modified On : Fri Nov 15 09:55:34 2024 14 %% Update Count : 6249 15 15 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 16 16 … … 357 357 358 358 359 \section{Constructor / Destructor} 360 361 \begin{cquote} 362 \begin{tabular}{@{}l|l@{}} 363 \begin{uC++} 364 365 struct S { 366 int i, j; 367 368 @S@( int i, int j ) { S::i = i; S::j = j; } 369 @~S@() {} 370 }; 371 S s1 = { 1, 2 }; 372 373 S * s2 = new S{ 1, 2 }; 374 delete s2; 375 s2 = new S{ 1, 2 }; 376 delete s2; 377 S & s3 = *new S{ 1, 2 }; 378 delete &s3; 379 s3 = *new S{ 1, 2 }; 380 delete &s3; 381 \end{uC++} 382 & 383 \begin{cfa} 384 #include <stdlib.hfa> // new (malloc) 385 struct S { 386 int i, j; 387 }; 388 void @?{}@( S & s, int i, int j ) { s.i = i; s.j = j; } 389 void @^?{}@( S & s ) { s.i = 0; s.j = 0; } 390 391 S s1 = { 1, 2 }; 392 // cannot use 0/1 (zero_t/one_t) with "new" 393 S * s2 = new( 1@n@, 2 ); // n => (int) 394 delete( s2 ); 395 s2 = new( 1n, 2 ); 396 delete( s2 ); 397 S & s3 = *new( 1n, 2 ); 398 delete( s3 ); 399 &s3 = &*new( 1n, 2 ); 400 delete( s3 ); 401 \end{cfa} 402 \end{tabular} 403 \end{cquote} 404 405 359 406 \section{\texorpdfstring{Structures (object-oriented \protect\vs routine style)}{Structures (object-oriented vs. routine style)}} 360 407 … … 381 428 setter( @s,@ 3 ); // normal calls 382 429 int k = getter( @s@ ); 383 \end{cfa}384 \end{tabular}385 \end{cquote}386 387 388 \section{Constructor / Destructor}389 390 \begin{cquote}391 \begin{tabular}{@{}l|l@{}}392 \begin{uC++}393 394 struct S {395 int i, j;396 S( int i, int j ) { S::i = i; S::j = j; }397 ~S() {}398 };399 S s = { 1, 2 }, s2{ 1, 2 };400 S * s3 = new S{ 1, 2 };401 S & s4 = *new S{ 1, 2 };402 \end{uC++}403 &404 \begin{cfa}405 #include <stdlib.hfa> // malloc406 struct S {407 int i, j;408 };409 void ?{}( S & s, int i, int j ) { s.[i, j] = [i, j]; }410 void ^?{}( S & s ) {}411 S s = { 1, 2 }, s2{ 1, 2 };412 S * s3 = &(*malloc()){ 1, 2 };413 S & s4 = (*malloc()){ 1, 2 }; // fails414 430 \end{cfa} 415 431 \end{tabular} … … 498 514 499 515 500 \section{Coroutine s}516 \section{Coroutine} 501 517 502 518 \begin{cquote} … … 504 520 \begin{uC++} 505 521 506 _CoroutineC {522 @_Coroutine@ C { 507 523 // private coroutine fields 508 524 void main() { 509 ... suspend();...510 ... _Resume E( ... ) _At partner;511 ... uThisCoroutine();...525 ... @suspend();@ ... 526 ... @_Resume E( ... ) _At partner;@ 527 ... @uThisCoroutine();@ ... 512 528 513 529 } 514 530 public: 515 531 void mem( ... ) { 516 ... resume()...532 ... @resume();@ ... 517 533 } 518 534 }; … … 521 537 \begin{cfa} 522 538 #include <$coroutine$.hfa> 523 coroutineC {539 @coroutine@ C { 524 540 // private coroutine fields 525 541 526 542 }; 527 543 void main( C & c ) { 528 ... suspend;... // keyword not routine529 ... resumeAt( partner, ExceptionInst( E, ... ) );530 ... active_coroutine();...544 ... @suspend;@ ... // keyword not routine 545 ... @resumeAt( partner, ExceptionInst( E, ... ) );@ 546 ... @active_coroutine();@ ... 531 547 } 532 548 void mem( C & c, ... ) { 533 ... resume( c );...549 ... @resume( c );@ ... 534 550 } 535 551 \end{cfa} … … 540 556 541 557 558 \section{Thread} 559 560 \begin{cquote} 561 \begin{tabular}{@{}l|ll@{}} 562 \begin{uC++} 563 564 @_Task@ T { 565 // private task fields 566 void main() { 567 ... @_Resume E( ... ) _At partner@; 568 ... @uThisTask();@ ... 569 } 570 public: 571 }; 572 \end{uC++} 573 & 574 \begin{cfa} 575 #include <$thread$.hfa> 576 @thread@ T { 577 // private task fields 578 579 }; 580 void main( @T & t@ ) { 581 ... @resumeAt( partner, ExceptionInst( E, ... )@ ); 582 ... @active_thread();@ ... 583 } 584 \end{cfa} 585 \\ 586 \multicolumn{2}{@{}l@{}}{\lstinline{T t; // start thread in main routine}} 587 \end{tabular} 588 \end{cquote} 589 590 542 591 \section{\lstinline{COBEGIN}/\lstinline{COFOR}} 543 592 … … 548 597 #include <uCobegin.h> 549 598 int main() { 550 COBEGIN599 @COBEGIN@ 551 600 BEGIN osacquire( cout ) << "A" << endl; END 552 601 BEGIN osacquire( cout ) << "B" << endl; END … … 554 603 BEGIN osacquire( cout ) << "D" << endl; END 555 604 BEGIN osacquire( cout ) << "E" << endl; END 556 COEND557 COFOR( i, 1, 10,605 @COEND@ 606 @COFOR@( i, 1, 10, 558 607 osacquire( cout ) << i << endl; 559 608 ) … … 566 615 int main() { 567 616 { 568 corun{ mutex( sout ) sout | "A"; }617 @corun@ { mutex( sout ) sout | "A"; } 569 618 corun { mutex( sout ) sout | "B"; } 570 619 corun { mutex( sout ) sout | "C"; } … … 572 621 corun { mutex( sout ) sout | "E"; } 573 622 } 574 cofor( i; 10 ) {623 @cofor@( i; 10 ) { 575 624 mutex( sout ) sout | i; 576 625 } … … 591 640 592 641 struct StrMsg : @public uActor::Message@ { 642 593 643 const char * val; // string message 594 595 644 596 645 StrMsg( const char * val ) : … … 600 649 _Actor Hello { ${\color{red}\LstCommentStyle{// : public uActor}}$ 601 650 Allocation receive( Message & msg ) { 602 Case( StrMsg, msg ) { // discriminate 651 Case( @StartMsg@, msg ) { // discriminate 652 653 } else Case( StrMsg, msg ) { 603 654 osacquire( cout ) << msg_d->val << endl; 604 }; 605 return Delete; // delete after use 655 656 } else Case( @StopMsg@, msg ) 657 return Delete; // delete actor 658 return Nodelete; // reuse actor 606 659 } 607 660 }; 608 661 int main() { 609 662 @uActor::start();@ // start actor system 610 *new Hello() | *new StrMsg( "hello" ); 611 *new Hello() | *new StrMsg( "bonjour" ); 612 @uActor::stop();@ // wait for all actors to terminate 663 *new Hello() | uActor::startMsg 664 | *new StrMsg( "hello" ) | uActor::stopMsg; 665 *new Hello() | uActor::startMsg 666 | *new StrMsg( "bonjour" ) | uActor::stopMsg; 667 @uActor::stop();@ // wait for actors to terminate 613 668 } 614 669 \end{uC++} … … 623 678 const char * val; // string message 624 679 }; 625 void ?{}( StrMsg & msg, char * str ) { 680 void ?{}( StrMsg & msg, const char * str ) { 681 @set_allocation( msg, Delete );@ // delete after use 626 682 msg.val = str; 627 @set_allocation( msg, Delete );@ // delete after use 628 } 629 struct Hello{630 @inline actor;@ // derived actor631 } ;683 } 684 struct Hello { @inline actor;@ }; // derived actor 685 allocation receive( Hello & receiver, @start_msg_t@ & ) { 686 return Nodelete; 687 } 632 688 allocation receive( Hello & receiver, StrMsg & msg ) { 633 689 mutex( sout ) sout | msg.val; 634 return Delete; // delete after use 690 return Nodelete; // reuse actor 691 } 692 allocation receive( Hello & receiver, @stop_msg_t@ & ) { 693 return Delete; // delete actor 635 694 } 636 695 637 696 int main() { 638 @start_actor_system();@ // start actor system 639 *(Hello *)new() | *(StrMsg *)new( "hello" ); 640 *(Hello *)new() | *(StrMsg *)new( "bonjour" ); 641 @stop_actor_system();@ // wait for all actors to terminate 642 } 643 \end{cfa} 644 \end{tabular} 645 \end{cquote} 646 647 648 \section{Threads} 649 650 \begin{cquote} 651 \begin{tabular}{@{}l|ll@{}} 652 \begin{uC++} 653 654 @_Task@ T { 655 // private task fields 656 void main() { 657 ... _Resume E( ... ) _At partner; 658 ... uThisTask(); ... 659 } 660 public: 661 }; 662 \end{uC++} 663 & 664 \begin{cfa} 665 #include <$thread$.hfa> 666 @thread@ T { 667 // private task fields 668 669 }; 670 void main( @T & t@ ) { 671 ... resumeAt( partner, ExceptionInst( E, ... ) ); 672 ... active_thread(); ... 673 } 674 \end{cfa} 675 \\ 676 \multicolumn{2}{@{}l@{}}{\lstinline{T t; // start thread in main routine}} 697 @actor_start();@ // start actor system 698 *(Hello *)new() | start_msg 699 | *(StrMsg *)new( "hello" ) | stop_msg; 700 *(Hello *)new() | start_msg 701 | *(StrMsg *)new( "bonjour" ) | stop_msg; 702 @actor_stop();@ // wait for actors to terminate 703 } 704 \end{cfa} 677 705 \end{tabular} 678 706 \end{cquote} … … 710 738 711 739 712 \section{ Monitors}740 \section{Barrier} 713 741 714 742 \begin{cquote} 715 743 \begin{tabular}{@{}l|ll@{}} 716 744 \begin{uC++} 717 718 @_Monitor@ M { 719 @uCondition@ c; 720 bool avail = true; 745 #include <iostream> 746 using namespace std; 747 #include <uBarrier.h> 748 749 @_Cormonitor@ Barrier 750 : @public uBarrier@ { // inheritance 751 int total; 752 void @last@() { cout << total << endl; } 721 753 public: 722 723 void rtn() { 724 if ( ! avail ) c.wait(); 725 else avail = false; 754 Barrier( unsigned int group ) : 755 @uBarrier( group )@ { 756 total = 0; 757 } 758 void @block@( int subtotal ) { 759 760 761 total += subtotal; 762 @uBarrier::block();@ 763 } 764 }; 765 enum { N = 3 }; 766 Barrier b{ N }; 767 768 _Task T { 769 void main() { 770 for ( int i = 0; i < 10; i += 1 ) { 771 b.block( 1 ); 772 } 773 } 774 }; 775 int main() { 776 uProcessor p[N - 1]; 777 T t[N]; 778 } 779 \end{uC++} 780 & 781 \begin{cfa} 782 #include <fstream.hfa> 783 #include <$thread$.hfa> 784 #include <barrier.hfa> 785 #include <mutex_stmt.hfa> 786 struct Barrier { 787 @barrier b;@ // containment 788 int total; 789 790 }; 791 void ?{}( Barrier & B, unsigned int group ) with(B) { 792 @?{}( b, group );@ // initialize barrier 793 total = 0; 794 } 795 unsigned int block( Barrier & B, int subtotal ) with(B) { 796 void @last@() { sout | total; } // called by Gth arriving thread 797 @mutex( b )@ { // use barrier's mutual exclusion 798 total += subtotal; 799 return @block@( b, last ); // wait for barrier trigger 800 } 801 } 802 enum { N = 3 }; 803 Barrier b{ N }; 804 805 thread T {}; 806 void main( T & ) { 807 for ( 10 ) { 808 block( b, 1 ); 809 } 810 } 811 812 int main() { 813 processor p[N - 1]; 814 T t[N]; 815 } 816 \end{cfa} 817 \end{tabular} 818 \end{cquote} 819 820 \newpage 821 822 \section{Monitor} 823 824 Internal Scheduling 825 \begin{cquote} 826 \begin{tabular}{@{}l|ll@{}} 827 \begin{uC++} 828 829 @_Monitor@ BoundedBufferI { 830 @uCondition@ full, empty; 831 int front = 0, back = 0, count = 0; 832 int elements[20]; 833 public: 834 835 836 837 @_Nomutex@ int query() const { return count; } 838 839 void insert( int elem ) { 840 if ( count == 20 ) @empty.wait();@ 841 elements[back] = elem; 842 back = ( back + 1 ) % 20; 843 count += 1; 844 @full.signal();@ 845 } 846 int remove() { 847 if ( count == 0 ) @full.wait();@ 848 int elem = elements[front]; 849 front = ( front + 1 ) % 20; 850 count -= 1; 851 @empty.signal();@ 852 return elem; 726 853 } 727 854 }; … … 730 857 \begin{cfa} 731 858 #include <$monitor$.hfa> 732 @monitor@ M { 733 @condition@ c; 734 bool avail; 735 }; 736 void ?{}( M & m ) { m.avail = true; } 737 void rtn( M & m ) with( m ) { 738 if ( ! avail ) wait( c ); 739 else avail = false; 740 } 741 742 \end{cfa} 743 \\ 744 \multicolumn{2}{@{}l@{}}{\lstinline{M m;}} 859 @monitor@ BoundedBufferI { 860 @condition@ full, empty; 861 int front, back, count; 862 int elements[20]; 863 }; 864 void ?{}( BoundedBufferI & buf ) with( buf ) { 865 front = back = count = 0; 866 } 867 int query( BoundedBufferI & buf ) { return buf.count; } 868 int remove( BoundedBufferI & @mutex@ buf ); // forward 869 void insert( BoundedBufferI & @mutex@ buf, int elem ) with( buf ) { 870 if ( count == 20 ) @wait( empty );@ 871 elements[back] = elem; 872 back = ( back + 1 ) % 20; 873 count += 1 874 @signal( full );@ 875 } 876 int remove( BoundedBufferI & @mutex@ buf ) with( buf ) { 877 if ( count == 0 ) @wait( full );@ 878 int elem = elements[front]; 879 front = ( front + 1 ) % 20; 880 count -= 1; 881 @signal( empty );@ 882 return elem; 883 } 884 885 \end{cfa} 886 \end{tabular} 887 \end{cquote} 888 889 \enlargethispage{1000pt} 890 891 \noindent 892 External Scheduling 893 \begin{cquote} 894 \begin{tabular}{@{}l|ll@{}} 895 \begin{uC++} 896 897 _Monitor BoundedBuffer { 898 int front = 0, back = 0, count = 0; 899 int elements[20]; 900 public: 901 _Nomutex int query() const { return count; } 902 void insert( int elem ); 903 int remove(); 904 }; 905 906 void BoundedBuffer::insert( int elem ) { 907 if ( count == 20 ) @_Accept( remove );@ 908 elements[back] = elem; 909 back = ( back + 1 ) % 20; 910 count += 1; 911 } 912 int BoundedBuffer::remove() { 913 if ( count == 0 ) @_Accept( insert );@ 914 int elem = elements[front]; 915 front = ( front + 1 ) % 20; 916 count -= 1; 917 return elem; 918 } 919 \end{uC++} 920 & 921 \begin{cfa} 922 #include <$monitor$.hfa> 923 monitor BoundedBuffer { 924 int front, back, count; 925 int elements[20]; 926 }; 927 void ?{}( BoundedBuffer & buf ) with( buf ) { 928 front = back = count = 0; 929 } 930 int query( BoundedBuffer & buf ) { return buf.count; } 931 int remove( BoundedBuffer & @mutex@ buf ); // forward 932 void insert( BoundedBuffer & @mutex@ buf, int elem ) with( buf ) { 933 if ( count == 20 ) @waitfor( remove : buf );@ 934 elements[back] = elem; 935 back = ( back + 1 ) % 20; 936 count += 1; 937 } 938 int remove( BoundedBuffer & @mutex@ buf ) with( buf ) { 939 if ( count == 0 ) @waitfor( insert : buf );@ 940 int elem = elements[front]; 941 front = ( front + 1 ) % 20; 942 count -= 1; 943 return elem; 944 } 945 \end{cfa} 745 946 \end{tabular} 746 947 \end{cquote} -
libcfa/prelude/builtins.c
rb006c51e r10a9479d 10 10 // Created On : Fri Jul 21 16:21:03 2017 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Thu Feb 2 11:33:56 202313 // Update Count : 1 3512 // Last Modified On : Fri Nov 8 17:07:15 2024 13 // Update Count : 144 14 14 // 15 15 … … 140 140 ) \ 141 141 typeof(x) op = 1; /* accumulate odd product */ \ 142 typeof(x) w = x; /* FIX-ME: possible bug in the box pass changing value argument through parameter */ \ 142 143 for ( ; y > 1; y >>= 1 ) { /* squaring exponentiation, O(log2 y) */ \ 143 if ( (y & 1) == 1 ) op = op * x; /* odd ? */ \144 x = x * x; \144 if ( (y & 1) == 1 ) op = op * w; /* odd ? */ \ 145 w = w * w; \ 145 146 } \ 146 return x* op147 return w * op 147 148 #define __CFA_EXP_INT__(...) __VA_ARGS__ 148 149 -
libcfa/src/concurrency/actor.hfa
rb006c51e r10a9479d 398 398 // TODO: update globals in this file to be static fields once the static fields project is done 399 399 static executor * __actor_executor_ = 0p; 400 static bool __actor_executor_passed = false; // was an executor passed to start_actor_system400 static bool __actor_executor_passed = false; // was an executor passed to actor_start 401 401 static size_t __num_actors_ = 0; // number of actor objects in system 402 402 static struct thread$ * __actor_executor_thd = 0p; // used to wake executor after actors finish … … 410 410 // Once an actor is allocated it must be sent a message or the actor system cannot stop. Hence, its receive 411 411 // member must be called to end it 412 DEBUG_ABORT( __actor_executor_ == 0p, "Creating actor before calling start_actor_system() can cause undefined behaviour.\n" );412 DEBUG_ABORT( __actor_executor_ == 0p, "Creating actor before calling actor_start() can cause undefined behaviour.\n" ); 413 413 alloc = Nodelete; 414 414 ticket = __get_next_ticket( *__actor_executor_ ); … … 682 682 } 683 683 684 static inline void start_actor_system( size_t num_thds ) {684 static inline void actor_start( size_t num_thds ) { 685 685 __reset_stats(); 686 686 __actor_executor_thd = active_thread(); … … 689 689 } 690 690 691 static inline void start_actor_system() { start_actor_system( get_proc_count( *active_cluster() ) ); }692 693 static inline void start_actor_system( executor & this ) {691 static inline void actor_start() { actor_start( get_proc_count( *active_cluster() ) ); } 692 693 static inline void actor_start( executor & this ) { 694 694 __reset_stats(); 695 695 __actor_executor_thd = active_thread(); … … 698 698 } 699 699 700 static inline void stop_actor_system() {700 static inline void actor_stop() { 701 701 park(); // unparked when actor system is finished 702 702 … … 715 715 struct finished_msg_t { inline message; } finished_msg = __base_msg_finished; 716 716 717 allocation receive( actor & this, delete_msg_t & msg) { return Delete; }718 allocation receive( actor & this, destroy_msg_t & msg) { return Destroy; }719 allocation receive( actor & this, finished_msg_t & msg) { return Finished; }717 allocation receive( actor & this, delete_msg_t & ) { return Delete; } 718 allocation receive( actor & this, destroy_msg_t & ) { return Destroy; } 719 allocation receive( actor & this, finished_msg_t & ) { return Finished; } 720 720 721 721 // Default messages used all the time. 722 //static struct startmsg_t { inline message; } start_msg; // start actor723 //static struct stopmsg_t { inline message; } stop_msg; // terminate actor722 struct start_msg_t { inline message; } start_msg = __base_msg_finished; // start actor 723 struct stop_msg_t { inline message; } stop_msg = __base_msg_finished; // terminate actor -
libcfa/src/concurrency/barrier.hfa
rb006c51e r10a9479d 1 // 1 // -*- Mode: C -*- 2 // 2 3 // Cforall Version 1.0.0 Copyright (C) 2022 University of Waterloo 3 // 4 // 4 5 // The contents of this file are covered under the licence agreement in the 5 6 // file "LICENCE" distributed with Cforall. 6 7 // 7 // barrier.hfa -- simple barrier implemented from monitors8 // 9 // Author : Thierry Delisle10 // Created On : Thu Mar 31 16:51:35 202211 // Last Modified By : 12 // Last Modified On : 13 // Update Count : 14 // 8 // barrier.hfa -- simple barrier implemented using a monitor 9 // 10 // Author : Peter A. Buhr 11 // Created On : Sun Nov 10 08:07:35 2024 12 // Last Modified By : Peter A. Buhr 13 // Last Modified On : Wed Nov 13 12:37:04 2024 14 // Update Count : 9 15 // 15 16 16 17 #pragma once … … 18 19 #include <monitor.hfa> 19 20 20 // Simple barrier based on a monitor 21 // Plan 9 inheritance does not work with monitors. Two monitor locks are created. 22 21 23 monitor barrier { 22 // Number of threads blocking needed to unblock the barrier 23 // Unsigned should be enough, I don't expect use cases with 2^32 thread barriers. 24 unsigned width; 25 26 // Current count (counting backwards) 27 unsigned count; 28 29 // Barrier uses internal scheduling 30 condition c; 24 unsigned int group, arrivals; // group size, arrival counter 25 condition c; // wait for group to form 31 26 }; 32 27 33 // Constructor 34 void ?{}( barrier & this, unsigned width ) { 35 this.width = width; 36 this.count = width; // Count backwards so initialize at width 28 static inline void ?{}( barrier & b, unsigned int group ) { 29 b.group = b.arrivals = group; // arrivals count backward 37 30 } 38 31 39 // block until the number of threads needed have blocked 40 // returns an value indicating the reverse order the threads arrived in 41 // i.e. last thread will return 0 (and not block) 42 // second last thread returns 1 43 // etc. 44 // last is an optional hook that will be called by the last thread 45 // before unblocking the others 46 static inline unsigned block(barrier & mutex this, fptr_t last = (fptr_t)0 ) { 47 this.count -= 1; // prefix decrement so we the last is 0 and not 1 48 unsigned arrival = this.count; // Note arrival order 49 if(arrival == 0) { 50 if(last) last(); 51 // If arrived last unblock everyone and reset 52 signal_all(this.c); 53 this.count = this.width; 54 } else { 55 // Otherwise block 56 wait(this.c); 57 } 58 return arrival; // return arrival order 32 // Returns a value indicating the reverse order the threads arrived, i.e. last thread returns 0 (and does not block) 33 // last is an optional hook that is called by the Gth thread before unblocking the other threads. 34 static inline unsigned int block( barrier & mutex b, fptr_t last = (fptr_t)0 ) with( b ) { 35 arrivals -= 1; // prefix decrement so last is 0 not 1 36 unsigned arrived = b.arrivals; // note arrival order 37 if ( arrivals != 0 ) { // wait for group to form 38 wait( b.c ); 39 } else { // group formed 40 if ( last ) last(); // safe to call 41 signal_all( c ); // unblock group 42 arrivals = group; // reset 43 } // if 44 return arrived; // return arrival order 59 45 } -
libcfa/src/concurrency/monitor.cfa
rb006c51e r10a9479d 10 10 // Created On : Thd Feb 23 12:27:26 2017 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Sun Feb 19 17:00:59 202313 // Update Count : 1 212 // Last Modified On : Thu Nov 21 08:31:55 2024 13 // Update Count : 18 14 14 // 15 15 … … 931 931 932 932 static inline [thread$ *, int] search_entry_queue( const __waitfor_mask_t & mask, monitor$ * monitors [], __lock_size_t count ) { 933 934 933 __queue_t(thread$) & entry_queue = monitors[0]->entry_queue; 935 934 935 #if 0 936 936 #if defined( __CFA_WITH_VERIFY__ ) 937 937 thread$ * last = 0p; 938 938 #endif 939 939 // For each thread in the entry-queue 940 for( thread$ ** thrd_it = &entry_queue.head; 941 (*thrd_it) != 1p; 942 thrd_it = &get_next(**thrd_it) 943 ) { 940 for ( thread$ ** thrd_it = &entry_queue.head; (*thrd_it) != 1p; thrd_it = &get_next(**thrd_it) ) { 944 941 thread$ * curr = *thrd_it; 945 942 946 /* paranoid */ verifyf( !last || last->user_link.next == curr, "search not making progress, from %p (%p) to %p", last, last->user_link.next, curr ); 943 /* paranoid */ verifyf( !last || last->user_link.next == curr, "search not making progress, from %p (%p) to %p", 944 last, last->user_link.next, curr ); 947 945 /* paranoid */ verifyf( curr != last, "search not making progress, from %p to %p", last, curr ); 948 946 … … 951 949 __acceptable_t * end = end (mask); 952 950 __acceptable_t * begin = begin(mask); 953 for( __acceptable_t * it = begin; it != end; it++, i++ ) { 954 // Check if we have a match 955 if( *it == curr->monitors ) { 956 957 // If we have a match return it 958 // after removeing it from the entry queue 951 for ( __acceptable_t * it = begin; it != end; it++, i++ ) { 952 // Check for match 953 if ( *it == curr->monitors ) { 954 // If match, return it after removeing it from the entry queue 959 955 return [remove( entry_queue, thrd_it ), i]; 960 956 } … … 965 961 #endif 966 962 } 967 963 #endif 964 int i = 0; 965 __acceptable_t * end = end (mask); 966 __acceptable_t * begin = begin(mask); 967 // For each acceptable (respect lexical priority in waitfor statement) 968 for ( __acceptable_t * it = begin; it != end; it++, i++ ) { 969 #if defined( __CFA_WITH_VERIFY__ ) 970 thread$ * last = 0p; 971 #endif // __CFA_WITH_VERIFY__ 972 973 for ( thread$ ** thrd_it = &entry_queue.head; (*thrd_it) != 1p; thrd_it = &get_next(**thrd_it) ) { 974 thread$ * curr = *thrd_it; 975 976 /* paranoid */ verifyf( !last || last->user_link.next == curr, "search not making progress, from %p (%p) to %p", 977 last, last->user_link.next, curr ); 978 /* paranoid */ verifyf( curr != last, "search not making progress, from %p to %p", last, curr ); 979 980 // For each thread in the entry-queue check for a match 981 if ( *it == curr->monitors ) { 982 // If match, return it after removeing from the entry queue 983 return [remove( entry_queue, thrd_it ), i]; 984 } // if 985 986 #if defined( __CFA_WITH_VERIFY__ ) 987 last = curr; 988 #endif 989 } // for 990 } // for 968 991 return [0, -1]; 969 992 } -
libcfa/src/rational.cfa
rb006c51e r10a9479d 10 10 // Created On : Wed Apr 6 17:54:28 2016 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Fri Aug 2 07:41:25202413 // Update Count : 19912 // Last Modified On : Mon Nov 11 22:37:12 2024 13 // Update Count : 206 14 14 // 15 15 … … 203 203 204 204 forall( ostype & | ostream( ostype ) | { ostype & ?|?( ostype &, T ); } ) { 205 205 ostype & ?|?( ostype & os, rational(T) r ) { 206 206 return os | r.numerator | '/' | r.denominator; 207 207 } // ?|? -
libcfa/src/rational.hfa
rb006c51e r10a9479d 12 12 // Created On : Wed Apr 6 17:56:25 2016 13 13 // Last Modified By : Peter A. Buhr 14 // Last Modified On : Fri Oct 6 07:52:20 202315 // Update Count : 12 214 // Last Modified On : Fri Nov 8 17:02:09 2024 15 // Update Count : 126 16 16 // 17 17 … … 23 23 // implementation 24 24 25 forall( T | arithmetic( T )) {25 forall( T ) { 26 26 struct rational { 27 27 T numerator, denominator; // invariant: denominator > 0 28 28 }; // rational 29 } 29 30 31 forall( T | arithmetic( T ) ) { 30 32 // constructors 31 33 -
src/AST/Expr.hpp
rb006c51e r10a9479d 330 330 enum GeneratedFlag { ExplicitCast, GeneratedCast }; 331 331 332 /// Even within the basic cast expression there are variants: 333 /// CCast - C-Style Cast: A backwards compatable cast from C. 334 /// CoerceCast - Coercion Cast: Change the type without changing the value. 335 /// ReturnCast - Ascription Cast: Requires the given expression result type. 336 enum CastKind { CCast, CoerceCast, ReturnCast }; 337 332 338 /// A type cast, e.g. `(int)e` 333 339 class CastExpr final : public Expr { … … 336 342 GeneratedFlag isGenerated; 337 343 338 enum CastKind { 339 Default, // C 340 Coerce, // reinterpret cast 341 Return // overload selection 342 }; 343 344 CastKind kind = Default; 344 CastKind kind = CCast; 345 345 346 346 CastExpr( const CodeLocation & loc, const Expr * a, const Type * to, 347 GeneratedFlag g = GeneratedCast, CastKind kind = Default ) : Expr( loc, to ), arg( a ), isGenerated( g ), kind( kind ) {}347 GeneratedFlag g = GeneratedCast, CastKind kind = CCast ) : Expr( loc, to ), arg( a ), isGenerated( g ), kind( kind ) {} 348 348 /// Cast-to-void 349 CastExpr( const CodeLocation & loc, const Expr * a, GeneratedFlag g = GeneratedCast, CastKind kind = Default );349 CastExpr( const CodeLocation & loc, const Expr * a, GeneratedFlag g = GeneratedCast, CastKind kind = CCast ); 350 350 351 351 /// Wrap a cast expression around an existing expression (always generated) -
src/AST/Pass.hpp
rb006c51e r10a9479d 327 327 /// The Pass template handles what *before* and *after* means automatically 328 328 template< template<class...> class container_t = std::list > 329 struct WithStmtsToAdd {329 struct WithStmtsToAddX { 330 330 container_t< ptr<Stmt> > stmtsToAddBefore; 331 331 container_t< ptr<Stmt> > stmtsToAddAfter; 332 332 }; 333 334 struct WithStmtsToAdd : public WithStmtsToAddX<> {}; 333 335 334 336 /// Used if visitor requires added declarations before or after the current node. 335 337 /// The Pass template handles what *before* and *after* means automatically 336 338 template< template<class...> class container_t = std::list > 337 struct WithDeclsToAdd {339 struct WithDeclsToAddX { 338 340 container_t< ptr<Decl> > declsToAddBefore; 339 341 container_t< ptr<Decl> > declsToAddAfter; 340 342 }; 343 344 struct WithDeclsToAdd : public WithDeclsToAddX<> {}; 341 345 342 346 /// Use if visitation should stop at certain levels -
src/CodeGen/CodeGenerator.cpp
rb006c51e r10a9479d 680 680 extension( expr ); 681 681 output << "("; 682 if ( expr->result->isVoid() ) { 683 output << "(void)"; 684 } else { 685 output << "("; 682 switch ( expr->kind ) { 683 case ast::CCast: 684 if ( expr->result->isVoid() ) { 685 output << "(void)"; 686 } else { 687 output << "("; 688 output << genType( expr->result, "", options ); 689 output << ")"; 690 } 691 break; 692 case ast::CoerceCast: 693 assertf( ast::CoerceCast != expr->kind, "Coercion cast is not implemented." ); 694 // And likely shouldn't reach code generation when it is implemented. 695 break; 696 case ast::ReturnCast: 697 // This should be invisible in the resulting C code. 698 // Can we insert a check here? 699 //assert( ResolvExpr::typesCompatable(???) ); 700 if ( options.genC ) break; 701 output << "(return "; 686 702 output << genType( expr->result, "", options ); 687 703 output << ")"; 704 break; 688 705 } 689 706 expr->arg->accept( *visitor ); -
src/Concurrency/Actors.cpp
rb006c51e r10a9479d 194 194 // collects data needed for next pass that does the circular defn resolution 195 195 // for message send operators (via table above) 196 struct GenFuncsCreateTables : public ast::WithDeclsToAdd <>{196 struct GenFuncsCreateTables : public ast::WithDeclsToAdd { 197 197 unordered_set<const StructDecl *> & actorStructDecls; 198 198 unordered_set<const StructDecl *> & messageStructDecls; … … 451 451 // separate pass is needed since this pass resolves circular defn issues 452 452 // generates the forward declarations of the send operator for actor routines 453 struct FwdDeclOperator : public ast::WithDeclsToAdd <>{453 struct FwdDeclOperator : public ast::WithDeclsToAdd { 454 454 unordered_set<const StructDecl *> & actorStructDecls; 455 455 unordered_set<const StructDecl *> & messageStructDecls; -
src/Concurrency/Corun.cpp
rb006c51e r10a9479d 25 25 namespace Concurrency { 26 26 27 struct CorunKeyword : public WithDeclsToAdd <>, public WithStmtsToAdd<>{27 struct CorunKeyword : public WithDeclsToAdd, public WithStmtsToAdd { 28 28 UniqueName CorunFnNamer = "__CFA_corun_lambda_"s; 29 29 UniqueName CoforFnNamer = "__CFA_cofor_lambda_"s; -
src/Concurrency/Keywords.cpp
rb006c51e r10a9479d 117 117 118 118 // -------------------------------------------------------------------------- 119 struct ConcurrentSueKeyword : public ast::WithDeclsToAdd <>{119 struct ConcurrentSueKeyword : public ast::WithDeclsToAdd { 120 120 ConcurrentSueKeyword( 121 121 std::string&& type_name, std::string&& field_name, … … 639 639 // -------------------------------------------------------------------------- 640 640 struct SuspendKeyword final : 641 public ast::WithStmtsToAdd <>, public ast::WithGuards {641 public ast::WithStmtsToAdd, public ast::WithGuards { 642 642 SuspendKeyword() = default; 643 643 virtual ~SuspendKeyword() = default; … … 860 860 861 861 // -------------------------------------------------------------------------- 862 struct MutexKeyword final : public ast::WithDeclsToAdd <>{862 struct MutexKeyword final : public ast::WithDeclsToAdd { 863 863 const ast::FunctionDecl * postvisit( const ast::FunctionDecl * decl ); 864 864 void postvisit( const ast::StructDecl * decl ); -
src/Concurrency/Waituntil.cpp
rb006c51e r10a9479d 1398 1398 // To add the predicates at global scope we need to do it in a second pass 1399 1399 // Predicates are added after "struct select_node { ... };" 1400 class AddPredicateDecls final : public WithDeclsToAdd <>{1400 class AddPredicateDecls final : public WithDeclsToAdd { 1401 1401 vector<FunctionDecl *> & satFns; 1402 1402 const StructDecl * selectNodeDecl = nullptr; -
src/ControlStruct/ExceptDecl.cpp
rb006c51e r10a9479d 401 401 } 402 402 403 struct ExceptDeclCore : public ast::WithDeclsToAdd <>{403 struct ExceptDeclCore : public ast::WithDeclsToAdd { 404 404 ast::StructDecl const * transformExcept( ast::StructDecl const * decl ); 405 405 ast::ObjectDecl const * transformVTable( -
src/GenPoly/Box.cpp
rb006c51e r10a9479d 55 55 /// Adds layout-generation functions to polymorphic types. 56 56 struct LayoutFunctionBuilder final : 57 public ast::WithDeclsToAdd <>,57 public ast::WithDeclsToAdd, 58 58 public ast::WithShortCircuiting, 59 59 public ast::WithVisitorRef<LayoutFunctionBuilder> { … … 344 344 public ast::WithGuards, 345 345 public ast::WithShortCircuiting, 346 public ast::WithStmtsToAdd <>,346 public ast::WithStmtsToAdd, 347 347 public ast::WithVisitorRef<CallAdapter> { 348 348 CallAdapter(); … … 1575 1575 struct PolyGenericCalculator final : 1576 1576 public ast::WithConstTypeSubstitution, 1577 public ast::WithDeclsToAdd <>,1577 public ast::WithDeclsToAdd, 1578 1578 public ast::WithGuards, 1579 public ast::WithStmtsToAdd <>,1579 public ast::WithStmtsToAdd, 1580 1580 public ast::WithVisitorRef<PolyGenericCalculator> { 1581 1581 PolyGenericCalculator(); -
src/GenPoly/InstantiateGeneric.cpp
rb006c51e r10a9479d 277 277 public ast::WithVisitorRef<FixDtypeStatic>, 278 278 public ast::WithShortCircuiting, 279 public ast::WithStmtsToAdd <>{279 public ast::WithStmtsToAdd { 280 280 ast::ApplicationExpr const * previsit( ast::ApplicationExpr const * expr ); 281 281 void previsit( ast::AddressExpr const * expr ); … … 421 421 public ast::WithCodeLocation, 422 422 public ast::WithConstTypeSubstitution, 423 public ast::WithDeclsToAdd <>,423 public ast::WithDeclsToAdd, 424 424 public ast::WithGuards, 425 425 public ast::WithVisitorRef<GenericInstantiator> -
src/GenPoly/Lvalue.cpp
rb006c51e r10a9479d 85 85 struct ReferenceConversions final : 86 86 public ast::WithConstTranslationUnit, 87 public ast::WithGuards, public ast::WithStmtsToAdd <>{87 public ast::WithGuards, public ast::WithStmtsToAdd { 88 88 ast::Expr const * postvisit( ast::CastExpr const * expr ); 89 89 ast::Expr const * postvisit( ast::AddressExpr const * expr ); … … 316 316 Warning::RvalueToReferenceConversion, toCString( expr->arg ) ); 317 317 318 // allowing conversion in the rvalue to const ref case 319 // use the referenced-to type to create temp variables 320 ast::Type const * targetType = dstType; 321 for (int i = 0; i < diff; ++i) targetType = (strict_dynamic_cast<ast::ReferenceType const *>(targetType))->base; 322 318 323 static UniqueName tmpNamer( "__ref_tmp_" ); 319 324 ast::ObjectDecl * tmp = new ast::ObjectDecl( expr->arg->location, 320 325 tmpNamer.newName(), 321 ast::deepCopy( expr->arg->result ), 326 // ast::deepCopy( expr->arg->result ), 327 ast::deepCopy (targetType), 322 328 new ast::SingleInit( expr->arg->location, expr->arg ) ); 323 329 PRINT( std::cerr << "make tmp: " << tmp << std::endl; ) … … 359 365 ret = new ast::AddressExpr( ret->location, ret ); 360 366 } 361 if ( expr->arg->get_lvalue() && 362 !ResolvExpr::typesCompatible( 363 srcType, 364 strict_dynamic_cast<ast::ReferenceType const *>( dstType )->base ) ) { 365 // Must keep cast if cast-to type is different from the actual type. 367 // Must keep cast if types are different. 368 if ( !ResolvExpr::typesCompatible( 369 srcType, 370 strict_dynamic_cast<ast::ReferenceType const *>( dstType )->base ) ) { 366 371 return ast::mutate_field( expr, &ast::CastExpr::arg, ret ); 367 372 } … … 377 382 } 378 383 // Must keep cast if types are different. 379 if ( !ResolvExpr::typesCompatible IgnoreQualifiers(384 if ( !ResolvExpr::typesCompatible( 380 385 dstType->stripReferences(), 381 386 srcType->stripReferences() ) ) { … … 390 395 } else { 391 396 assert( 0 == diff ); 392 // Remove useless generated casts.393 if ( expr->isGenerated == ast::GeneratedFlag::GeneratedCast &&394 ResolvExpr::typesCompatible(397 // Must keep cast if types are different. (Or it is explicit.) 398 if ( ast::ExplicitCast == expr->isGenerated || 399 !ResolvExpr::typesCompatible( 395 400 expr->result, 396 401 expr->arg->result ) ) { 397 PRINT( 398 std::cerr << "types are compatible, removing cast: " << expr << '\n'; 399 std::cerr << "-- " << expr->result << '\n'; 400 std::cerr << "-- " << expr->arg->result << std::endl; 401 ) 402 auto argAsEnum = expr->arg.as<ast::EnumInstType>(); 403 auto resultAsEnum = expr->result.as<ast::EnumInstType>(); 404 if (argAsEnum && resultAsEnum) { 405 if (argAsEnum->base->name != resultAsEnum->base->name) { 406 return expr; 407 } 408 } 409 return ast::mutate_field( expr->arg.get(), 410 &ast::Expr::env, expr->env.get() ); 411 } 412 return expr; 402 return expr; 403 } 404 PRINT( 405 std::cerr << "types are compatible, removing cast: " << expr << '\n'; 406 std::cerr << "-- " << expr->result << '\n'; 407 std::cerr << "-- " << expr->arg->result << std::endl; 408 ) 409 return ast::mutate_field( expr->arg.get(), 410 &ast::Expr::env, expr->env.get() ); 413 411 } 414 412 } … … 505 503 } 506 504 505 /// Recursively move an address expression underneath casts. Casts are not 506 /// lvalue expressions in C but are sometimes considered as such in Cforall, 507 /// (passes like InstantiateGeneric can add them.) - &(int) => (int*)& 508 ast::Expr const * moveAddressUnderCast( ast::AddressExpr const * expr ) { 509 if ( !dynamic_cast<ast::CastExpr const *>( expr->arg.get() ) ) { 510 return expr; 511 } 512 auto mutExpr = ast::mutate( expr ); 513 auto mutCast = strict_dynamic_cast<ast::CastExpr *>( 514 ast::mutate( mutExpr->arg.release() ) ); 515 mutExpr->arg = mutCast->arg; 516 mutCast->arg = moveAddressUnderCast( mutExpr ); 517 mutCast->result = new ast::PointerType( mutCast->result ); 518 return mutCast; 519 } 520 507 521 ast::Expr const * CollapseAddressDeref::postvisit( 508 522 ast::AddressExpr const * expr ) { … … 516 530 return ret; 517 531 } 518 } else if ( auto cast = dynamic_cast<ast::CastExpr const *>( arg ) ) { 519 // Need to move cast to pointer type out a level since address of 520 // pointer is not valid C code (can be introduced in prior passes, 521 // e.g., InstantiateGeneric) 522 if ( ast::getPointerBase( cast->result ) ) { 523 auto mutExpr = ast::mutate( expr ); 524 auto mutCast = strict_dynamic_cast<ast::CastExpr *>( 525 ast::mutate( mutExpr->arg.release() ) ); 526 mutExpr->arg = mutCast->arg; 527 mutCast->arg = mutExpr; 528 mutCast->result = new ast::PointerType( mutCast->result ); 529 return mutCast; 530 } 532 } else { 533 return moveAddressUnderCast( expr ); 531 534 } 532 535 return expr; -
src/GenPoly/Specialize.cpp
rb006c51e r10a9479d 30 30 struct SpecializeCore final : 31 31 public ast::WithConstTypeSubstitution, 32 public ast::WithDeclsToAdd <>,32 public ast::WithDeclsToAdd, 33 33 public ast::WithVisitorRef<SpecializeCore> { 34 34 std::string paramPrefix = "_p"; -
src/InitTweak/FixInit.cpp
rb006c51e r10a9479d 105 105 /// generate/resolve copy construction expressions for each, and generate/resolve destructors for both 106 106 /// arguments and return value temporaries 107 struct ResolveCopyCtors final : public ast::WithGuards, public ast::WithStmtsToAdd <>, public ast::WithSymbolTable, public ast::WithShortCircuiting, public ast::WithVisitorRef<ResolveCopyCtors>, public ast::WithConstTranslationUnit {107 struct ResolveCopyCtors final : public ast::WithGuards, public ast::WithStmtsToAdd, public ast::WithSymbolTable, public ast::WithShortCircuiting, public ast::WithVisitorRef<ResolveCopyCtors>, public ast::WithConstTranslationUnit { 108 108 const ast::Expr * postvisit( const ast::ImplicitCopyCtorExpr * impCpCtorExpr ); 109 109 const ast::StmtExpr * previsit( const ast::StmtExpr * stmtExpr ); … … 177 177 /// insert destructor calls at the appropriate places. must happen before CtorInit nodes are removed 178 178 /// (currently by FixInit) 179 struct InsertDtors final : public ObjDeclCollector, public ast::WithStmtsToAdd <>{179 struct InsertDtors final : public ObjDeclCollector, public ast::WithStmtsToAdd { 180 180 InsertDtors( ast::Pass<LabelFinder> & finder ) : finder( finder ), labelVars( finder.core.vars ) {} 181 181 … … 194 194 195 195 /// expand each object declaration to use its constructor after it is declared. 196 struct FixInit : public ast::WithStmtsToAdd <>{196 struct FixInit : public ast::WithStmtsToAdd { 197 197 static void fixInitializers( ast::TranslationUnit &translationUnit ); 198 198 … … 230 230 231 231 /// expands ConstructorExpr nodes into comma expressions, using a temporary for the first argument 232 struct FixCtorExprs final : public ast::WithDeclsToAdd <>, public ast::WithSymbolTable, public ast::WithShortCircuiting, public ast::WithConstTranslationUnit {232 struct FixCtorExprs final : public ast::WithDeclsToAdd, public ast::WithSymbolTable, public ast::WithShortCircuiting, public ast::WithConstTranslationUnit { 233 233 const ast::Expr * postvisit( const ast::ConstructorExpr * ctorExpr ); 234 234 }; -
src/InitTweak/GenInit.cpp
rb006c51e r10a9479d 46 46 // Outer pass finds declarations, for their type could wrap a type that needs hoisting 47 47 struct HoistArrayDimension_NoResolve final : 48 public ast::WithDeclsToAdd <>, public ast::WithShortCircuiting,48 public ast::WithDeclsToAdd, public ast::WithShortCircuiting, 49 49 public ast::WithGuards, public ast::WithConstTranslationUnit, 50 50 public ast::WithVisitorRef<HoistArrayDimension_NoResolve>, … … 205 205 206 206 struct ReturnFixer final : 207 public ast::WithStmtsToAdd <>, ast::WithGuards, ast::WithShortCircuiting {207 public ast::WithStmtsToAdd, ast::WithGuards, ast::WithShortCircuiting { 208 208 void previsit( const ast::FunctionDecl * decl ); 209 209 const ast::ReturnStmt * previsit( const ast::ReturnStmt * stmt ); -
src/Parser/ExpressionNode.cpp
rb006c51e r10a9479d 652 652 DeclarationNode * decl_node, 653 653 ExpressionNode * expr_node, 654 ast::Cast Expr::CastKind kind ) {654 ast::CastKind kind ) { 655 655 ast::Type * targetType = maybeMoveBuildType( decl_node ); 656 656 if ( dynamic_cast<ast::VoidType *>( targetType ) ) { -
src/Parser/ExpressionNode.hpp
rb006c51e r10a9479d 69 69 ast::DimensionExpr * build_dimensionref( const CodeLocation &, const std::string * name ); 70 70 71 ast::Expr * build_cast( const CodeLocation &, DeclarationNode * decl_node, ExpressionNode * expr_node, ast::Cast Expr::CastKind kind = ast::CastExpr::Default );71 ast::Expr * build_cast( const CodeLocation &, DeclarationNode * decl_node, ExpressionNode * expr_node, ast::CastKind kind = ast::CCast ); 72 72 ast::Expr * build_keyword_cast( const CodeLocation &, ast::AggregateDecl::Aggregate target, ExpressionNode * expr_node ); 73 73 ast::Expr * build_virtual_cast( const CodeLocation &, DeclarationNode * decl_node, ExpressionNode * expr_node ); -
src/Parser/parser.yy
rb006c51e r10a9479d 10 10 // Created On : Sat Sep 1 20:22:55 2001 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Sun Oct 13 12:18:15202413 // Update Count : 6 84512 // Last Modified On : Fri Nov 15 15:01:33 2024 13 // Update Count : 6915 14 14 // 15 15 … … 24 24 // the grammar. 25 25 26 // The root language for this grammar is ANSI99/11 C. All of ANSI99/11 is parsed, except for: 27 // 28 // designation with '=' (use ':' instead) 29 // 30 // This incompatibility is discussed in detail before the "designation" grammar rule. Most of the syntactic extensions 31 // from ANSI90 to ANSI11 C are marked with the comment "C99/C11". 26 // The root language for this grammar is ANSI99/11 C. All of ANSI99/11 is parsed. Most of the syntactic extensions from 27 // ANSI90 to ANSI11 C are marked with the comment "C99/C11". 32 28 33 29 // This grammar also has two levels of extensions. The first extensions cover most of the GCC C extensions. All of the … … 983 979 { $$ = new ExpressionNode( new ast::VirtualCastExpr( yylloc, maybeMoveBuild( $5 ), maybeMoveBuildType( $3 ) ) ); } 984 980 | '(' RETURN type_no_function ')' cast_expression // CFA 985 { $$ = new ExpressionNode( build_cast( yylloc, $3, $5, ast:: CastExpr::Return) ); }981 { $$ = new ExpressionNode( build_cast( yylloc, $3, $5, ast::ReturnCast ) ); } 986 982 | '(' COERCE type_no_function ')' cast_expression // CFA 987 983 { SemanticError( yylloc, "Coerce cast is currently unimplemented." ); $$ = nullptr; } … … 1170 1166 // comma_expression in cfa_identifier_parameter_array and cfa_abstract_array 1171 1167 '[' ',' ']' 1172 { $$ = new ExpressionNode( build_tuple( yylloc, nullptr ) ); } 1168 // { $$ = new ExpressionNode( build_tuple( yylloc, nullptr ) ); } 1169 { SemanticError( yylloc, "Empty tuple is meaningless." ); $$ = nullptr; } 1173 1170 | '[' assignment_expression ',' ']' 1174 1171 { $$ = new ExpressionNode( build_tuple( yylloc, $2 ) ); } … … 1224 1221 | DIRECTIVE 1225 1222 { $$ = new StatementNode( build_directive( yylloc, $1 ) ); } 1223 // | attribute ';' 1224 // { $$ = new StatementNode( $1 ); } 1226 1225 ; 1227 1226 … … 2057 2056 | cfa_abstract_tuple identifier_or_type_name asm_name_opt 2058 2057 { $$ = $1->addName( $2 )->addAsmName( $3 ); } 2059 | type_qualifier_list cfa_abstract_tuple identifier_or_type_name asm_name_opt 2060 { $$ = $2->addQualifiers( $1 )->addName( $3 )->addAsmName( $4 ); } 2058 | multi_array_dimension cfa_abstract_tuple identifier_or_type_name asm_name_opt 2059 { $$ = $2->addNewArray( $1 )->addName( $3 )->addAsmName( $4 ); } 2060 | multi_array_dimension type_qualifier_list cfa_abstract_tuple identifier_or_type_name asm_name_opt 2061 { $$ = $3->addNewArray( $1 )->addQualifiers( $2 )->addName( $4 )->addAsmName( $5 ); } 2061 2062 2062 2063 // [ int s, int t ]; // declare s and t … … 4223 4224 cfa_identifier_parameter_ptr 4224 4225 | cfa_identifier_parameter_array 4226 | type_qualifier_list cfa_identifier_parameter_array 4227 { $$ = $2->addQualifiers( $1 ); } 4225 4228 ; 4226 4229 … … 4246 4249 '[' ']' type_specifier_nobody 4247 4250 { $$ = $3->addNewArray( DeclarationNode::newArray( nullptr, nullptr, false ) ); } 4251 | '[' ']' cfa_abstract_tuple 4252 { $$ = $3->addNewArray( DeclarationNode::newArray( nullptr, nullptr, false ) ); } 4248 4253 | cfa_array_parameter_1st_dimension type_specifier_nobody 4254 { $$ = $2->addNewArray( $1 ); } 4255 | cfa_array_parameter_1st_dimension cfa_abstract_tuple 4249 4256 { $$ = $2->addNewArray( $1 ); } 4250 4257 | '[' ']' multi_array_dimension type_specifier_nobody 4251 4258 { $$ = $4->addNewArray( $3 )->addNewArray( DeclarationNode::newArray( nullptr, nullptr, false ) ); } 4259 | '[' ']' multi_array_dimension cfa_abstract_tuple 4260 { $$ = $4->addNewArray( $3 )->addNewArray( DeclarationNode::newArray( nullptr, nullptr, false ) ); } 4252 4261 | cfa_array_parameter_1st_dimension multi_array_dimension type_specifier_nobody 4253 4262 { $$ = $3->addNewArray( $2 )->addNewArray( $1 ); } 4263 | cfa_array_parameter_1st_dimension multi_array_dimension cfa_abstract_tuple 4264 { $$ = $3->addNewArray( $2 )->addNewArray( $1 ); } 4254 4265 | multi_array_dimension type_specifier_nobody 4266 { $$ = $2->addNewArray( $1 ); } 4267 | multi_array_dimension cfa_abstract_tuple 4255 4268 { $$ = $2->addNewArray( $1 ); } 4256 4269 -
src/ResolvExpr/CandidateFinder.cpp
rb006c51e r10a9479d 1220 1220 finder.allowVoid = true; 1221 1221 } 1222 if ( castExpr->kind == ast::CastExpr::Return) {1222 if ( ast::ReturnCast == castExpr->kind ) { 1223 1223 finder.strictMode = true; 1224 1224 finder.find( castExpr->arg, ResolveMode::withAdjustment() ); -
src/ResolvExpr/ConversionCost.cpp
rb006c51e r10a9479d 250 250 newSrc = new ast::BasicType( ast::BasicKind::UnsignedInt ); 251 251 } 252 if (dstAsRef->base->is_const() ) { 253 auto cvtCost = conversionCost(newSrc, dstAsRef->base, srcIsLvalue, symtab, env) ; 254 if (cvtCost == Cost::zero) { // exact match, may use a lvalue src 255 if ( srcIsLvalue ) { 256 if ( src->qualifiers == dstAsRef->base->qualifiers ) { 257 return Cost::reference; 258 } else if ( src->qualifiers < dstAsRef->base->qualifiers ) { 259 return Cost::safe; 260 } else { 261 return Cost::unsafe; 262 } 263 } 264 else { 265 return Cost::reference; 266 } 267 } 268 else { // not exact match, conversion is needed so lvalueness of src does not matter 269 return cvtCost + Cost::reference; 270 } 271 } 252 272 if ( typesCompatibleIgnoreQualifiers( newSrc, dstAsRef->base, env ) ) { 253 273 if ( srcIsLvalue ) { … … 259 279 return Cost::unsafe; 260 280 } 261 } else if ( dstAsRef->base->is_const() ) { 262 return Cost::safe; 263 } else { 281 } else { // rvalue-to-NC-ref conversion 264 282 return Cost::unsafe; 265 283 } -
src/ResolvExpr/Resolver.cpp
rb006c51e r10a9479d 201 201 && typesCompatible( castExpr->arg->result, castExpr->result ) 202 202 ) { 203 auto argAsEnum = castExpr->arg.as<ast::EnumInstType>(); 204 auto resultAsEnum = castExpr->result.as<ast::EnumInstType>(); 205 if (argAsEnum && resultAsEnum) { 206 if (argAsEnum->base->name != resultAsEnum->base->name) { 207 std::cerr << "Enum Cast: " << argAsEnum->base->name << " to " << resultAsEnum->base->name << std::endl; 208 return castExpr; 209 } 203 ast::EnumInstType const * arg, * result; 204 if ( ( result = castExpr->result.as<ast::EnumInstType>() ) && 205 ( arg = castExpr->arg.as<ast::EnumInstType>() ) && 206 arg->base->name != result->base->name) { 207 return castExpr; 210 208 } 211 209 // generated cast is the same type as its argument, remove it after keeping env … … 377 375 : public ast::WithSymbolTable, public ast::WithGuards, 378 376 public ast::WithVisitorRef<Resolver>, public ast::WithShortCircuiting, 379 public ast::WithStmtsToAdd <>{377 public ast::WithStmtsToAdd { 380 378 381 379 ast::ptr< ast::Type > functionReturn = nullptr; -
src/Tuples/TupleExpansion.cpp
rb006c51e r10a9479d 28 28 }; 29 29 30 struct UniqueExprExpander final : public ast::WithDeclsToAdd <>{30 struct UniqueExprExpander final : public ast::WithDeclsToAdd { 31 31 const ast::Expr * postvisit( const ast::UniqueExpr * unqExpr ); 32 32 // Not a vector, because they may not be adding in increasing order. … … 37 37 struct TupleMainExpander final : 38 38 public ast::WithCodeLocation, 39 public ast::WithDeclsToAdd <>,39 public ast::WithDeclsToAdd, 40 40 public ast::WithGuards, 41 41 public ast::WithVisitorRef<TupleMainExpander> { -
src/Validate/Autogen.cpp
rb006c51e r10a9479d 50 50 // -------------------------------------------------------------------------- 51 51 struct AutogenerateRoutines final : 52 public ast::WithDeclsToAdd <>,52 public ast::WithDeclsToAdd, 53 53 public ast::WithShortCircuiting { 54 54 void previsit( const ast::EnumDecl * enumDecl ); -
src/Validate/CompoundLiteral.cpp
rb006c51e r10a9479d 27 27 28 28 struct CompoundLiteral final : 29 public ast::WithDeclsToAdd <>{29 public ast::WithDeclsToAdd { 30 30 ast::Storage::Classes storageClasses; 31 31 -
src/Validate/HoistStruct.cpp
rb006c51e r10a9479d 68 68 */ 69 69 struct HoistStructCore final : 70 public ast::WithDeclsToAdd <>, public ast::WithGuards {70 public ast::WithDeclsToAdd, public ast::WithGuards { 71 71 ast::StructDecl const * previsit( ast::StructDecl const * decl ); 72 72 ast::StructDecl const * postvisit( ast::StructDecl const * decl ); -
src/Validate/HoistTypeDecls.cpp
rb006c51e r10a9479d 22 22 namespace { 23 23 24 struct HoistTypeDecls final : public ast::WithDeclsToAdd <>{24 struct HoistTypeDecls final : public ast::WithDeclsToAdd { 25 25 void previsit( ast::SizeofExpr const * ); 26 26 void previsit( ast::AlignofExpr const * ); -
src/Validate/ImplementEnumFunc.cpp
rb006c51e r10a9479d 472 472 473 473 struct ImplementEnumFunc final : 474 public ast::WithDeclsToAdd <>, public ast::WithShortCircuiting {474 public ast::WithDeclsToAdd, public ast::WithShortCircuiting { 475 475 void previsit(const ast::EnumDecl* enumDecl); 476 476 void previsit(const ast::FunctionDecl* functionDecl); -
src/Validate/LinkInstanceTypes.cpp
rb006c51e r10a9479d 27 27 struct LinkTypesCore : public WithNoIdSymbolTable, 28 28 public ast::WithCodeLocation, 29 public ast::WithDeclsToAdd <>,29 public ast::WithDeclsToAdd, 30 30 public ast::WithGuards, 31 31 public ast::WithShortCircuiting, -
src/Validate/ReplaceTypedef.cpp
rb006c51e r10a9479d 28 28 struct ReplaceTypedefCore final : 29 29 public ast::WithCodeLocation, 30 public ast::WithDeclsToAdd <>,30 public ast::WithDeclsToAdd, 31 31 public ast::WithGuards, 32 32 public ast::WithShortCircuiting, -
src/Virtual/VirtualDtor.cpp
rb006c51e r10a9479d 119 119 // collects data needed for next pass that does the circular defn resolution 120 120 // for dtor setters and delete fns (via table above) 121 struct GenFuncsCreateTables : public ast::WithDeclsToAdd <>{121 struct GenFuncsCreateTables : public ast::WithDeclsToAdd { 122 122 unordered_map<const StructDecl *, CtorDtor> & structDecls; 123 123 CtorDtorTable & torDecls; … … 351 351 // separate pass is needed since __CFA_set_dtor needs to be defined after 352 352 // the last dtor defn which is found in prior pass 353 struct GenSetDtor : public ast::WithDeclsToAdd <>{353 struct GenSetDtor : public ast::WithDeclsToAdd { 354 354 unordered_map<const StructDecl *, CtorDtor> & structDecls; // set of decls that inherit from virt dtor 355 355 CtorDtorTable & torDecls; -
tests/concurrency/actors/dynamic.cfa
rb006c51e r10a9479d 48 48 49 49 executor e{ 0, 1, 1, false }; 50 start_actor_system( e ); 51 50 actor_start( e ); 52 51 sout | "started"; 53 52 … … 58 57 *d_actor | *d_msg; 59 58 60 stop_actor_system(); 61 59 actor_stop(); 62 60 sout | "stopped"; 63 61 } -
tests/concurrency/actors/executor.cfa
rb006c51e r10a9479d 84 84 85 85 sout | "starting"; 86 87 start_actor_system( e ); 88 86 actor_start( e ); 89 87 sout | "started"; 90 91 88 d_actor actors[ Actors ]; 92 93 89 for ( i; Actors ) { 94 90 actors[i] | shared_msg; 95 91 } // for 96 97 92 sout | "stopping"; 98 99 stop_actor_system(); 100 93 actor_stop(); 101 94 sout | "stopped"; 102 95 } -
tests/concurrency/actors/inherit.cfa
rb006c51e r10a9479d 29 29 sout | "Start"; 30 30 { 31 start_actor_system();31 actor_start(); 32 32 D_msg * dm = alloc(); 33 33 (*dm){}; … … 40 40 *s | *dm; 41 41 *s2 | *dm2; 42 stop_actor_system();42 actor_stop(); 43 43 } 44 44 { 45 start_actor_system();45 actor_start(); 46 46 Server s[2]; 47 47 D_msg * dm = alloc(); … … 51 51 s[0] | *dm; 52 52 s[1] | *dm2; 53 stop_actor_system();53 actor_stop(); 54 54 } 55 55 sout | "Finished"; -
tests/concurrency/actors/inline.cfa
rb006c51e r10a9479d 38 38 processor p; 39 39 { 40 start_actor_system(); // sets up executor40 actor_start(); // sets up executor 41 41 d_actor da; 42 42 d_msg * dm = alloc(); 43 43 (*dm){ 42, 2423 }; 44 44 da | *dm; 45 stop_actor_system(); // waits until actors finish45 actor_stop(); // waits until actors finish 46 46 } 47 47 { 48 start_actor_system(); // sets up executor48 actor_start(); // sets up executor 49 49 d_actor da; 50 50 d_msg2 dm{ 29079 }; … … 54 54 virtual_dtor * v = &dm; 55 55 da | dm; 56 stop_actor_system(); // waits until actors finish56 actor_stop(); // waits until actors finish 57 57 } 58 58 } -
tests/concurrency/actors/matrixMultiply.cfa
rb006c51e r10a9479d 88 88 89 89 sout | "starting"; 90 91 start_actor_system( e ); 92 90 actor_start( e ); 93 91 sout | "started"; 94 95 92 derived_msg messages[xr]; 96 97 93 derived_actor actors[xr]; 98 94 … … 100 96 messages[r]{ Z[r], X[r], Y }; 101 97 } // for 102 103 98 for ( r; xr ) { 104 99 actors[r] | messages[r]; … … 106 101 107 102 sout | "stopping"; 108 109 stop_actor_system(); 110 103 actor_stop(); 111 104 sout | "stopped"; 112 105 -
tests/concurrency/actors/pingpong.cfa
rb006c51e r10a9479d 47 47 processor p[Processors - 1]; 48 48 49 start_actor_system( Processors ); // test passing number of processors49 actor_start( Processors ); // test passing number of processors 50 50 ping pi_actor; 51 51 pong po_actor; … … 54 54 p_msg m; 55 55 pi_actor | m; 56 stop_actor_system();56 actor_stop(); 57 57 58 58 sout | "end"; -
tests/concurrency/actors/poison.cfa
rb006c51e r10a9479d 15 15 sout | "Finished"; 16 16 { 17 start_actor_system();17 actor_start(); 18 18 Server s[10]; 19 19 for ( i; 10 ) { 20 20 s[i] | finished_msg; 21 21 } 22 stop_actor_system();22 actor_stop(); 23 23 } 24 24 25 25 sout | "Delete"; 26 26 { 27 start_actor_system();27 actor_start(); 28 28 for ( i; 10 ) { 29 29 Server * s = alloc(); … … 31 31 (*s) | delete_msg; 32 32 } 33 stop_actor_system();33 actor_stop(); 34 34 } 35 35 36 36 sout | "Destroy"; 37 37 { 38 start_actor_system();38 actor_start(); 39 39 Server s[10]; 40 40 for ( i; 10 ) 41 41 s[i] | destroy_msg; 42 stop_actor_system();42 actor_stop(); 43 43 for ( i; 10 ) 44 44 if (s[i].val != 777) -
tests/concurrency/actors/static.cfa
rb006c51e r10a9479d 45 45 46 46 executor e{ 0, 1, 1, false }; 47 start_actor_system( e ); 48 47 actor_start( e ); 49 48 sout | "started"; 50 51 49 derived_msg msg; 52 53 50 derived_actor actor; 54 55 51 actor | msg; 56 57 stop_actor_system(); 58 52 actor_stop(); 59 53 sout | "stopped"; 60 54 } -
tests/concurrency/actors/types.cfa
rb006c51e r10a9479d 67 67 68 68 sout | "basic test"; 69 start_actor_system( Processors ); // test passing number of processors69 actor_start( Processors ); // test passing number of processors 70 70 derived_actor a; 71 71 d_msg b, c; … … 73 73 c.num = 2; 74 74 a | b | c; 75 stop_actor_system();75 actor_stop(); 76 76 77 77 sout | "same message and different actors test"; 78 start_actor_system(); // let system detect # of processors78 actor_start(); // let system detect # of processors 79 79 derived_actor2 d_ac2_0, d_ac2_1; 80 80 d_msg d_ac2_msg; … … 82 82 d_ac2_0 | d_ac2_msg; 83 83 d_ac2_1 | d_ac2_msg; 84 stop_actor_system();84 actor_stop(); 85 85 86 86 … … 88 88 sout | "same message and different actor types test"; 89 89 executor e{ 0, Processors, Processors == 1 ? 1 : Processors * 4, false }; 90 start_actor_system( e ); // pass an explicit executor90 actor_start( e ); // pass an explicit executor 91 91 derived_actor2 d_ac2_2; 92 92 derived_actor3 d_ac3_0; … … 95 95 d_ac3_0 | d_ac23_msg; 96 96 d_ac2_2 | d_ac23_msg; 97 stop_actor_system();97 actor_stop(); 98 98 } // RAII to clean up executor 99 99 … … 101 101 sout | "different message types, one actor test"; 102 102 executor e{ 1, Processors, Processors == 1 ? 1 : Processors * 4, true }; 103 start_actor_system( Processors );103 actor_start( Processors ); 104 104 derived_actor3 a3; 105 105 d_msg b1; … … 108 108 c2.num = 5; 109 109 a3 | b1 | c2; 110 stop_actor_system();110 actor_stop(); 111 111 } // RAII to clean up executor 112 112 … … 114 114 sout | "nested inheritance actor test"; 115 115 executor e{ 1, Processors, Processors == 1 ? 1 : Processors * 4, true }; 116 start_actor_system( Processors );116 actor_start( Processors ); 117 117 derived_actor4 a4; 118 118 d_msg b1; … … 121 121 c2.num = 5; 122 122 a4 | b1 | c2; 123 stop_actor_system();123 actor_stop(); 124 124 } // RAII to clean up executor 125 125 -
tests/concurrency/barrier/order.cfa
rb006c51e r10a9479d 5 5 // file "LICENCE" distributed with Cforall. 6 6 // 7 // order.cfa -- validates barriers the return value of 8 // barrier block 7 // order.cfa -- validates barrier return value from barrier block 9 8 // 10 9 // Author : Thierry Delisle 11 10 // Created On : Fri Apr 01 11:39:09 2022 12 // Last Modified By : 13 // Last Modified On : 14 // Update Count : 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Sun Nov 10 11:22:56 2024 13 // Update Count : 20 15 14 // 16 17 // Test validates barrier and block return value by checking18 // that no more than one thread gets the same return value19 15 20 16 #include <concurrency/barrier.hfa> … … 23 19 #include <thread.hfa> 24 20 25 const unsigned NUM_LAPS = 173; 26 const unsigned NUM_THREADS = 11; 21 enum { NUM_LAPS = 173, NUM_THREADS = 11 }; 27 22 28 // The barrier we are testing29 23 barrier bar = { NUM_THREADS }; 30 24 31 // The return values of the previous generation. 32 volatile unsigned * generation; 25 volatile unsigned generation = 0; // count laps 26 void last() { 27 generation += 1; // last thread at barrier advances 28 } 29 volatile unsigned * generations; // global array pointer 33 30 34 31 thread Tester {}; 35 32 void main( Tester & this ) { 36 // Repeat a few times 37 for(l; NUM_LAPS) { 38 // Yield for chaos 39 yield( prng(this, 10) ); 33 for ( l; NUM_LAPS ) { 34 yield( prng( this, 10 ) ); // yield for chaos 35 unsigned int order = block( bar, last ); // block at barrier 40 36 41 // Block and what order we arrived 42 unsigned ret = block(bar); 43 44 // Check what was the last generation of that last thread in this position 45 unsigned g = generation[ret]; 46 47 // Is it what we expect? 48 if(g != l) { 49 // Complain that they are different 50 sout | "Gen" | l | ": Expeced generation at" | ret | "to be" | l | "was" | g; 51 } 52 53 // Mark the expected next generation 54 generation[ret] = l+1; 55 } 37 // For G == T, no thread should be able to advance generation until current generation finishes. 38 if ( generation - 1 != l || generations[order] != l ) { // generation advanced in block 39 mutex( sout ) sout | "mismatched generation, expected" | l | "got" | generation; 40 } // if 41 generations[order] = l + 1; // every thread advances their current order generation 42 } // for 56 43 } 57 44 58 45 int main() { 59 // Create the data ans zero it.60 46 volatile unsigned gen_data[NUM_THREADS]; 61 for( t; NUM_THREADS)62 gen_data[t] = 0;47 for( t; NUM_THREADS ) gen_data[t] = 0; 48 generations = gen_data; // global points at local 63 49 64 generation = gen_data; 65 66 // Run the experiment 67 processor p[4]; 68 { 50 processor p[4]; // parallelism 51 { // run experiment 69 52 Tester testers[NUM_THREADS]; 70 53 }
Note:
See TracChangeset
for help on using the changeset viewer.