Changeset 486caad for doc


Ignore:
Timestamp:
Mar 25, 2024, 7:15:30 PM (4 months ago)
Author:
JiadaL <j82liang@…>
Branches:
master
Children:
d734fa1
Parents:
df78cce (diff), bf050c5 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.
Message:

Merge branch 'master' of plg.uwaterloo.ca:software/cfa/cfa-cc

Location:
doc
Files:
1 added
10 edited

Legend:

Unmodified
Added
Removed
  • doc/bibliography/pl.bib

    rdf78cce r486caad  
    12631263    school      = {School of Computer Science, University of Waterloo},
    12641264    year        = 2019,
    1265     optaddress  = {Waterloo, Ontario, Canada, N2L 3G1},
     1265    address     = {Waterloo, Ontario, Canada, N2L 3G1},
    12661266    note        = {\url{https://uwspace.uwaterloo.ca/handle/10012/14584}},
    12671267}
     
    19551955    school      = {School of Computer Sc., University of Waterloo},
    19561956    year        = 2015,
    1957     optaddress  = {Waterloo, Ontario, Canada, N2L 3G1},
     1957    address     = {Waterloo, Ontario, Canada, N2L 3G1},
    19581958    note        = {\url{https://uwspace.uwaterloo.ca/handle/10012/10013}},
    19591959}
     
    41304130    school      = {School of Computer Sc., University of Waterloo},
    41314131    year        = 2019,
    4132     optaddress  = {Waterloo, Ontario, Canada, N2L 3G1},
     4132    address     = {Waterloo, Ontario, Canada, N2L 3G1},
    41334133    note        = {\url{https://uwspace.uwaterloo.ca/handle/10012/14706}},
    41344134}
     
    43234323    school      = {School of Computer Science, University of Waterloo},
    43244324    year        = 2003,
    4325     optaddress  = {Waterloo, Ontario, Canada, N2L 3G1},
     4325    address     = {Waterloo, Ontario, Canada, N2L 3G1},
    43264326    note        = {\url{http://plg.uwaterloo.ca/theses/BilsonThesis.pdf}},
    43274327}
     
    43644364    year        = 2018,
    43654365    month       = sep,
    4366     optaddress  = {Waterloo, Ontario, Canada, N2L 3G1},
     4366    address     = {Waterloo, Ontario, Canada, N2L 3G1},
    43674367    note        = {\url{https://uwspace.uwaterloo.ca/handle/10012/13935}},
    43684368}
     
    59905990    key         = {OCaml},
    59915991    title       = {The {OC}aml system, release 5.1},
    5992     optaddress  = {Rust Project Developers},
     5992    address     = {Rust Project Developers},
    59935993    year        = 2023,
    59945994    note        = {\url{https://v2.ocaml.org/manual/}},
     
    70077007    organization= {United States Department of Defense},
    70087008    edition     = {{ANSI/MIL-STD-1815A-1983}},
     7009    address     = {Springer, New York},
    70097010    month       = feb,
    70107011    year        = 1983,
    7011     note        = {Springer, New York},
    70127012}
    70137013
     
    74217421    school      = {School of Computer Science, University of Waterloo},
    74227422    year        = 2017,
    7423     optaddress  = {Waterloo, Ontario, Canada, N2L 3G1},
     7423    address     = {Waterloo, Ontario, Canada, N2L 3G1},
    74247424    note        = {\url{https://uwspace.uwaterloo.ca/handle/10012/11830}},
    74257425}
     
    75087508    key         = {Rust},
    75097509    title       = {{R}ust Programming Language},
    7510     optaddress  = {Rust Project Developers},
     7510    address     = {Rust Project Developers},
    75117511    year        = 2015,
    75127512    note        = {\url{https://doc.rust-lang.org/reference.html}},
     
    77297729    publisher   = {Morgan \& Claypool},
    77307730    year        = 2013,
    7731 }
    7732 
    7733 @inproceedings{Leissa14,
    7734     title       = {{S}ierra: a {SIMD} extension for {C}++},
    7735     author      = {Lei{\ss}a, Roland and Haffner, Immanuel and Hack, Sebastian},
    7736     booktitle   = {Proceedings of the 2014 Workshop on Workshop on programming models for SIMD/Vector processing},
    7737     pages       = {17-24},
    7738     year        = {2014},
    7739     organization= {ACM}
    7740 }
    7741 
    7742 @article{Nickolls08,
    7743     author      = {Nickolls, John and Buck, Ian and Garland, Michael and Skadron, Kevin},
    7744     title       = {Scalable Parallel Programming with CUDA},
    7745     journal     = {Queue},
    7746     volume      = {6},
    7747     number      = {2},
    7748     month       = mar,
    7749     year        = 2008,
    7750     pages       = {40-53},
    7751     publisher   = {ACM},
    7752     address     = {New York, NY, USA},
    77537731}
    77547732
  • doc/theses/jiada_liang_MMath/CFAenum.tex

    rdf78cce r486caad  
    1 \chapter{\CFA-Style Enum}
    2 
    3 
    4 \CFA supports C-Style enumeration using the same syntax and semantics for backwards compatibility.
     1\chapter{\CFA Enumeration}
     2
     3
     4\CFA supports C enumeration using the same syntax and semantics for backwards compatibility.
    55\CFA also extends C-Style enumeration by adding a number of new features that bring enumerations inline with other modern programming languages.
    66
     
    1616Finally, qualification is provided to disambiguate any ambiguous situations.
    1717\begin{cfa}
    18 enum C1 { First, Second, Third, Fourth };
    19 enum C2 { @Fourth@, @Third@, @Second@, @First@ };
    20 C1 p() { return Third; }                                $\C{// correctly resolved duplicate names}$
    21 C2 p() { return Fourth; }
     18enum E1 { First, Second, Third, Fourth };
     19enum E2 { @Fourth@, @Third@, @Second@, @First@ };
     20E1 p() { return Third; }                                $\C{// correctly resolved duplicate names}$
     21E2 p() { return Fourth; }
    2222void foo() {
    23         C1 e1 = First;   C2 e2 = First;
     23        E1 e1 = First;   E2 e2 = First;
    2424        e1 = Second;   e2 = Second;
    2525        e1 = p();   e2 = p();                           $\C{// correctly resolved function call}$
    26         int i = @C1.@First + @C2.@First;        $\C{// ambiguous without qualification}$
    27 }
    28 \end{cfa}
    29 \CFA overloading allows programmers to use the most meaningful names without fear of unresolvable clashes from included files, which are correctable with qualification.
     26        int i = @E1.@First + @E2.@First;        $\C{// disambiguate with qualification}$
     27        int j = @(E1)@First + @(E2)@First;      $\C{// disambiguate with cast}$
     28}
     29\end{cfa}
     30\CFA overloading allows programmers to use the most meaningful names without fear of name clashes from include files.
     31Either the type system implicitly disambiguates or the programmer explicitly disambiguates using qualification or casting.
    3032
    3133
     
    3436An enumeration can be scoped, so the enumerator constants are not projected into the enclosing scope, using @'!'@.
    3537\begin{cfa}
    36 enum Weekday @!@ { /* as above */ };
    37 enum( char * ) Names @!@ { /* as above */ };
     38enum Weekday @!@ { Mon, Tue, Wed, Thu = 10, Fri, Sat, Sun };
     39enum RGB @!@ { Red, Green, Blue };
    3840\end{cfa}
    3941Now the enumerators \emph{must} be qualified with the associated enumeration.
    4042\begin{cfa}
    41 Weekday weekday = @Weekday@.Monday;
    42 Names names = @Names.@Fred;
    43 names = @Names.@Jane;
     43Weekday weekday = @Weekday@.Mon;
     44weekday = @Weekday@.Sat;
     45RGB rgb = RGB.Red;
     46rgb = RGB.Blue;
    4447\end{cfa}
    4548It is possible to toggle back to unscoping using the \CFA @with@ clause/statement (see also \CC \lstinline[language=c++]{using enum} in Section~\ref{s:C++RelatedWork}).
    4649\begin{cfa}
    47 Weekday weekday;
    48 with ( @Weekday@, @Names@ ) {                   $\C{// type names}$
    49          Names names = @Fred@;
    50          names = @Jane@;
    51          weekday = Saturday;
    52 }
    53 \end{cfa}
    54 As in Section~\ref{s:EnumeratorNameResolution}, opening multiple unscoped enumerations can result in duplicate enumeration names, but \CFA type resolution and falling back to explicit qualification handles name resolution.
     50with ( @Weekday@, @RGB@ ) {                     $\C{// type names}$
     51         weekday = @Sun@;                               $\C{// no qualification}$
     52         rgb = @Green@;
     53}
     54\end{cfa}
     55As in Section~\ref{s:EnumeratorNameResolution}, opening multiple unscoped enumerations can result in duplicate enumeration names, but \CFA implicit type resolution and explicit qualification/casting handles name resolution.
    5556
    5657\section{Enumerator Typing}
     
    8081\begin{cfa}
    8182// integral
    82         enum( @char@ ) Currency { Dollar = '$\textdollar$', Euro = '$\texteuro$', Pound = '$\textsterling$' };
     83        enum( @char@ ) Currency { Dollar = '$\textdollar$', Cent = '$\textcent$', Yen = '$\textyen$', Pound = '$\textsterling$', Euro = 'E' };
    8384        enum( @signed char@ ) srgb { Red = -1, Green = 0, Blue = 1 };
    8485        enum( @long long int@ ) BigNum { X = 123_456_789_012_345,  Y = 345_012_789_456_123 };
     
    8788        enum( @_Complex@ ) Plane { X = 1.5+3.4i, Y = 7+3i, Z = 0+0.5i };
    8889// pointer
    89         enum( @char *@ ) Names { Fred = "FRED", Mary = "MARY", Jane = "JANE" };
     90        enum( @const char *@ ) Name { Fred = "FRED", Mary = "MARY", Jane = "JANE" };
    9091        int i, j, k;
    9192        enum( @int *@ ) ptr { I = &i,  J = &j,  K = &k };
     
    9899// aggregate
    99100        struct Person { char * name; int age, height; };
    100 @***@enum( @Person@ ) friends { @Liz@ = { "ELIZABETH", 22, 170 }, @Beth@ = Liz, Jon = { "JONATHAN", 35, 190 } };
     101@***@enum( @Person@ ) friends { @Liz@ = { "ELIZABETH", 22, 170 }, @Beth@ = Liz,
     102                                                                                        Jon = { "JONATHAN", 35, 190 } };
    101103\end{cfa}
    102104\caption{Enumerator Typing}
     
    140142\begin{cfa}
    141143enum() Mode { O_RDONLY, O_WRONLY, O_CREAT, O_TRUNC, O_APPEND };
    142 @***@Mode iomode = O_RDONLY;
    143 bool b = iomode == O_RDONLY || iomode < O_APPEND;
    144 int i = iomode;                                                 $\C{\color{red}// disallowed}$
     144Mode iomode = O_RDONLY;
     145bool b = iomode == O_RDONLY || iomode < O_APPEND; $\C{// ordering}$
     146@***@@int i = iomode;@                                                  $\C{// disallow conversion to int}$
    145147\end{cfa}
    146148
     
    149151If follows from enumerator typing that the enumerator type can be another enumerator.
    150152\begin{cfa}
    151 enum( @char@ ) Currency { Dollar = '$\textdollar$', Euro = '$\texteuro$', Pound = '$\textsterling$' };
    152 enum( @Currency@ ) Europe { Euro = Currency.Euro, Pound = Currency.Pound }; // intersection
     153enum( @char@ ) Currency { Dollar = '$\textdollar$', Cent = '$\textcent$', Yen = '$\textyen$', Pound = '$\textsterling$', Euro = 'E' };
     154enum( Currency ) Europe { Euro = Currency.Euro, Pound = Currency.Pound };
    153155enum( char ) Letter { A = 'A',  B = 'B', C = 'C', ..., Z = 'Z' };
    154156enum( @Letter@ ) Greek { Alph = A, Beta = B, ..., Zeta = Z }; // intersection
     
    158160\begin{cfa}
    159161Letter letter = A;
    160 @***@Greak greek = Beta;
    161 letter = Beta;                                                  $\C{// allowed, letter == B}$
    162 greek = A;                                                              $\C{\color{red}// disallowed}$
     162Greak greek = Beta;
     163letter = Beta;                                                  $\C{// allowed, greek == B}$
     164@greek = A;@                                                    $\C{// disallowed}$
    163165\end{cfa}
    164166
     
    277279p(variable_d); // 3
    278280\end{cfa}
     281
     282
     283\section{Planet Example}
     284
     285\VRef[Figure]{f:PlanetExample} shows an archetypal enumeration example illustrating all of the \CFA enumeration features.
     286Enumeration @Planet@ is a typed enumeration of type @MR@.
     287Each of the planet enumerators is initialized to a specific mass/radius, @MR@, value.
     288The unnamed enumeration projects the gravitational-constant enumerator @G@.
     289The program main iterates through the planets computing the weight on each planet for a given earth weight.
     290
     291\begin{figure}
     292\begin{cfa}
     293struct MR { double mass, radius; };
     294enum( MR ) Planet {
     295        //                           mass          radius
     296        MERCURY = { 3.303_E23, 2.4397_E6 },
     297        VENUS       = { 4.869_E24, 6.0518_E6 },
     298        EARTH       = { 5.976_E24, 6.3781_E6 },
     299        MARS         = { 6.421_E23, 3.3972_E6 },
     300        JUPITER    = { 1.898_E27, 7.1492_E7 },
     301        SATURN     = { 5.688_E26, 6.0268_E7 },
     302        URANUS    = { 8.686_E25, 2.5559_E7 },
     303        NEPTUNE  = { 1.024_E26, 2.4746_E7 },
     304};
     305enum( double ) { G = 6.6743E-11 }; // universal gravitational constant (m3 kg-1 s-2)
     306
     307static double surfaceGravity( Planet p ) with( p ) {
     308        return G * mass / ( radius * radius );
     309}
     310static double surfaceWeight( Planet p, double otherMass ) {
     311        return otherMass * surfaceGravity( p );
     312}
     313int main( int argc, char * argv[] ) {
     314        if ( argc != 2 ) exit | "Usage: " | argv[0] | "earth-weight";
     315        double earthWeight = convert( argv[1] );
     316        double mass = earthWeight / surfaceGravity( EARTH );
     317        for ( p; Planet ) {
     318                sout | "Your weight on" | labelE(p) | "is" | surfaceWeight( p, mass );
     319        }
     320}
     321\end{cfa}
     322\caption{Planet Example}
     323\label{f:PlanetExample}
     324\end{figure}
  • doc/theses/jiada_liang_MMath/background.tex

    rdf78cce r486caad  
    11\chapter{Background}
     2\lstnewenvironment{clang}[1][]{\lstset{language=[ANSI]C,escapechar=\$,moredelim=**[is][\color{red}]{@}{@},}\lstset{#1}}{}
     3
     4\CFA is a backwards-compatible extension of the C programming language.
     5Therefore, it must support C-style enumerations and any enumeration extensions must be intuitive to C programmers both in syntax and semantics.
     6
     7It is common for C programmers to ``believe'' there are three equivalent forms of named constants.
     8\begin{clang}
     9#define Mon 0
     10static const int Mon = 0;
     11enum { Mon };
     12\end{clang}
     13\begin{enumerate}[leftmargin=*]
     14\item
     15For @#define@, the programmer has to explicitly manage the constant name and value.
     16Furthermore, these C preprocessor macro names are outside of the C type-system, and hence cannot be overloaded, and can incorrectly change random text in a program.
     17\item
     18The same explicit management is true for the @const@ declaration, and the @const@ variable cannot appear in constant-expression locations, like @case@ labels, array dimensions,\footnote{
     19C allows variable-length array-declarations (VLA), so this case does work, but it fails in \CC, which does not support VLAs, unless it is \lstinline{g++}.} immediate operands of assembler instructions, and occupy storage.
     20\begin{clang}
     21$\$$ nm test.o
     220000000000000018 r Mon
     23\end{clang}
     24\item
     25Only the @enum@ form is managed by the compiler, is part of the language type-system, and works in all C constant-expression locations.
     26\end{enumerate}
    227
    328
    4 \section{C-Style Enum}
     29\section{C \lstinline{const}}
    530
    6 The C-Style enumeration has the following syntax and semantics.
    7 \begin{cfa}
    8 enum Weekday { Monday, Tuesday, Wednesday, Thursday@ = 10@, Friday, Saturday, Sunday };
    9 \end{cfa}
     31As noted, C has the equivalent of Pascal typed @const@ declarations \see{\VRef{s:Pascal}}, with static and dynamic initialization.
     32\begin{clang}
     33static const int one = 0 + 1;                   $\C{// static intialization}$
     34static const void * NIL = NULL;
     35static const double PI = 3.14159;
     36static const char Plus = '+';
     37static const char * Fred = "Fred";
     38static const int Mon = 0, Tue = Mon + 1, Wed = Tue + 1, Thu = Wed + 1, Fri = Thu + 1,
     39                                        Sat = Fri + 1, Sun = Sat + 1;
     40void foo() {
     41        const int r = random();                         $\C{// dynamic intialization}$
     42        int sa[Sun];                                            $\C{// VLA, local scope only}$
     43}
     44\end{clang}
     45Statically initialized identifiers may appear in any constant-expression context, \eg @case@.
     46Dynamically initialized identifiers may appear as array dimensions in @g++@, which allows variable-sized arrays.
     47
     48
     49\section{C Enumeration}
     50
     51The C enumeration has the following syntax and semantics.
     52\begin{clang}
     53enum Weekday { Mon, Tue, Wed, Thu@ = 10@, Fri, Sat, Sun, };
     54\end{clang}
    1055Enumerators without an explicitly designated constant value are \Newterm{auto-initialized} by the compiler: from left to right, starting at zero or the next explicitly initialized constant, incrementing by @1@.
    11 For example, @Monday@ to @Wednesday@ are implicitly assigned with constants @0@--@2@, @Thursday@ is explicitly set to constant @10@, and @Friday@ to @Sunday@ are implicitly assigned with constants @11@--@13@.
     56For example, @Mon@ to @Wed@ are implicitly assigned with constants @0@--@2@, @Thu@ is explicitly set to constant @10@, and @Fri@ to @Sun@ are implicitly assigned with constants @11@--@13@.
    1257Initialization may occur in any order.
    13 \begin{cfa}
    14 enum Weekday { Thursday@ = 10@, Friday, Saturday, Sunday, Monday@ = 0@, Tuesday, Wednesday };
    15 \end{cfa}
     58\begin{clang}
     59enum Weekday { Thu@ = 10@, Fri, Sat, Sun, Mon@ = 0@, Tue, Wed };
     60\end{clang}
    1661Note, the comma in the enumerator list can be a terminator or a separator, allowing the list to end with a dangling comma.
    17 \begin{cfa}
     62\begin{clang}
    1863enum Weekday {
    19         Thursday = 10, Friday, Saturday, Sunday,
    20         Monday = 0, Tuesday, Wednesday@,@ // terminating comma
     64        Thu = 10, Fri, Sat, Sun,
     65        Mon = 0, Tue, Wed@,@ // terminating comma
    2166};
    22 \end{cfa}
     67\end{clang}
    2368This feature allow enumerator lines to be interchanged without moving a comma.\footnote{
    2469A terminating comma appears in other C syntax, \eg the initializer list.}
     
    2873In practice, since integral constants are used, which have type @int@ (unless qualified with a size suffix), C uses @int@ as the underlying type for enumeration variables.
    2974Finally, there is an implicit bidirectional conversion between an enumeration and its integral type.
    30 \begin{cfa}
     75\begin{clang}
    3176{
    3277        enum Weekday { /* as above */ };        $\C{// enumerators implicitly projected into local scope}$
    33         Weekday weekday = Monday;                       $\C{// weekday == 0}$
    34         weekday = Friday;                                       $\C{// weekday == 11}$
    35         int i = Sunday;                                         $\C{// implicit conversion to int, i == 13}$
     78        Weekday weekday = Mon                        $\C{// weekday == 0}$
     79        weekday = Fri                                        $\C{// weekday == 11}$
     80        int i = Sun;                                            $\C{// implicit conversion to int, i == 13}$
    3681        weekday = 10000;                                        $\C{// UNDEFINED! implicit conversion to Weekday}$
    3782}
    38 int j = Wednesday;                                              $\C{// ERROR! Wednesday is not declared in this scope}$
    39 \end{cfa}
     83int j = Wed;                                                    $\C{// ERROR! Wed is not declared in this scope}$
     84\end{clang}
    4085The implicit conversion from @int@ to an enumeration type is an unnecessary source of error.
    41 
    42 It is common for C programmers to ``believe'' there are 3 equivalent forms of constant enumeration.
    43 \begin{cfa}
    44 #define Monday 0
    45 static const int Monday = 0;
    46 enum { Monday };
    47 \end{cfa}
    48 For @#define@, the programmer has to play compiler and explicitly manage the enumeration values;
    49 furthermore, these are independent constants outside of any language type mechanism.
    50 The same explicit management is true for @const@ declarations, and the @const@ variable cannot appear in constant-expression locations, like @case@ labels, array dimensions,\footnote{
    51 C allows variable-length array-declarations (VLA), so this case does work, but it fails in \CC, which does not support VLAs, unless it is \lstinline{g++}.} and immediate operands of assembler instructions.
    52 Only the @enum@ form is managed by the compiler, is part of the language type-system, and works in all C constant-expression locations.
  • doc/theses/jiada_liang_MMath/implementation.tex

    rdf78cce r486caad  
    548548\begin{cfa}
    549549enum(int) Weekday {
    550         Monday=10, Tuesday, ...
     550        Mon = 10, Tue, ...
    551551};
    552552
  • doc/theses/jiada_liang_MMath/intro.tex

    rdf78cce r486caad  
    11\chapter{Introduction}
    22
    3 Naming values is a common practice in mathematics and engineering, \eg $\pi$, $\tau$ (2$\pi$), $\phi$ (golden ratio), MHz (1E6), etc.
    4 Naming is also commonly used to represent many other numerical phenomenon, such as days of the week, months of a year, floors of a building (basement), specific times (noon, New Years).
    5 Many programming languages capture this important software engineering capability through a mechanism called an \Newterm{enumeration}.
    6 An enumeration is similar to other programming-language types by providing a set of constrained values, but adds the ability to name \emph{all} the values in its set.
    7 Note, all enumeration names must be unique but different names can represent the same value (eight note, quaver), which are synonyms.
     3All types in a programming language must have a set of constants, and these constants have \Newterm{primary names}, \eg integral types have constants @-1@, @17@, @12345@, \etc.
     4Constants can be overloaded among types, \eg @0@ is a null pointer for all pointer types, and the value zero for integral and floating-point types.
     5Hence, each primary constant has a symbolic name referring to its internal representation, and these names are dictated by language syntax related to types.
     6In theory, there are an infinite set of primary names per type.
    87
    9 Specifically, an enumerated type restricts its values to a fixed set of named constants.
    10 While all types are restricted to a fixed set of values because of the underlying von Neumann architecture, and hence, to a corresponding set of constants, \eg @3@, @3.5@, @3.5+2.1i@, @'c'@, @"abc"@, etc., these values are not named, other than the programming-language supplied constant names.
     8\Newterm{Secondary naming} is a common practice in mathematics and engineering, \eg $\pi$, $\tau$ (2$\pi$), $\phi$ (golden ratio), MHz (1E6), and in general situations, \eg specific times (noon, New Years), cities (Big Apple), flowers (Lily), \etc.
     9Many programming languages capture this important software-engineering capability through a mechanism called \Newterm{constant} or \Newterm{literal} naming, where a secondary name is aliased to a primary name.
     10In some cases, secondary naming is \Newterm{pure}, where the matching internal representation can be chosen arbitrarily, and only equality operations are available, \eg @O_RDONLY@, @O_WRONLY@, @O_CREAT@, @O_TRUNC@, @O_APPEND@.
     11(The names the thing.)
     12Because a secondary name is a constant, it cannot appear in a mutable context, \eg \mbox{$\pi$ \lstinline{= 42}} is meaningless, and a constant has no address, \ie it is an \Newterm{rvalue}\footnote{
     13The term rvalue defines an expression that can only appear on the right-hand side of an assignment expression.}.
    1114
    12 Fundamentally, all enumeration systems have an \Newterm{enumeration} type with an associated set of \Newterm{enumerator} names.
    13 An enumeration has three universal attributes, \Newterm{position}, \Newterm{label}, and \Newterm{value}, as shown by this representative enumeration, where position and value can be different.
     15Secondary names can form an (ordered) set, \eg days of the week, months of a year, floors of a building (basement, ground, 1st), colours in a rainbow, \etc.
     16Many programming languages capture these groupings through a mechanism called an \Newterm{enumeration}.
     17\begin{quote}
     18enumerate (verb, transitive).
     19To count, ascertain the number of;
     20\emph{more
     21usually, to mention (a number of things or persons) separately, as if for the
     22purpose of counting};
     23to specify as in a list or catalogue.~\cite{OED}
     24\end{quote}
     25Within an enumeration set, the enumeration names must be unique, and instances of an enumerated type are restricted to hold only the secondary names.
     26It is possible to enumerate among set names without having an ordering among the set elements.
     27For example, the week, the weekdays, the weekend, and every second day of the week.
     28\begin{cfa}[morekeywords={in}]
     29for ( cursor in Mon, Tue, Wed, Thu, Fri, Sat, Sun } ... $\C[3.75in]{// week}$
     30for ( cursor in Mon, Tue, Wed, Thu, Fri } ...   $\C{// weekday}$
     31for ( cursor in Thu, Fri } ...                                  $\C{// weekend}$
     32for ( cursor in Mon, Wed, Fri, Sun } ...                $\C{// every second day of week}\CRT$
     33\end{cfa}
     34This independence from internal representation allows multiple names to have the same representation (eight note, quaver), giving synonyms.
     35A set can have a partial or total ordering, making it possible to compare set elements, \eg Monday is before Friday and Friday is after.
     36Ordering allows iterating among the enumeration set using relational operators and advancement, \eg
     37\begin{cfa}
     38for ( cursor = Monday; cursor @<=@ Friday; cursor = @succ@( cursor ) ) ...
     39\end{cfa}
     40Here the internal representations for the secondary names are \emph{generated} rather than listing a subset of names.
     41
     42
     43\section{Terminology}
     44
     45The term \Newterm{enumeration} defines the set of secondary names, and the term \Newterm{enumerator} represents an arbitrary secondary name.
     46As well, an enumerated type has three fundamental properties, \Newterm{label}, \Newterm{order}, and \Newterm{value}.
    1447\begin{cquote}
    15 \small\sf\setlength{\tabcolsep}{3pt}
    16 \begin{tabular}{rccccccccccc}
    17 \it\color{red}enumeration & \multicolumn{7}{c}{\it\color{red}enumerators}       \\
    18 $\downarrow$\hspace*{25pt} & \multicolumn{7}{c}{$\downarrow$}                           \\
    19 @enum@ Weekday \{                               & Monday,       & Tuesday,      & Wednesday,    & Thursday,& Friday,    & Saturday,     & Sunday \}; \\
    20 \it\color{red}position                  & 0                     & 1                     & 2                             & 3                             & 4                     & 5                     & 6                     \\
    21 \it\color{red}label                             & Monday        & Tuesday       & Wednesday             & Thursday              & Friday        & Saturday      & Sunday        \\
    22 \it\color{red}value                             & 0                     & 1                     & 2                             & 3                             & 4                     & 5             & 6
     48\sf\setlength{\tabcolsep}{3pt}
     49\begin{tabular}{rcccccccr}
     50\it\color{red}enumeration       & \multicolumn{8}{c}{\it\color{red}enumerators} \\
     51$\downarrow$\hspace*{25pt}      & \multicolumn{8}{c}{$\downarrow$}                              \\
     52@enum@ Week \{                          & Mon,  & Tue,  & Wed,  & Thu,  & Fri,  & Sat,  & Sun = 42      & \};   \\
     53\it\color{red}label                     & Mon   & Tue   & Wed   & Thu   & Fri   & Sat   & Sun           &               \\
     54\it\color{red}order                     & 0             & 1             & 2             & 3             & 4             & 5             & 6                     &               \\
     55\it\color{red}value                     & 0             & 1             & 2             & 3             & 4             & 5             & 42            &
    2356\end{tabular}
    2457\end{cquote}
    25 Here, the \Newterm{enumeration} @Weekday@ defines the ordered \Newterm{enumerator}s @Monday@, @Tuesday@, @Wednesday@, @Thursday@, @Friday@, @Saturday@ and @Sunday@.
    26 By convention, the successor of @Tuesday@ is @Monday@ and the predecessor of @Tuesday@ is @Wednesday@, independent of the associated enumerator constant values.
    27 Because an enumerator is a constant, it cannot appear in a mutable context, \eg @Mon = Sun@ is meaningless, and an enumerator has no address, it is an \Newterm{rvalue}\footnote{
    28 The term rvalue defines an expression that can only appear on the right-hand side of an assignment.}.
     58Here, the enumeration @Week@ defines the enumerator labels @Mon@, @Tue@, @Wed@, @Thu@, @Fri@, @Sat@ and @Sun@.
     59The implicit ordering implies the successor of @Tue@ is @Mon@ and the predecessor of @Tue@ is @Wed@, independent of any associated enumerator values.
     60The value is the constant represented by the secondary name, which can be implicitly or explicitly set.
     61
     62Specifying complex ordering is possible:
     63\begin{cfa}
     64enum E1 { $\color{red}[\(_1\)$ {A, B}, $\color{blue}[\(_2\)$ C $\color{red}]\(_1\)$, {D, E} $\color{blue}]\(_2\)$ }; $\C{// overlapping square brackets}$
     65enum E2 { {A, {B, C} }, { {D, E}, F };  $\C{// nesting}$
     66\end{cfa}
     67For @E1@, there is the partial ordering among @A@, @B@ and @C@, and @C@, @D@ and @E@, but not among @A@, @B@ and @D@, @E@.
     68For @E2@, there is the total ordering @A@ $<$ @{B, C}@ $<$ @{D, E}@ $<$ @F@.
     69Only flat total-ordering among enumerators is considered in this work.
     70
     71
     72\section{Motivation}
     73
     74Some programming languages only provide secondary renaming, which can be simulated by an enumeration without ordering.
     75\begin{cfa}
     76const Size = 20, Pi = 3.14159;
     77enum { Size = 20, Pi = 3.14159 };   // unnamed enumeration $\(\Rightarrow\)$ no ordering
     78\end{cfa}
     79In both cases, it is possible to compare the secondary names, \eg @Size < Pi@, if that is meaningful;
     80however, without an enumeration type-name, it is impossible to create an iterator cursor.
     81
     82Secondary renaming can similate an enumeration, but with extra effort.
     83\begin{cfa}
     84const Mon = 1, Tue = 2, Wed = 3, Thu = 4, Fri = 5, Sat = 6, Sun = 7;
     85\end{cfa}
     86Furthermore, reordering the enumerators requires manual renumbering.
     87\begin{cfa}
     88const Sun = 1, Mon = 2, Tue = 3, Wed = 4, Thu = 5, Fri = 6, Sat = 7;
     89\end{cfa}
     90Finally, there is no common type to create a type-checked instance or iterator cursor.
     91Hence, there is only a weak equivalence between secondary naming and enumerations, justifying the enumeration type in a programming language.
     92
     93A variant (algebraic) type is often promoted as a kind of enumeration, \ie a varient type can simulate an enumeration.
     94A variant type is a tagged-union, where the possible types may be heterogeneous.
     95\begin{cfa}
     96@variant@ Variant {
     97        @int tag;@  // optional/implicit: 0 => int, 1 => double, 2 => S
     98        @union {@ // implicit
     99                case int i;
     100                case double d;
     101                case struct S { int i, j; } s;
     102        @};@
     103};
     104\end{cfa}
     105Crucially, the union implies instance storage is shared by all of the variant types.
     106Hence, a variant is dynamically typed, as in a dynamic-typed programming-language, but the set of types is statically bound, similar to some aspects of dynamic gradual-typing~\cite{Gradual Typing}.
     107Knowing which type is in a variant instance is crucial for correctness.
     108Occasionally, it is possible to statically determine all regions where each variant type is used, so a tag and runtime checking is unnecessary;
     109otherwise, a tag is required to denote the particular type in the variant and the tag checked at runtime using some form of type pattern-matching.
     110
     111The tag can be implicitly set by the compiler on assignment, or explicitly set by the program\-mer.
     112Type pattern-matching is then used to dynamically test the tag and branch to a section of code to safely manipulate the value, \eg:
     113\begin{cfa}[morekeywords={match}]
     114Variant v = 3;  // implicitly set tag to 0
     115@match@( v ) {    // know the type or test the tag
     116        case int { /* only access i field in v */ }
     117        case double { /* only access d field in v */ }
     118        case S { /* only access s field in v */ }
     119}
     120\end{cfa}
     121For safety, either all variant types must be listed or a @default@ case must exist with no field accesses.
     122
     123To simulate an enumeration with a variant, the tag is \emph{re-purposed} for either ordering or value and the variant types are omitted.
     124\begin{cfa}
     125variant Weekday {
     126        int tag; // implicit 0 => Mon, ..., 6 => Sun
     127        @case Mon;@ // no type
     128        ...
     129        @case Sun;@
     130};
     131\end{cfa}
     132The type system ensures tag setting and testing are correctly done.
     133However, the enumeration operations are limited to the available tag operations, \eg pattern matching.
     134\begin{cfa}
     135Week week = Mon;
     136if ( @dynamic_cast(Mon)@week ) ... // test tag == Mon
     137\end{cfa}
     138While enumerating among tag names is possible:
     139\begin{cfa}[morekeywords={in}]
     140for ( cursor in Mon, Wed, Fri, Sun ) ...
     141\end{cfa}
     142ordering for iteration would require a \emph{magic} extension, such as a special @enum@ variant, because it has no meaning for a regular variant, \ie @int@ < @double@.
     143
     144However, if a special @enum@ variant allows the tags to be heterogeneously typed, ordering must fall back on case positioning, as many types have incomparable values.
     145Iterating using tag ordering and heterogeneous types, also requires pattern matching.
     146\begin{cfa}[morekeywords={match}]
     147for ( cursor = Mon; cursor <= Fri; cursor = succ( cursor) ) {
     148        match( cursor ) {
     149                case Mon { /* access special type for Mon */ }
     150                ...
     151                case Fri { /* access special type for Fri */ }
     152                default
     153        }
     154}
     155\end{cfa}
     156If the variant type is changed by adding/removing types or the loop range changes, the pattern matching must be adjusted.
     157As well, if the start/stop values are dynamic, it may be impossible to statically determine if all variant types are listed.
     158
     159Re-purposing the notion of enumerating into variant types is ill formed and confusing.
     160Hence, there is only a weak equivalence between an enumeration and variant type, justifying the enumeration type in a programming language.
     161
    29162
    30163\section{Contributions}
     164
     165The goal of this work is to to extend the simple and unsafe enumeration type in the C programming-language into a sophisticated and safe type in the \CFA programming-language, while maintain backwards compatibility with C.
     166On the surface, enumerations seem like a simple type.
     167However, when extended with advanced features, enumerations become complex for both the type system and the runtime implementation.
     168
     169\begin{enumerate}
     170\item
     171overloading
     172\item
     173scoping
     174\item
     175typing
     176\item
     177subset
     178\item
     179inheritance
     180\end{enumerate}
  • doc/theses/jiada_liang_MMath/relatedwork.tex

    rdf78cce r486caad  
    22\label{s:RelatedWork}
    33
     4\begin{comment}
    45An algebraic data type (ADT) can be viewed as a recursive sum of product types.
    56A sum type lists values as members.
     
    1516Enumerated types are a special case of product/sum types with non-mutable fields, \ie initialized (constructed) once at the type's declaration, possible restricted to compile-time initialization.
    1617Values of algebraic types are access by subscripting, field qualification, or type (pattern) matching.
     18\end{comment}
    1719
    1820Enumeration types exist in many popular programming languages, both past and present, \eg Pascal~\cite{Pascal}, Ada~\cite{Ada}, \Csharp~\cite{Csharp}, OCaml~\cite{OCaml} \CC, Go~\cite{Go}, Java~\cite{Java}, Modula-3~\cite{Modula-3}, Rust~\cite{Rust}, Swift~\cite{Swift}, Python~\cite{Python}.
     
    2022
    2123\section{Pascal}
     24\label{s:Pascal}
    2225\lstnewenvironment{pascal}[1][]{\lstset{language=pascal,escapechar=\$,moredelim=**[is][\color{red}]{@}{@},}\lstset{#1}}{}
    2326
     
    2730                 PI = 3.14159;   Plus = '+';   Fred = 'Fred';
    2831\end{pascal}
    29 Here, there is no enumeration because there is no specific type (pseudo enumeration).
     32This mechanism is not an enumeration because there is no specific type (pseudo enumeration).
    3033Hence, there is no notion of a (possibly ordered) set, modulo the \lstinline[language=pascal]{set of} type.
    3134The type of each constant name (enumerator) is inferred from the constant-expression type.
     
    8184with Ada.Text_IO; use Ada.Text_IO;
    8285procedure test is
    83    type RGB is ( @Red@, Green, Blue );
    84    type Traffic_Light is ( @Red@, Yellow, Green );         -- overload
    85    procedure @Red@( Colour : RGB ) is begin            -- overload
    86        Put_Line( "Colour is " & RGB'Image( Colour ) );
    87    end Red;
    88    procedure @Red@( TL : Traffic_Light ) is begin       -- overload
    89        Put_Line( "Light is " & Traffic_Light'Image( TL ) );
    90    end Red;
     86        type RGB is ( @Red@, Green, Blue );
     87        type Traffic_Light is ( @Red@, Yellow, Green );         -- overload
     88        procedure @Red@( Colour : RGB ) is begin            -- overload
     89                Put_Line( "Colour is " & RGB'Image( Colour ) );
     90        end Red;
     91        procedure @Red@( TL : Traffic_Light ) is begin       -- overload
     92                Put_Line( "Light is " & Traffic_Light'Image( TL ) );
     93        end Red;
    9194begin
    92     @Red@( Blue );                               -- RGB
    93     @Red@( Yellow );                            -- Traffic_Light
    94     @Red@( @RGB'(Red)@ );               -- ambiguous without cast
     95        @Red@( Blue );                           -- RGB
     96        @Red@( Yellow );                                -- Traffic_Light
     97        @Red@( @RGB'(Red)@ );           -- ambiguous without cast
    9598end test;
    9699\end{ada}
     
    224227\lstnewenvironment{c++}[1][]{\lstset{language=[GNU]C++,escapechar=\$,moredelim=**[is][\color{red}]{@}{@},}\lstset{#1}}{}
    225228
    226 \CC is largely backwards compatible with C, so it inherited C's enumerations.
    227 However, the following non-backwards compatible changes have been made.
     229\CC has the equivalent of Pascal typed @const@ declarations \see{\VRef{s:Pascal}}, with static and dynamic initialization.
     230\begin{c++}
     231const auto one = 0 + 1;                                 $\C{// static intialization}$
     232const auto NULL = nullptr;
     233const auto PI = 3.14159;
     234const auto Plus = '+';
     235const auto Fred = "Fred";
     236const auto Mon = 0, Tue = Mon + 1, Wed = Tue + 1, Thu = Wed + 1, Fri = Thu + 1,
     237                                Sat = Fri + 1, Sun = Sat + 1;
     238int sa[Sun];
     239const auto r = random();                                $\C{// dynamic intialization}$
     240int da[r];                                                              $\C{// VLA}$
     241\end{c++}
     242Statically initialized identifiers may appear in any constant-expression context, \eg @case@.
     243Dynamically intialized identifiers may appear as array dimensions in @g++@, which allows variable-sized arrays.
     244Interestingly, global \CC @const@ declarations are implicitly marked @static@ (@r@ rather than @R@).
     245\begin{c++}
     246$\$$ nm test.o
     2470000000000000018 @r@ Mon
     248\end{c++}
     249
     250\CC enumeration is largely backwards compatible with C, so it inherited C's enumerations.
     251However, the following non-backwards compatible changes are made.
    228252
    229253\begin{cquote}
     
    423447\begin{Go}
    424448const ( Mon = iota; Tue; Wed; // 0, 1, 2
    425          @Thu = 10@; Fri; Sat; Sun ) // 10, 10, 10, 10
     449        @Thu = 10@; Fri; Sat; Sun ) // 10, 10, 10, 10
    426450\end{Go}
    427451Auto-incrementing can be restarted with an expression containing \emph{one} \lstinline[language=Go]{iota}.
     
    429453const ( V1 = iota; V2; @V3 = 7;@ V4 = @iota@; V5 ) // 0 1 7 3 4
    430454const ( Mon = iota; Tue; Wed; // 0, 1, 2
    431          @Thu = 10;@ Fri = @iota - Wed + Thu - 1@; Sat; Sun ) // 10, 11, 12, 13
     455        @Thu = 10;@ Fri = @iota - Wed + Thu - 1@; Sat; Sun ) // 10, 11, 12, 13
    432456\end{Go}
    433457Note, \lstinline[language=Go]{iota} is advanced for an explicitly initialized enumerator, like the underscore @_@ identifier.
     
    584608\lstnewenvironment{rust}[1][]{\lstset{language=Rust,escapechar=\$,moredelim=**[is][\color{red}]{@}{@},}\lstset{#1}}{}
    585609
    586 Enumerations
     610Rust provides a scoped enumeration based on variant types.
     611% An enumeration, also referred to as an enum, is a simultaneous definition of a nominal enumerated type as well as a set of constructors, that can be used to create or pattern-match values of the corresponding enumerated type.
     612An enumeration without constructors is called field-less.
    587613\begin{rust}
    588         Syntax
    589         Enumeration :
    590            enum IDENTIFIER  GenericParams? WhereClause? { EnumItems? }
    591 
    592         EnumItems :
    593            EnumItem ( , EnumItem )* ,?
    594 
    595         EnumItem :
    596            OuterAttribute* Visibility?
    597            IDENTIFIER ( EnumItemTuple | EnumItemStruct )? EnumItemDiscriminant?
    598 
    599         EnumItemTuple :
    600            ( TupleFields? )
    601 
    602         EnumItemStruct :
    603            { StructFields? }
    604 
    605         EnumItemDiscriminant :
    606            = Expression
     614enum Week { Mon, Tues, Wed, Thu, Fri, Sat, Sun@,@ }
     615let mut week: Week = Week::Mon;
     616week = Week::Fri;
    607617\end{rust}
    608 An enumeration, also referred to as an enum, is a simultaneous definition of a nominal enumerated type as well as a set of constructors, that can be used to create or pattern-match values of the corresponding enumerated type.
    609 
    610 Enumerations are declared with the keyword enum.
    611 
    612 An example of an enum item and its use:
     618A field-less enumeration with only unit variants is called unit-only.
    613619\begin{rust}
    614 enum Animal {
    615         Dog,
    616         Cat,
    617 }
    618 
    619 let mut a: Animal = Animal::Dog;
    620 a = Animal::Cat;
     620enum Week { Mon = 0, Tues = 1, Wed = 2, Thu = 3, Fri = 4, Sat = 5, Sun = 6 }
    621621\end{rust}
    622622Enum constructors can have either named or unnamed fields:
    623623\begin{rust}
    624624enum Animal {
    625         Dog(String, f64),
    626         Cat { name: String, weight: f64 },
    627 }
    628 
     625        Dog( String, f64 ),
     626        Cat{ name: String, weight: f64 },
     627}
    629628let mut a: Animal = Animal::Dog("Cocoa".to_string(), 37.2);
    630629a = Animal::Cat { name: "Spotty".to_string(), weight: 2.7 };
    631630\end{rust}
    632 In this example, Cat is a struct-like enum variant, whereas Dog is simply called an enum variant.
    633 
    634 An enum where no constructors contain fields are called a field-less enum. For example, this is a fieldless enum:
     631Here, @Dog@ is an @enum@ variant, whereas @Cat@ is a struct-like variant.
     632
     633Each @enum@ type has an implicit integer tag (discriminant), with a unique value for each variant type.
     634Like a C enumeration, the tag values for the variant types start at 0 with auto incrementing.
     635The tag is re-purposed for enumeration by allowing it to be explicitly set, and auto incrmenting continues from that value.
     636\begin{cquote}
     637\sf\setlength{\tabcolsep}{3pt}
     638\begin{tabular}{rcccccccr}
     639@enum@ Week \{  & Mon,  & Tue,  & Wed = 2,      & Thu = 10,     & Fri,  & Sat = 5,      & Sun   & \};   \\
     640\rm tags                & 0             & 1             & 2                     & 10            & 11    & 5                     & 6             &               \\
     641\end{tabular}
     642\end{cquote}
     643In general, the tag can only be read as an opaque reference for comparison.
    635644\begin{rust}
    636 enum Fieldless {
    637         Tuple(),
    638         Struct{},
    639         Unit,
    640 }
     645if mem::discriminant(&week) == mem::discriminant(&Week::Mon) ...
    641646\end{rust}
    642 If a field-less enum only contains unit variants, the enum is called an unit-only enum. For example:
     647If the enumeration is unit-only, or field-less with no explicit discriminants and where only unit variants are explicit, then the discriminant is accessible with a numeric cast.
    643648\begin{rust}
    644 enum Enum {
    645         Foo = 3,
    646         Bar = 2,
    647         Baz = 1,
    648 }
    649 \end{rust}
    650 
    651 \subsection{Discriminants}
    652 
    653 Each enum instance has a discriminant: an integer logically associated to it that is used to determine which variant it holds.
    654 
    655 Under the default representation, the discriminant is interpreted as an isize value. However, the compiler is allowed to use a smaller type (or another means of distinguishing variants) in its actual memory layout.
    656 
    657 \subsection{Assigning discriminant values}
    658 
    659 \subsection{Explicit discriminants}
    660 
    661 In two circumstances, the discriminant of a variant may be explicitly set by following the variant name with = and a constant expression:
    662 
    663         if the enumeration is "unit-only".
    664 
    665         if a primitive representation is used. For example:
    666 \begin{rust}
    667         #[repr(u8)]
    668         enum Enum {
    669                 Unit = 3,
    670                 Tuple(u16),
    671                 Struct {
    672                         a: u8,
    673                         b: u16,
    674                 } = 1,
    675         }
    676 \end{rust}
    677 
    678 \subsection{Implicit discriminants}
    679 
    680 If a discriminant for a variant is not specified, then it is set to one higher than the discriminant of the previous variant in the declaration. If the discriminant of the first variant in the declaration is unspecified, then it is set to zero.
    681 \begin{rust}
    682 enum Foo {
    683         Bar,                    // 0
    684         Baz = 123,        // 123
    685         Quux,              // 124
    686 }
    687 
    688 let baz_discriminant = Foo::Baz as u32;
    689 assert_eq!(baz_discriminant, 123);
    690 \end{rust}
    691 
    692 \subsection{Restrictions}
    693 
    694 It is an error when two variants share the same discriminant.
    695 \begin{rust}
    696 enum SharedDiscriminantError {
    697         SharedA = 1,
    698         SharedB = 1
    699 }
    700 
    701 enum SharedDiscriminantError2 {
    702         Zero,      // 0
    703         One,            // 1
    704         OneToo = 1  // 1 (collision with previous!)
    705 }
    706 \end{rust}
    707 It is also an error to have an unspecified discriminant where the previous discriminant is the maximum value for the size of the discriminant.
    708 \begin{rust}
    709 #[repr(u8)]
    710 enum OverflowingDiscriminantError {
    711         Max = 255,
    712         MaxPlusOne // Would be 256, but that overflows the enum.
    713 }
    714 
    715 #[repr(u8)]
    716 enum OverflowingDiscriminantError2 {
    717         MaxMinusOne = 254, // 254
    718         Max,                       // 255
    719         MaxPlusOne               // Would be 256, but that overflows the enum.
    720 }
    721 \end{rust}
    722 
    723 \subsection{Accessing discriminant}
    724 
    725 \begin{rust}
    726 Via mem::discriminant
    727 \end{rust}
    728 @mem::discriminant@ returns an opaque reference to the discriminant of an enum value which can be compared. This cannot be used to get the value of the discriminant.
    729 
    730 \subsection{Casting}
    731 
    732 If an enumeration is unit-only (with no tuple and struct variants), then its discriminant can be directly accessed with a numeric cast; e.g.:
    733 \begin{rust}
    734 enum Enum {
    735         Foo,
    736         Bar,
    737         Baz,
    738 }
    739 
    740 assert_eq!(0, Enum::Foo as isize);
    741 assert_eq!(1, Enum::Bar as isize);
    742 assert_eq!(2, Enum::Baz as isize);
    743 \end{rust}
    744 Field-less enums can be casted if they do not have explicit discriminants, or where only unit variants are explicit.
    745 \begin{rust}
    746 enum Fieldless {
    747         Tuple(),
    748         Struct{},
    749         Unit,
    750 }
    751 
    752 assert_eq!(0, Fieldless::Tuple() as isize);
    753 assert_eq!(1, Fieldless::Struct{} as isize);
    754 assert_eq!(2, Fieldless::Unit as isize);
    755 \end{rust}
    756 \begin{rust}
    757 #[repr(u8)]
    758 enum FieldlessWithDiscrimants {
    759         First = 10,
    760         Tuple(),
    761         Second = 20,
    762         Struct{},
    763         Unit,
    764 }
    765 
    766 assert_eq!(10, FieldlessWithDiscrimants::First as u8);
    767 assert_eq!(11, FieldlessWithDiscrimants::Tuple() as u8);
    768 assert_eq!(20, FieldlessWithDiscrimants::Second as u8);
    769 assert_eq!(21, FieldlessWithDiscrimants::Struct{} as u8);
    770 assert_eq!(22, FieldlessWithDiscrimants::Unit as u8);
    771 \end{rust}
    772 
    773 \subsection{Pointer casting}
    774 
    775 If the enumeration specifies a primitive representation, then the discriminant may be reliably accessed via unsafe pointer casting:
    776 \begin{rust}
    777 #[repr(u8)]
    778 enum Enum {
    779         Unit,
    780         Tuple(bool),
    781         Struct{a: bool},
    782 }
    783 
    784 impl Enum {
    785         fn discriminant(&self) -> u8 {
    786                 unsafe { *(self as *const Self as *const u8) }
    787         }
    788 }
    789 
    790 let unit_like = Enum::Unit;
    791 let tuple_like = Enum::Tuple(true);
    792 let struct_like = Enum::Struct{a: false};
    793 
    794 assert_eq!(0, unit_like.discriminant());
    795 assert_eq!(1, tuple_like.discriminant());
    796 assert_eq!(2, struct_like.discriminant());
    797 \end{rust}
    798 
    799 \subsection{Zero-variant enums}
    800 
    801 Enums with zero variants are known as zero-variant enums. As they have no valid values, they cannot be instantiated.
    802 \begin{rust}
    803 enum ZeroVariants {}
    804 \end{rust}
    805 Zero-variant enums are equivalent to the never type, but they cannot be coerced into other types.
    806 \begin{rust}
    807 let x: ZeroVariants = panic!();
    808 let y: u32 = x; // mismatched type error
    809 \end{rust}
    810 
    811 \subsection{Variant visibility}
    812 
    813 Enum variants syntactically allow a Visibility annotation, but this is rejected when the enum is validated. This allows items to be parsed with a unified syntax across different contexts where they are used.
    814 \begin{rust}
    815 macro_rules! mac_variant {
    816         ($vis:vis $name:ident) => {
    817                 enum $name {
    818                         $vis Unit,
    819 
    820                         $vis Tuple(u8, u16),
    821 
    822                         $vis Struct { f: u8 },
    823                 }
    824         }
    825 }
    826 
    827 // Empty `vis` is allowed.
    828 mac_variant! { E }
    829 
    830 // This is allowed, since it is removed before being validated.
    831 #[cfg(FALSE)]
    832 enum E {
    833         pub U,
    834         pub(crate) T(u8),
    835         pub(super) T { f: String }
    836 }
     649if week as isize == Week::Mon as isize ...
    837650\end{rust}
    838651
     
    12001013\lstnewenvironment{python}[1][]{\lstset{language=Python,escapechar=\$,moredelim=**[is][\color{red}]{@}{@},}\lstset{#1}}{}
    12011014
    1202 An @Enum@ is a set of symbolic names bound to unique values.
    1203 They are similar to global variables, but they offer a more useful @repr()@, grouping, type-safety, and a few other features.
    1204 
    1205 They are most useful when you have a variable that can take one of a limited selection of values. For example, the days of the week:
    1206 \begin{python}
    1207 >>> from enum import Enum
    1208 >>> class Weekday(Enum):
    1209 ...    MONDAY = 1
    1210 ...    TUESDAY = 2
    1211 ...    WEDNESDAY = 3
    1212 ...    THURSDAY = 4
    1213 ...    FRIDAY = 5
    1214 ...    SATURDAY = 6
    1215 ...    SUNDAY = 7
    1216 \end{python}
    1217 Or perhaps the RGB primary colors:
    1218 \begin{python}
    1219 >>> from enum import Enum
    1220 >>> class Color(Enum):
    1221 ...    RED = 1
    1222 ...    GREEN = 2
    1223 ...    BLUE = 3
    1224 \end{python}
    1225 As you can see, creating an @Enum@ is as simple as writing a class that inherits from @Enum@ itself.
    1226 
    1227 Note: Case of Enum Members
    1228 
    1229 Because Enums are used to represent constants, and to help avoid issues with name clashes between mixin-class methods/attributes and enum names, we strongly recommend using @UPPER_CASE@ names for members, and will be using that style in our examples.
     1015A Python enumeration is a set of symbolic names bound to \emph{unique} values.
     1016They are similar to global variables, but offer a more useful @repr()@, grouping, type-safety, and additional features.
     1017Enumerations inherits from the @Enum@ class, \eg:
     1018\begin{python}
     1019class Weekday(@Enum@): Mon = 1; Tue = 2; Wed = 3; Thu = 4; Fri = 5; Sat = 6; Sun = 7
     1020class RGB(@Enum@): Red = 1; Green = 2; Blue = 3
     1021\end{python}
    12301022
    12311023Depending on the nature of the enum a member's value may or may not be important, but either way that value can be used to get the corresponding member:
    12321024\begin{python}
    1233 >>> Weekday(3)
    1234 <Weekday.WEDNESDAY: 3>
     1025print( repr( Weekday( 3 ) ) )
     1026<Weekday.Wed: 3>
    12351027\end{python}
    12361028As you can see, the @repr()@ of a member shows the enum name, the member name, and the value.
    12371029The @str()@ of a member shows only the enum name and member name:
    12381030\begin{python}
    1239 print(Weekday.THURSDAY)
    1240 Weekday.THURSDAY
     1031print( str( Weekday.Thu ), Weekday.Thu )
     1032Weekday.Thu Weekday.Thu
    12411033\end{python}
    12421034The type of an enumeration member is the enum it belongs to:
    12431035\begin{python}
    1244 >>> type(Weekday.MONDAY)
     1036print( type( Weekday.Thu ) )
    12451037<enum 'Weekday'>
    1246 isinstance(Weekday.FRIDAY, Weekday)
     1038print( isinstance(Weekday.Fri, Weekday) )
    12471039True
    12481040\end{python}
    12491041Enum members have an attribute that contains just their name:
    12501042\begin{python}
    1251 >>> print(Weekday.TUESDAY.name)
     1043print(Weekday.TUESDAY.name)
    12521044TUESDAY
    12531045\end{python}
    12541046Likewise, they have an attribute for their value:
    12551047\begin{python}
    1256 >>> Weekday.WEDNESDAY.value
     1048Weekday.WEDNESDAY.value
    125710493
    12581050\end{python}
     1051
    12591052Unlike many languages that treat enumerations solely as name/value pairs, Python @Enum@s can have behavior added.
    12601053For example, @datetime.date@ has two methods for returning the weekday: @weekday()@ and @isoweekday()@.
     
    12621055Rather than keep track of that ourselves we can add a method to the @Weekday@ enum to extract the day from the date instance and return the matching enum member:
    12631056\begin{python}
     1057class Weekday(Enum): Mon = 1; Tue = 2; Wed = 3; Thu = 10; Fri = 15; Sat = 16; Sun = 17
    12641058$@$classmethod
    12651059def from_date(cls, date):
    1266     return cls(date.isoweekday())
    1267 \end{python}
    1268 The complete Weekday enum now looks like this:
    1269 \begin{python}
    1270 >>> class Weekday(Enum):
    1271 ...    MONDAY = 1
    1272 ...    TUESDAY = 2
    1273 ...    WEDNESDAY = 3
    1274 ...    THURSDAY = 4
    1275 ...    FRIDAY = 5
    1276 ...    SATURDAY = 6
    1277 ...    SUNDAY = 7
    1278 ...    #
    1279 ...    $@$classmethod
    1280 ...    def from_date(cls, date):
    1281 ...        return cls(date.isoweekday())
     1060        return cls(date.isoweekday())
    12821061\end{python}
    12831062Now we can find out what today is! Observe:
     
    12911070This Weekday enum is great if our variable only needs one day, but what if we need several? Maybe we're writing a function to plot chores during a week, and don't want to use a @list@ -- we could use a different type of @Enum@:
    12921071\begin{python}
    1293 >>> from enum import Flag
    1294 >>> class Weekday(Flag):
    1295 ...    MONDAY = 1
    1296 ...    TUESDAY = 2
    1297 ...    WEDNESDAY = 4
    1298 ...    THURSDAY = 8
    1299 ...    FRIDAY = 16
    1300 ...    SATURDAY = 32
    1301 ...    SUNDAY = 64
     1072from enum import Flag
     1073class WeekdayF(@Flag@): Mon = @1@; Tue = @2@; Wed = @4@; Thu = @8@; Fri = @16@; Sat = @32@; Sun = @64@
    13021074\end{python}
    13031075We've changed two things: we're inherited from @Flag@, and the values are all powers of 2.
    13041076
    1305 Just like the original @Weekday@ enum above, we can have a single selection:
    1306 \begin{python}
    1307 >>> first_week_day = Weekday.MONDAY
    1308 >>> first_week_day
    1309 <Weekday.MONDAY: 1>
    1310 \end{python}
    1311 But @Flag@ also allows us to combine several members into a single variable:
    1312 \begin{python}
    1313 >>> weekend = Weekday.SATURDAY | Weekday.SUNDAY
    1314 >>> weekend
    1315 <Weekday.SATURDAY|SUNDAY: 96>
     1077@Flag@ allows combining several members into a single variable:
     1078\begin{python}
     1079print( repr(WeekdayF.Sat | WeekdayF.Sun) )
     1080<WeekdayF.Sun|Sat: 96>
    13161081\end{python}
    13171082You can even iterate over a @Flag@ variable:
    13181083\begin{python}
    1319 >>> for day in weekend:
    1320 ...    print(day)
     1084for day in weekend:
     1085        print(day)
    13211086Weekday.SATURDAY
    13221087Weekday.SUNDAY
     
    13821147\subsection{Duplicating enum members and values}
    13831148
    1384 Having two enum members with the same name is invalid:
    1385 \begin{python}
    1386 >>> class Shape(Enum):
    1387 ...    SQUARE = 2
    1388 ...    SQUARE = 3
    1389 ...
    1390 Traceback (most recent call last):
    1391 ...
    1392 TypeError: 'SQUARE' already defined as 2
    1393 \end{python}
    1394 However, an enum member can have other names associated with it.
     1149An enum member can have other names associated with it.
    13951150Given two entries @A@ and @B@ with the same value (and @A@ defined first), @B@ is an alias for the member @A@.
    13961151By-value lookup of the value of @A@ will return the member @A@.
     
    13981153By-name lookup of @B@ will also return the member @A@:
    13991154\begin{python}
    1400 >>> class Shape(Enum):
    1401 ...    SQUARE = 2
    1402 ...    DIAMOND = 1
    1403 ...    CIRCLE = 3
    1404 ...    ALIAS_FOR_SQUARE = 2
    1405 ...
     1155class Shape(Enum): SQUARE = 2; DIAMOND = 1; CIRCLE = 3; ALIAS_FOR_SQUARE = 2
    14061156>>> Shape.SQUARE
    14071157<Shape.SQUARE: 2>
     
    14191169When this behavior isn't desired, you can use the @unique()@ decorator:
    14201170\begin{python}
    1421 >>> from enum import Enum, unique
    1422 >>> $@$unique
    1423 ... class Mistake(Enum):
    1424 ...     ONE = 1
    1425 ...     TWO = 2
    1426 ...     THREE = 3
    1427 ...     FOUR = 3
    1428 ...
    1429 Traceback (most recent call last):
    1430 ...
     1171from enum import Enum, unique
     1172$@$unique
     1173class DupVal(Enum): ONE = 1; TWO = 2; THREE = 3; FOUR = 3
    14311174ValueError: duplicate values found in <enum 'Mistake'>: FOUR -> THREE
    14321175\end{python}
     
    14361179If the exact value is unimportant you can use @auto@:
    14371180\begin{python}
    1438 >>> from enum import Enum, auto
    1439 >>> class Color(Enum):
    1440 ...     RED = auto()
    1441 ...     BLUE = auto()
    1442 ...     GREEN = auto()
    1443 ...
    1444 >>> [member.value for member in Color]
    1445 [1, 2, 3]
    1446 \end{python}
    1447 The values are chosen by \_generate\_next\_value\_(), which can be overridden:
     1181from enum import Enum, auto
     1182class RGBa(Enum): RED = auto(); BLUE = auto(); GREEN = auto()
     1183\end{python}
     1184(Like Golang @iota@.)
     1185The values are chosen by @_generate_next_value_()@, which can be overridden:
    14481186\begin{python}
    14491187>>> class AutoName(Enum):
     
    15821320\begin{python}
    15831321class EnumName([mix-in, ...,] [data-type,] base-enum):
    1584     pass
     1322        pass
    15851323\end{python}
    15861324Also, subclassing an enumeration is allowed only if the enumeration does not define any members.
     
    16991437\begin{python}
    17001438Enum(
    1701     value='NewEnumName',
    1702     names=<...>,
    1703     *,
    1704     module='...',
    1705     qualname='...',
    1706     type=<mixed-in class>,
    1707     start=1,
    1708     )
     1439        value='NewEnumName',
     1440        names=<...>,
     1441        *,
     1442        module='...',
     1443        qualname='...',
     1444        type=<mixed-in class>,
     1445        start=1,
     1446        )
    17091447\end{python}
    17101448\begin{itemize}
     
    19351673\begin{python}
    19361674class IntEnum(int, Enum):
    1937     pass
     1675        pass
    19381676\end{python}
    19391677This demonstrates how similar derived enumerations can be defined;
     
    20701808\begin{python}
    20711809def __bool__(self):
    2072     return bool(self.value)
     1810        return bool(self.value)
    20731811\end{python}
    20741812Plain @Enum@ classes always evaluate as @True@.
     
    24142152
    24152153If @__new__()@ or @__init__()@ is defined, the value of the enum member will be passed to those methods:
    2416 \begin{python}
    2417 >>> class Planet(Enum):
    2418 ...     MERCURY = (3.303e+23, 2.4397e6)
    2419 ...     VENUS   = (4.869e+24, 6.0518e6)
    2420 ...     EARTH   = (5.976e+24, 6.37814e6)
    2421 ...     MARS    = (6.421e+23, 3.3972e6)
    2422 ...     JUPITER = (1.9e+27,   7.1492e7)
    2423 ...     SATURN  = (5.688e+26, 6.0268e7)
    2424 ...     URANUS  = (8.686e+25, 2.5559e7)
    2425 ...     NEPTUNE = (1.024e+26, 2.4746e7)
    2426 ...     def __init__(self, mass, radius):
    2427 ...         self.mass = mass       # in kilograms
    2428 ...         self.radius = radius   # in meters
    2429 ...     $\@$property
    2430 ...     def surface_gravity(self):
    2431 ...         # universal gravitational constant  (m3 kg-1 s-2)
    2432 ...         G = 6.67300E-11
    2433 ...         return G * self.mass / (self.radius * self.radius)
    2434 ...
    2435 >>> Planet.EARTH.value
    2436 (5.976e+24, 6378140.0)
    2437 >>> Planet.EARTH.surface_gravity
    2438 9.802652743337129
    2439 \end{python}
     2154\begin{figure}
     2155\begin{python}
     2156from enum import Enum
     2157class Planet(Enum):
     2158        MERCURY = ( 3.303E23, 2.4397E6 )
     2159        VENUS       = ( 4.869E24, 6.0518E6 )
     2160        EARTH       = (5.976E24, 6.37814E6)
     2161        MARS         = (6.421E23, 3.3972E6)
     2162        JUPITER    = (1.9E27,   7.1492E7)
     2163        SATURN     = (5.688E26, 6.0268E7)
     2164        URANUS    = (8.686E25, 2.5559E7)
     2165        NEPTUNE  = (1.024E26, 2.4746E7)
     2166        def __init__( self, mass, radius ):
     2167                self.mass = mass                # in kilograms
     2168                self.radius = radius    # in meters
     2169        def surface_gravity( self ):
     2170                # universal gravitational constant  (m3 kg-1 s-2)
     2171                G = 6.67300E-11
     2172                return G * self.mass / (self.radius * self.radius)
     2173for p in Planet:
     2174        print( f"{p.name}: {p.value}" )
     2175
     2176MERCURY: (3.303e+23, 2439700.0)
     2177VENUS: (4.869e+24, 6051800.0)
     2178EARTH: (5.976e+24, 6378140.0)
     2179MARS: (6.421e+23, 3397200.0)
     2180JUPITER: (1.9e+27, 71492000.0)
     2181SATURN: (5.688e+26, 60268000.0)
     2182URANUS: (8.686e+25, 25559000.0)
     2183NEPTUNE: (1.024e+26, 24746000.0)
     2184\end{python}
     2185\caption{Python Planet Example}
     2186\label{f:PythonPlanetExample}
     2187\end{figure}
     2188
    24402189
    24412190\subsection{TimePeriod}
     
    24642213\section{OCaml}
    24652214\lstnewenvironment{ocaml}[1][]{\lstset{language=OCaml,escapechar=\$,moredelim=**[is][\color{red}]{@}{@},}\lstset{#1}}{}
     2215
     2216% https://ocaml.org/docs/basic-data-types#enumerated-data-types
    24662217
    24672218OCaml provides a variant (union) type, where multiple heterogeneously-typed objects share the same storage.
     
    25532304With valediction,
    25542305  - Gregor Richards
     2306
     2307
     2308Date: Thu, 14 Mar 2024 21:45:52 -0400
     2309Subject: Re: OCaml "enums" do come with ordering
     2310To: "Peter A. Buhr" <pabuhr@uwaterloo.ca>
     2311From: Gregor Richards <gregor.richards@uwaterloo.ca>
     2312
     2313On 3/14/24 21:30, Peter A. Buhr wrote:
     2314> I've marked 3 places with your name to shows places with enum ordering.
     2315>
     2316> type weekday = Mon | Tue | Wed | Thu | Fri | Sat | Sun
     2317> let day : weekday = Mon
     2318> let take_class( d : weekday ) =
     2319>       if d <= Fri then                                (* Gregor *)
     2320>               Printf.printf "weekday\n"
     2321>       else if d >= Sat then                   (* Gregor *)
     2322>               Printf.printf "weekend\n";
     2323>       match d with
     2324>               Mon | Wed -> Printf.printf "CS442\n" |
     2325>               Tue | Thu -> Printf.printf "CS343\n" |
     2326>               Fri -> Printf.printf "Tutorial\n" |
     2327>               _ -> Printf.printf "Take a break\n"
     2328>
     2329> let _ = take_class( Mon ); take_class( Sat );
     2330>
     2331> type colour = Red | Green of string | Blue of int * float
     2332> let c = Red
     2333> let _ = match c with Red -> Printf.printf "Red, "
     2334> let c = Green( "abc" )
     2335> let _ = match c with Green g -> Printf.printf "%s, " g
     2336> let c = Blue( 1, 1.5 )
     2337> let _ = match c with Blue( i, f ) -> Printf.printf "%d %g\n" i f
     2338>
     2339> let check_colour(c: colour): string =
     2340>       if c < Green( "xyz" ) then              (* Gregor *)
     2341>               Printf.printf "green\n";
     2342>       match c with
     2343>               Red -> "Red" |
     2344>               Green g -> g |
     2345>               Blue(i, f) -> string_of_int i ^ string_of_float f
     2346> let _ = check_colour( Red ); check_colour( Green( "xyz" ) );
     2347>
     2348> type stringList = Empty | Pair of string * stringList
     2349> let rec len_of_string_list(l: stringList): int =
     2350>       match l with
     2351>               Empty -> 0 |
     2352>               Pair(_ , r) -> 1 + len_of_string_list r
     2353>
     2354> let _ = for i = 1 to 10 do
     2355>       Printf.printf "%d, " i
     2356> done
     2357>
     2358> (* Local Variables: *)
     2359> (* tab-width: 4 *)
     2360> (* compile-command: "ocaml test.ml" *)
     2361> (* End: *)
     2362
     2363My functional-language familiarity is far more with Haskell than OCaml.  I
     2364mostly view OCaml through a lens of "it's Haskell but with cheating".  Haskell
     2365"enums" (ADTs) aren't ordered unless you specifically and manually put them in
     2366the Ord typeclass by defining the comparators.  Apparently, OCaml has some
     2367other rule, which I would guess is something like "sort by tag then by order of
     2368parameter". Having a default behavior for comparators is *bizarre*; my guess
     2369would be that it gained this behavior in its flirtation with object
     2370orientation, but that's just a guess (and irrelevant).
     2371
     2372This gives a total order, but not enumerability (which would still be
     2373effectively impossible or even meaningless since enums are just a special case
     2374of ADTs).
     2375
     2376With valediction,
     2377  - Gregor Richards
     2378
     2379Date: Wed, 20 Mar 2024 18:16:44 -0400
     2380Subject: Re:
     2381To: "Peter A. Buhr" <pabuhr@uwaterloo.ca>
     2382From: Gregor Richards <gregor.richards@uwaterloo.ca>
     2383
     2384
     2385On 3/20/24 17:26, Peter A. Buhr wrote:
     2386> Gregor, everyone at this end would like a definition of "enumerability". Can
     2387> you formulate one?
     2388
     2389According to the OED (emphasis added to the meaning I'm after):
     2390
     2391enumerate (verb, transitive). To count, ascertain the number of; **more
     2392usually, to mention (a number of things or persons) separately, as if for the
     2393purpose of counting**; to specify as in a list or catalogue.
     2394
     2395With C enums, if you know the lowest and highest value, you can simply loop
     2396over them in a for loop (this is, of course, why so many enums come with an
     2397ENUM_WHATEVER_LAST value). But, I would be hesitant to use the word "loop" to
     2398describe enumerability, since in functional languages, you would recurse for
     2399such a purpose.
     2400
     2401In Haskell, in order to do something with every member of an "enumeration", you
     2402would have to explicitly list them all. The type system will help a bit since
     2403it knows if you haven't listed them all, but you would have to statically have
     2404every element in the enumeration.  If somebody added new elements to the
     2405enumeration later, your code to enumerate over them would no longer work
     2406correctly, because you can't simply say "for each member of this enumeration do
     2407X". In Haskell that's because there aren't actually enumerations; what they use
     2408as enumerations are a degenerate form of algebraic datatypes, and ADTs are
     2409certainly not enumerable. In OCaml, you've demonstrated that they impose
     2410comparability, but I would still assume that you can't make a loop over every
     2411member of an enumeration. (But, who knows!)
     2412
     2413Since that's literally what "enumerate" means, it seems like a rather important
     2414property for enumerations to have ;)
     2415
     2416With valediction,
     2417  - Gregor Richards
     2418
     2419
     2420From: Andrew James Beach <ajbeach@uwaterloo.ca>
     2421To: Gregor Richards <gregor.richards@uwaterloo.ca>, Peter Buhr <pabuhr@uwaterloo.ca>
     2422CC: Michael Leslie Brooks <mlbrooks@uwaterloo.ca>, Fangren Yu <f37yu@uwaterloo.ca>,
     2423    Jiada Liang <j82liang@uwaterloo.ca>
     2424Subject: Re: Re:
     2425Date: Thu, 21 Mar 2024 14:26:36 +0000
     2426
     2427Does this mean that not all enum declarations in C create enumerations? If you
     2428declare an enumeration like:
     2429
     2430enum Example {
     2431    Label,
     2432    Name = 10,
     2433    Tag = 3,
     2434};
     2435
     2436I don't think there is any way to enumerate (iterate, loop, recurse) over these
     2437values without listing all of them.
     2438
     2439
     2440Date: Thu, 21 Mar 2024 10:31:49 -0400
     2441Subject: Re:
     2442To: Andrew James Beach <ajbeach@uwaterloo.ca>, Peter Buhr <pabuhr@uwaterloo.ca>
     2443CC: Michael Leslie Brooks <mlbrooks@uwaterloo.ca>, Fangren Yu <f37yu@uwaterloo.ca>,
     2444    Jiada Liang <j82liang@uwaterloo.ca>
     2445From: Gregor Richards <gregor.richards@uwaterloo.ca>
     2446
     2447I consider this conclusion reasonable. C enums can be nothing more than const
     2448ints, and if used in that way, I personally wouldn't consider them as
     2449enumerations in any meaningful sense, particularly since the type checker
     2450essentially does nothing for you there. Then they're a way of writing consts
     2451repeatedly with some textual indicator that these definitions are related; more
     2452namespace, less enum.
     2453
     2454When somebody writes bitfield members as an enum, is that *really* an
     2455enumeration, or just a use of the syntax for enums to keep related definitions
     2456together?
     2457
     2458With valediction,
     2459  - Gregor Richards
    25552460\end{comment}
    25562461
     
    25582463\section{Comparison}
    25592464
    2560 \begin{tabular}{r|ccccccccc}
    2561 feat. / lang. & Pascal  & Ada   & \Csharp       & OCaml & Java  & Modula-3      & Rust  & Swift & Python        \\
     2465\VRef[Table]{t:FeatureLanguageComparison} shows a comparison of enumeration features and programming languages.
     2466The features are high level and may not capture nuances within a particular language
     2467The @const@ feature is simple macros substitution and not a typed enumeration.
     2468
     2469\begin{table}
     2470\caption{Enumeration Feature / Language Comparison}
     2471\label{t:FeatureLanguageComparison}
     2472\small
     2473\setlength{\tabcolsep}{3pt}
     2474\newcommand{\CM}{\checkmark}
     2475\begin{tabular}{r|c|c|c|c|c|c|c|c|c|c|c|c|c}
     2476                                &Pascal & Ada   &\Csharp& OCaml & Java  &Modula-3&Golang& Rust  & Swift & Python& C             & \CC   & \CFA  \\
    25622477\hline
    2563 pure            &                       &               &                       &               &               &                       &               &               &                       \\
    2564 ordered         &                       &               &                       &               &               &                       &               &               &                       \\
    2565 setable         &                       &               &                       &               &               &                       &               &               &                       \\
    2566 auto-init       &                       &               &                       &               &               &                       &               &               &                       \\
    2567 scoped          &                       &               &                       &               &               &                       &               &               &                       \\
    2568 typed           &                       &               &                       &               &               &                       &               &               &                       \\
    2569 switch          &                       &               &                       &               &               &                       &               &               &                       \\
    2570 loop            &                       &               &                       &               &               &                       &               &               &                       \\
    2571 array           &                       &               &                       &               &               &                       &               &               &                       \\
     2478@const@                 & \CM   &               &               &               &               &               & \CM   &               &               &               &               & \CM   &               \\
     2479\hline
     2480\hline
     2481pure                    &               &               &               &               &               &               &               &               &               &               &               &               & \CM   \\
     2482\hline
     2483typed                   &               &               &               &               &               &               &               &               &               &               & @int@ & integral      & @T@   \\
     2484\hline
     2485safe                    &               &               &               &               &               &               &               &               &               &               &               & \CM   & \CM   \\
     2486\hline
     2487ordered                 &               &               &               &               &               &               &               &               &               &               & \CM   & \CM   & \CM   \\
     2488\hline
     2489dup. values             &               &               &               &               &               &               &               &               &               & alias & \CM   & \CM   & \CM   \\
     2490\hline
     2491setable                 &               &               &               &               &               &               &               &               &               &               & \CM   & \CM   & \CM   \\
     2492\hline
     2493auto-init               &               &               &               &               &               &               &               &               &               &               & \CM   & \CM   & \CM   \\
     2494\hline
     2495(un)scoped              &               &               &               &               &               &               &               &               &               &               & U             & U/S   & U/S   \\
     2496\hline
     2497overload                &               & \CM   &               &               &               &               &               &               &               &               &               & \CM   & \CM   \\
     2498\hline
     2499switch                  &               &               &               &               &               &               &               &               &               &               & \CM   & \CM   & \CM   \\
     2500\hline
     2501loop                    &               &               &               &               &               &               &               &               &               &               &               &               & \CM   \\
     2502\hline
     2503array                   &               &               &               &               &               &               &               &               &               &               & \CM   &               & \CM   \\
     2504\hline
     2505subtype                 &               &               &               &               &               &               &               &               &               &               &               &               & \CM   \\
     2506\hline
     2507inheritance             &               &               &               &               &               &               &               &               &               &               &               &               & \CM   \\
    25722508\end{tabular}
     2509\end{table}
  • doc/theses/mike_brooks_MMath/array.tex

    rdf78cce r486caad  
    510510
    511511\subsection{Retire pointer arithmetic}
     512
     513
     514\section{\CFA}
     515
     516XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX \\
     517moved from background chapter \\
     518XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX \\
     519
     520Traditionally, fixing C meant leaving the C-ism alone, while providing a better alternative beside it.
     521(For later:  That's what I offer with array.hfa, but in the future-work vision for arrays, the fix includes helping programmers stop accidentally using a broken C-ism.)
     522
     523\subsection{\CFA features interacting with arrays}
     524
     525Prior work on \CFA included making C arrays, as used in C code from the wild,
     526work, if this code is fed into @cfacc@.
     527The quality of this this treatment was fine, with no more or fewer bugs than is typical.
     528
     529More mixed results arose with feeding these ``C'' arrays into preexisting \CFA features.
     530
     531A notable success was with the \CFA @alloc@ function,
     532which type information associated with a polymorphic return type
     533replaces @malloc@'s use of programmer-supplied size information.
     534\begin{cfa}
     535// C, library
     536void * malloc( size_t );
     537// C, user
     538struct tm * el1 = malloc(      sizeof(struct tm) );
     539struct tm * ar1 = malloc( 10 * sizeof(struct tm) );
     540
     541// CFA, library
     542forall( T * ) T * alloc();
     543// CFA, user
     544tm * el2 = alloc();
     545tm (*ar2)[10] = alloc();
     546\end{cfa}
     547The alloc polymorphic return compiles into a hidden parameter, which receives a compiler-generated argument.
     548This compiler's argument generation uses type information from the left-hand side of the initialization to obtain the intended type.
     549Using a compiler-produced value eliminates an opportunity for user error.
     550
     551TODO: fix in following: even the alloc call gives bad code gen: verify it was always this way; walk back the wording about things just working here; assignment (rebind) seems to offer workaround, as in bkgd-cfa-arrayinteract.cfa
     552
     553Bringing in another \CFA feature, reference types, both resolves a sore spot of the last example, and gives a first example of an array-interaction bug.
     554In the last example, the choice of ``pointer to array'' @ar2@ breaks a parallel with @ar1@.
     555They are not subscripted in the same way.
     556\begin{cfa}
     557ar1[5];
     558(*ar2)[5];
     559\end{cfa}
     560Using ``reference to array'' works at resolving this issue.  TODO: discuss connection with Doug-Lea \CC proposal.
     561\begin{cfa}
     562tm (&ar3)[10] = *alloc();
     563ar3[5];
     564\end{cfa}
     565The implicit size communication to @alloc@ still works in the same ways as for @ar2@.
     566
     567Using proper array types (@ar2@ and @ar3@) addresses a concern about using raw element pointers (@ar1@), albeit a theoretical one.
     568TODO xref C standard does not claim that @ar1@ may be subscripted,
     569because no stage of interpreting the construction of @ar1@ has it be that ``there is an \emph{array object} here.''
     570But both @*ar2@ and the referent of @ar3@ are the results of \emph{typed} @alloc@ calls,
     571where the type requested is an array, making the result, much more obviously, an array object.
     572
     573The ``reference to array'' type has its sore spots too.
     574TODO see also @dimexpr-match-c/REFPARAM_CALL@ (under @TRY_BUG_1@)
     575
     576TODO: I fixed a bug associated with using an array as a T.  I think.  Did I really?  What was the bug?
  • doc/theses/mike_brooks_MMath/background.tex

    rdf78cce r486caad  
    11\chapter{Background}
    22
    3 This chapter states facts about the prior work, upon which my contributions build.
    4 Each receives a justification of the extent to which its statement is phrased to provoke controversy or surprise.
    5 
    6 \section{C}
    7 
    8 \subsection{Common knowledge}
    9 
    10 The reader is assumed to have used C or \CC for the coursework of at least four university-level courses, or have equivalent experience.
    11 The current discussion introduces facts, unaware of which, such a functioning novice may be operating.
    12 
    13 % TODO: decide if I'm also claiming this collection of facts, and test-oriented presentation is a contribution; if so, deal with (not) arguing for its originality
    14 
    15 \subsection{Convention: C is more touchable than its standard}
    16 
    17 When it comes to explaining how C works, I like illustrating definite program semantics.
    18 I prefer doing so, over a quoting manual's suggested programmer's intuition, or showing how some compiler writers chose to model their problem.
    19 To illustrate definite program semantics, I devise a program, whose behaviour exercises the point at issue, and I show its behaviour.
    20 
    21 This behaviour is typically one of
    22 \begin{itemize}
    23         \item my statement that the compiler accepts or rejects the program
    24         \item the program's printed output, which I show
    25         \item my implied assurance that its assertions do not fail when run
    26 \end{itemize}
    27 
    28 The compiler whose program semantics is shown is
    29 \begin{cfa}
    30 $ gcc --version
    31 gcc (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0
    32 \end{cfa}
    33 running on Architecture @x86_64@, with the same environment targeted.
    34 
    35 Unless explicit discussion ensues about differences among compilers or with (versions of) the standard, it is further implied that there exists a second version of GCC and some version of Clang, running on and for the same platform, that give substantially similar behaviour.
    36 In this case, I do not argue that my sample of major Linux compilers is doing the right thing with respect to the C standard.
    37 
    38 
    39 \subsection{C reports many ill-typed expressions as warnings}
    40 
    41 These attempts to assign @y@ to @x@ and vice-versa are obviously ill-typed.
    42 \lstinput{12-15}{bkgd-c-tyerr.c}
    43 with warnings:
    44 \begin{cfa}
    45 warning: assignment to 'float *' from incompatible pointer type 'void (*)(void)'
    46 warning: assignment to 'void (*)(void)' from incompatible pointer type 'float *'
    47 \end{cfa}
    48 Similarly,
    49 \lstinput{17-19}{bkgd-c-tyerr.c}
    50 with warning:
    51 \begin{cfa}
    52 warning: passing argument 1 of 'f' from incompatible pointer type
    53 note: expected 'void (*)(void)' but argument is of type 'float *'
    54 \end{cfa}
    55 with a segmentation fault at runtime.
    56 
    57 That @f@'s attempt to call @g@ fails is not due to 3.14 being a particularly unlucky choice of value to put in the variable @pi@.
    58 Rather, it is because obtaining a program that includes this essential fragment, yet exhibits a behaviour other than "doomed to crash," is a matter for an obfuscated coding competition.
    59 
    60 A "tractable syntactic method for proving the absence of certain program behaviours by classifying phrases according to the kinds of values they compute"*1 rejected the program.
    61 The behaviour (whose absence is unprovable) is neither minor nor unlikely.
    62 The rejection shows that the program is ill-typed.
    63 
    64 Yet, the rejection presents as a GCC warning.
    65 
    66 In the discussion following, ``ill-typed'' means giving a nonzero @gcc -Werror@ exit condition with a message that discusses typing.
    67 
    68 *1  TAPL-pg1 definition of a type system
    69 
    70 
    71 \section{C Arrays}
    72 
    73 \subsection{C has an array type (!)}
     3Since this work builds on C, it is necessary to explain the C mechanisms and their shortcomings for array, linked list, and string,
     4
     5
     6\section{Array}
    747
    758When a programmer works with an array, C semantics provide access to a type that is different in every way from ``pointer to its first element.''
     
    511444
    512445
    513 \section{\CFA}
    514 
    515 Traditionally, fixing C meant leaving the C-ism alone, while providing a better alternative beside it.
    516 (For later:  That's what I offer with array.hfa, but in the future-work vision for arrays, the fix includes helping programmers stop accidentally using a broken C-ism.)
    517 
    518 \subsection{\CFA features interacting with arrays}
    519 
    520 Prior work on \CFA included making C arrays, as used in C code from the wild,
    521 work, if this code is fed into @cfacc@.
    522 The quality of this this treatment was fine, with no more or fewer bugs than is typical.
    523 
    524 More mixed results arose with feeding these ``C'' arrays into preexisting \CFA features.
    525 
    526 A notable success was with the \CFA @alloc@ function,
    527 which type information associated with a polymorphic return type
    528 replaces @malloc@'s use of programmer-supplied size information.
    529 \begin{cfa}
    530 // C, library
    531 void * malloc( size_t );
    532 // C, user
    533 struct tm * el1 = malloc(      sizeof(struct tm) );
    534 struct tm * ar1 = malloc( 10 * sizeof(struct tm) );
    535 
    536 // CFA, library
    537 forall( T * ) T * alloc();
    538 // CFA, user
    539 tm * el2 = alloc();
    540 tm (*ar2)[10] = alloc();
    541 \end{cfa}
    542 The alloc polymorphic return compiles into a hidden parameter, which receives a compiler-generated argument.
    543 This compiler's argument generation uses type information from the left-hand side of the initialization to obtain the intended type.
    544 Using a compiler-produced value eliminates an opportunity for user error.
    545 
    546 TODO: fix in following: even the alloc call gives bad code gen: verify it was always this way; walk back the wording about things just working here; assignment (rebind) seems to offer workaround, as in bkgd-cfa-arrayinteract.cfa
    547 
    548 Bringing in another \CFA feature, reference types, both resolves a sore spot of the last example, and gives a first example of an array-interaction bug.
    549 In the last example, the choice of ``pointer to array'' @ar2@ breaks a parallel with @ar1@.
    550 They are not subscripted in the same way.
    551 \begin{cfa}
    552 ar1[5];
    553 (*ar2)[5];
    554 \end{cfa}
    555 Using ``reference to array'' works at resolving this issue.  TODO: discuss connection with Doug-Lea \CC proposal.
    556 \begin{cfa}
    557 tm (&ar3)[10] = *alloc();
    558 ar3[5];
    559 \end{cfa}
    560 The implicit size communication to @alloc@ still works in the same ways as for @ar2@.
    561 
    562 Using proper array types (@ar2@ and @ar3@) addresses a concern about using raw element pointers (@ar1@), albeit a theoretical one.
    563 TODO xref C standard does not claim that @ar1@ may be subscripted,
    564 because no stage of interpreting the construction of @ar1@ has it be that ``there is an \emph{array object} here.''
    565 But both @*ar2@ and the referent of @ar3@ are the results of \emph{typed} @alloc@ calls,
    566 where the type requested is an array, making the result, much more obviously, an array object.
    567 
    568 The ``reference to array'' type has its sore spots too.
    569 TODO see also @dimexpr-match-c/REFPARAM_CALL@ (under @TRY_BUG_1@)
    570 
    571 TODO: I fixed a bug associated with using an array as a T.  I think.  Did I really?  What was the bug?
     446\section{Linked List}
     447
     448
     449\section{String}
  • doc/theses/mike_brooks_MMath/intro.tex

    rdf78cce r486caad  
    11\chapter{Introduction}
     2
     3All modern programming languages provide three high-level containers (collection): array, linked-list, and string.
     4Often array is part of the programming language, while linked-list is built from pointer types, and string from a combination of array and linked-list.
    25
    36\cite{Blache19}
     
    58\cite{Ruef19}
    69
    7 \section{Arrays}
     10\section{Array}
    811
    9 \section{Strings}
     12Array provides a homogeneous container with $O(1)$ access to elements using subscripting.
     13The array size can be static, dynamic but fixed after creation, or dynamic and variable after creation.
     14For static and dynamic-fixed, an array can be stack allocated, while dynamic-variable requires the heap.
     15
     16
     17\section{Linked List}
     18
     19Linked-list provides a homogeneous container with $O(log N)$/$O(N)$ access to elements using successor and predecessor operations.
     20Subscripting by value is sometimes available, \eg hash table.
     21Linked types are normally dynamically sized by adding/removing nodes using link fields internal or external to the elements (nodes).
     22If a programming language allows pointer to stack storage, linked-list types can be allocated on the stack;
     23otherwise, elements are heap allocated and explicitly/implicitly managed.
     24
     25
     26\section{String}
     27
     28String provides a dynamic array of homogeneous elements, where the elements are often human-readable characters.
     29What differentiates string from other types in that string operations work on blocks of elements for scanning and changing the elements, rather than accessing individual elements.
     30Nevertheless, subscripting is often available.
     31The cost of string operations is less important than the power of the block operation to accomplish complex manipulation.
     32The dynamic nature of string means storage is normally heap allocated but often implicitly managed, even in unmanaged languages.
     33
     34
     35\section{Motivation}
     36
     37The goal of this work is to introduce safe and complex versions of array, link-lists, and string into the programming language \CFA~\cite{CFA}, which is based on C.
     38Unfortunately, to make C better, while retaining a high level of backwards compatibility, requires a significant knowledge of C's design.
     39Hence, it is assumed the reader has a medium knowledge of C or \CC, on which extensive new C knowledge is built.
     40
     41
     42\subsection{C?}
     43
     44Like many established programming languages, C has a standards committee and multiple ANSI/\-ISO language manuals~\cite{C99,C11,C18,C23}.
     45However, most programming languages are only partially explained by standard's manuals.
     46When it comes to explaining how C works, the definitive source is the @gcc@ compiler, which is mimicked by other C compilers, such as Clang~\cite{clang}.
     47Often other C compilers must \emph{ape} @gcc@ because a large part of the C library (runtime) system contains @gcc@ features.
     48While some key aspects of C need to be explained by quoting from the language reference manual, to illustrate definite program semantics, I devise a program, whose behaviour exercises the point at issue, and shows its behaviour.
     49These example programs show
     50\begin{itemize}
     51        \item the compiler accepts or rejects certain syntax,
     52        \item prints output to buttress a claim of behaviour,
     53        \item executes without triggering any embedded assertions testing pre/post-assertions or invariants.
     54\end{itemize}
     55This work has been tested across @gcc@ versions 8--12 and clang version 10 running on ARM, AMD, and Intel architectures.
     56Any discovered anomalies among compilers or versions is discussed.
     57In this case, I do not argue that my sample of major Linux compilers is doing the right thing with respect to the C standard.
     58
     59
     60\subsection{Ill-Typed Expressions}
     61
     62C reports many ill-typed expressions as warnings.
     63For example, these attempts to assign @y@ to @x@ and vice-versa are obviously ill-typed.
     64\lstinput{12-15}{bkgd-c-tyerr.c}
     65with warnings:
     66\begin{cfa}
     67warning: assignment to 'float *' from incompatible pointer type 'void (*)(void)'
     68warning: assignment to 'void (*)(void)' from incompatible pointer type 'float *'
     69\end{cfa}
     70Similarly,
     71\lstinput{17-19}{bkgd-c-tyerr.c}
     72with warning:
     73\begin{cfa}
     74warning: passing argument 1 of 'f' from incompatible pointer type
     75note: expected 'void (*)(void)' but argument is of type 'float *'
     76\end{cfa}
     77with a segmentation fault at runtime.
     78Clearly, @gcc@ understands these ill-typed case, and yet allows the program to compile, which seems like madness.
     79Compiling with flag @-Werror@, which turns warnings into errors, is often too strong, because some warnings are just warnings.
     80In the following discussion, ``ill-typed'' means giving a nonzero @gcc@ exit condition with a message that discusses typing.
     81Note, \CFA's type-system rejects all these ill-typed cases as type mismatch errors.
     82
     83% That @f@'s attempt to call @g@ fails is not due to 3.14 being a particularly unlucky choice of value to put in the variable @pi@.
     84% Rather, it is because obtaining a program that includes this essential fragment, yet exhibits a behaviour other than "doomed to crash," is a matter for an obfuscated coding competition.
     85
     86% A "tractable syntactic method for proving the absence of certain program behaviours by classifying phrases according to the kinds of values they compute"*1 rejected the program.
     87% The behaviour (whose absence is unprovable) is neither minor nor unlikely.
     88% The rejection shows that the program is ill-typed.
     89%
     90% Yet, the rejection presents as a GCC warning.
     91% *1  TAPL-pg1 definition of a type system
     92
    1093
    1194\section{Contributions}
     95
     96\subsection{Linked List}
     97
     98\subsection{Array}
     99
     100\subsection{String}
  • doc/theses/mike_brooks_MMath/uw-ethesis.tex

    rdf78cce r486caad  
    218218\input{intro}
    219219\input{background}
     220\input{array}
    220221\input{list}
    221 \input{array}
    222222\input{string}
    223223\input{conclusion}
Note: See TracChangeset for help on using the changeset viewer.