source: doc/proposals/enum.tex@ 47bd204

Last change on this file since 47bd204 was bd67442, checked in by Peter A. Buhr <pabuhr@…>, 20 months ago

more proofreading, related-work section still needs work

  • Property mode set to 100644
File size: 60.3 KB
Line 
1\documentclass[12pt]{article}
2\usepackage{fullpage,times}
3\usepackage{pslatex} % reduce size of san serif font
4\usepackage{xcolor}
5\usepackage{listings}
6%\usepackage{array}
7\usepackage{graphics}
8\usepackage{xspace}
9\usepackage{relsize} % must be after change to small or selects old size
10\usepackage{calc} % latex arithmetic
11
12\makeatletter
13\renewcommand\section{\@startsection{section}{1}{\z@}{-3.0ex \@plus -1ex \@minus -.2ex}{1.5ex \@plus .2ex}{\normalfont\large\bfseries}}
14\renewcommand\subsection{\@startsection{subsection}{2}{\z@}{-2.75ex \@plus -1ex \@minus -.2ex}{1.25ex \@plus .2ex}{\normalfont\normalsize\bfseries}}
15\renewcommand\subsubsection{\@startsection{subsubsection}{3}{\z@}{-2.5ex \@plus -1ex \@minus -.2ex}{1.0ex \@plus .2ex}{\normalfont\normalsize\bfseries}}
16\renewcommand\paragraph{\@startsection{paragraph}{4}{\z@}{-2.0ex \@plus -1ex \@minus -.2ex}{-1em}{\normalfont\normalsize\bfseries}}
17\renewcommand\subparagraph{\@startsection{subparagraph}{4}{\z@}{-1.5ex \@plus -1ex \@minus -.2ex}{-1em}{\normalfont\normalsize\bfseries\itshape}}
18
19% Denote newterms in particular font and index them without particular font and in lowercase, e.g., \newterm{abc}.
20% The option parameter provides an index term different from the new term, e.g., \newterm[\texttt{abc}]{abc}
21% The star version does not lowercase the index information, e.g., \newterm*{IBM}.
22\newcommand{\newtermFontInline}{\emph}
23\newcommand{\newterm}{\protect\@ifstar\@snewterm\@newterm}
24\newcommand{\@newterm}[2][\@empty]{\lowercase{\def\temp{#2}}{\newtermFontInline{#2}}\ifx#1\@empty\index{\temp}\else\index{#1@{\protect#2}}\fi}
25\newcommand{\@snewterm}[2][\@empty]{{\newtermFontInline{#2}}\ifx#1\@empty\index{#2}\else\index{#1@{\protect#2}}\fi}
26
27\newcommand{\LstBasicStyle}[1]{{\lst@basicstyle{#1}}}
28\newcommand{\LstKeywordStyle}[1]{{\lst@basicstyle{\lst@keywordstyle{#1}}}}
29\newcommand{\LstCommentStyle}[1]{{\lst@basicstyle{\lst@commentstyle{#1}}}}
30\newcommand{\LstStringStyle}[1]{{\lst@basicstyle{\lst@stringstyle{#1}}}}
31\newcommand{\LstNumberStyle}[1]{{\lst@basicstyle{\lst@numberstyle{#1}}}}
32
33\newlength{\gcolumnposn} % temporary hack because lstlisting does not handle tabs correctly
34\newlength{\columnposn}
35\setlength{\gcolumnposn}{3in}
36\setlength{\columnposn}{\gcolumnposn}
37\newcommand{\setgcolumn}[1]{\global\gcolumnposn=#1\global\columnposn=\gcolumnposn}
38\newcommand{\C}[2][\@empty]{\ifx#1\@empty\else\global\setlength{\columnposn}{#1}\global\columnposn=\columnposn\fi\hfill\makebox[\textwidth-\columnposn][l]{\LstCommentStyle{#2}}}
39\newcommand{\CD}[2][\@empty]{\ifx#1\@empty\else\global\setlength{\columnposn}{#1}\global\columnposn=\columnposn\fi\hfill\makebox[\textwidth-\columnposn][l]{\LstBasicStyle{#2}}}
40\newcommand{\CRT}{\global\columnposn=\gcolumnposn}
41\makeatother
42
43\usepackage[ignoredisplayed]{enumitem} % do not affect trivlist
44\setlist{labelsep=1ex}% global
45\setlist[itemize]{topsep=0.5ex,parsep=0.25ex,itemsep=0.25ex,listparindent=\parindent,leftmargin=\parindent}% global
46\setlist[itemize,1]{label=\textbullet}% local
47%\renewcommand{\labelitemi}{{\raisebox{0.25ex}{\footnotesize$\bullet$}}}
48\setlist[enumerate]{topsep=0.5ex,parsep=0.25ex,itemsep=0.25ex,listparindent=\parindent}% global
49\setlist[enumerate,2]{leftmargin=\parindent,labelsep=*,align=parleft,label=\alph*.}% local
50\setlist[description]{topsep=0.5ex,itemsep=0pt,listparindent=\parindent,leftmargin=\parindent,labelsep=1.5ex}
51
52\newenvironment{cquote}{%
53 \list{}{\lstset{resetmargins=true,aboveskip=0pt,belowskip=0pt}\topsep=4pt\parsep=0pt\leftmargin=\parindent\rightmargin\leftmargin}%
54 \item\relax
55}{%
56 \endlist
57}% cquote
58
59\setlength{\topmargin}{-0.45in} % move running title into header
60\setlength{\headsep}{0.25in}
61\setlength{\textheight}{9.0in}
62
63\newcommand{\CFAIcon}{\textsf{C\raisebox{\depth}{\rotatebox{180}A}}} % Cforall icon
64\newcommand{\CFA}{\protect\CFAIcon\xspace} % CFA symbolic name
65\newcommand{\CCIcon}{\textrm{C}\kern-.1em\hbox{+\kern-.25em+}} % C++ icon
66\newcommand{\CC}[1][]{\protect\CCIcon{#1}\xspace} % C++ symbolic name
67\newcommand{\Csharp}{C\raisebox{-0.7ex}{\relsize{2}$^\sharp$}\xspace} % C# symbolic name
68\newcommand{\PAB}[1]{{\color{red}PAB: #1}}
69
70% \definecolor{mGreen}{rgb}{0,0.6,0}
71% \definecolor{mGray}{rgb}{0.5,0.5,0.5}
72% \definecolor{mPurple}{rgb}{0.58,0,0.82}
73% \definecolor{backgroundColour}{rgb}{0.95,0.95,0.92}
74
75\lstdefinestyle{CStyle}{
76% backgroundcolor=\color{backgroundColour},
77% commentstyle=\color{mGreen},
78% keywordstyle=\color{magenta},
79 stringstyle=\small\tt, % use typewriter font
80% stringstyle=\color{mPurple},
81 columns=fullflexible,
82 basicstyle=\small\linespread{0.9}\sf, % reduce line spacing and use sanserif font
83% basicstyle=\footnotesize,
84 breakatwhitespace=false,
85% breaklines=true,
86 captionpos=b,
87 keepspaces=true,
88 escapechar=\$, % LaTeX escape in CFA code
89% numbers=left,
90% numbersep=5pt,
91% numberstyle=\tiny\color{mGray},
92% showspaces=false,
93 showstringspaces=false,
94% showtabs=false,
95 showlines=true, % show blank lines at end of code
96 tabsize=5,
97 language=C,
98 aboveskip=4pt, % spacing above/below code block
99 belowskip=2pt,
100 xleftmargin=\parindent, % indent code to paragraph indentation
101}
102\lstset{style=CStyle,moredelim=**[is][\color{red}]{@}{@}}
103\lstMakeShortInline@ % single-character for \lstinline
104
105\begin{document}
106
107\title{\vspace*{-0.5in}Enumeration in \CFA}
108\author{Jiada Liang}
109
110\maketitle
111
112\begin{abstract}
113An enumeration is a type defining an ordered set of named constant values, where a name abstracts a value, e.g., @PI@ versus @3.145159@.
114C restrict an enumeration type to the integral type @signed int@, which \CC support , meaning enumeration names bind to integer constants.
115\CFA extends C enumerations to allow all basic and custom types for the enumeration type, like other modern programming languages.
116Furthermore, \CFA adds other useful features for enumerations to support better software-engineering practices and simplify program development.
117\end{abstract}
118
119\section{Background}
120
121Naming values is a common practice in mathematics and engineering, e.g., $\pi$, $\tau$ (2$\pi$), $\phi$ (golden ratio), MHz (1E6), etc.
122Naming is also commonly used to represent many other numerical phenomenon, such as days of the week, months of a year, floors of a building (basement), time (noon, New Years).
123Many programming languages capture this important software-engineering capability through a mechanism called an \newterm{enumeration}.
124An enumeration is similar to other programming-language types by providing a set of constrained values, but adds the ability to name \emph{all} the values in its set.
125Note, all enumeration names must be unique but different names can represent the same value (eight note, quaver), which are synonyms.
126
127Specifically, an enumerated type restricts its values to a fixed set of named constants.
128Fundamentally, all types are restricted to a fixed set of values because of the underlying von Neumann architecture, and hence, to a corresponding set of constants, e.g., @3@, @3.5@, @3.5+2.1i@, @'c'@, @"abc"@, etc.
129However, the values for basic types are not named, other than the programming-language supplied constants.
130
131
132\section{C-Style Enum}
133
134The C-Style enumeration has the following syntax and semantics, and is representative of enumerations in many other programming languages (see Section~\ref{s:RelatedWork}).
135\begin{lstlisting}[label=lst:weekday]
136enum Weekday { Monday, Tuesday, Wednesday, Thursday@ = 10@, Friday, Saturday, Sunday };
137 $\(\uparrow\)$ $\(\uparrow\)$
138 ${\rm \newterm{enumeration name}}$ ${\rm \newterm{enumerator names}}
139\end{lstlisting}
140Here, the enumeration type @Weekday@ defines the ordered \newterm{enumerator}s @Monday@, @Tuesday@, @Wednesday@, @Thursday@, @Friday@, @Saturday@ and @Sunday@.
141By convention, the successor of @Tuesday@ is @Monday@ and the predecessor of @Tuesday@ is @Wednesday@, independent of the associated enumerator constants.
142Because an enumerator is a constant, it cannot appear in a mutable context, e.g. @Mon = Sun@ is meaningless, and has no address, it is an rvalue\footnote{
143The term rvalue defines an expression that can only appear on the right-hand side of an assignment.}.
144Enumerators without explicitly designated constants are auto-initialized by the compiler: from left to right, starting at zero or the next explicitly initialized constant, incrementing by @1@.
145For example, @Monday@ to @Wednesday@ are implicitly assigned with constants 0--2, @Thursday@ is explicitly set to constant @10@, and @Friday@ to @Sunday@ are implicitly assigned with constants 11--13.
146Hence, there are 3 universal enumeration attributes: \newterm{position}, \newterm{label}, and \newterm{value}:
147\begin{cquote}
148\small\sf\setlength{\tabcolsep}{3pt}
149\begin{tabular}{rccccccccccc}
150@enum@ Weekday \{ & Monday, & Tuesday, & Wednesday, & Thursday = 10,& Friday, & Saturday, & Sunday \}; \\
151\it\color{red}position & 0 & 1 & 2 & 3 & 4 & 5 & 6 \\
152\it\color{red}label & Monday & Tuesday & Wednesday & Thursday & Friday & Saturday & Sunday \\
153\it\color{red}value & 0 & 1 & 2 & {\color{red}10}& 11 & 12 & 13
154\end{tabular}
155\end{cquote}
156Finally, C enumerators are \newterm{unscoped}, i.e., enumerators declared inside of an @enum@ are visible in the enclosing scope of the @enum@ type.
157
158In theory, a C enumeration \emph{variable} is an implementation-defined integral type large enough to hold all enumerated values.
159In practice, since integral constants in C have type @int@ (unless qualified with a size suffix), C uses @int@ as the underlying type for enumeration variables.
160Furthermore, there is an implicit bidirectional conversion between an enumeration and integral types.
161\begin{lstlisting}[label=lst:enum_scope]
162{
163 enum Weekday { ... }; $\C{// enumerators implicitly projected into local scope}$
164 Weekday weekday = Monday; $\C{// weekday == 0}$
165 weekday = Friday; $\C{// weekday == 11}$
166 int i = Sunday $\C{// implicit conversion to int, i == 13}$
167 weekday = 10000; $\C{// UNDEFINED! implicit conversion to Weekday}$
168}
169int j = Wednesday; $\C{// ERROR! Wednesday is not declared in this scope}$
170\end{lstlisting}
171The implicit conversion from @int@ to an enumeration type is an unnecessary source of error.
172
173\section{\CFA-Style Enum}
174
175\CFA supports C-Style enumeration using the same syntax and semantics for backwards compatibility.
176\CFA also extends C-Style enumeration by adding a number of new features that bring enumerations inline with other modern programming languages.
177
178\subsection{Enumerator Typing}
179
180\CFA extends the enumeration by parameterizing the enumeration with a type for the enumerators, allowing enumerators to be assigned any values from the declared type.
181Figure~\ref{f:EumeratorTyping} shows a series of examples illustrating that all \CFA types can be use with an enumeration and each type's constants used to set the enumerators.
182
183Typed enumerates deals with \emph{harmonizing} problem between an enumeration and its companion data.
184The following example is from the \CFA compiler, written in \CC.
185\begin{lstlisting}
186enum integral_types { chr, schar, uschar, sshort, ushort, sint, usint, ..., NO_OF_ITYPES };
187char * integral_names[NO_OF_ITYPES] = {
188 "char", "signed char", "unsigned char",
189 "signed short int", "unsigned short int",
190 "signed int", "unsigned int",
191 ...
192};
193\end{lstlisting}
194The \emph{harmonizing} problem occurs because the enumeration declaration is in one header file and the names are declared in another translation unit.
195It is up to the programmer to ensure changes made in one location are harmonized with the other location (by identifying this requirement within a comment).
196The typed enumeration largely solves this problem by combining and managing the two data types.
197\begin{lstlisting}
198enum( char * ) integral_types {
199 chr = "char", schar = "signed char", uschar = "unsigned char",
200 sshort = "signed short int", ushort = "unsigned short int",
201 sint = "signed int", usint = "unsigned int",
202 ...
203};
204\end{lstlisting}
205
206% \begin{lstlisting}[label=lst:color]
207% struct S { int i, j; };
208% enum( S ) s { A = { 3, 4 }, B = { 7, 8 } };
209% enum( @char@ ) Currency { Dollar = '$\textdollar$', Euro = '$\texteuro$', Pound = '$\textsterling$' };
210% enum( @double@ ) Planet { Venus = 4.87, Earth = 5.97, Mars = 0.642 }; // mass
211% enum( @char *@ ) Colour { Red = "red", Green = "green", Blue = "blue" };
212% enum( @Currency@ ) Europe { Euro = '$\texteuro$', Pound = '$\textsterling$' }; // intersection
213% \end{lstlisting}
214
215\begin{figure}
216\begin{lstlisting}
217// integral
218 enum( @char@ ) Currency { Dollar = '$\textdollar$', Euro = '$\texteuro$', Pound = '$\textsterling$' };
219 enum( @signed char@ ) srgb { Red = -1, Green = 0, Blue = 1 };
220 enum( @long long int@ ) BigNum { X = 123_456_789_012_345, Y = 345_012_789_456_123 };
221// non-integral
222 enum( @double@ ) Math { PI_2 = 1.570796, PI = 3.141597, E = 2.718282 };
223 enum( @_Complex@ ) Plane { X = 1.5+3.4i, Y = 7+3i, Z = 0+0.5i };
224// pointer
225 enum( @char *@ ) Names { Fred = "Fred", Mary = "Mary", Jane = "Jane" };
226 int i, j, k;
227 enum( @int *@ ) ptr { I = &i, J = &j, K = &k };
228 enum( @int &@ ) ref { I = i, J = j, K = k };
229// tuple
230 enum( @[int, int]@ ) { T = [ 1, 2 ] };
231// function
232 void f() {...} void g() {...}
233 enum( @void (*)()@ ) funs { F = f, G = g };
234// aggregate
235 struct Person { char * name; int age, height; };
236 enum( @Person@ ) friends { Liz = { "Elizabeth", 22, 170 }, Beth = Liz, Jon = { "Jonathan", 35, 190 } };
237\end{lstlisting}
238\caption{Enumerator Typing}
239\label{f:EumeratorTyping}
240\end{figure}
241
242\subsection{Pure Enumerators}
243
244An empty type, @enum()@, implies the enumerators are pure symbols without values;
245hence, there is no default conversion to @int@.
246
247\begin{lstlisting}
248enum() Mode { O_RDONLY, O_WRONLY, O_CREAT, O_TRUNC, O_APPEND };
249Mode iomode = O_RDONLY;
250int i = iomode; $\C{\color{red}// disallowed}$
251sout | O_TRUNC; $\C{\color{red}// disallowed}$
252\end{lstlisting}
253
254\subsection{Enumerator Subset}
255
256If follows from enumerator typing that the type of the enumerators can be another enumerator.
257\begin{lstlisting}
258enum( char ) Letter { A = 'A', B = 'B', C = 'C', ..., Z = 'Z' };
259enum( Letter ) Greek { Alph = A, Beta = B, ..., Zeta = Z }; // alphabet intersection
260Letter letter = A;
261Greak greek = Alph;
262letter = Alph; $\C{// allowed}$
263greek = A; $\C{\color{red}// disallowed}$
264\end{lstlisting}
265Enumeration @Greek@ may have more or less enumerators than @Letter@, but the enumerator values must be from @Letter@.
266Therefore, @Greek@ enumerators are a subset of type @Letter@ and are type compatible with enumeration @Letter@, but @Letter@ enumerators are not type compatible with enumeration @Greek@.
267
268\subsection{Enumeration Inheritance}
269
270\CFA Plan-9 inheritance may be used with enumerations.
271\begin{lstlisting}
272enum( char * ) Name2 { @inline Name@, Jack = "Jack", Jill = "Jill" };
273enum /* inferred */ Name3 { @inline Name2@, Sue = "Sue", Tom = "Tom" };
274\end{lstlisting}
275Enumeration @Name2@ inherits all the enumerators and their values from enumeration @Name@ by containment, and a @Name@ enumeration is a subtype of enumeration @Name2@.
276Note, enumerators must be unique in inheritance but enumerator values may be repeated.
277
278The enumeration type for the inheriting type must be the same as the inherited type;
279hence the enumeration type may be omitted for the inheriting enumeration and it is inferred from the inherited enumeration, as for @Name3@.
280When inheriting from integral types, automatic numbering may be used, so the inheritance placement left to right is important.
281
282Specifically, the inheritance relationship for Names is:
283\begin{lstlisting}
284Name $\(\subset\)$ Name2 $\(\subset\)$ Name3 $\(\subset\)$ const char * // enum type of Name
285\end{lstlisting}
286For the given function prototypes, the following calls are valid.
287\begin{cquote}
288\begin{tabular}{ll}
289\begin{lstlisting}
290void f( Name );
291void g( Name2 );
292void h( Name3 );
293void j( const char * );
294\end{lstlisting}
295&
296\begin{lstlisting}
297f( Fred );
298g( Fred ); g( Jill );
299h( Fred ); h( Jill ); h( Sue );
300j( Fred ); j( Jill ); j( Sue ); j( "Will" );
301\end{lstlisting}
302\end{tabular}
303\end{cquote}
304Note, the validity of calls is the same for call-by-reference as for call-by-value, and const restrictions are the same as for other types.
305
306\subsection{Enumerator Scoping}
307
308A \CFA-enum can be scoped, meaning the enumerator constants are not projected into the enclosing scope.
309\begin{lstlisting}
310enum Weekday @!@ { /* as above */ };
311enum Colour( char * ) @!@ { /* as above */ };
312\end{lstlisting}
313where the @'!'@ implies the enumerators are \emph{not} projected.
314The enumerators of a scoped enumeration are accessed using qualifications, like the fields of an aggregate.
315% The syntax of $qualified\_expression$ for \CFA-enum is the following:
316% $$<qualified\_expression> := <enum\_type>.<enumerator>$$
317\begin{lstlisting}
318Weekday weekday = @Weekday.Monday@; $\C{// qualification}$
319Colour colour = @Colour.@Red;
320colour = @Colour.@Blue;
321\end{lstlisting}
322
323\subsection{Enumeration Pseudo-functions}
324
325Pseudo-functions are function-like operators that do not result in any run-time computations, i.e., like @sizeof@.
326Often a call to a pseudo-function is substituted with information extracted from the symbol table at compilation time, like storage size or alignment associated with the underlying architecture..
327
328\subsubsection{Enumerator Attributes}
329The attributes of an enumerator are accessed by pseudo-functions @position@, @value@, and @label@.
330\begin{lstlisting}
331int green_pos = @position@( Colour.Green ); $\C{// 1}$
332char * green_value = @value@( Colour.Green ); $\C{// "G"}$
333char * green_label = @label@( Colour.Green ); $\C{// "Green"}$
334\end{lstlisting}
335
336Enumeration Greek may have more or less enumerators than Letter, but the enumerator values must be from Letter.
337Therefore, Greek enumerators are a subset of type Letter and are type compatible with enumeration Letter, but Letter enumerators are not type compatible with enumeration Greek.
338
339% An instance of \CFA-enum (denoted as @<enum_instance>@) is a label for the defined enum name.
340% The label can be retrieved by calling the function @label( <enum_instance> )@.
341% Similarly, the @value()@ function returns the value used to initialize the \CFA-enum.
342
343\subsubsection{\lstinline{enumerate()}}
344
345\begin{lstlisting}[label=lst:c_switch]
346enum(int) C_ENUM { First, Second, Third = First, Fourth };
347int v( C_ENUM e ) {
348 switch( e ) {
349 case First: return 0; break;
350 case Second: return 1; break;
351 // case Third: return 2; break;
352 // case Fourth: return 3; break;
353 };
354};
355\end{lstlisting}
356In the @C_ENUM@ example, @Third@ is an alias of @First@ and @Fourth@ is an alias of @Second@.
357Programmers cannot make case branches for @Third@ and @Fourth@ because the switch statement matches cases by the enumerator's value.
358Case @First@ and @Third@, or @Second@ and @Fourth@, has duplicate case values.
359
360@enumerate()@ is a pseudo-function that makes the switch statement match by an enumerator instead.
361\begin{lstlisting}[label=lst:c_switch_enumerate]
362enum(double) C_ENUM { First, Second, Third = First, Fourth };
363C_ENUM variable_a = First, variable_b = Second, variable_c = Third, variable_d = Fourth;
364int v(C_ENUM e) {
365 switch( enumeratate( e ) ) {
366 case First: return e; break;
367 case Second: return value( e ); break;
368 case Third: return label( e ); break;
369 case Fourth: return position( e ); break;
370 };
371};
372p(variable_a); // 0
373p(variable_b); // 1
374p(variable_c); // "Third"
375p(variable_d); // 3
376\end{lstlisting}
377
378
379\section{Enumeration Storage}
380
381
382\subsection{Enumeration Variable}
383
384Although \CFA enumeration captures three different attributes, an enumeration instance does not store all this information.
385The @sizeof@ a \CFA enumeration instance is always 4 bytes, the same size as a C enumeration instance (@sizeof( int )@).
386It comes from the fact that:
387\begin{enumerate}
388\item
389a \CFA enumeration is always statically typed;
390\item
391it is always resolved as one of its attributes regarding real usage.
392\end{enumerate}
393When creating an enumeration instance @colour@ and assigning it with the enumerator @Color.Green@, the compiler allocates an integer variable and stores the position 1.
394The invocations of $positions()$, $value()$, and $label()$ turn into calls to special functions defined in the prelude:
395\begin{lstlisting}[label=lst:companion_call]
396position( green );
397>>> position( Colour, 1 ) -> int
398value( green );
399>>> value( Colour, 1 ) -> T
400label( green );
401>>> label( Colour, 1) -> char *
402\end{lstlisting}
403@T@ represents the type declared in the \CFA enumeration defined and @char *@ in the example.
404These generated functions are $Companion Functions$, they take an $companion$ object and the position as parameters.
405
406
407\subsection{Enumeration Data}
408
409\begin{lstlisting}[label=lst:enumeration_backing_data]
410enum(T) E { ... };
411// backing data
412T * E_values;
413char ** E_labels;
414\end{lstlisting}
415Storing values and labels as arrays can sometimes help support enumeration features.
416However, the data structures are the overhead for the programs. We want to reduce the memory usage for enumeration support by:
417\begin{itemize}
418 \item Only generates the data array if necessary
419 \item The compilation units share the data structures.
420 No extra overhead if the data structures are requested multiple times.
421\end{itemize}
422
423
424\section{Unification}
425
426\subsection{Enumeration as Value}
427\label{section:enumeration_as_value}
428An \CFA enumeration with base type T can be used seamlessly as T, without explicitly calling the pseudo-function value.
429\begin{lstlisting}[label=lst:implicit_conversion]
430char * green_value = Colour.Green; // "G"
431// Is equivalent to
432// char * green_value = value( Color.Green ); "G"
433\end{lstlisting}
434
435
436\subsection{Unification Distance}
437
438\begin{lstlisting}[label=lst:unification_distance_example]
439T_2 Foo(T1);
440\end{lstlisting}
441The @Foo@ function expects a parameter with type @T1@. In C, only a value with the exact type T1 can be used as a parameter for @Foo@. In \CFA, @Foo@ accepts value with some type @T3@ as long as @distance(T1, T3)@ is not @Infinite@.
442
443@path(A, B)@ is a compiler concept that returns one of the following:
444\begin{itemize}
445 \item Zero or 0, if and only if $A == B$.
446 \item Safe, if B can be used as A without losing its precision, or B is a subtype of A.
447 \item Unsafe, if B loses its precision when used as A, or A is a subtype of B.
448 \item Infinite, if B cannot be used as A. A is not a subtype of B and B is not a subtype of A.
449\end{itemize}
450
451For example, @path(int, int)==Zero@, @path(int, char)==Safe@, @path(int, double)==Unsafe@, @path(int, struct S)@ is @Infinite@ for @struct S{}@.
452@distance(A, C)@ is the minimum sum of paths from A to C. For example, if @path(A, B)==i@, @path(B, C)==j@, and @path(A, C)=k@, then $$distance(A,C)==min(path(A,B), path(B,C))==i+j$$.
453
454(Skip over the distance matrix here because it is mostly irrelevant for enumeration discussion. In the actual implementation, distance( E, T ) is 1.)
455
456The arithmetic of distance is the following:
457\begin{itemize}
458 \item $Zero + v= v$, for some value v.
459 \item $Safe * k < Unsafe$, for finite k.
460 \item $Unsafe * k < Infinite$, for finite k.
461 \item $Infinite + v = Infinite$, for some value v.
462\end{itemize}
463
464For @enum(T) E@, @path(T, E)==Safe@ and @path(E,T)==Infinite@. In other words, enumeration type E can be @safely@ used as type T, but type T cannot be used when the resolution context expects a variable with enumeration type @E@.
465
466
467\subsection{Variable Overloading and Parameter Unification}
468
469\CFA allows variable names to be overloaded. It is possible to overload a variable that has type T and an enumeration with type T.
470\begin{lstlisting}[label=lst:variable_overload]
471char * green = "Green";
472Colour green = Colour.Green; // "G"
473
474void bar(char * s) { return s; }
475void foo(Colour c) { return value( c ); }
476
477foo( green ); // "G"
478bar( green ); // "Green"
479\end{lstlisting}
480\CFA's conversion distance helps disambiguation in this overloading. For the function @bar@ which expects the parameter s to have type @char *@, $distance(char *,char *) == Zero$ while $distance(char *, Colour) == Safe$, the path from @char *@ to the enumeration with based type @char *@, \CFA chooses the @green@ with type @char *@ unambiguously. On the other hand, for the function @foo@, @distance(Colour, char *)@ is @Infinite@, @foo@ picks the @green@ with type @char *@.
481
482\subsection{Function Overloading}
483Similarly, functions can be overloaded with different signatures. \CFA picks the correct function entity based on the distance between parameter types and the arguments.
484\begin{lstlisting}[label=lst:function_overload]
485Colour green = Colour.Green;
486void foo(Colour c) { sout | "It is an enum"; } // First foo
487void foo(char * s) { sout | "It is a string"; } // Second foo
488foo( green ); // "It is an enum"
489\end{lstlisting}
490Because @distance(Colour, Colour)@ is @Zero@ and @distance(char *, Colour)@ is @Safe@, \CFA determines the @foo( green )@ is a call to the first foo.
491
492\subsection{Attributes Functions}
493The pseudo-function @value()@ "unboxes" the enumeration and the type of the expression is the underlying type. Therefore, in the section~\ref{section:enumeration_as_value} when assigning @Colour.Green@ to variable typed @char *@, the resolution distance is @Safe@, while assigning @value(Color.Green) to @char *) has resolution distance @Zero@.
494
495\begin{lstlisting}[label=lst:declaration_code]
496int s1;
497\end{lstlisting}
498The generated code for an enumeration instance is simply an int. It is to hold the position of an enumeration. And usage of variable @s1@ will be converted to return one of its attributes: label, value, or position, concerning the @Unification@ rule
499
500% \subsection{Unification and Resolution (this implementation will probably not be used, safe as reference for now)}
501
502% \begin{lstlisting}
503% enum Colour( char * ) { Red = "R", Green = "G", Blue = "B" };
504% \end{lstlisting}
505% The @EnumInstType@ is convertible to other types.
506% A \CFA enumeration expression is implicitly \emph{overloaded} with its three different attributes: value, position, and label.
507% The \CFA compilers need to resolve an @EnumInstType@ as one of its attributes based on the current context.
508
509% \begin{lstlisting}[caption={Null Context}, label=lst:null_context]
510% {
511% Colour.Green;
512% }
513% \end{lstlisting}
514% In example~\ref{lst:null_context}, the environment gives no information to help with the resolution of @Colour.Green@.
515% In this case, any of the attributes is resolvable.
516% According to the \textit{precedence rule}, the expression with @EnumInstType@ resolves as @value( Colour.Green )@.
517% The @EnumInstType@ is converted to the type of the value, which is statically known to the compiler as @char *@.
518% When the compilation reaches the code generation, the compiler outputs code for type @char *@ with the value @"G"@.
519% \begin{lstlisting}[caption={Null Context Generated Code}, label=lst:null_context]
520% {
521% "G";
522% }
523% \end{lstlisting}
524% \begin{lstlisting}[caption={int Context}, label=lst:int_context]
525% {
526% int g = Colour.Green;
527% }
528% \end{lstlisting}
529% The assignment expression gives a context for the EnumInstType resolution.
530% The EnumInstType is used as an @int@, and \CFA needs to determine which of the attributes can be resolved as an @int@ type.
531% The functions $Unify( T1, T2 ): bool$ take two types as parameters and determine if one type can be used as another.
532% In example~\ref{lst:int_context}, the compiler is trying to unify @int@ and @EnumInstType@ of @Colour@.
533% $$Unification( int, EnumInstType<Colour> )$$ which turns into three Unification call
534% \begin{lstlisting}[label=lst:attr_resolution_1]
535% {
536% Unify( int, char * ); // unify with the type of value
537% Unify( int, int ); // unify with the type of position
538% Unify( int, char * ); // unify with the type of label
539% }
540% \end{lstlisting}
541% \begin{lstlisting}[label=lst:attr_resolution_precedence]
542% {
543% Unification( T1, EnumInstType<T2> ) {
544% if ( Unify( T1, T2 ) ) return T2;
545% if ( Unify( T1, int ) ) return int;
546% if ( Unify( T1, char * ) ) return char *;
547% Error: Cannot Unify T1 with EnumInstType<T2>;
548% }
549% }
550% \end{lstlisting}
551% After the unification, @EnumInstType@ is replaced by its attributes.
552
553% \begin{lstlisting}[caption={Unification Functions}, label=lst:unification_func_call]
554% {
555% T2 foo ( T1 ); // function take variable with T1 as a parameter
556% foo( EnumInstType<T3> ); // Call foo with a variable has type EnumInstType<T3>
557% >>>> Unification( T1, EnumInstType<T3> )
558% }
559% \end{lstlisting}
560% % The conversion can work backward: in restrictive cases, attributes of can be implicitly converted back to the EnumInstType.
561% Backward conversion:
562% \begin{lstlisting}[caption={Unification Functions}, label=lst:unification_func_call]
563% {
564% enum Colour colour = 1;
565% }
566% \end{lstlisting}
567
568% \begin{lstlisting}[caption={Unification Functions}, label=lst:unification_func_call]
569% {
570% Unification( EnumInstType<Colour>, int ) >>> label
571% }
572% \end{lstlisting}
573% @int@ can be unified with the label of Colour.
574% @5@ is a constant expression $\Rightarrow$ Compiler knows the value during the compilation $\Rightarrow$ turns it into
575% \begin{lstlisting}
576% {
577% enum Colour colour = Colour.Green;
578% }
579% \end{lstlisting}
580% Steps:
581% \begin{enumerate}
582% \item
583% identify @1@ as a constant expression with type @int@, and the value is statically known as @1@
584% \item
585% @unification( EnumInstType<Colour>, int )@: @position( EnumInstType< Colour > )@
586% \item
587% return the enumeration constant at position 1
588% \end{enumerate}
589% \begin{lstlisting}
590% {
591% enum T (int) { ... } // Declaration
592% enum T t = 1;
593% }
594% \end{lstlisting}
595% Steps:
596% \begin{enumerate}
597% \item
598% identify @1@ as a constant expression with type @int@, and the value is statically known as @1@
599% \item
600% @unification( EnumInstType<Colour>, int )@: @value( EnumInstType< Colour > )@
601% \item
602% return the FIRST enumeration constant that has the value 1, by searching through the values array
603% \end{enumerate}
604% The downside of the precedence rule: @EnumInstType@ $\Rightarrow$ @int ( value )@ $\Rightarrow$ @EnumInstType@ may return a different @EnumInstType@ because the value can be repeated and there is no way to know which one is expected $\Rightarrow$ want uniqueness
605
606% \subsection{Casting}
607% Casting an EnumInstType to some other type T works similarly to unify the EnumInstType with T. For example:
608% \begin{lstlisting}
609% enum( int ) Foo { A = 10, B = 100, C = 1000 };
610% (int) Foo.A;
611% \end{lstlisting}
612% The \CFA-compiler unifies @EnumInstType<int>@ with int, with returns @value( Foo.A )@, which has statically known value 10. In other words, \CFA-compiler is aware of a cast expression, and it forms the context for EnumInstType resolution. The expression with type @EnumInstType<int>@ can be replaced by the compile with a constant expression 10, and optionally discard the cast expression.
613
614% \subsection{Value Conversion}
615% As discussed in section~\ref{lst:var_declaration}, \CFA only saves @position@ as the necessary information. It is necessary for \CFA to generate intermediate code to retrieve other attributes.
616
617% \begin{lstlisting}
618% Foo a; // int a;
619% int j = a;
620% char * s = a;
621% \end{lstlisting}
622% Assume stores a value x, which cannot be statically determined. When assigning a to j in line 2, the compiler @Unify@ j with a, and returns @value( a )@. The generated code for the second line will be
623% \begin{lstlisting}
624% int j = value( Foo, a )
625% \end{lstlisting}
626% Similarly, the generated code for the third line is
627% \begin{lstlisting}
628% char * j = label( Foo, a )
629% \end{lstlisting}
630
631
632\section{Enumerator Initialization}
633An enumerator must have a deterministic immutable value, either be explicitly initialized in the enumeration definition, or implicitly initialized by rules.
634
635\subsection{C Enumeration Rule}
636A C enumeration has an integral type. If not initialized, the first enumerator implicitly has the integral value 0, and other enumerators have a value equal to its $predecessor + 1$.
637
638\subsection{Auto Initializable}
639\label{s:AutoInitializable}
640
641
642\CFA enumerations have the same rule in enumeration constant initialization.
643However, only \CFA types that have defined traits for @zero_t@, @one_t@, and an addition operator can be automatically initialized by \CFA.
644
645Specifically, a type is auto-initializable only if it satisfies the trait @AutoInitializable@:
646\begin{lstlisting}
647forall(T)
648trait AutoInitializable {
649 void ?()( T & t, zero_t );
650 S ?++( T & t);
651};
652\end{lstlisting}
653An example of a user-defined @AutoInitializable@ is:
654\begin{lstlisting}[label=lst:sample_auto_Initializable]
655struct Odd { int i; };
656void ?()( Odd & t, zero_t ) { t.i = 1; };
657Odd ?++( Odd t1 ) { return Odd( t1.i + 2); };
658\end{lstlisting}
659When the type of an enumeration is @AutoInitializable@, implicit initialization is available.
660\begin{lstlisting}[label=lst:sample_auto_Initializable_usage]
661enum AutoInitUsage(Odd) {
662 A, B, C = 7, D
663};
664\end{lstlisting}
665In the example, no initializer is specified for the first enumeration constant @A@, so \CFA initializes it with the value of @zero_t@, which is 1.
666@B@ and @D@ have the values of their $predecessor++$, where @one_t@ has the value 2.
667Therefore, the enumeration is initialized as follows:
668\begin{lstlisting}[label=lst:sample_auto_Initializable_usage_gen]
669enum AutoInitUsage(Odd) {
670 A = 1, B = 3, C = 7, D = 9
671};
672\end{lstlisting}
673Note that there is no mechanism to prevent an even value for the direct initialization, such as @C = 6@.
674
675In \CFA, character, integral, float, and imaginary types are all @AutoInitialiable@.
676\begin{lstlisting}[label=lst:letter]
677enum Alphabet( int ) {
678 A = 'A', B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V, W, X, Y, Z,
679 a = 'a', b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z
680};
681print( "%c, %c, %c", Alphabet.F, Alphabet.o, Alphabet.z );
682>>> F, o, z
683\end{lstlisting}
684\section{Enumeration Features}
685\subsection{Iteration and Range}
686
687It is convenient to iterate over a \CFA enumeration value, e.g.:
688\begin{lstlisting}[label=lst:range_functions]
689for ( Alphabet alph; Alphabet ) { sout | alph; }
690>>> A B C ... D
691\end{lstlisting}
692The for-loop uses the enumeration type @Alphabet@ its range, and iterates through all enumerators in the order defined in the enumeration.
693@alph@ is the iterating enumeration object, which returns the value of an @Alphabet@ in this context according to the precedence rule.
694
695\textbullet\ \CFA offers a shorthand for iterating all enumeration constants:
696\begin{lstlisting}[label=lst:range_functions]
697for ( Alphabet alph ) { sout | alph; }
698>>> A B C ... D
699\end{lstlisting}
700
701The following are examples for constructing for-control using an enumeration. Note that the type declaration of the iterating variable is optional, because \CFA can infer the type as EnumInstType based on the range expression, and possibly convert it to one of its attribute types.
702
703\textbullet\ H is implicit up-to exclusive range [0, H).
704\begin{lstlisting}[label=lst:range_function_1]
705for ( alph; Alphabet.D ) { sout | alph; }
706>>> A B C
707\end{lstlisting}
708
709\textbullet\ ~= H is implicit up-to inclusive range [0,H].
710\begin{lstlisting}[label=lst:range_function_2]
711for ( alph; ~= Alphabet.D ) { sout | alph; }
712>>> A B C D
713\end{lstlisting}
714
715\textbullet\ L ~ H is explicit up-to exclusive range [L,H).
716\begin{lstlisting}[label=lst:range_function_3]
717for ( alph; Alphabet.B ~ Alphabet.D ) { sout | alph; }
718// for ( Alphabet alph = Alphabet.B; alph < Alphabet.D; alph += 1 ); 1 is one_t
719>>> B C
720\end{lstlisting}
721
722\textbullet\ L ~= H is explicit up-to inclusive range [L,H].
723\begin{lstlisting}[label=lst:range_function_4]
724for ( alph; Alphabet.B ~= Alphabet.D ) { sout | alph; }
725>>> B C D
726\end{lstlisting}
727
728\textbullet\ L -~ H is explicit down-to exclusive range [H,L), where L and H are implicitly interchanged to make the range down-to.
729\begin{lstlisting}[label=lst:range_function_5]
730for ( alph; Alphabet.D -~ Alphabet.B ) { sout | alph; }
731>>> D C
732\end{lstlisting}
733
734\textbullet\ L -~= H is explicit down-to exclusive range [H,L], where L and H are implicitly interchanged to make the range down-to.
735\begin{lstlisting}[label=lst:range_function_6]
736for ( alph; Alphabet.D -~= Alphabet.B ) { sout | alph; }
737>>> D C B
738\end{lstlisting}
739
740A user can specify the ``step size'' of an iteration. There are two different stepping schemes of enumeration for-loop.
741\begin{lstlisting}[label=lst:range_function_stepping]
742enum(int) Sequence { A = 10, B = 12, C = 14, D = 16, D = 18 };
743for ( s; Sequence.A ~= Sequence.D ~ 1 ) { sout | alph; }
744>>> 10 12 14 16 18
745for ( s; Sequence.A ~= Sequence.D; s+=1 ) { sout | alph; }
746>>> 10 11 12 13 14 15 16 17 18
747\end{lstlisting}
748The first syntax is stepping to the next enumeration constant, which is the default stepping scheme if not explicitly specified. The second syntax, on the other hand, is to call @operator+=@ @one_type@ on the @value( s )@. Therefore, the second syntax is equivalent to
749\begin{lstlisting}[label=lst:range_function_stepping_converted]
750for ( typeof( value(Sequence.A) ) s=value( Sequence.A ); s <= Sequence.D; s+=1 ) { sout | alph; }
751>>> 10 11 12 13 14 15 16 17 18
752\end{lstlisting}
753
754% \PAB{Explain what each loop does.}
755
756It is also possible to iterate over an enumeration's labels, implicitly or explicitly:
757\begin{lstlisting}[label=lst:range_functions_label_implicit]
758for ( char * alph; Alphabet )
759\end{lstlisting}
760This for-loop implicitly iterates every label of the enumeration, because a label is the only valid resolution to @ch@ with type @char *@ in this case.
761If the value can also be resolved as the @char *@, you might iterate the labels explicitly with the array iteration.
762\begin{lstlisting}[label=lst:range_functions_label_implicit]
763for ( char * ch; labels( Alphabet ) )
764\end{lstlisting}
765
766
767% \subsection{Non-uniform Type}
768% TODO: Working in Progress, might need to change other sections. Conflict with the resolution right now.
769
770% \begin{lstlisting}
771% enum T( int, char * ) {
772% a=42, b="Hello World"
773% };
774% \end{lstlisting}
775% The enum T declares two different types: int and char *. The enumerators of T hold values of one of the declared types.
776
777\subsection{Enumeration Inheritance}
778
779\begin{lstlisting}[label=lst:EnumInline]
780enum( char * ) Name { Jack = "Jack", Jill = "Jill" };
781enum /* inferred */ Name2 { inline Name, Sue = "Sue", Tom = "Tom" };
782\end{lstlisting}
783\lstinline{Inline} allows Enumeration Name2 to inherit enumerators from Name1 by containment, and a Name enumeration is a subtype of enumeration Name2. An enumeration instance of type Name can be used where an instance of Name2 is expected.
784\begin{lstlisting}[label=lst:EnumInline]
785Name Fred;
786void f( Name2 );
787f( Fred );
788\end{lstlisting}
789If enumeration A declares @inline B@ in its enumeration body, enumeration A is the "inlining enum" and enumeration B is the "inlined enum".
790
791An enumeration can inline at most one other enumeration. The inline declaration must be placed before the first enumerator of the inlining enum. The inlining enum has all the enumerators from the inlined enum, with the same labels, values, and position.
792\begin{lstlisting}[label=lst:EnumInline]
793enum /* inferred */ Name2 { inline Name, Sue = "Sue", Tom = "Tom" };
794// is equivalent to enum Name2 { Jack = "Jack", Jill="Jill", Sue = "Sue", Tom = "Tom" };
795\end{lstlisting}
796Name.Jack is equivalent to Name2.Jack. Their attributes are all identical. Opening both Name and Name2 in the same scope will not introduce ambiguity.
797\begin{lstlisting}[label=lst:EnumInline]
798with( Name, Name2 ) { Jack; } // Name.Jack and Name2.Jack are equivalent. No ambiguity
799\end{lstlisting}
800
801\section{Implementation}
802
803\subsection{Static Attribute Expression}
804\begin{lstlisting}[label=lst:static_attr]
805enum( char * ) Colour {
806 Red = "red", Blue = "blue", Green = "green"
807};
808\end{lstlisting}
809An enumerator expression returns its enumerator value as a constant expression with no runtime cost. For example, @Colour.Red@ is equivalent to the constant expression "red", and \CFA finishes the expression evaluation before generating the corresponding C code. Applying a pseudo-function to a constant enumerator expression results in a constant expression as well. @value( Colour.Red )@, @position( Colour. Red )@, and @label( Colour.Red )@ are equivalent to constant expression with char * value "red", int value 0, and char * value "Red", respectively.
810
811\subsection{Runtime Attribute Expression and Weak Referenced Data}
812\begin{lstlisting}[label=lst:dynamic_attr]
813Colour c;
814...
815value( c ); // or c
816\end{lstlisting}
817An enumeration variable c is equivalent to an integer variable with the value of @position( c )@ In Example~\ref{lst:dynamic_attr}, the value of enumeration variable c is unknown at compile time. In this case, the pseudo-function calls are reduced to expression that returns the enumerator values at runtime.
818
819\CFA stores the variables and labels in @const@ arrays to provide runtime lookup for enumeration information.
820
821\begin{lstlisting}[label=lst:attr_array]
822const char * Colour_labels [3] = { "Red", "Blue", "Green" };
823const char * Colour_values [3] = { "red", "blue", "green" };
824\end{lstlisting}
825The \CFA compiles transforms the attribute expressions into array access.
826\begin{lstlisting}[label=lst:attr_array_access]
827position( c ) // c; an integer
828value( c ); // Colour_values[c]
829label( c ); // Colour_labels[c]
830\end{lstlisting}
831
832To avoid unnecessary memory usage, the labels and values array are only generated as needed, and only generate once across all compilation units. By default, \CFA defers the declaration of the label and value arrays until an call to attribute function with a dynamic value. If an attribute function is never called on a dynamic value of an enumerator, the array will never be allocated. Once the arrays are created, all compilation units share a weak reference to the allocation array.
833
834\subsection{Enum Prelude}
835
836\begin{lstlisting}[label=lst:enum_func_dec]
837forall( T ) {
838 unsigned position( unsigned );
839 T value( unsigned );
840 char * label( unsigned );
841}
842\end{lstlisting}
843\CFA loads the declaration of enumeration function from the enum.hfa.
844
845\subsection{Internal Representation}
846
847The definition of an enumeration is represented by an internal type called @EnumDecl@. At the minimum, it stores all the information needed to construct the companion object. Therefore, an @EnumDecl@ can be represented as the following:
848\begin{lstlisting}[label=lst:EnumDecl]
849forall(T)
850class EnumDecl {
851 T* values;
852 char** label;
853};
854\end{lstlisting}
855
856The internal representation of an enumeration constant is @EnumInstType@.
857An @EnumInstType@ has a reference to the \CFA-enumeration declaration and the position of the enumeration constant.
858\begin{lstlisting}[label=lst:EnumInstType]
859class EnumInstType {
860 EnumDecl enumDecl;
861 int position;
862};
863\end{lstlisting}
864In the later discussion, we will use @EnumDecl<T>@ to symbolize a @EnumDecl@ parameterized by type T, and @EnumInstType<T>@ is a declared instance of @EnumDecl<T>@.
865
866\begin{lstlisting}[caption={Enum Type Functions}, label=lst:cforall_enum_data]
867const T * const values;
868const char * label;
869int length;
870\end{lstlisting}
871Companion data are necessary information to represent an enumeration. They are stored as standalone pieces, rather than a structure. Those data will be loaded "on demand".
872Companion data are needed only if the according pseudo-functions are called. For example, the value of the enumeration Workday is loaded only if there is at least one compilation that has call $value(Workday)$. Once the values are loaded, all compilations share these values array to reduce memory usage.
873
874
875% \subsection{(Rework) Companion Object and Companion Function}
876
877% \begin{lstlisting}[caption={Enum Type Functions}, label=lst:cforall_enum_functions]
878% forall( T )
879% struct Companion {
880% const T * const values;
881% const char * label;
882% int length;
883% };
884% \end{lstlisting}
885% \CFA generates companion objects, an instance of structure that encloses @necessary@ data to represent an enumeration. The size of the companion is unknown at the compilation time, and it "grows" in size to compensate for the @usage@.
886
887% The companion object is singleton across the compilation (investigation).
888
889% \CFA generates the definition of companion functions.
890% Because \CFA implicitly stores an enumeration instance as its position, the companion function @position@ does nothing but return the position it is passed.
891% Companions function @value@ and @label@ return the array item at the given position of @values@ and @labels@, respectively.
892% \begin{lstlisting}[label=lst:companion_definition]
893% int position( Companion o, int pos ) { return pos; }
894% T value( Companion o, int pos ) { return o.values[ pos ]; }
895% char * label( Companion o, int pos ) { return o.labels[ pos ]; }
896% \end{lstlisting}
897% Notably, the @Companion@ structure definition, and all companion objects, are visible to users.
898% A user can retrieve values and labels defined in an enumeration by accessing the values and labels directly, or indirectly by calling @Companion@ functions @values@ and @labels@
899% \begin{lstlisting}[label=lst:companion_definition_values_labels]
900% Colour.values; // read the Companion's values
901% values( Colour ); // same as Colour.values
902% \end{lstlisting}
903
904\subsection{Companion Traits (experimental)}
905Not sure its semantics yet, and it might replace a companion object.
906\begin{lstlisting}[label=lst:companion_trait]
907forall(T1) {
908 trait Companion(otype T2<otype T1>) {
909 T1 value((otype T2<otype T1> const &);
910 int position(otype T2<otype T1> const &);
911 char * label(otype T2<otype T1> const &);
912 }
913}
914\end{lstlisting}
915All enumerations implicitly implement the Companion trait, an interface to access attributes. The Companion can be a data type because it fulfills to requirements to have concrete instances, which are:
916
917\begin{enumerate}
918 \item The instance of enumeration has a single polymorphic type.
919 \item Each assertion should use the type once as a parameter.
920\end{enumerate}
921
922\begin{lstlisting}
923enum(int) Weekday {
924 Monday=10, Tuesday, ...
925};
926
927T value( enum Weekday<T> & this);
928int position( enum Weekday<T> & this )
929char * label( enum Weekday<T> & this )
930
931trait Companion obj = (enum(int)) Workday.Weekday;
932value(obj); // 10
933\end{lstlisting}
934The enumeration comes with default implementation to the Companion traits functions. The usage of Companion functions would make \CFA allocates and initializes the necessary companion arrays, and return the data at the position represented by the enumeration.
935(...)
936
937\subsection{User Define Enumeration Functions}
938
939Companion objects make extending features for \CFA enumeration easy.
940\begin{lstlisting}[label=lst:companion_user_definition]
941char * charastic_string( Companion o, int position ) {
942 return sprintf( "Label: %s; Value: %s", label( o, position ), value( o, position) );
943}
944printf( charactic_string ( Color, 1 ) );
945>>> Label: Green; Value: G
946\end{lstlisting}
947Defining a function takes a Companion object effectively defines functions for all \CFA enumeration.
948
949The \CFA compiler turns a function call that takes an enumeration instance as a parameter into a function call with a companion object plus a position.
950Therefore, a user can use the syntax with a user-defined enumeration function call:
951\begin{lstlisting}[label=lst:companion_user_definition]
952charactic_string( Color.Green ); // equivalent to charactic_string( Color, 1 )
953>>> Label: Green; Value: G
954\end{lstlisting}
955Similarly, the user can work with the enumeration type itself: (see section ref...)
956\begin{lstlisting}[ label=lst:companion_user_definition]
957void print_enumerators ( Companion o ) {
958 for ( c : Companion o ) {
959 sout | label (c) | value( c ) ;
960 }
961}
962print_enumerators( Colour );
963\end{lstlisting}
964
965
966\subsection{Declaration}
967
968The qualified enumeration syntax is dedicated to \CFA enumeration.
969\begin{lstlisting}[label=lst:range_functions]
970enum (type_declaration) name { enumerator = const_expr, enumerator = const_expr, ... }
971\end{lstlisting}
972A compiler stores the name, the underlying type, and all enumerators in an @enumeration table@.
973During the $Validation$ pass, the compiler links the type declaration to the type's definition.
974It ensures that the name of an enumerator is unique within the enumeration body, and checks if all values of the enumerator have the declaration type.
975If the declared type is not @AutoInitializable@, \CFA rejects the enumeration definition.
976Otherwise, it attempts to initialize enumerators with the enumeration initialization pattern. (a reference to a future initialization pattern section)
977
978\begin{lstlisting}[label=lst:init]
979struct T { ... };
980void ?{}( T & t, zero_t ) { ... };
981void ?{}( T & t, one_t ) { ... };
982T ?+?( T & lhs, T & rhs ) { ... };
983
984enum (T) Sample {
985 Zero: 0 /* zero_t */,
986 One: Zero + 1 /* ?+?( Zero, one_t ) */ , ...
987};
988\end{lstlisting}
989Challenge: \\
990The value of an enumerator, or the initializer, requires @const_expr@.
991While previously getting around the issue by pushing it to the C compiler, it might not work anymore because of the user-defined types, user-defined @zero_t@, @one_t@, and addition operation.
992Might not be able to implement a \emph{correct} static check.
993
994\CFA $autogens$ a Companion object for the declared enumeration.
995\begin{lstlisting}[label=lst:companion]
996Companion( T ) Sample {
997 .values: { 0, 0+1, 0+1+1, 0+1+1+1, ... }, /* 0: zero_t, 1: one_t, +: ?+?{} */
998 .labels: { "Zero", "One", "Two", "Three", ...},
999 .length: /* number of enumerators */
1000};
1001\end{lstlisting}
1002\CFA stores values as intermediate expressions because the result of the function call to the function @?+?{}(T&, T&)@ is statically unknown to \CFA.
1003But the result is computed at run time, and the compiler ensures the @values@ are not changed.
1004
1005\subsection{Qualified Expression}
1006
1007\CFA uses qualified expression to address the scoping of \CFA-enumeration.
1008\begin{lstlisting}[label=lst:qualified_expression]
1009aggregation_name.field;
1010\end{lstlisting}
1011The qualified expression is not dedicated to \CFA enumeration.
1012It is a feature that is supported by other aggregation in \CFA as well, including a C enumeration.
1013When C enumerations are unscoped, the qualified expression syntax still helps to disambiguate names in the context.
1014\CFA recognizes if the expression references a \CFA aggregation by searching the presence of @aggregation_name@ in the \CFA enumeration table.
1015If the @aggregation_name@ is identified as a \CFA enumeration, the compiler checks if @field@ presents in the declared \CFA enumeration.
1016
1017\subsection{\lstinline{with} Clause/Statement}
1018
1019Instead of qualifying an enumeration expression every time, the @with@ can be used to expose enumerators to the current scope, making them directly accessible.
1020\begin{lstlisting}[label=lst:declaration]
1021enum Color( char * ) { Red="R", Green="G", Blue="B" };
1022enum Animal( int ) { Cat=10, Dog=20 };
1023with ( Color, Animal ) {
1024 char * red_string = Red; // value( Color.Red )
1025 int cat = Cat; // value( Animal.Cat )
1026}
1027\end{lstlisting}
1028The \lstinline{with} might introduce ambiguity to a scope. Consider the example:
1029\begin{lstlisting}[label=lst:declaration]
1030enum Color( char * ) { Red="R", Green="G", Blue="B" };
1031enum RGB( int ) { Red=0, Green=1, Blue=2 };
1032with ( Color, RGB ) {
1033 // int red = Red;
1034}
1035\end{lstlisting}
1036\CFA will not try to resolve the expression with ambiguity. It would report an error. In this case, it is necessary to qualify @Red@ even inside of the \lstinline{with} clause.
1037
1038\subsection{Instance Declaration}
1039
1040
1041\begin{lstlisting}[label=lst:var_declaration]
1042enum Sample s1;
1043\end{lstlisting}
1044
1045The declaration \CFA-enumeration variable has the same syntax as the C-enumeration. Internally, such a variable will be represented as an EnumInstType.
1046
1047\section{Related Work}
1048\label{s:RelatedWork}
1049
1050Enumerations exist in many popular programming languages, e.g., Pascal, Ada, \Csharp, \CC, Go, Java, Modula-3, Rust, Swift, Python, and the algebraic data-type in functional programming.
1051There are a large set of overlapping features among these languages, but each language has its own unique restrictions and extensions.
1052
1053\subsection{(Free) Pascal}
1054
1055Free Pascal is a modern object-oriented version of the classic Pascal programming language.
1056It allows a C-style enumeration type, where enumerators must be in assigned in ascending numerical order with a constant expression and the range can be non-consecutive.
1057\begin{lstlisting}[language=pascal,{moredelim=**[is][\color{red}]{@}{@}}]
1058Type EnumType = ( one, two, three, forty @= 40@, fortyone );
1059\end{lstlisting}
1060Pseudo-functions @Pred@ and @Succ@ can only be used if the range is consecutive.
1061The underlying type is an implementation-defined integral type large enough to hold all enumerated values; it does not have to be the smallest possible type.
1062The size underlying integral type can be explicitly specified using compiler directive @$PACKENUM@~$N$, where $N$ is the number of bytes, e.g.:
1063\begin{lstlisting}[language=pascal,{moredelim=**[is][\color{red}]{@}{@}}]
1064Type @{$\color{red}\$$PACKENUM 1}@ SmallEnum = ( one, two, three );
1065 @{$\color{red}\$$PACKENUM 4}@ LargeEnum = ( BigOne, BigTwo, BigThree );
1066Var S : SmallEnum; { 1 byte }
1067 L : LargeEnum; { 4 bytes}
1068\end{lstlisting}
1069
1070
1071\subsection{Ada}
1072
1073An enumeration type is defined as a list of possible values:
1074\begin{lstlisting}[language=ada]
1075type RGB is (Red, Green, Blue);
1076\end{lstlisting}
1077Like for numeric types, where e.g., 1 is an integer literal, @Red@, @Green@ and @Blue@ are called the literals of this type.
1078There are no other values assignable to objects of this type.
1079
1080\paragraph{Operators and attributes} ~\newline
1081Apart from equality (@"="@), the only operators on enumeration types are the ordering operators: @"<"@, @"<="@, @"="@, @"/="@, @">="@, @">"@, where the order relation is given implicitly by the sequence of literals:
1082Each literal has a position, starting with 0 for the first, incremented by one for each successor.
1083This position can be queried via the @'Pos@ attribute; the inverse is @'Val@, which returns the corresponding literal. In our example:
1084\begin{lstlisting}[language=ada]
1085RGB'Pos (Red) = 0
1086RGB'Val (0) = Red
1087\end{lstlisting}
1088There are two other important attributes: @Image@ and @Value@.
1089@Image@ returns the string representation of the value (in capital letters), @Value@ is the inverse:
1090\begin{lstlisting}[language=ada]
1091RGB'Image ( Red ) = "RED"
1092RGB'Value ("Red") = Red
1093\end{lstlisting}
1094These attributes are important for simple IO (there are more elaborate IO facilities in @Ada.Text_IO@ for enumeration types).
1095Note that, since Ada is case-insensitive, the string given to @'Value@ can be in any case.
1096
1097\paragraph{Enumeration literals} ~\newline
1098Literals are overloadable, i.e. you can have another type with the same literals.
1099\begin{lstlisting}[language=ada]
1100type Traffic_Light is (Red, Yellow, Green);
1101\end{lstlisting}
1102Overload resolution within the context of use of a literal normally resolves which @Red@ is meant.
1103Only if you have an unresolvable overloading conflict, you can qualify with special syntax which @Red@ is meant:
1104\begin{lstlisting}[language=ada]
1105RGB'(Red)
1106\end{lstlisting}
1107Like many other declarative items, enumeration literals can be renamed.
1108In fact, such a literal is actually a function, so it has to be renamed as such:
1109\begin{lstlisting}[language=ada]
1110function Red return P.RGB renames P.Red;
1111\end{lstlisting}
1112Here, @RGB@ is assumed to be defined in package @P@, which is visible at the place of the renaming declaration.
1113Renaming makes @Red@ directly visible without necessity to resort the use-clause.
1114
1115Note that redeclaration as a function does not affect the staticness of the literal.
1116
1117\paragraph{Characters as enumeration literals} ~\newline
1118Rather unique to Ada is the use of character literals as enumeration literals:
1119\begin{lstlisting}[language=ada]
1120type ABC is ('A', 'B', 'C');
1121\end{lstlisting}
1122This literal @'A'@ has nothing in common with the literal @'A'@ of the predefined type @Character@ (or @Wide_Character@).
1123
1124Every type that has at least one character literal is a character type.
1125For every character type, string literals and the concatenation operator @"&"@ are also implicitly defined.
1126\begin{lstlisting}[language=ada]
1127type My_Character is (No_Character, 'a', Literal, 'z');
1128type My_String is array (Positive range <>) of My_Character;
1129
1130S: My_String := "aa" & Literal & "za" & 'z';
1131T: My_String := ('a', 'a', Literal, 'z', 'a', 'z');
1132\end{lstlisting}
1133In this example, @S@ and @T@ have the same value.
1134
1135Ada's @Character@ type is defined that way.
1136See Ada Programming/Libraries/Standard.
1137
1138\paragraph{Booleans as enumeration literals} ~\newline
1139Also Booleans are defined as enumeration types:
1140\begin{lstlisting}[language=ada]
1141type Boolean is (False, True);
1142\end{lstlisting}
1143There is special semantics implied with this declaration in that objects and expressions of this type can be used as conditions.
1144Note that the literals @False@ and @True@ are not Ada keywords.
1145
1146Thus it is not sufficient to declare a type with these literals and then hope objects of this type can be used like so:
1147\begin{lstlisting}[language=ada]
1148type My_Boolean is (False, True);
1149Condition: My_Boolean;
1150
1151if Condition then -- wrong, won't compile
1152\end{lstlisting}
1153
1154If you need your own Booleans (perhaps with special size requirements), you have to derive from the predefined Boolean:
1155\begin{lstlisting}[language=ada]
1156type My_Boolean is new Boolean;
1157Condition: My_Boolean;
1158
1159if Condition then -- OK
1160\end{lstlisting}
1161
1162\paragraph{Enumeration subtypes} ~\newline
1163You can use range to subtype an enumeration type:
1164\begin{lstlisting}[language=ada]
1165subtype Capital_Letter is Character range 'A' .. 'Z';
1166type Day_Of_Week is (Sunday, Monday, Tuesday, Wednesday, Thursday, Friday, Saturday);
1167subtype Working_Day is Day_Of_Week range Monday .. Friday;
1168\end{lstlisting}
1169
1170\paragraph{Using enumerations} ~\newline
1171Enumeration types being scalar subtypes, type attributes such as @First@ and @Succ@ will allow stepping through a subsequence of the values.
1172\begin{lstlisting}[language=ada]
1173case Day_Of_Week'First is
1174 when Sunday =>
1175 ISO (False);
1176 when Day_Of_Week'Succ(Sunday) =>
1177 ISO (True);
1178 when Tuesday .. Saturday =>
1179 raise Program_Error;
1180end case;
1181\end{lstlisting}
1182A loop will automatically step through the values of the subtype's range.
1183Filtering week days to include only working days with an even position number:
1184\begin{lstlisting}[language=ada]
1185 for Day in Working_Day loop
1186 if Day_Of_Week'Pos(Day) mod 2 = 0 then
1187 Work_In_Backyard;
1188 end if;
1189 end loop;
1190\end{lstlisting}
1191Enumeration types can be used as array index subtypes, yielding a table feature:
1192\begin{lstlisting}[language=ada]
1193type Officer_ID is range 0 .. 50;
1194type Schedule is array (Working_Day) of Officer_ID;
1195\end{lstlisting}
1196
1197\begin{lstlisting}[language=ada]
1198type Subtype_Name is (Id1, Id2, Id3 ... );
1199\end{lstlisting}
1200where @Id1@, @Id2@, etc. are identifiers or characters literals.
1201In either case, the legal values of the type are referred to as "enumeration literals."
1202Each of these values has a "position number" corresponding to its position in the list such that @Id1@ has position 0, @Id2@ has position 1, and the Nth value has position N-1.
1203
1204\paragraph{Attributes of Enumeration Types} ~\newline
1205An enumeration type, @T@, has the following attributes: @T'First@, @T'Last@, @T'Range@, @T'Pred@, @T'Succ@, @T'Min@, @T'Max@, @T'Image@, @T'Wide_Image@, @T'Value@, @T'Wide_Value@, @T'Pos@, and @T'Val@ (pronounced "T tick first", "T tick last", etc.).
1206Most of these are illustrated in the example program given below, and most of them produce what you would intuitively expect based on their names.
1207
1208@T'Image@ and @T'Value@ form a complementary pair of attributes.
1209The former takes a value in @T@ and returns a String representation of that value.
1210The latter takes a @String@ that is a representation of a value in @T@ and returns that value.
1211
1212@T'Pos@ and @T'Val@ form another complementary pair.
1213The former takes a value in @T@ and returns its position number.
1214The latter takes a position number and returns the corresponding value of type @T@.
1215
1216
1217\subsection{\Csharp}
1218
1219\subsection{\CC}
1220
1221\CC is backwards compatible with C, so it inherited C's enumerations, except there is no implicit conversion from an integral value to an enumeration;
1222hence, the values in a \CC enumeration can only be its enumerators (without a cast).
1223There is no mechanism to iterate through an enumeration.
1224
1225\CC{11} added a scoped enumeration, \lstinline[language=c++]{enum class} (or \lstinline[language=c++]{enum struct}), so the enumerators are local to the enumeration and must be accessed using type qualification, e.g., @Weekday::Monday@.
1226\CC{20} supports unscoped access with a \lstinline[language=c++]{using enum} declaration.
1227
1228For both unscoped and scoped enumerations, the underlying type is an implementation-defined integral type large enough to hold all enumerated values; it does not have to be the smallest possible type.
1229In \CC{11}, the underlying integral type can be explicitly specified:
1230\begin{lstlisting}[language=c++,{moredelim=**[is][\color{red}]{@}{@}}]
1231enum class RGB : @long@ { Red, Green, Blue };
1232enum class rgb : @char@ { Red = 'r', Green = 'g', Blue = 'b' };
1233enum class srgb : @signed char@ { Red = -1, Green = 0, Blue = 1 };
1234RGB colour1 = @RGB::@Red;
1235rgb colour2 = @rgb::@Red;
1236srgb colour3 = @srgb::@Red;
1237\end{lstlisting}
1238
1239\subsection{Go}
1240
1241\subsection{Java}
1242
1243\subsection{Modula-3}
1244
1245\subsection{Rust}
1246
1247\subsection{Swift}
1248
1249\subsection{Python}
1250
1251\subsection{Algebraic Data Type}
1252
1253\end{document}
1254
1255% Local Variables: %
1256% tab-width: 4 %
1257% compile-command: "pdflatex enum.tex" %
1258% End: %
Note: See TracBrowser for help on using the repository browser.