source: doc/proposals/enum.tex@ 566cc33

Last change on this file since 566cc33 was d63746f, checked in by Peter A. Buhr <pabuhr@…>, 21 months ago

more updates

  • Property mode set to 100644
File size: 66.2 KB
Line 
1\documentclass[12pt]{article}
2\usepackage{fullpage,times}
3\usepackage{pslatex} % reduce size of san serif font
4\usepackage{xcolor}
5\usepackage{listings}
6%\usepackage{array}
7\usepackage{graphics}
8\usepackage{xspace}
9\usepackage{relsize} % must be after change to small or selects old size
10\usepackage{calc} % latex arithmetic
11
12\makeatletter
13\renewcommand\section{\@startsection{section}{1}{\z@}{-3.0ex \@plus -1ex \@minus -.2ex}{1.5ex \@plus .2ex}{\normalfont\large\bfseries}}
14\renewcommand\subsection{\@startsection{subsection}{2}{\z@}{-2.75ex \@plus -1ex \@minus -.2ex}{1.25ex \@plus .2ex}{\normalfont\normalsize\bfseries}}
15\renewcommand\subsubsection{\@startsection{subsubsection}{3}{\z@}{-2.5ex \@plus -1ex \@minus -.2ex}{1.0ex \@plus .2ex}{\normalfont\normalsize\bfseries}}
16\renewcommand\paragraph{\@startsection{paragraph}{4}{\z@}{-2.0ex \@plus -1ex \@minus -.2ex}{-1em}{\normalfont\normalsize\bfseries}}
17\renewcommand\subparagraph{\@startsection{subparagraph}{4}{\z@}{-1.5ex \@plus -1ex \@minus -.2ex}{-1em}{\normalfont\normalsize\bfseries\itshape}}
18
19% Denote newterms in particular font and index them without particular font and in lowercase, e.g., \newterm{abc}.
20% The option parameter provides an index term different from the new term, e.g., \newterm[\texttt{abc}]{abc}
21% The star version does not lowercase the index information, e.g., \newterm*{IBM}.
22\newcommand{\newtermFontInline}{\emph}
23\newcommand{\newterm}{\protect\@ifstar\@snewterm\@newterm}
24\newcommand{\@newterm}[2][\@empty]{\lowercase{\def\temp{#2}}{\newtermFontInline{#2}}\ifx#1\@empty\index{\temp}\else\index{#1@{\protect#2}}\fi}
25\newcommand{\@snewterm}[2][\@empty]{{\newtermFontInline{#2}}\ifx#1\@empty\index{#2}\else\index{#1@{\protect#2}}\fi}
26
27\newcommand{\LstBasicStyle}[1]{{\lst@basicstyle{#1}}}
28\newcommand{\LstKeywordStyle}[1]{{\lst@basicstyle{\lst@keywordstyle{#1}}}}
29\newcommand{\LstCommentStyle}[1]{{\lst@basicstyle{\lst@commentstyle{#1}}}}
30\newcommand{\LstStringStyle}[1]{{\lst@basicstyle{\lst@stringstyle{#1}}}}
31\newcommand{\LstNumberStyle}[1]{{\lst@basicstyle{\lst@numberstyle{#1}}}}
32
33\newlength{\gcolumnposn} % temporary hack because lstlisting does not handle tabs correctly
34\newlength{\columnposn}
35\setlength{\gcolumnposn}{3in}
36\setlength{\columnposn}{\gcolumnposn}
37\newcommand{\setgcolumn}[1]{\global\gcolumnposn=#1\global\columnposn=\gcolumnposn}
38\newcommand{\C}[2][\@empty]{\ifx#1\@empty\else\global\setlength{\columnposn}{#1}\global\columnposn=\columnposn\fi\hfill\makebox[\textwidth-\columnposn][l]{\LstCommentStyle{#2}}}
39\newcommand{\CD}[2][\@empty]{\ifx#1\@empty\else\global\setlength{\columnposn}{#1}\global\columnposn=\columnposn\fi\hfill\makebox[\textwidth-\columnposn][l]{\LstBasicStyle{#2}}}
40\newcommand{\CRT}{\global\columnposn=\gcolumnposn}
41\makeatother
42
43\usepackage[ignoredisplayed]{enumitem} % do not affect trivlist
44\setlist{labelsep=1ex}% global
45\setlist[itemize]{topsep=0.5ex,parsep=0.25ex,itemsep=0.25ex,listparindent=\parindent,leftmargin=\parindent}% global
46\setlist[itemize,1]{label=\textbullet}% local
47%\renewcommand{\labelitemi}{{\raisebox{0.25ex}{\footnotesize$\bullet$}}}
48\setlist[enumerate]{topsep=0.5ex,parsep=0.25ex,itemsep=0.25ex,listparindent=\parindent}% global
49\setlist[enumerate,2]{leftmargin=\parindent,labelsep=*,align=parleft,label=\alph*.}% local
50\setlist[description]{topsep=0.5ex,itemsep=0pt,listparindent=\parindent,leftmargin=\parindent,labelsep=1.5ex}
51
52\newenvironment{cquote}{%
53 \list{}{\lstset{resetmargins=true,aboveskip=0pt,belowskip=0pt}\topsep=4pt\parsep=0pt\leftmargin=\parindent\rightmargin\leftmargin}%
54 \item\relax
55}{%
56 \endlist
57}% cquote
58
59\setlength{\topmargin}{-0.45in} % move running title into header
60\setlength{\headsep}{0.25in}
61\setlength{\textheight}{9.0in}
62
63\newcommand{\CFAIcon}{\textsf{C\raisebox{\depth}{\rotatebox{180}A}}} % Cforall icon
64\newcommand{\CFA}{\protect\CFAIcon\xspace} % CFA symbolic name
65\newcommand{\CCIcon}{\textrm{C}\kern-.1em\hbox{+\kern-.25em+}} % C++ icon
66\newcommand{\CC}[1][]{\protect\CCIcon{#1}\xspace} % C++ symbolic name
67\newcommand{\Csharp}{C\raisebox{-0.7ex}{\relsize{2}$^\sharp$}\xspace} % C# symbolic name
68\newcommand{\PAB}[1]{{\color{red}PAB: #1}}
69
70% \definecolor{mGreen}{rgb}{0,0.6,0}
71% \definecolor{mGray}{rgb}{0.5,0.5,0.5}
72% \definecolor{mPurple}{rgb}{0.58,0,0.82}
73% \definecolor{backgroundColour}{rgb}{0.95,0.95,0.92}
74
75\lstdefinestyle{CStyle}{
76% backgroundcolor=\color{backgroundColour},
77% commentstyle=\color{mGreen},
78% keywordstyle=\color{magenta},
79 stringstyle=\small\tt, % use typewriter font
80% stringstyle=\color{mPurple},
81 columns=fullflexible,
82 basicstyle=\small\linespread{0.9}\sf, % reduce line spacing and use sanserif font
83% basicstyle=\footnotesize,
84 breakatwhitespace=false,
85% breaklines=true,
86 captionpos=b,
87 keepspaces=true,
88 escapechar=\$, % LaTeX escape in CFA code
89% numbers=left,
90% numbersep=5pt,
91% numberstyle=\tiny\color{mGray},
92% showspaces=false,
93 showstringspaces=false,
94% showtabs=false,
95 showlines=true, % show blank lines at end of code
96 tabsize=5,
97 language=C,
98 aboveskip=4pt, % spacing above/below code block
99 belowskip=2pt,
100 xleftmargin=\parindent, % indent code to paragraph indentation
101}
102\lstset{style=CStyle,moredelim=**[is][\color{red}]{@}{@}}
103\lstMakeShortInline@ % single-character for \lstinline
104
105\begin{document}
106
107\title{\vspace*{-0.5in}Enumeration in \CFA}
108\author{Jiada Liang}
109
110\maketitle
111
112\begin{abstract}
113An enumeration is a type defining an ordered set of named constant values, where a name abstracts a value, e.g., @PI@ versus @3.145159@.
114C restrict an enumeration type to the integral type @signed int@, which \CC support , meaning enumeration names bind to integer constants.
115\CFA extends C enumerations to allow all basic and custom types for the enumeration type, like other modern programming languages.
116Furthermore, \CFA adds other useful features for enumerations to support better software-engineering practices and simplify program development.
117\end{abstract}
118
119\section{Introduction}
120
121Naming values is a common practice in mathematics and engineering, e.g., $\pi$, $\tau$ (2$\pi$), $\phi$ (golden ratio), MHz (1E6), etc.
122Naming is also commonly used to represent many other numerical phenomenon, such as days of the week, months of a year, floors of a building (basement), specific times (noon, New Years).
123Many programming languages capture this important software-engineering capability through a mechanism called an \newterm{enumeration}.
124An enumeration is similar to other programming-language types by providing a set of constrained values, but adds the ability to name \emph{all} the values in its set.
125Note, all enumeration names must be unique but different names can represent the same value (eight note, quaver), which are synonyms.
126
127Specifically, an enumerated type restricts its values to a fixed set of named constants.
128While all types are restricted to a fixed set of values because of the underlying von Neumann architecture, and hence, to a corresponding set of constants, e.g., @3@, @3.5@, @3.5+2.1i@, @'c'@, @"abc"@, etc., these values are not named, other than the programming-language supplied constant names.
129
130Fundamentally, all enumeration systems have an \newterm{enumeration} type with an associated set of \newterm{enumerator} names.
131An enumeration has three universal attributes, \newterm{position}, \newterm{label}, and \newterm{value}, as shown by this representative enumeration, where position and value can be different.
132\begin{cquote}
133\small\sf\setlength{\tabcolsep}{3pt}
134\begin{tabular}{rccccccccccc}
135\it\color{red}enumeration & \multicolumn{7}{c}{\it\color{red}enumerators} \\
136$\downarrow$\hspace*{25pt} & \multicolumn{7}{c}{$\downarrow$} \\
137@enum@ Weekday \{ & Monday, & Tuesday, & Wednesday, & Thursday,& Friday, & Saturday, & Sunday \}; \\
138\it\color{red}position & 0 & 1 & 2 & 3 & 4 & 5 & 6 \\
139\it\color{red}label & Monday & Tuesday & Wednesday & Thursday & Friday & Saturday & Sunday \\
140\it\color{red}value & 0 & 1 & 2 & 3 & 4 & 5 & 6
141\end{tabular}
142\end{cquote}
143Here, the \newterm{enumeration} @Weekday@ defines the ordered \newterm{enumerator}s @Monday@, @Tuesday@, @Wednesday@, @Thursday@, @Friday@, @Saturday@ and @Sunday@.
144By convention, the successor of @Tuesday@ is @Monday@ and the predecessor of @Tuesday@ is @Wednesday@, independent of the associated enumerator constant values.
145Because an enumerator is a constant, it cannot appear in a mutable context, e.g. @Mon = Sun@ is meaningless, and an enumerator has no address, it is an \newterm{rvalue}\footnote{
146The term rvalue defines an expression that can only appear on the right-hand side of an assignment.}.
147
148
149\section{C-Style Enum}
150
151The C-Style enumeration has the following syntax and semantics.
152\begin{lstlisting}
153enum Weekday { Monday, Tuesday, Wednesday, Thursday@ = 10@, Friday, Saturday, Sunday };
154\end{lstlisting}
155Enumerators without an explicitly designated constant value are \newterm{auto-initialized} by the compiler: from left to right, starting at zero or the next explicitly initialized constant, incrementing by @1@.
156For example, @Monday@ to @Wednesday@ are implicitly assigned with constants @0@--@2@, @Thursday@ is explicitly set to constant @10@, and @Friday@ to @Sunday@ are implicitly assigned with constants @11@--@13@.
157Initialization may occur in any order.
158\begin{lstlisting}
159enum Weekday { Thursday@ = 10@, Friday, Saturday, Sunday, Monday@ = 0@, Tuesday, Wednesday };
160\end{lstlisting}
161Note, the comma in the enumerator list can be a terminator or a separator, allowing the list to end with a dangling comma.
162\begin{lstlisting}
163enum Weekday {
164 Thursday = 10, Friday, Saturday, Sunday,
165 Monday = 0, Tuesday, Wednesday@,@ // terminating comma
166};
167\end{lstlisting}
168This feature allow enumerator lines to be interchanged without moving a comma.\footnote{
169A terminating comma appears in other C syntax, e.g., the initializer list.}
170Finally, C enumerators are \newterm{unscoped}, i.e., enumerators declared inside of an @enum@ are visible (projected) into the enclosing scope of the @enum@ type.
171
172In theory, a C enumeration \emph{variable} is an implementation-defined integral type large enough to hold all enumerated values.
173In practice, since integral constants are used, which have type @int@ (unless qualified with a size suffix), C uses @int@ as the underlying type for enumeration variables.
174Finally, there is an implicit bidirectional conversion between an enumeration and integral types.
175\begin{lstlisting}[label=lst:enum_scope]
176{
177 enum Weekday { /* as above */ }; $\C{// enumerators implicitly projected into local scope}$
178 Weekday weekday = Monday; $\C{// weekday == 0}$
179 weekday = Friday; $\C{// weekday == 11}$
180 int i = Sunday; $\C{// implicit conversion to int, i == 13}$
181 weekday = 10000; $\C{// UNDEFINED! implicit conversion to Weekday}$
182}
183int j = Wednesday; $\C{// ERROR! Wednesday is not declared in this scope}$
184\end{lstlisting}
185The implicit conversion from @int@ to an enumeration type is an unnecessary source of error.
186
187It is common for C programmers to ``believe'' there are 3 equivalent forms of constant enumeration.
188\begin{lstlisting}[label=lst:enum_scope]
189#define Monday 0
190static const int Monday = 0;
191enum { Monday };
192\end{lstlisting}
193For @#define@, the programmer has to play compiler and explicitly manage the enumeration values;
194furthermore, these are independent constants outside of any language type mechanism.
195The same explicit management is true for @const@ declarations, and the @const@ variable cannot appear in constant-expression locations, like @case@ labels, array dimensions,\footnote{
196C allows variable-length array-declarations (VLA), so this case does work, but it fails in \CC, which does not support VLAs, unless it is \lstinline{g++}.} and immediate operands of assembler instructions.
197Only the @enum@ form is managed by the compiler, is part of the language type-system, and works in all C constant-expression locations.
198
199
200\section{\CFA-Style Enum}
201
202\CFA supports C-Style enumeration using the same syntax and semantics for backwards compatibility.
203\CFA also extends C-Style enumeration by adding a number of new features that bring enumerations inline with other modern programming languages.
204
205
206\subsection{Enumerator Name Resolution}
207\label{s:EnumeratorNameResolution}
208
209In C, unscoping of enumerators presents a \newterm{naming problem} when multiple enumeration types appear in the same scope with duplicate enumerator names.
210There is no mechanism in C to resolve these naming conflicts other than renaming of one of the duplicates, which may be impossible.
211
212The \CFA type-system allows extensive overloading, including enumerators.
213Furthermore, \CFA uses the left-hand of assignment in type resolution to pinpoint the best overloaded name.
214Finally, qualification is provided to disambiguate any ambiguous situations.
215\begin{lstlisting}
216enum C1 { First, Second, Third, Fourth };
217enum C2 { @Fourth@, @Third@, @Second@, @First@ };
218C1 p() { return Third; } $\C{// correctly resolved duplicate names}$
219C2 p() { return Fourth; }
220void foo() {
221 C1 e1 = First; C2 e2 = First;
222 e1 = Second; e2 = Second;
223 e1 = p(); e2 = p(); $\C{// correctly resolved function call}$
224 int i = @C1.@First + @C2.@First; $\C{// ambiguous without qualification}$
225}
226\end{lstlisting}
227\CFA overloading allows programmers to use the most meaningful names without fear of unresolvable clashes from included files, which are correctable with qualification.
228
229
230\subsection{Enumerator Scoping}
231
232An enumeration can be scoped, so the enumerator constants are not projected into the enclosing scope, using @'!'@.
233\begin{lstlisting}
234enum Weekday @!@ { /* as above */ };
235enum( char * ) Names @!@ { /* as above */ };
236\end{lstlisting}
237Now the enumerators \emph{must} be qualified with the associated enumeration.
238\begin{lstlisting}
239Weekday weekday = @Weekday@.Monday;
240Names names = @Names.@Fred;
241names = @Names.@Jane;
242\end{lstlisting}
243It is possible to toggle back to unscoping using the \CFA @with@ clause/statement (see also \CC \lstinline[language=c++]{using enum} in Section~\ref{s:C++RelatedWork}).
244\begin{lstlisting}
245Weekday weekday;
246with ( @Weekday@, @Names@ ) { $\C{// type names}$
247 Names names = @Fred@;
248 names = @Jane@;
249 weekday = Saturday;
250}
251\end{lstlisting}
252As in Section~\ref{s:EnumeratorNameResolution}, opening multiple unscoped enumerations can result in duplicate enumeration names, but \CFA type resolution and falling back to explicit qualification handles name resolution.
253
254\subsection{Enumerator Typing}
255
256\CFA extends the enumeration declaration by parameterizing with a type (like a generic type), allowing enumerators to be assigned any values from the declared type.
257Figure~\ref{f:EumeratorTyping} shows a series of examples illustrating that all \CFA types can be use with an enumeration and each type's constants used to set the enumerator constants.
258Note, the synonyms @Liz@ and @Beth@ in the last declaration.
259
260Because enumerators are constants, the enumeration type is implicitly @const@, so all the enumerator types in Figure~\ref{f:EumeratorTyping} are rewritten with @const@.
261A typed enumeration has an implicit (safe) conversion to its base type.
262\begin{lstlisting}
263char currency = Dollar;
264string fred = Fred; $\C{// implicit conversion from char * to \CFA string type}$
265Person student = Beth;
266\end{lstlisting}
267
268% \begin{lstlisting}[label=lst:color]
269% struct S { int i, j; };
270% enum( S ) s { A = { 3, 4 }, B = { 7, 8 } };
271% enum( @char@ ) Currency { Dollar = '$\textdollar$', Euro = '$\texteuro$', Pound = '$\textsterling$' };
272% enum( @double@ ) Planet { Venus = 4.87, Earth = 5.97, Mars = 0.642 }; // mass
273% enum( @char *@ ) Colour { Red = "red", Green = "green", Blue = "blue" };
274% enum( @Currency@ ) Europe { Euro = '$\texteuro$', Pound = '$\textsterling$' }; // intersection
275% \end{lstlisting}
276
277\begin{figure}
278\begin{lstlisting}
279// integral
280 enum( @char@ ) Currency { Dollar = '$\textdollar$', Euro = '$\texteuro$', Pound = '$\textsterling$' };
281 enum( @signed char@ ) srgb { Red = -1, Green = 0, Blue = 1 };
282 enum( @long long int@ ) BigNum { X = 123_456_789_012_345, Y = 345_012_789_456_123 };
283// non-integral
284 enum( @double@ ) Math { PI_2 = 1.570796, PI = 3.141597, E = 2.718282 };
285 enum( @_Complex@ ) Plane { X = 1.5+3.4i, Y = 7+3i, Z = 0+0.5i };
286// pointer
287 enum( @char *@ ) Names { Fred = "FRED", Mary = "MARY", Jane = "JANE" };
288 int i, j, k;
289 enum( @int *@ ) ptr { I = &i, J = &j, K = &k };
290 enum( @int &@ ) ref { I = i, J = j, K = k };
291// tuple
292 enum( @[int, int]@ ) { T = [ 1, 2 ] }; $\C{// new \CFA type}$
293// function
294 void f() {...} void g() {...}
295 enum( @void (*)()@ ) funs { F = f, G = g };
296// aggregate
297 struct Person { char * name; int age, height; };
298@***@enum( @Person@ ) friends { @Liz@ = { "ELIZABETH", 22, 170 }, @Beth@ = Liz, Jon = { "JONATHAN", 35, 190 } };
299\end{lstlisting}
300\caption{Enumerator Typing}
301\label{f:EumeratorTyping}
302\end{figure}
303
304Typed enumerations deals with the \emph{harmonizing} problem between an enumeration and any companion data.
305The following example is from the \CFA compiler, written in \CC.
306\begin{lstlisting}
307enum integral_types { chr, schar, uschar, sshort, ushort, sint, usint, ..., NO_OF_ITYPES };
308char * integral_names[NO_OF_ITYPES] = {
309 "char", "signed char", "unsigned char",
310 "signed short int", "unsigned short int",
311 "signed int", "unsigned int",
312 ...
313};
314\end{lstlisting}
315The \emph{harmonizing} problem occurs because the enumeration declaration is in one header file and the names are declared in another translation unit.
316It is up to the programmer to ensure changes made in one location are harmonized with the other location (by identifying this requirement within a comment).
317The typed enumeration largely solves this problem by combining and managing the two data types.
318\begin{lstlisting}
319enum( char * ) integral_types {
320 chr = "char", schar = "signed char", uschar = "unsigned char",
321 sshort = "signed short int", ushort = "unsigned short int",
322 sint = "signed int", usint = "unsigned int",
323 ...
324};
325\end{lstlisting}
326Note, the enumeration type can be a structure (see @Person@ in Figure~\ref{f:EumeratorTyping}), so it is possible to have the equivalent of multiple arrays of companion data using an array of structures.
327
328
329\subsection{Pure Enumerators}
330
331An empty enumerator type, @enum()@, implies the enumerators are pure symbols without values but set properties;
332hence, there is no default conversion to @int@.
333
334\begin{lstlisting}
335enum() Mode { O_RDONLY, O_WRONLY, O_CREAT, O_TRUNC, O_APPEND };
336@***@Mode iomode = O_RDONLY;
337bool b = iomode == O_RDONLY || iomode < O_APPEND;
338int i = iomode; $\C{\color{red}// disallowed}$
339\end{lstlisting}
340
341\subsection{Enumerator Subset}
342
343If follows from enumerator typing that the enumerator type can be another enumerator.
344\begin{lstlisting}
345enum( @char@ ) Currency { Dollar = '$\textdollar$', Euro = '$\texteuro$', Pound = '$\textsterling$' };
346enum( @Currency@ ) Europe { Euro = Currency.Euro, Pound = Currency.Pound }; // intersection
347enum( char ) Letter { A = 'A', B = 'B', C = 'C', ..., Z = 'Z' };
348enum( @Letter@ ) Greek { Alph = A, Beta = B, ..., Zeta = Z }; // intersection
349\end{lstlisting}
350Subset enumerations may have more or less enumerators than their typed enumeration, but the enumerator values must be from the typed enumeration.
351For example, @Greek@ enumerators are a subset of type @Letter@ and are type compatible with enumeration @Letter@, but @Letter@ enumerators are not type compatible with enumeration @Greek@.
352\begin{lstlisting}
353Letter letter = A;
354@***@Greak greek = Beta;
355letter = Beta; $\C{// allowed, letter == B}$
356greek = A; $\C{\color{red}// disallowed}$
357\end{lstlisting}
358
359
360\subsection{Enumeration Inheritance}
361
362\CFA Plan-9 inheritance may be used with enumerations, where Plan-9 inheritance is containment inheritance with implicit unscoping (like a nested unnamed @struct@/@union@ in C).
363\begin{lstlisting}
364enum( char * ) Names { /* as above */ };
365enum( char * ) Names2 { @inline Names@, Jack = "JACK", Jill = "JILL" };
366@***@enum /* inferred */ Names3 { @inline Names2@, Sue = "SUE", Tom = "TOM" };
367\end{lstlisting}
368Enumeration @Name2@ inherits all the enumerators and their values from enumeration @Names@ by containment, and a @Names@ enumeration is a subtype of enumeration @Name2@.
369Note, enumerators must be unique in inheritance but enumerator values may be repeated.
370
371The enumeration type for the inheriting type must be the same as the inherited type;
372hence the enumeration type may be omitted for the inheriting enumeration and it is inferred from the inherited enumeration, as for @Name3@.
373% When inheriting from integral types, automatic numbering may be used, so the inheritance placement left to right is important.
374Specifically, the inheritance relationship for @Names@ is:
375\begin{lstlisting}
376Names $\(\subset\)$ Names2 $\(\subset\)$ Names3 $\(\subset\)$ const char * $\C{// enum type of Names}$
377\end{lstlisting}
378For the given function prototypes, the following calls are valid.
379\begin{cquote}
380\begin{tabular}{ll}
381\begin{lstlisting}
382void f( Names );
383void g( Names2 );
384void h( Names3 );
385void j( const char * );
386\end{lstlisting}
387&
388\begin{lstlisting}
389f( Fred );
390g( Fred ); g( Jill );
391h( Fred ); h( Jill ); h( Sue );
392j( Fred ); j( Jill ); j( Sue ); j( "WILL" );
393\end{lstlisting}
394\end{tabular}
395\end{cquote}
396Note, the validity of calls is the same for call-by-reference as for call-by-value, and @const@ restrictions are the same as for other types.
397
398
399\subsection{Enumeration Pseudo-functions}
400
401Pseudo-functions are function-like operators that do not result in any run-time computations, i.e., like @sizeof@, @offsetof@, @typeof@.
402Often a call to a pseudo-function is substituted with information extracted from the symbol table at compilation time, like storage size or alignment associated with the underlying architecture..
403
404The attributes of an enumerator are accessed by pseudo-functions @position@, @value@, and @label@.
405\begin{lstlisting}
406@***@int jane_pos = @position@( Names.Jane ); $\C{// 2}$
407@***@char * jane_value = @value@( Names.Jane ); $\C{// "JANE"}$
408@***@char * jane_label = @label@( Names.Jane ); $\C{// "Jane"}$
409sout | @label@( Names.Jane ) | @value@( Names.Jane );
410\end{lstlisting}
411Note the ability to print both enumerator label and value.
412
413
414\subsection{Enumerator Position or Value}
415
416Enumerators can be used in multiple contexts.
417In most programming languages, an enumerator is implicitly converted to its value (like a typed macro substitution).
418However, enumerator synonyms and typed enumerations make this implicit conversion to value incorrect in some contexts.
419In these contexts, a programmer's initition assumes an implicit conversion to postion.
420
421For example, an intuitive use of enumerations is with the \CFA @switch@/@choose@ statement, where @choose@ performs an implict @break@ rather than a fall-through at the end of a @case@ clause.
422\begin{cquote}
423\begin{lstlisting}
424enum Count { First, Second, Third, Fourth };
425Count e;
426\end{lstlisting}
427\begin{tabular}{ll}
428\begin{lstlisting}
429
430choose( e ) {
431 case @First@: ...;
432 case @Second@: ...;
433 case @Third@: ...;
434 case @Fourth@: ...;
435}
436\end{lstlisting}
437&
438\begin{lstlisting}
439// rewrite
440choose( @value@( e ) ) {
441 case @value@( First ): ...;
442 case @value@( Second ): ...;
443 case @value@( Third ): ...;
444 case @value@( Fourth ): ...;
445}
446\end{lstlisting}
447\end{tabular}
448\end{cquote}
449Here, the intuitive code on the left is implicitly transformed into the statndard implementation on the right, using the value of the enumeration variable and enumerators.
450However, this implementation is fragile, e.g., if the enumeration is changed to:
451\begin{lstlisting}
452enum Count { First, Second, Third @= First@, Fourth };
453\end{lstlisting}
454which make @Third == First@ and @Fourth == Second@, causing a compilation error because of duplicase @case@ clauses.
455To better match with programmer intuition, \CFA toggles between value and position semantics depneding on the language context.
456For conditional clauses and switch statments, \CFA uses the robust position implementation.
457\begin{lstlisting}
458choose( @position@( e ) ) {
459 case @position@( First ): ...;
460 case @position@( Second ): ...;
461 case @position@( Third ): ...;
462 case @position@( Fourth ): ...;
463}
464\end{lstlisting}
465
466\begin{lstlisting}
467Count variable_a = First, variable_b = Second, variable_c = Third, variable_d = Fourth;
468p(variable_a); // 0
469p(variable_b); // 1
470p(variable_c); // "Third"
471p(variable_d); // 3
472\end{lstlisting}
473
474
475\section{Enumeration Storage}
476
477
478\subsection{Enumeration Variable}
479
480Although \CFA enumeration captures three different attributes, an enumeration instance does not store all this information.
481The @sizeof@ a \CFA enumeration instance is always 4 bytes, the same size as a C enumeration instance (@sizeof( int )@).
482It comes from the fact that:
483\begin{enumerate}
484\item
485a \CFA enumeration is always statically typed;
486\item
487it is always resolved as one of its attributes regarding real usage.
488\end{enumerate}
489When creating an enumeration instance @colour@ and assigning it with the enumerator @Color.Green@, the compiler allocates an integer variable and stores the position 1.
490The invocations of $positions()$, $value()$, and $label()$ turn into calls to special functions defined in the prelude:
491\begin{lstlisting}[label=lst:companion_call]
492position( green );
493>>> position( Colour, 1 ) -> int
494value( green );
495>>> value( Colour, 1 ) -> T
496label( green );
497>>> label( Colour, 1) -> char *
498\end{lstlisting}
499@T@ represents the type declared in the \CFA enumeration defined and @char *@ in the example.
500These generated functions are $Companion Functions$, they take an $companion$ object and the position as parameters.
501
502
503\subsection{Enumeration Data}
504
505\begin{lstlisting}[label=lst:enumeration_backing_data]
506enum(T) E { ... };
507// backing data
508T * E_values;
509char ** E_labels;
510\end{lstlisting}
511Storing values and labels as arrays can sometimes help support enumeration features.
512However, the data structures are the overhead for the programs. We want to reduce the memory usage for enumeration support by:
513\begin{itemize}
514 \item Only generates the data array if necessary
515 \item The compilation units share the data structures.
516 No extra overhead if the data structures are requested multiple times.
517\end{itemize}
518
519
520\section{Unification}
521
522\subsection{Enumeration as Value}
523\label{section:enumeration_as_value}
524An \CFA enumeration with base type T can be used seamlessly as T, without explicitly calling the pseudo-function value.
525\begin{lstlisting}[label=lst:implicit_conversion]
526char * green_value = Colour.Green; // "G"
527// Is equivalent to
528// char * green_value = value( Color.Green ); "G"
529\end{lstlisting}
530
531
532\subsection{Unification Distance}
533
534\begin{lstlisting}[label=lst:unification_distance_example]
535T_2 Foo(T1);
536\end{lstlisting}
537The @Foo@ function expects a parameter with type @T1@. In C, only a value with the exact type T1 can be used as a parameter for @Foo@. In \CFA, @Foo@ accepts value with some type @T3@ as long as @distance(T1, T3)@ is not @Infinite@.
538
539@path(A, B)@ is a compiler concept that returns one of the following:
540\begin{itemize}
541 \item Zero or 0, if and only if $A == B$.
542 \item Safe, if B can be used as A without losing its precision, or B is a subtype of A.
543 \item Unsafe, if B loses its precision when used as A, or A is a subtype of B.
544 \item Infinite, if B cannot be used as A. A is not a subtype of B and B is not a subtype of A.
545\end{itemize}
546
547For example, @path(int, int)==Zero@, @path(int, char)==Safe@, @path(int, double)==Unsafe@, @path(int, struct S)@ is @Infinite@ for @struct S{}@.
548@distance(A, C)@ is the minimum sum of paths from A to C. For example, if @path(A, B)==i@, @path(B, C)==j@, and @path(A, C)=k@, then $$distance(A,C)==min(path(A,B), path(B,C))==i+j$$.
549
550(Skip over the distance matrix here because it is mostly irrelevant for enumeration discussion. In the actual implementation, distance( E, T ) is 1.)
551
552The arithmetic of distance is the following:
553\begin{itemize}
554 \item $Zero + v= v$, for some value v.
555 \item $Safe * k < Unsafe$, for finite k.
556 \item $Unsafe * k < Infinite$, for finite k.
557 \item $Infinite + v = Infinite$, for some value v.
558\end{itemize}
559
560For @enum(T) E@, @path(T, E)==Safe@ and @path(E,T)==Infinite@. In other words, enumeration type E can be @safely@ used as type T, but type T cannot be used when the resolution context expects a variable with enumeration type @E@.
561
562
563\subsection{Variable Overloading and Parameter Unification}
564
565\CFA allows variable names to be overloaded. It is possible to overload a variable that has type T and an enumeration with type T.
566\begin{lstlisting}[label=lst:variable_overload]
567char * green = "Green";
568Colour green = Colour.Green; // "G"
569
570void bar(char * s) { return s; }
571void foo(Colour c) { return value( c ); }
572
573foo( green ); // "G"
574bar( green ); // "Green"
575\end{lstlisting}
576\CFA's conversion distance helps disambiguation in this overloading. For the function @bar@ which expects the parameter s to have type @char *@, $distance(char *,char *) == Zero$ while $distance(char *, Colour) == Safe$, the path from @char *@ to the enumeration with based type @char *@, \CFA chooses the @green@ with type @char *@ unambiguously. On the other hand, for the function @foo@, @distance(Colour, char *)@ is @Infinite@, @foo@ picks the @green@ with type @char *@.
577
578\subsection{Function Overloading}
579Similarly, functions can be overloaded with different signatures. \CFA picks the correct function entity based on the distance between parameter types and the arguments.
580\begin{lstlisting}[label=lst:function_overload]
581Colour green = Colour.Green;
582void foo(Colour c) { sout | "It is an enum"; } // First foo
583void foo(char * s) { sout | "It is a string"; } // Second foo
584foo( green ); // "It is an enum"
585\end{lstlisting}
586Because @distance(Colour, Colour)@ is @Zero@ and @distance(char *, Colour)@ is @Safe@, \CFA determines the @foo( green )@ is a call to the first foo.
587
588\subsection{Attributes Functions}
589The pseudo-function @value()@ "unboxes" the enumeration and the type of the expression is the underlying type. Therefore, in the section~\ref{section:enumeration_as_value} when assigning @Colour.Green@ to variable typed @char *@, the resolution distance is @Safe@, while assigning @value(Color.Green) to @char *) has resolution distance @Zero@.
590
591\begin{lstlisting}[label=lst:declaration_code]
592int s1;
593\end{lstlisting}
594The generated code for an enumeration instance is simply an int. It is to hold the position of an enumeration. And usage of variable @s1@ will be converted to return one of its attributes: label, value, or position, concerning the @Unification@ rule
595
596% \subsection{Unification and Resolution (this implementation will probably not be used, safe as reference for now)}
597
598% \begin{lstlisting}
599% enum Colour( char * ) { Red = "R", Green = "G", Blue = "B" };
600% \end{lstlisting}
601% The @EnumInstType@ is convertible to other types.
602% A \CFA enumeration expression is implicitly \emph{overloaded} with its three different attributes: value, position, and label.
603% The \CFA compilers need to resolve an @EnumInstType@ as one of its attributes based on the current context.
604
605% \begin{lstlisting}[caption={Null Context}, label=lst:null_context]
606% {
607% Colour.Green;
608% }
609% \end{lstlisting}
610% In example~\ref{lst:null_context}, the environment gives no information to help with the resolution of @Colour.Green@.
611% In this case, any of the attributes is resolvable.
612% According to the \textit{precedence rule}, the expression with @EnumInstType@ resolves as @value( Colour.Green )@.
613% The @EnumInstType@ is converted to the type of the value, which is statically known to the compiler as @char *@.
614% When the compilation reaches the code generation, the compiler outputs code for type @char *@ with the value @"G"@.
615% \begin{lstlisting}[caption={Null Context Generated Code}, label=lst:null_context]
616% {
617% "G";
618% }
619% \end{lstlisting}
620% \begin{lstlisting}[caption={int Context}, label=lst:int_context]
621% {
622% int g = Colour.Green;
623% }
624% \end{lstlisting}
625% The assignment expression gives a context for the EnumInstType resolution.
626% The EnumInstType is used as an @int@, and \CFA needs to determine which of the attributes can be resolved as an @int@ type.
627% The functions $Unify( T1, T2 ): bool$ take two types as parameters and determine if one type can be used as another.
628% In example~\ref{lst:int_context}, the compiler is trying to unify @int@ and @EnumInstType@ of @Colour@.
629% $$Unification( int, EnumInstType<Colour> )$$ which turns into three Unification call
630% \begin{lstlisting}[label=lst:attr_resolution_1]
631% {
632% Unify( int, char * ); // unify with the type of value
633% Unify( int, int ); // unify with the type of position
634% Unify( int, char * ); // unify with the type of label
635% }
636% \end{lstlisting}
637% \begin{lstlisting}[label=lst:attr_resolution_precedence]
638% {
639% Unification( T1, EnumInstType<T2> ) {
640% if ( Unify( T1, T2 ) ) return T2;
641% if ( Unify( T1, int ) ) return int;
642% if ( Unify( T1, char * ) ) return char *;
643% Error: Cannot Unify T1 with EnumInstType<T2>;
644% }
645% }
646% \end{lstlisting}
647% After the unification, @EnumInstType@ is replaced by its attributes.
648
649% \begin{lstlisting}[caption={Unification Functions}, label=lst:unification_func_call]
650% {
651% T2 foo ( T1 ); // function take variable with T1 as a parameter
652% foo( EnumInstType<T3> ); // Call foo with a variable has type EnumInstType<T3>
653% >>>> Unification( T1, EnumInstType<T3> )
654% }
655% \end{lstlisting}
656% % The conversion can work backward: in restrictive cases, attributes of can be implicitly converted back to the EnumInstType.
657% Backward conversion:
658% \begin{lstlisting}[caption={Unification Functions}, label=lst:unification_func_call]
659% {
660% enum Colour colour = 1;
661% }
662% \end{lstlisting}
663
664% \begin{lstlisting}[caption={Unification Functions}, label=lst:unification_func_call]
665% {
666% Unification( EnumInstType<Colour>, int ) >>> label
667% }
668% \end{lstlisting}
669% @int@ can be unified with the label of Colour.
670% @5@ is a constant expression $\Rightarrow$ Compiler knows the value during the compilation $\Rightarrow$ turns it into
671% \begin{lstlisting}
672% {
673% enum Colour colour = Colour.Green;
674% }
675% \end{lstlisting}
676% Steps:
677% \begin{enumerate}
678% \item
679% identify @1@ as a constant expression with type @int@, and the value is statically known as @1@
680% \item
681% @unification( EnumInstType<Colour>, int )@: @position( EnumInstType< Colour > )@
682% \item
683% return the enumeration constant at position 1
684% \end{enumerate}
685% \begin{lstlisting}
686% {
687% enum T (int) { ... } // Declaration
688% enum T t = 1;
689% }
690% \end{lstlisting}
691% Steps:
692% \begin{enumerate}
693% \item
694% identify @1@ as a constant expression with type @int@, and the value is statically known as @1@
695% \item
696% @unification( EnumInstType<Colour>, int )@: @value( EnumInstType< Colour > )@
697% \item
698% return the FIRST enumeration constant that has the value 1, by searching through the values array
699% \end{enumerate}
700% The downside of the precedence rule: @EnumInstType@ $\Rightarrow$ @int ( value )@ $\Rightarrow$ @EnumInstType@ may return a different @EnumInstType@ because the value can be repeated and there is no way to know which one is expected $\Rightarrow$ want uniqueness
701
702% \subsection{Casting}
703% Casting an EnumInstType to some other type T works similarly to unify the EnumInstType with T. For example:
704% \begin{lstlisting}
705% enum( int ) Foo { A = 10, B = 100, C = 1000 };
706% (int) Foo.A;
707% \end{lstlisting}
708% The \CFA-compiler unifies @EnumInstType<int>@ with int, with returns @value( Foo.A )@, which has statically known value 10. In other words, \CFA-compiler is aware of a cast expression, and it forms the context for EnumInstType resolution. The expression with type @EnumInstType<int>@ can be replaced by the compile with a constant expression 10, and optionally discard the cast expression.
709
710% \subsection{Value Conversion}
711% As discussed in section~\ref{lst:var_declaration}, \CFA only saves @position@ as the necessary information. It is necessary for \CFA to generate intermediate code to retrieve other attributes.
712
713% \begin{lstlisting}
714% Foo a; // int a;
715% int j = a;
716% char * s = a;
717% \end{lstlisting}
718% Assume stores a value x, which cannot be statically determined. When assigning a to j in line 2, the compiler @Unify@ j with a, and returns @value( a )@. The generated code for the second line will be
719% \begin{lstlisting}
720% int j = value( Foo, a )
721% \end{lstlisting}
722% Similarly, the generated code for the third line is
723% \begin{lstlisting}
724% char * j = label( Foo, a )
725% \end{lstlisting}
726
727
728\section{Enumerator Initialization}
729An enumerator must have a deterministic immutable value, either be explicitly initialized in the enumeration definition, or implicitly initialized by rules.
730
731\subsection{C Enumeration Rule}
732A C enumeration has an integral type. If not initialized, the first enumerator implicitly has the integral value 0, and other enumerators have a value equal to its $predecessor + 1$.
733
734\subsection{Auto Initializable}
735\label{s:AutoInitializable}
736
737
738\CFA enumerations have the same rule in enumeration constant initialization.
739However, only \CFA types that have defined traits for @zero_t@, @one_t@, and an addition operator can be automatically initialized by \CFA.
740
741Specifically, a type is auto-initializable only if it satisfies the trait @AutoInitializable@:
742\begin{lstlisting}
743forall(T)
744trait AutoInitializable {
745 void ?()( T & t, zero_t );
746 S ?++( T & t);
747};
748\end{lstlisting}
749An example of a user-defined @AutoInitializable@ is:
750\begin{lstlisting}[label=lst:sample_auto_Initializable]
751struct Odd { int i; };
752void ?()( Odd & t, zero_t ) { t.i = 1; };
753Odd ?++( Odd t1 ) { return Odd( t1.i + 2); };
754\end{lstlisting}
755When the type of an enumeration is @AutoInitializable@, implicit initialization is available.
756\begin{lstlisting}[label=lst:sample_auto_Initializable_usage]
757enum AutoInitUsage(Odd) {
758 A, B, C = 7, D
759};
760\end{lstlisting}
761In the example, no initializer is specified for the first enumeration constant @A@, so \CFA initializes it with the value of @zero_t@, which is 1.
762@B@ and @D@ have the values of their $predecessor++$, where @one_t@ has the value 2.
763Therefore, the enumeration is initialized as follows:
764\begin{lstlisting}[label=lst:sample_auto_Initializable_usage_gen]
765enum AutoInitUsage(Odd) {
766 A = 1, B = 3, C = 7, D = 9
767};
768\end{lstlisting}
769Note that there is no mechanism to prevent an even value for the direct initialization, such as @C = 6@.
770
771In \CFA, character, integral, float, and imaginary types are all @AutoInitialiable@.
772\begin{lstlisting}[label=lst:letter]
773enum Alphabet( int ) {
774 A = 'A', B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V, W, X, Y, Z,
775 a = 'a', b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z
776};
777print( "%c, %c, %c", Alphabet.F, Alphabet.o, Alphabet.z );
778>>> F, o, z
779\end{lstlisting}
780\section{Enumeration Features}
781\subsection{Iteration and Range}
782
783It is convenient to iterate over a \CFA enumeration value, e.g.:
784\begin{lstlisting}[label=lst:range_functions]
785for ( Alphabet alph; Alphabet ) { sout | alph; }
786>>> A B C ... D
787\end{lstlisting}
788The for-loop uses the enumeration type @Alphabet@ its range, and iterates through all enumerators in the order defined in the enumeration.
789@alph@ is the iterating enumeration object, which returns the value of an @Alphabet@ in this context according to the precedence rule.
790
791\textbullet\ \CFA offers a shorthand for iterating all enumeration constants:
792\begin{lstlisting}[label=lst:range_functions]
793for ( Alphabet alph ) { sout | alph; }
794>>> A B C ... D
795\end{lstlisting}
796
797The following are examples for constructing for-control using an enumeration. Note that the type declaration of the iterating variable is optional, because \CFA can infer the type as EnumInstType based on the range expression, and possibly convert it to one of its attribute types.
798
799\textbullet\ H is implicit up-to exclusive range [0, H).
800\begin{lstlisting}[label=lst:range_function_1]
801for ( alph; Alphabet.D ) { sout | alph; }
802>>> A B C
803\end{lstlisting}
804
805\textbullet\ ~= H is implicit up-to inclusive range [0,H].
806\begin{lstlisting}[label=lst:range_function_2]
807for ( alph; ~= Alphabet.D ) { sout | alph; }
808>>> A B C D
809\end{lstlisting}
810
811\textbullet\ L ~ H is explicit up-to exclusive range [L,H).
812\begin{lstlisting}[label=lst:range_function_3]
813for ( alph; Alphabet.B ~ Alphabet.D ) { sout | alph; }
814// for ( Alphabet alph = Alphabet.B; alph < Alphabet.D; alph += 1 ); 1 is one_t
815>>> B C
816\end{lstlisting}
817
818\textbullet\ L ~= H is explicit up-to inclusive range [L,H].
819\begin{lstlisting}[label=lst:range_function_4]
820for ( alph; Alphabet.B ~= Alphabet.D ) { sout | alph; }
821>>> B C D
822\end{lstlisting}
823
824\textbullet\ L -~ H is explicit down-to exclusive range [H,L), where L and H are implicitly interchanged to make the range down-to.
825\begin{lstlisting}[label=lst:range_function_5]
826for ( alph; Alphabet.D -~ Alphabet.B ) { sout | alph; }
827>>> D C
828\end{lstlisting}
829
830\textbullet\ L -~= H is explicit down-to exclusive range [H,L], where L and H are implicitly interchanged to make the range down-to.
831\begin{lstlisting}[label=lst:range_function_6]
832for ( alph; Alphabet.D -~= Alphabet.B ) { sout | alph; }
833>>> D C B
834\end{lstlisting}
835
836A user can specify the ``step size'' of an iteration. There are two different stepping schemes of enumeration for-loop.
837\begin{lstlisting}[label=lst:range_function_stepping]
838enum(int) Sequence { A = 10, B = 12, C = 14, D = 16, D = 18 };
839for ( s; Sequence.A ~= Sequence.D ~ 1 ) { sout | alph; }
840>>> 10 12 14 16 18
841for ( s; Sequence.A ~= Sequence.D; s+=1 ) { sout | alph; }
842>>> 10 11 12 13 14 15 16 17 18
843\end{lstlisting}
844The first syntax is stepping to the next enumeration constant, which is the default stepping scheme if not explicitly specified. The second syntax, on the other hand, is to call @operator+=@ @one_type@ on the @value( s )@. Therefore, the second syntax is equivalent to
845\begin{lstlisting}[label=lst:range_function_stepping_converted]
846for ( typeof( value(Sequence.A) ) s=value( Sequence.A ); s <= Sequence.D; s+=1 ) { sout | alph; }
847>>> 10 11 12 13 14 15 16 17 18
848\end{lstlisting}
849
850% \PAB{Explain what each loop does.}
851
852It is also possible to iterate over an enumeration's labels, implicitly or explicitly:
853\begin{lstlisting}[label=lst:range_functions_label_implicit]
854for ( char * alph; Alphabet )
855\end{lstlisting}
856This for-loop implicitly iterates every label of the enumeration, because a label is the only valid resolution to @ch@ with type @char *@ in this case.
857If the value can also be resolved as the @char *@, you might iterate the labels explicitly with the array iteration.
858\begin{lstlisting}[label=lst:range_functions_label_implicit]
859for ( char * ch; labels( Alphabet ) )
860\end{lstlisting}
861
862
863% \subsection{Non-uniform Type}
864% TODO: Working in Progress, might need to change other sections. Conflict with the resolution right now.
865
866% \begin{lstlisting}
867% enum T( int, char * ) {
868% a=42, b="Hello World"
869% };
870% \end{lstlisting}
871% The enum T declares two different types: int and char *. The enumerators of T hold values of one of the declared types.
872
873\subsection{Enumeration Inheritance}
874
875\begin{lstlisting}[label=lst:EnumInline]
876enum( char * ) Name { Jack = "Jack", Jill = "Jill" };
877enum /* inferred */ Name2 { inline Name, Sue = "Sue", Tom = "Tom" };
878\end{lstlisting}
879\lstinline{Inline} allows Enumeration Name2 to inherit enumerators from Name1 by containment, and a Name enumeration is a subtype of enumeration Name2. An enumeration instance of type Name can be used where an instance of Name2 is expected.
880\begin{lstlisting}[label=lst:EnumInline]
881Name Fred;
882void f( Name2 );
883f( Fred );
884\end{lstlisting}
885If enumeration A declares @inline B@ in its enumeration body, enumeration A is the "inlining enum" and enumeration B is the "inlined enum".
886
887An enumeration can inline at most one other enumeration. The inline declaration must be placed before the first enumerator of the inlining enum. The inlining enum has all the enumerators from the inlined enum, with the same labels, values, and position.
888\begin{lstlisting}[label=lst:EnumInline]
889enum /* inferred */ Name2 { inline Name, Sue = "Sue", Tom = "Tom" };
890// is equivalent to enum Name2 { Jack = "Jack", Jill="Jill", Sue = "Sue", Tom = "Tom" };
891\end{lstlisting}
892Name.Jack is equivalent to Name2.Jack. Their attributes are all identical. Opening both Name and Name2 in the same scope will not introduce ambiguity.
893\begin{lstlisting}[label=lst:EnumInline]
894with( Name, Name2 ) { Jack; } // Name.Jack and Name2.Jack are equivalent. No ambiguity
895\end{lstlisting}
896
897\section{Implementation}
898
899\subsection{Static Attribute Expression}
900\begin{lstlisting}[label=lst:static_attr]
901enum( char * ) Colour {
902 Red = "red", Blue = "blue", Green = "green"
903};
904\end{lstlisting}
905An enumerator expression returns its enumerator value as a constant expression with no runtime cost. For example, @Colour.Red@ is equivalent to the constant expression "red", and \CFA finishes the expression evaluation before generating the corresponding C code. Applying a pseudo-function to a constant enumerator expression results in a constant expression as well. @value( Colour.Red )@, @position( Colour. Red )@, and @label( Colour.Red )@ are equivalent to constant expression with char * value "red", int value 0, and char * value "Red", respectively.
906
907\subsection{Runtime Attribute Expression and Weak Referenced Data}
908\begin{lstlisting}[label=lst:dynamic_attr]
909Colour c;
910...
911value( c ); // or c
912\end{lstlisting}
913An enumeration variable c is equivalent to an integer variable with the value of @position( c )@ In Example~\ref{lst:dynamic_attr}, the value of enumeration variable c is unknown at compile time. In this case, the pseudo-function calls are reduced to expression that returns the enumerator values at runtime.
914
915\CFA stores the variables and labels in @const@ arrays to provide runtime lookup for enumeration information.
916
917\begin{lstlisting}[label=lst:attr_array]
918const char * Colour_labels [3] = { "Red", "Blue", "Green" };
919const char * Colour_values [3] = { "red", "blue", "green" };
920\end{lstlisting}
921The \CFA compiles transforms the attribute expressions into array access.
922\begin{lstlisting}[label=lst:attr_array_access]
923position( c ) // c; an integer
924value( c ); // Colour_values[c]
925label( c ); // Colour_labels[c]
926\end{lstlisting}
927
928To avoid unnecessary memory usage, the labels and values array are only generated as needed, and only generate once across all compilation units. By default, \CFA defers the declaration of the label and value arrays until an call to attribute function with a dynamic value. If an attribute function is never called on a dynamic value of an enumerator, the array will never be allocated. Once the arrays are created, all compilation units share a weak reference to the allocation array.
929
930\subsection{Enum Prelude}
931
932\begin{lstlisting}[label=lst:enum_func_dec]
933forall( T ) {
934 unsigned position( unsigned );
935 T value( unsigned );
936 char * label( unsigned );
937}
938\end{lstlisting}
939\CFA loads the declaration of enumeration function from the enum.hfa.
940
941\subsection{Internal Representation}
942
943The definition of an enumeration is represented by an internal type called @EnumDecl@. At the minimum, it stores all the information needed to construct the companion object. Therefore, an @EnumDecl@ can be represented as the following:
944\begin{lstlisting}[label=lst:EnumDecl]
945forall(T)
946class EnumDecl {
947 T* values;
948 char** label;
949};
950\end{lstlisting}
951
952The internal representation of an enumeration constant is @EnumInstType@.
953An @EnumInstType@ has a reference to the \CFA-enumeration declaration and the position of the enumeration constant.
954\begin{lstlisting}[label=lst:EnumInstType]
955class EnumInstType {
956 EnumDecl enumDecl;
957 int position;
958};
959\end{lstlisting}
960In the later discussion, we will use @EnumDecl<T>@ to symbolize a @EnumDecl@ parameterized by type T, and @EnumInstType<T>@ is a declared instance of @EnumDecl<T>@.
961
962\begin{lstlisting}[caption={Enum Type Functions}, label=lst:cforall_enum_data]
963const T * const values;
964const char * label;
965int length;
966\end{lstlisting}
967Companion data are necessary information to represent an enumeration. They are stored as standalone pieces, rather than a structure. Those data will be loaded "on demand".
968Companion data are needed only if the according pseudo-functions are called. For example, the value of the enumeration Workday is loaded only if there is at least one compilation that has call $value(Workday)$. Once the values are loaded, all compilations share these values array to reduce memory usage.
969
970
971% \subsection{(Rework) Companion Object and Companion Function}
972
973% \begin{lstlisting}[caption={Enum Type Functions}, label=lst:cforall_enum_functions]
974% forall( T )
975% struct Companion {
976% const T * const values;
977% const char * label;
978% int length;
979% };
980% \end{lstlisting}
981% \CFA generates companion objects, an instance of structure that encloses @necessary@ data to represent an enumeration. The size of the companion is unknown at the compilation time, and it "grows" in size to compensate for the @usage@.
982
983% The companion object is singleton across the compilation (investigation).
984
985% \CFA generates the definition of companion functions.
986% Because \CFA implicitly stores an enumeration instance as its position, the companion function @position@ does nothing but return the position it is passed.
987% Companions function @value@ and @label@ return the array item at the given position of @values@ and @labels@, respectively.
988% \begin{lstlisting}[label=lst:companion_definition]
989% int position( Companion o, int pos ) { return pos; }
990% T value( Companion o, int pos ) { return o.values[ pos ]; }
991% char * label( Companion o, int pos ) { return o.labels[ pos ]; }
992% \end{lstlisting}
993% Notably, the @Companion@ structure definition, and all companion objects, are visible to users.
994% A user can retrieve values and labels defined in an enumeration by accessing the values and labels directly, or indirectly by calling @Companion@ functions @values@ and @labels@
995% \begin{lstlisting}[label=lst:companion_definition_values_labels]
996% Colour.values; // read the Companion's values
997% values( Colour ); // same as Colour.values
998% \end{lstlisting}
999
1000\subsection{Companion Traits (experimental)}
1001Not sure its semantics yet, and it might replace a companion object.
1002\begin{lstlisting}[label=lst:companion_trait]
1003forall(T1) {
1004 trait Companion(otype T2<otype T1>) {
1005 T1 value((otype T2<otype T1> const &);
1006 int position(otype T2<otype T1> const &);
1007 char * label(otype T2<otype T1> const &);
1008 }
1009}
1010\end{lstlisting}
1011All enumerations implicitly implement the Companion trait, an interface to access attributes. The Companion can be a data type because it fulfills to requirements to have concrete instances, which are:
1012
1013\begin{enumerate}
1014 \item The instance of enumeration has a single polymorphic type.
1015 \item Each assertion should use the type once as a parameter.
1016\end{enumerate}
1017
1018\begin{lstlisting}
1019enum(int) Weekday {
1020 Monday=10, Tuesday, ...
1021};
1022
1023T value( enum Weekday<T> & this);
1024int position( enum Weekday<T> & this )
1025char * label( enum Weekday<T> & this )
1026
1027trait Companion obj = (enum(int)) Workday.Weekday;
1028value(obj); // 10
1029\end{lstlisting}
1030The enumeration comes with default implementation to the Companion traits functions. The usage of Companion functions would make \CFA allocates and initializes the necessary companion arrays, and return the data at the position represented by the enumeration.
1031(...)
1032
1033\subsection{User Define Enumeration Functions}
1034
1035Companion objects make extending features for \CFA enumeration easy.
1036\begin{lstlisting}[label=lst:companion_user_definition]
1037char * charastic_string( Companion o, int position ) {
1038 return sprintf( "Label: %s; Value: %s", label( o, position ), value( o, position) );
1039}
1040printf( charactic_string ( Color, 1 ) );
1041>>> Label: Green; Value: G
1042\end{lstlisting}
1043Defining a function takes a Companion object effectively defines functions for all \CFA enumeration.
1044
1045The \CFA compiler turns a function call that takes an enumeration instance as a parameter into a function call with a companion object plus a position.
1046Therefore, a user can use the syntax with a user-defined enumeration function call:
1047\begin{lstlisting}[label=lst:companion_user_definition]
1048charactic_string( Color.Green ); // equivalent to charactic_string( Color, 1 )
1049>>> Label: Green; Value: G
1050\end{lstlisting}
1051Similarly, the user can work with the enumeration type itself: (see section ref...)
1052\begin{lstlisting}[ label=lst:companion_user_definition]
1053void print_enumerators ( Companion o ) {
1054 for ( c : Companion o ) {
1055 sout | label (c) | value( c ) ;
1056 }
1057}
1058print_enumerators( Colour );
1059\end{lstlisting}
1060
1061
1062\subsection{Declaration}
1063
1064The qualified enumeration syntax is dedicated to \CFA enumeration.
1065\begin{lstlisting}[label=lst:range_functions]
1066enum (type_declaration) name { enumerator = const_expr, enumerator = const_expr, ... }
1067\end{lstlisting}
1068A compiler stores the name, the underlying type, and all enumerators in an @enumeration table@.
1069During the $Validation$ pass, the compiler links the type declaration to the type's definition.
1070It ensures that the name of an enumerator is unique within the enumeration body, and checks if all values of the enumerator have the declaration type.
1071If the declared type is not @AutoInitializable@, \CFA rejects the enumeration definition.
1072Otherwise, it attempts to initialize enumerators with the enumeration initialization pattern. (a reference to a future initialization pattern section)
1073
1074\begin{lstlisting}[label=lst:init]
1075struct T { ... };
1076void ?{}( T & t, zero_t ) { ... };
1077void ?{}( T & t, one_t ) { ... };
1078T ?+?( T & lhs, T & rhs ) { ... };
1079
1080enum (T) Sample {
1081 Zero: 0 /* zero_t */,
1082 One: Zero + 1 /* ?+?( Zero, one_t ) */ , ...
1083};
1084\end{lstlisting}
1085Challenge: \\
1086The value of an enumerator, or the initializer, requires @const_expr@.
1087While previously getting around the issue by pushing it to the C compiler, it might not work anymore because of the user-defined types, user-defined @zero_t@, @one_t@, and addition operation.
1088Might not be able to implement a \emph{correct} static check.
1089
1090\CFA $autogens$ a Companion object for the declared enumeration.
1091\begin{lstlisting}[label=lst:companion]
1092Companion( T ) Sample {
1093 .values: { 0, 0+1, 0+1+1, 0+1+1+1, ... }, /* 0: zero_t, 1: one_t, +: ?+?{} */
1094 .labels: { "Zero", "One", "Two", "Three", ...},
1095 .length: /* number of enumerators */
1096};
1097\end{lstlisting}
1098\CFA stores values as intermediate expressions because the result of the function call to the function @?+?{}(T&, T&)@ is statically unknown to \CFA.
1099But the result is computed at run time, and the compiler ensures the @values@ are not changed.
1100
1101\subsection{Qualified Expression}
1102
1103\CFA uses qualified expression to address the scoping of \CFA-enumeration.
1104\begin{lstlisting}[label=lst:qualified_expression]
1105aggregation_name.field;
1106\end{lstlisting}
1107The qualified expression is not dedicated to \CFA enumeration.
1108It is a feature that is supported by other aggregation in \CFA as well, including a C enumeration.
1109When C enumerations are unscoped, the qualified expression syntax still helps to disambiguate names in the context.
1110\CFA recognizes if the expression references a \CFA aggregation by searching the presence of @aggregation_name@ in the \CFA enumeration table.
1111If the @aggregation_name@ is identified as a \CFA enumeration, the compiler checks if @field@ presents in the declared \CFA enumeration.
1112
1113\subsection{Instance Declaration}
1114
1115
1116\begin{lstlisting}[label=lst:var_declaration]
1117enum Sample s1;
1118\end{lstlisting}
1119
1120The declaration \CFA-enumeration variable has the same syntax as the C-enumeration. Internally, such a variable will be represented as an EnumInstType.
1121
1122\section{Related Work}
1123\label{s:RelatedWork}
1124
1125Enumerations exist in many popular programming languages, e.g., Pascal, Ada, \Csharp, \CC, Go, Java, Modula-3, Rust, Swift, Python, and the algebraic data-type in functional programming.
1126There are a large set of overlapping features among these languages, but each language has its own unique restrictions and extensions.
1127
1128\subsection{(Free) Pascal}
1129
1130Free Pascal is a modern object-oriented version of the classic Pascal programming language.
1131It allows a C-style enumeration type, where enumerators must be in assigned in ascending numerical order with a constant expression and the range can be non-consecutive.
1132\begin{lstlisting}[language=pascal,{moredelim=**[is][\color{red}]{@}{@}}]
1133Type EnumType = ( one, two, three, forty @= 40@, fortyone );
1134\end{lstlisting}
1135Pseudo-functions @Pred@ and @Succ@ can only be used if the range is consecutive.
1136The underlying type is an implementation-defined integral type large enough to hold all enumerated values; it does not have to be the smallest possible type.
1137The size underlying integral type can be explicitly specified using compiler directive @$PACKENUM@~$N$, where $N$ is the number of bytes, e.g.:
1138\begin{lstlisting}[language=pascal,{moredelim=**[is][\color{red}]{@}{@}}]
1139Type @{$\color{red}\$$PACKENUM 1}@ SmallEnum = ( one, two, three );
1140 @{$\color{red}\$$PACKENUM 4}@ LargeEnum = ( BigOne, BigTwo, BigThree );
1141Var S : SmallEnum; { 1 byte }
1142 L : LargeEnum; { 4 bytes}
1143\end{lstlisting}
1144
1145
1146\subsection{Ada}
1147
1148An enumeration type is defined as a list of possible values:
1149\begin{lstlisting}[language=ada]
1150type RGB is (Red, Green, Blue);
1151\end{lstlisting}
1152Like for numeric types, where e.g., 1 is an integer literal, @Red@, @Green@ and @Blue@ are called the literals of this type.
1153There are no other values assignable to objects of this type.
1154
1155\paragraph{Operators and attributes} ~\newline
1156Apart from equality (@"="@), the only operators on enumeration types are the ordering operators: @"<"@, @"<="@, @"="@, @"/="@, @">="@, @">"@, where the order relation is given implicitly by the sequence of literals:
1157Each literal has a position, starting with 0 for the first, incremented by one for each successor.
1158This position can be queried via the @'Pos@ attribute; the inverse is @'Val@, which returns the corresponding literal. In our example:
1159\begin{lstlisting}[language=ada]
1160RGB'Pos (Red) = 0
1161RGB'Val (0) = Red
1162\end{lstlisting}
1163There are two other important attributes: @Image@ and @Value@.
1164@Image@ returns the string representation of the value (in capital letters), @Value@ is the inverse:
1165\begin{lstlisting}[language=ada]
1166RGB'Image ( Red ) = "RED"
1167RGB'Value ("Red") = Red
1168\end{lstlisting}
1169These attributes are important for simple IO (there are more elaborate IO facilities in @Ada.Text_IO@ for enumeration types).
1170Note that, since Ada is case-insensitive, the string given to @'Value@ can be in any case.
1171
1172\paragraph{Enumeration literals} ~\newline
1173Literals are overloadable, i.e. you can have another type with the same literals.
1174\begin{lstlisting}[language=ada]
1175type Traffic_Light is (Red, Yellow, Green);
1176\end{lstlisting}
1177Overload resolution within the context of use of a literal normally resolves which @Red@ is meant.
1178Only if you have an unresolvable overloading conflict, you can qualify with special syntax which @Red@ is meant:
1179\begin{lstlisting}[language=ada]
1180RGB'(Red)
1181\end{lstlisting}
1182Like many other declarative items, enumeration literals can be renamed.
1183In fact, such a literal is actually a function, so it has to be renamed as such:
1184\begin{lstlisting}[language=ada]
1185function Red return P.RGB renames P.Red;
1186\end{lstlisting}
1187Here, @RGB@ is assumed to be defined in package @P@, which is visible at the place of the renaming declaration.
1188Renaming makes @Red@ directly visible without necessity to resort the use-clause.
1189
1190Note that redeclaration as a function does not affect the staticness of the literal.
1191
1192\paragraph{Characters as enumeration literals} ~\newline
1193Rather unique to Ada is the use of character literals as enumeration literals:
1194\begin{lstlisting}[language=ada]
1195type ABC is ('A', 'B', 'C');
1196\end{lstlisting}
1197This literal @'A'@ has nothing in common with the literal @'A'@ of the predefined type @Character@ (or @Wide_Character@).
1198
1199Every type that has at least one character literal is a character type.
1200For every character type, string literals and the concatenation operator @"&"@ are also implicitly defined.
1201\begin{lstlisting}[language=ada]
1202type My_Character is (No_Character, 'a', Literal, 'z');
1203type My_String is array (Positive range <>) of My_Character;
1204
1205S: My_String := "aa" & Literal & "za" & 'z';
1206T: My_String := ('a', 'a', Literal, 'z', 'a', 'z');
1207\end{lstlisting}
1208In this example, @S@ and @T@ have the same value.
1209
1210Ada's @Character@ type is defined that way.
1211See Ada Programming/Libraries/Standard.
1212
1213\paragraph{Booleans as enumeration literals} ~\newline
1214Also Booleans are defined as enumeration types:
1215\begin{lstlisting}[language=ada]
1216type Boolean is (False, True);
1217\end{lstlisting}
1218There is special semantics implied with this declaration in that objects and expressions of this type can be used as conditions.
1219Note that the literals @False@ and @True@ are not Ada keywords.
1220
1221Thus it is not sufficient to declare a type with these literals and then hope objects of this type can be used like so:
1222\begin{lstlisting}[language=ada]
1223type My_Boolean is (False, True);
1224Condition: My_Boolean;
1225
1226if Condition then -- wrong, won't compile
1227\end{lstlisting}
1228
1229If you need your own Booleans (perhaps with special size requirements), you have to derive from the predefined Boolean:
1230\begin{lstlisting}[language=ada]
1231type My_Boolean is new Boolean;
1232Condition: My_Boolean;
1233
1234if Condition then -- OK
1235\end{lstlisting}
1236
1237\paragraph{Enumeration subtypes} ~\newline
1238You can use range to subtype an enumeration type:
1239\begin{lstlisting}[language=ada]
1240subtype Capital_Letter is Character range 'A' .. 'Z';
1241type Day_Of_Week is (Sunday, Monday, Tuesday, Wednesday, Thursday, Friday, Saturday);
1242subtype Working_Day is Day_Of_Week range Monday .. Friday;
1243\end{lstlisting}
1244
1245\paragraph{Using enumerations} ~\newline
1246Enumeration types being scalar subtypes, type attributes such as @First@ and @Succ@ will allow stepping through a subsequence of the values.
1247\begin{lstlisting}[language=ada]
1248case Day_Of_Week'First is
1249 when Sunday =>
1250 ISO (False);
1251 when Day_Of_Week'Succ(Sunday) =>
1252 ISO (True);
1253 when Tuesday .. Saturday =>
1254 raise Program_Error;
1255end case;
1256\end{lstlisting}
1257A loop will automatically step through the values of the subtype's range.
1258Filtering week days to include only working days with an even position number:
1259\begin{lstlisting}[language=ada]
1260 for Day in Working_Day loop
1261 if Day_Of_Week'Pos(Day) mod 2 = 0 then
1262 Work_In_Backyard;
1263 end if;
1264 end loop;
1265\end{lstlisting}
1266Enumeration types can be used as array index subtypes, yielding a table feature:
1267\begin{lstlisting}[language=ada]
1268type Officer_ID is range 0 .. 50;
1269type Schedule is array (Working_Day) of Officer_ID;
1270\end{lstlisting}
1271
1272\begin{lstlisting}[language=ada]
1273type Subtype_Name is (Id1, Id2, Id3 ... );
1274\end{lstlisting}
1275where @Id1@, @Id2@, etc. are identifiers or characters literals.
1276In either case, the legal values of the type are referred to as "enumeration literals."
1277Each of these values has a "position number" corresponding to its position in the list such that @Id1@ has position 0, @Id2@ has position 1, and the Nth value has position N-1.
1278
1279\paragraph{Attributes of Enumeration Types} ~\newline
1280An enumeration type, @T@, has the following attributes: @T'First@, @T'Last@, @T'Range@, @T'Pred@, @T'Succ@, @T'Min@, @T'Max@, @T'Image@, @T'Wide_Image@, @T'Value@, @T'Wide_Value@, @T'Pos@, and @T'Val@ (pronounced "T tick first", "T tick last", etc.).
1281Most of these are illustrated in the example program given below, and most of them produce what you would intuitively expect based on their names.
1282
1283@T'Image@ and @T'Value@ form a complementary pair of attributes.
1284The former takes a value in @T@ and returns a String representation of that value.
1285The latter takes a @String@ that is a representation of a value in @T@ and returns that value.
1286
1287@T'Pos@ and @T'Val@ form another complementary pair.
1288The former takes a value in @T@ and returns its position number.
1289The latter takes a position number and returns the corresponding value of type @T@.
1290
1291
1292\subsection{\Csharp}
1293
1294\subsection{\CC}
1295\label{s:C++RelatedWork}
1296
1297\CC is backwards compatible with C, so it inherited C's enumerations.
1298However, the following non-backwards compatible changes have been made.
1299\begin{quote}
13007.2 Change: \CC objects of enumeration type can only be assigned values of the same enumeration type.
1301In C, objects of enumeration type can be assigned values of any integral type. \\
1302Example:
1303\begin{lstlisting}
1304enum color { red, blue, green };
1305color c = 1; $\C{// valid C, invalid C++}$
1306\end{lstlisting}
1307\textbf{Rationale}: The type-safe nature of C++. \\
1308\textbf{Effect on original feature}: Deletion of semantically well-defined feature. \\
1309\textbf{Difficulty of converting}: Syntactic transformation. (The type error produced by the assignment can be automatically corrected by applying an explicit cast.) \\
1310\textbf{How widely used}: Common.
1311\end{quote}
1312\begin{quote}
13137.2 Change: In \CC, the type of an enumerator is its enumeration.
1314In C, the type of an enumerator is @int@. \\
1315Example:
1316\begin{lstlisting}
1317enum e { A };
1318sizeof(A) == sizeof(int) $\C{// in C}$
1319sizeof(A) == sizeof(e) $\C{// in C++}$
1320/* and sizeof(int) is not necessary equal to sizeof(e) */
1321\end{lstlisting}
1322\textbf{Rationale}: In C++, an enumeration is a distinct type. \\
1323\textbf{Effect on original feature}: Change to semantics of well-defined feature. \\
1324\textbf{Difficulty of converting}: Semantic transformation. \\
1325\textbf{How widely used}: Seldom. The only time this affects existing C code is when the size of an enumerator is taken.
1326Taking the size of an enumerator is not a common C coding practice.
1327\end{quote}
1328Hence, the values in a \CC enumeration can only be its enumerators (without a cast).
1329While the storage size of an enumerator is up to the compiler, there is still an implicit cast to @int@.
1330\begin{lstlisting}
1331enum E { A, B, C };
1332E e = A;
1333int i = A; i = e; $\C{// implicit casts to int}$
1334\end{lstlisting}
1335\CC{11} added a scoped enumeration, \lstinline[language=c++]{enum class} (or \lstinline[language=c++]{enum struct}), so the enumerators are local to the enumeration and must be accessed using type qualification.
1336\begin{lstlisting}[language=c++,{moredelim=**[is][\color{red}]{@}{@}}]
1337enum class E { A, B, C };
1338E e = @E::@A; $\C{// qualified enumerator}$
1339e = B; $\C{// B not in scope}$
1340\end{lstlisting}
1341\CC{20} supports unscoped access with a \lstinline[language=c++]{using enum} declaration.
1342\begin{lstlisting}[language=c++,{moredelim=**[is][\color{red}]{@}{@}}]
1343enum class E { A, B, C };
1344@using enum E;@
1345E e = A; $\C{// direct access}$
1346e = B; $\C{// direct access}$
1347\end{lstlisting}
1348\CC{11} added the ability to explicitly declare the underlying integral type for \lstinline[language=c++]{enum class}.
1349\begin{lstlisting}[language=c++,{moredelim=**[is][\color{red}]{@}{@}}]
1350enum class RGB @: long@ { Red, Green, Blue };
1351enum class rgb @: char@ { Red = 'r', Green = 'g', Blue = 'b' };
1352enum class srgb @: signed char@ { Red = -1, Green = 0, Blue = 1 };
1353\end{lstlisting}
1354There is no implicit conversion from the \lstinline[language=c++]{enum class} type and to its type.
1355\begin{lstlisting}[language=c++,{moredelim=**[is][\color{red}]{@}{@}}]
1356rgb crgb = rgb::Red;
1357char ch = rgb::Red; ch = crgb; $\C{// disallowed}$
1358\end{lstlisting}
1359Finally, there is no mechanism to iterate through an enumeration nor use the enumeration type to declare an array dimension.
1360
1361
1362\subsection{Go}
1363
1364\subsection{Java}
1365
1366\subsection{Modula-3}
1367
1368\subsection{Rust}
1369
1370\subsection{Swift}
1371
1372\subsection{Python}
1373
1374\subsection{Algebraic Data Type}
1375
1376\end{document}
1377
1378% Local Variables: %
1379% tab-width: 4 %
1380% compile-command: "pdflatex enum.tex" %
1381% End: %
Note: See TracBrowser for help on using the repository browser.