source: doc/user/user.tex @ 6b6597c

aaron-thesisarm-ehcleanup-dtorsctordeferred_resndemanglerenumforall-pointer-decaygc_noraiijacob/cs343-translationjenkins-sandboxmemorynew-astnew-ast-unique-exprnew-envno_listpersistent-indexerpthread-emulationqualifiedEnumresolv-newstringwith_gc
Last change on this file since 6b6597c was 6b6597c, checked in by Peter A. Buhr <pabuhr@…>, 6 years ago

user manual updates, extend I/O test, fix memset in stdlib, workaround for fstream

  • Property mode set to 100644
File size: 172.1 KB
Line 
1%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -*- Mode: Latex -*- %%%%%%%%%%%%%%%%%%%%%%%%%%%%
2%%
3%% Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
4%%
5%% The contents of this file are covered under the licence agreement in the
6%% file "LICENCE" distributed with Cforall.
7%%
8%% user.tex --
9%%
10%% Author           : Peter A. Buhr
11%% Created On       : Wed Apr  6 14:53:29 2016
12%% Last Modified By : Peter A. Buhr
13%% Last Modified On : Thu Apr 14 21:30:07 2016
14%% Update Count     : 130
15%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
16
17% requires tex packages: texlive-base texlive-latex-base tex-common texlive-humanities texlive-latex-extra texlive-fonts-recommended
18
19\documentclass[openright,twoside]{article}
20%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
21
22% Latex packages used in the document.
23\usepackage[T1]{fontenc}                                % allow Latin1 (extended ASCII) characters
24\usepackage{textcomp}
25\usepackage[latin1]{inputenc}
26\usepackage{upquote}
27\usepackage{fullpage,times}
28\usepackage{epic,eepic}
29\usepackage{xspace}
30\usepackage{varioref}
31\usepackage{listings}
32\usepackage{footmisc}
33\usepackage{comment}
34\usepackage{latexsym}                                   % \Box
35\usepackage{mathptmx}                                   % better math font with "times"
36\usepackage[pagewise]{lineno}
37\renewcommand{\linenumberfont}{\scriptsize\sffamily}
38\usepackage[dvips,plainpages=false,pdfpagelabels,pdfpagemode=UseNone,colorlinks=true,pagebackref=true,linkcolor=blue,citecolor=blue,urlcolor=blue,pagebackref=true,breaklinks=true]{hyperref}
39\usepackage{breakurl}
40\renewcommand{\UrlFont}{\small\sf}
41
42%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
43
44% Bespoke macros used in the document.
45\input{common}
46
47%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
48
49% Names used in the document.
50
51\newcommand{\Version}{1.0.0}
52\newcommand{\CS}{C\raisebox{-0.9ex}{\large$^\sharp$}\xspace}
53
54%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
55
56\setcounter{secnumdepth}{3}                             % number subsubsections
57\setcounter{tocdepth}{3}                                % subsubsections in table of contents
58\makeindex
59
60%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
61
62\begin{document}
63\pagestyle{headings}
64\linenumbers                                            % comment out to turn off line numbering
65
66\title{\Huge
67\vspace*{1in}
68\CFA (\CFL) User Manual                         \\
69Version 1.0                                                     \\
70\vspace*{0.25in}
71\huge``describe not prescribe''         \\
72\vspace*{1in}
73}% title
74\author{\huge
75Peter A. Buhr and ...                           \\
76}% author
77\date{
78DRAFT \\
79\today
80}% date
81
82\pagenumbering{roman}
83\pagestyle{plain}
84
85\maketitle
86
87\vspace*{\fill}
88\thispagestyle{empty}
89\noindent
90\copyright\,2016 \CFA Project \\ \\
91\noindent
92This work is licensed under the Creative Commons Attribution 4.0 International License.
93To view a copy of this license, visit {\small\url{http://creativecommons.org/licenses/by/4.0}}.
94\vspace*{1in}
95
96\clearpage
97\pdfbookmark[1]{Contents}{section}
98\tableofcontents
99
100\clearpage
101\pagenumbering{arabic}
102
103
104\section{Introduction}
105
106\CFA\footnote{Pronounced ``C-for-all'', and written \CFA, CFA, or \CFL.} is a modern general-purpose programming-language, designed an as evolutionary step forward from the C programming language.
107The syntax of the \CFA language builds from C, and should look immediately familiar to C programmers.
108% Any language feature that is not described here can be assumed to be using the standard C11 syntax.
109\CFA adds many modern programming-language features, which directly leads to increased safety and productivity, while maintaining interoperability with existing C programs and achieving C performance.
110Like C, \CFA is a statically typed, procedural language with a low-overhead runtime, meaning there is no global garbage-collection.
111The primary new features include parametric-polymorphism routines and types, exceptions, concurrency, and modules.
112
113One of the main design philosophies of \CFA is to ``describe not prescribe'', which means \CFA tries to provide a pathway from low-level C programming to high-level \CFA programming, but it does not force programmers to ``do the right thing''.
114Programmers can cautiously add \CFA extensions to their C programs in any order and at any time to incrementally move towards safer, higher-level programming features.
115A programmer is always free to reach back to C from \CFA for any reason, and in many cases, new \CFA features have a fallback to a C mechanism.
116There is no notion or requirement for rewriting a legacy C program in \CFA;
117instead, a programmer evolves an existing C program into \CFA by incrementally incorporating \CFA features.
118New programs can be written in \CFA using a combination of C and \CFA features.
119\CC had a similar goal 30 years ago, but has struggled over the intervening time to incorporate modern programming-language features because of early design choices.
120\CFA has 30 years of hindsight and clean starting point.
121
122Like \CC, there may be both an old and new ways to achieve the same effect.
123For example, the following programs compare the \CFA and C I/O mechanisms.
124\begin{quote2}
125\begin{tabular}{@{}l@{\hspace{30pt}}l@{}}
126\multicolumn{1}{c@{\hspace{30pt}}}{\textbf{\CFA}}       & \multicolumn{1}{c}{\textbf{C}}        \\
127\begin{lstlisting}
128#include <fstream>
129int main( void ) {
130        int x = 0, y = 1, z = 2;
131        sout | x | y | z | endl;
132}
133\end{lstlisting}
134&
135\begin{lstlisting}
136#include <stdio.h>
137int main( void ) {
138        int x = 0, y = 1, z = 2;
139        printf( "%d %d %d\n", x, y, z );
140}
141\end{lstlisting}
142\end{tabular}
143\end{quote2}
144Both programs output the same result.
145While the \CFA I/O looks similar to the \CC output style, there are several important differences, such as automatic spacing between variables as in Python (see also~\VRef{s:IOLibrary}).
146
147This document is a reference manual for the \CFA programming language, targeted at \CFA programmers.
148Implementers may refer to the \CFA Programming Language Specification for details about the language syntax and semantics.
149In its current state, this document covers the intended core features of the language.
150Changes to the syntax and additional features are expected to be included in later revisions.
151% For additional information, see \url{http://wiki.do-lang.org}.
152
153
154\section{History}
155
156The \CFA project started with K-W C~\cite{Till89,Buhr94a}, which extended C with new declaration syntax, multiple return values from routines, and extended assignment capabilities using the notion of tuples.
157(See~\cite{Werther96} for some similar work, but for \CC.)
158The original \CFA project~\cite{Ditchfield92} extended the C type system with parametric polymorphism and overloading, as opposed to the \CC approach of object-oriented extensions to the C type-system.
159A first implementation of the core Cforall language was created~\cite{Bilson03,Esteves04}, but at the time there was little interesting in extending C, so work did not continue.
160As the saying goes, ``What goes around, comes around'', and there is now renewed interest in the C programming language because of legacy code-bases, so the \CFA project has been restarted.
161
162
163\section{Why fix C?}
164
165Even with all its problems, C is a very popular programming language because it allows writing software at virtually any level in a computer system without restriction.
166For systems programming, where direct access to hardware and dealing with real-time issues is a requirement, C is usually the language of choice.
167As well, there are millions of lines of C legacy code, forming the base for many software development projects (especially on UNIX systems).
168The TIOBE index (\url{http://www.tiobe.com/tiobe_index}) for March 2016 shows programming-language popularity, with Java 20.5\%, C 14.5\%, \CC 6.7\%, \CS 4.3\%, Python 4.3\%, and all other programming languages below 3\%.
169Hence, C is still an extremely important programming language, with double the usage of \CC, where \CC itself is largely C code.
170Finally, love it or hate it, C has been an important and influential part of computer science for 40 years and it appears it will continue to be for many more years.
171Unfortunately, C has too many problems and omissions to make it an acceptable programming language for modern needs.
172
173The goal of this project is to engineer modern language features into C in an evolutionary rather than revolutionary way.
174\CC~\cite{c++,ANSI14:C++} is an example of a similar project;
175however, it largely extended the language, and did not address existing problems.\footnote{%
176Two important existing problems addressed were changing the type of character literals from \lstinline@int@ to \lstinline@char@ and enumerator from \lstinline@int@ to the type of its enumerators.}
177Fortran~\cite{Fortran08}, Ada~\cite{Ada12}, and Cobol~\cite{Cobol14} are examples of programming languages that took an evolutionary approach, where modern language features are added and problems fixed within the framework of the existing language.
178Java~\cite{Java8}, Go~\cite{Go}, Rust~\cite{Rust} and D~\cite{D} are examples of the revolutionary approach for modernizing C/\CC, resulting in a new language rather than an extension of the descendent.
179These languages have different syntax and semantics from C, and do not interoperate directly with C, largely because of garbage collection.
180As a result, there is a significant learning curve to move to these languages, and C legacy-code must be rewritten.
181These costs can be prohibitive for many companies with a large software base in C/\CC, and many programmers that require retraining to a new programming language.
182
183The result of this project is a language that is largely backwards compatible with C11~\cite{C11}, but containing many modern language features and fixing some of the well known C problems.
184Without significant extension to the C programming language, C will be unable to cope with the needs of modern programming problems and programmers;
185as a result, it will fade into disuse.
186Considering the large body of existing C code and programmers, there is significant impetus to ensure C is transformed into a modern programming language.
187While C11 made a few simple extensions to the language, nothing was added to address existing problems in the language or to augment the language with modern language features.
188While some may argue that modern language features may make C complex and inefficient, it is clear a language without modern capabilities is insufficient for the advanced programming problems existing today.
189
190
191\section{Interoperability}
192
193\CFA is designed to integrate well with existing C programs and libraries.
194The most important feature of interoperability is to use the same calling conventions, so there is no overhead to call existing C routines.
195This feature allows users of \CFA to take advantage of the existing panoply of C libraries from inside their \CFA code.
196In fact, one of the biggest issues for any new programming language is establishing a minimum level of library code to support a large body of activities.
197Programming-language developers often state that adequate library support takes more work than designing and implementing the language itself.
198Like \CC, \CFA starts with immediate access to all exiting C libraries, and in many cases, can easily wrap library routines with simpler and safer interfaces, at very low cost.
199
200However, it is necessary to differentiate between C and \CFA code because of name overloading, as for \CC.
201For example, the C math-library provides the following routines for computing the absolute value of the basic type: \lstinline@abs@, \lstinline@labs@, \lstinline@llabs@, \lstinline@fabs@, \lstinline@fabsf@, \lstinline@fabsl@, \lstinline@cabsf@, \lstinline@cabs@, and \lstinline@cabsl@.
202Whereas, \CFA wraps each of these routines into one with the common name \lstinline@abs@.
203\begin{lstlisting}
204char abs( char );
205extern "C" {
206int abs( int );                         // use default C routine for int
207} // extern "C"
208long int abs( long int );
209long long int abs( long long int );
210float abs( float );
211double abs( double );
212long double abs( long double );
213float _Complex abs( float _Complex );
214double _Complex abs( double _Complex );
215long double _Complex abs( long double _Complex );
216\end{lstlisting}
217The problem is the name clash between the library routine \lstinline@abs@ and the \CFA names \lstinline@abs@.
218Hence, names appearing in an \lstinline@extern "C"@ block have \newterm{C linkage}.
219Then overloading polymorphism uses a mechanism called \newterm{name mangling} to create unique names that are different from C names, which are not mangled.
220Hence, there is the same need as in \CC, to know if a name is a C or \CFA name, so it can be correctly formed.
221There is no way around this problem, other than C's approach of creating unique names for each pairing of operation and type.
222This example strongly illustrates a core idea in \CFA: \emph{the power of a name}.
223The name ``\lstinline@abs@'' evokes the notion of absolute value, and many mathematical types provide the notion of absolute value.
224Hence, knowing the name \lstinline@abs@ should be sufficient to apply it to any type where it is applicable.
225The time savings and safety of using one name uniformly versus $N$ unique names should not be underestimated.
226
227
228\section{Compiling \CFA Program}
229
230The command \lstinline@cfa@ is used to compile \CFA program(s).
231This command works like the GNU \lstinline@gcc@\index{gcc} command, e.g.:
232\begin{lstlisting}
233cfa [ gcc-options ] C/@{\CFA}@-files [ assembler/loader-files ]
234\end{lstlisting}
235\index{cfa@\lstinline$cfa$}\index{compilation!cfa@\lstinline$cfa$}
236By default, \CFA programs having the following \lstinline@gcc@ flags turned on:
237\begin{description}
238\item
239\hspace*{-4pt}\lstinline@-std=gnu99@
240\index{-std=gnu99@{\lstinline$-std=gnu99$}}\index{compilation option!-std=gnu99@{\lstinline$-std=gnu99$}}
241The 1999 C standard plus GNU extensions.
242\end{description}
243The following new \CFA option is available:
244\begin{description}
245\item
246\hspace*{-4pt}\lstinline@-CFA@
247\index{-CFA@{\lstinline$-CFA$}}\index{compilation option!-CFA@{\lstinline$-CFA$}}
248Only the C preprocessor and the \CFA translator steps are performed and the transformed program is written to standard output, which makes it possible to examine the code generated by the \CFA translator.
249\end{description}
250
251The following preprocessor variables are available:
252\begin{description}
253\item
254\hspace*{-4pt}\lstinline$__CFA__$
255\index{__CFA__@{\lstinline$__CFA__$}}\index{preprocessor variables!__CFA__@{\lstinline$__CFA__$}}
256is always available during preprocessing and its value is the current major \Index{version number} of \CFA.\footnote{
257The C preprocessor allows only integer values in a preprocessor variable so a value like ``\Version'' is not allowed.
258Hence, the need to have three variables for the major, minor and patch version number.}
259
260\item
261\hspace*{-4pt}\lstinline$__CFA_MINOR__$
262\index{__CFA_MINOR__@{\lstinline$__CFA_MINOR__$}}\index{preprocessor variables!__CFA_MINOR__@{\lstinline$__CFA_MINOR__$}}
263is always available during preprocessing and its value is the current minor \Index{version number} of \CFA.
264
265\item
266\hspace*{-4pt}\lstinline$__CFA_PATCH__$
267\index{__CFA_PATCH__@%(__CFA_PATCH__%)}\index{preprocessor variables!__CFA_PATCH__@%(__CFA_PATCH__%)}
268is always available during preprocessing and its value is the current patch \Index{version number} of \CFA.
269
270\item
271\hspace*{-4pt}\lstinline$__CFORALL__$
272\index{__CFORALL__@%(__CFORALL__%)}\index{preprocessor variables!__CFORALL__@%(__CFORALL__%)}
273is always available during preprocessing and it has no value.
274\end{description}
275
276These preprocessor variables allow conditional compilation of programs that must work differently in these situations.
277For example, to toggle between C and \CFA extensions, using the following:
278\begin{lstlisting}
279#ifndef __CFORALL__
280#include <stdio.h>              // C header file
281#else
282#include <fstream>              // @\CFA{}@ header file
283#endif
284\end{lstlisting}
285which conditionally includes the correct header file, if the program is compiled using \lstinline@gcc@ or \lstinline@cfa@.
286
287
288\section{Underscores in Constants}
289
290Numeric constants are extended to allow \Index{underscore}s within constants\index{constant!underscore}, e.g.:
291\begin{lstlisting}
2922`_`147`_`483`_`648;                            // decimal constant
29356_ul;                                          // decimal unsigned long constant
2940_377;                                          // octal constant
2950x_ff_ff;                                       // hexadecimal constant
2960x_ef3d_aa5c;                           // hexadecimal constant
2973.141_592_654;                          // floating point constant
29810_e_+1_00;                                     // floating point constant
2990x_ff_ff_p_3;                           // hexadecimal floating point
3000x_1.ffff_ffff_p_128_l;         // hexadecimal floating point long constant
301L_"\x_ff_ee";                           // wide character constant
302\end{lstlisting}
303The rules for placement of underscores is as follows:
304\begin{enumerate}
305\item
306A sequence of underscores is disallowed, e.g., \lstinline@12__34@ is invalid.
307\item
308Underscores may only appear within a sequence of digits (regardless of the digit radix).
309In other words, an underscore cannot start or end a sequence of digits, e.g., \lstinline@_1@, \lstinline@1_@ and \lstinline@_1_@ are invalid (actually, the 1st and 3rd examples are identifier names).
310\item
311A numeric prefix may end with an underscore;
312a numeric infix may begin and/or end with an underscore;
313a numeric suffix may begin with an underscore.
314For example, the octal \lstinline@0@ or hexadecimal \lstinline@0x@ prefix may end with an underscore \lstinline@0_377@ or \lstinline@0x_ff@;
315the exponent infix \lstinline@E@ may start or end with an underscore \lstinline@1.0_E10@, \lstinline@1.0E_10@ or \lstinline@1.0_E_10@;
316the type suffixes \lstinline@U@, \lstinline@L@, etc. may start with an underscore \lstinline@1_U@, \lstinline@1_ll@ or \lstinline@1.0E10_f@.
317\end{enumerate}
318It is significantly easier to read and enter long constants when they are broken up into smaller groupings (most cultures use comma or period among digits for the same purpose).
319This extension is backwards compatible, matches with the use of underscore in variable names, and appears in Ada and Java.
320
321
322\section{Declarations}
323\label{s:Declarations}
324
325C declaration syntax is notoriously confusing and error prone.
326For example, many C programmers are confused by a declaration as simple as:
327\begin{quote2}
328\begin{tabular}{@{}ll@{}}
329\begin{lstlisting}[aboveskip=0pt,belowskip=0pt]
330int *x[ 5 ]
331\end{lstlisting}
332&
333\raisebox{-0.75\totalheight}{\input{Cdecl}}
334\end{tabular}
335\end{quote2}
336Is this an array of 5 pointers to integers or a pointer to an array of 5 integers?
337Another example of confusion results from the fact that a routine name and its parameters are embedded within the return type, mimicking the way the return value is used at the routine's call site.
338For example, a routine returning a pointer to an array of integers is defined and used in the following way:
339\begin{lstlisting}
340int (*f())[ 5 ] {...};  // definition mimics usage
341... (*f())[ 3 ] += 1;
342\end{lstlisting}
343Essentially, the return type is wrapped around the routine name in successive layers (like an onion).
344While attempting to make the two contexts consistent was a laudable goal, it has not worked out in practice.
345
346\CFA provides its own type, variable and routine declarations, using a slightly different syntax.
347The new declarations place modifiers to the left of the base type, while C declarations place modifiers to the right of the base type.
348The only exception is bit field specification, which always appear to the right of the base type.
349C and the new \CFA declarations may appear together in the same program block, but cannot be mixed within a specific declaration.
350
351In \CFA declarations, the same tokens are used as in C: the character \lstinline@*@ is used to indicate a pointer, square brackets \lstinline@[@\,\lstinline@]@ are used to represent an array, and parentheses \lstinline@()@ are used to indicate a routine parameter.
352However, unlike C, \CFA type declaration tokens are specified from left to right and the entire type specification is distributed across all variables in the declaration list.
353For instance, variables \lstinline@x@ and \lstinline@y@ of type pointer to integer are defined in \CFA as follows:
354\begin{quote2}
355\begin{tabular}{@{}l@{\hspace{30pt}}l@{}}
356\multicolumn{1}{c@{\hspace{30pt}}}{\textbf{\CFA}}       & \multicolumn{1}{c}{\textbf{C}}        \\
357\begin{lstlisting}
358`* int x, y;`
359\end{lstlisting}
360&
361\begin{lstlisting}
362int *x, *y;
363\end{lstlisting}
364\end{tabular}
365\end{quote2}
366Other examples are:
367\begin{quote2}
368\begin{tabular}{@{}l@{\hspace{30pt}}l@{\hspace{20pt}}l@{}}
369\multicolumn{1}{c@{\hspace{30pt}}}{\textbf{\CFA}}       & \multicolumn{1}{c@{\hspace{20pt}}}{\textbf{C}}        \\
370\begin{lstlisting}
371[ 5 ] int z;
372[ 5 ] * char w;
373* [ 5 ] double v;
374struct s {
375        int f0:3;
376        * int f1;
377        [ 5 ] * int f2;
378};
379\end{lstlisting}
380&
381\begin{lstlisting}
382int z[ 5 ];
383char *w[ 5 ];
384double (*v)[ 5 ];
385struct s {
386        int f0:3;
387        int *f1;
388        int *f2[ 5 ]
389};
390\end{lstlisting}
391&
392\begin{lstlisting}
393// array of 5 integers
394// array of 5 pointers to char
395// pointer to array of 5 doubles
396
397// common bit field syntax
398
399
400
401\end{lstlisting}
402\end{tabular}
403\end{quote2}
404
405All type qualifiers, i.e., \lstinline@const@ and \lstinline@volatile@, are used in the normal way with the new declarations but appear left to right, e.g.:
406\begin{quote2}
407\begin{tabular}{@{}l@{\hspace{30pt}}l@{\hspace{20pt}}l@{}}
408\multicolumn{1}{c@{\hspace{30pt}}}{\textbf{\CFA}}       & \multicolumn{1}{c@{\hspace{20pt}}}{\textbf{C}}        \\
409\begin{lstlisting}
410const * const int x;
411const * [ 5 ] const int y;
412\end{lstlisting}
413&
414\begin{lstlisting}
415int const * const x;
416const int (* const y)[ 5 ]
417\end{lstlisting}
418&
419\begin{lstlisting}
420// const pointer to const integer
421// const pointer to array of 5 const integers
422\end{lstlisting}
423\end{tabular}
424\end{quote2}
425All declaration qualifiers, i.e., \lstinline@extern@, \lstinline@static@, etc., are used in the normal way with the new declarations but can only appear at the start of a \CFA routine declaration,\footnote{\label{StorageClassSpecifier}
426The placement of a storage-class specifier other than at the beginning of the declaration specifiers in a declaration is an obsolescent feature.~\cite[\S~6.11.5(1)]{C11}} e.g.:
427\begin{quote2}
428\begin{tabular}{@{}l@{\hspace{30pt}}l@{\hspace{20pt}}l@{}}
429\multicolumn{1}{c@{\hspace{30pt}}}{\textbf{\CFA}}       & \multicolumn{1}{c@{\hspace{20pt}}}{\textbf{C}}        \\
430\begin{lstlisting}
431extern [ 5 ] int x;
432static * const int y;
433\end{lstlisting}
434&
435\begin{lstlisting}
436int extern x[ 5 ];
437const int static *y;
438\end{lstlisting}
439&
440\begin{lstlisting}
441// externally visible array of 5 integers
442// internally visible pointer to constant int
443\end{lstlisting}
444\end{tabular}
445\end{quote2}
446
447Unsupported are K\&R C declarations where the base type defaults to \lstinline@int@, if no type is specified\footnote{
448At least one type specifier shall be given in the declaration specifiers in each declaration, and in the specifier-qualifier list in each structure declaration and type name~\cite[\S~6.7.2(2)]{C11}},
449e.g.:
450\begin{lstlisting}
451x;                                              // int x
452*y;                                             // int *y
453f( p1, p2 );                    // int f( int p1, int p2 );
454f( p1, p2 ) {}                  // int f( int p1, int p2 ) {}
455\end{lstlisting}
456
457As stated above, the two styles of declaration may appear together in the same block.
458Therefore, a programmer has the option of either continuing to use traditional C declarations or take advantage of the new style.
459Clearly, both styles need to be supported for some time due to existing C-style header-files, particularly for UNIX systems.
460
461
462\section{Type Operators}
463
464The new declaration syntax can be used in other contexts where types are required, e.g., casts and the pseudo-routine \lstinline@sizeof@:
465\begin{quote2}
466\begin{tabular}{@{}l@{\hspace{30pt}}l@{}}
467\multicolumn{1}{c@{\hspace{30pt}}}{\textbf{\CFA}}       & \multicolumn{1}{c}{\textbf{C}}        \\
468\begin{lstlisting}
469y = (* int)x;
470i = sizeof([ 5 ] * int);
471\end{lstlisting}
472&
473\begin{lstlisting}
474y = (int *)x;
475i = sizeof(int *[ 5 ]);
476\end{lstlisting}
477\end{tabular}
478\end{quote2}
479
480
481\section{Routine Definition}
482
483\CFA also supports a new syntax for routine definition, as well as ISO C and K\&R routine syntax.
484The point of the new syntax is to allow returning multiple values from a routine~\cite{CLU,Galletly96}, e.g.:
485\begin{lstlisting}
486`[ int o1, int o2, char o3 ]` f( int i1, char i2, char i3 ) {
487        @\emph{routine body}@
488}
489\end{lstlisting}
490where routine \lstinline@f@ has three output (return values) and three input parameters.
491Existing C syntax cannot be extended with multiple return types because it is impossible to embed a single routine name within multiple return type specifications.
492
493In detail, the brackets, \lstinline@[]@, enclose the result type, where each return value is named and that name is a local variable of the particular return type.\footnote{
494Michael Tiemann, with help from Doug Lea, provided named return values in g++, circa 1989.}
495The value of each local return variable is automatically returned at routine termination.
496Declaration qualifiers can only appear at the start of a routine definition, e.g.:
497\begin{lstlisting}
498extern [ int x ] g( int y ) {@\,@}
499\end{lstlisting}
500Lastly, if there are no output parameters or input parameters, the brackets and/or parentheses must still be specified;
501in both cases the type is assumed to be void as opposed to old style C defaults of int return type and unknown parameter types, respectively, as in:
502\begin{lstlisting}
503[@\,@] g(@\,@);                         // no input or output parameters
504[ void ] g( void );                     // no input or output parameters
505\end{lstlisting}
506
507Routine f is called as follows:
508\begin{lstlisting}
509[ i, j, ch ] = f( 3, 'a', ch );
510\end{lstlisting}
511The list of return values from f and the grouping on the left-hand side of the assignment is called a tuple and discussed in Section 12.
512
513\CFA style declarations cannot be used to declare parameters for K\&R style routine definitions because of the following ambiguity:
514\begin{lstlisting}
515int (*f(x))[ 5 ] int x; {}
516\end{lstlisting}
517The string ``\lstinline@int (*f(x))[ 5 ]@'' declares a K\&R style routine of type returning a pointer to an array of 5 integers, while the string ``\lstinline@[ 5 ] int x@'' declares a \CFA style parameter x of type array of 5 integers.
518Since the strings overlap starting with the open bracket, \lstinline@[@, there is an ambiguous interpretation for the string.
519As well, \CFA-style declarations cannot be used to declare parameters for C-style routine-definitions because of the following ambiguity:
520\begin{lstlisting}
521typedef int foo;
522int f( int (* foo) );           // foo is redefined as a parameter name
523\end{lstlisting}
524The string ``\lstinline@int (* foo)@'' declares a C-style named-parameter of type pointer to an integer (the parenthesis are superfluous), while the same string declares a \CFA style unnamed parameter of type routine returning integer with unnamed parameter of type pointer to foo.
525The redefinition of a type name in a parameter list is the only context in C where the character \lstinline@*@ can appear to the left of a type name, and \CFA relies on all type modifier characters appearing to the right of the type name.
526The inability to use \CFA declarations in these two contexts is probably a blessing because it precludes programmers from arbitrarily switching between declarations forms within a declaration contexts.
527
528C-style declarations can be used to declare parameters for \CFA style routine definitions, e.g.:
529\begin{lstlisting}
530[ int ] f( * int, int * );      // returns an integer, accepts 2 pointers to integers
531[ * int, int * ] f( int );      // returns 2 pointers to integers, accepts an integer
532\end{lstlisting}
533The reason for allowing both declaration styles in the new context is for backwards compatibility with existing preprocessor macros that generate C-style declaration-syntax, as in:
534\begin{lstlisting}
535#define ptoa( n, d ) int (*n)[ d ]
536int f( ptoa(p,5) ) ...          // expands to int f( int (*p)[ 5 ] )
537[ int ] f( ptoa(p,5) ) ...      // expands to [ int ] f( int (*p)[ 5 ] )
538\end{lstlisting}
539Again, programmers are highly encouraged to use one declaration form or the other, rather than mixing the forms.
540
541
542\subsection{Returning Values}
543
544Named return values handle the case where it is necessary to define a local variable whose value is then returned in a \lstinline@return@ statement, as in:
545\begin{lstlisting}
546int f() {
547        int x;
548        ... x = 0; ... x = y; ...
549        return x;
550}
551\end{lstlisting}
552Because the value in the return variable is automatically returned when a \CFA routine terminates, the \lstinline@return@ statement \emph{does not} contain an expression, as in:
553\begin{lstlisting}
554`[ int x ]` f() {
555        ... x = 0; ... x = y; ...
556        `return;` // implicitly return x
557}
558\end{lstlisting}
559When the return is encountered, the current value of \lstinline@x@ is returned to the calling routine.
560As well, ``falling off the end'' of a routine without a \lstinline@return@ statement is permitted, as in:
561\begin{lstlisting}
562[ int x ] f() {
563        ... x = 0; ... x = y; ...
564} // implicitly return x
565\end{lstlisting}
566In this case, the current value of \lstinline@x@ is returned to the calling routine just as if a \lstinline@return@ had been encountered.
567
568
569\subsection{Routine Prototype}
570
571The syntax of the new routine prototype declaration follows directly from the new routine definition syntax;
572as well, parameter names are optional, e.g.:
573\begin{lstlisting}
574[ int x ] f ();                         // returning int with no parameters
575[ * int ] g (int y);            // returning pointer to int with int parameter
576[ ] h (int,char);                       // returning no result with int and char parameters
577[ * int,int ] j (int);          // returning pointer to int and int, with int parameter
578\end{lstlisting}
579This syntax allows a prototype declaration to be created by cutting and pasting source text from the routine definition header (or vice versa).
580It is possible to declare multiple routine-prototypes in a single declaration, but the entire type specification is distributed across \emph{all} routine names in the declaration list (see~\VRef{s:Declarations}), e.g.:
581\begin{quote2}
582\begin{tabular}{@{}l@{\hspace{30pt}}l@{}}
583\multicolumn{1}{c@{\hspace{30pt}}}{\textbf{\CFA}}       & \multicolumn{1}{c}{\textbf{C}}        \\
584\begin{lstlisting}
585[ int ] f(int), g;
586\end{lstlisting}
587&
588\begin{lstlisting}
589int f(int), g(int);
590\end{lstlisting}
591\end{tabular}
592\end{quote2}
593Declaration qualifiers can only appear at the start of a \CFA routine declaration,\footref{StorageClassSpecifier} e.g.:
594\begin{lstlisting}
595extern [ int ] f (int);
596static [ int ] g (int);
597\end{lstlisting}
598
599
600\section{Routine Pointers}
601
602The syntax for pointers to \CFA routines specifies the pointer name on the right, e.g.:
603\begin{lstlisting}
604* [ int x ] () fp;                      // pointer to routine returning int with no parameters
605* [ * int ] (int y) gp;         // pointer to routine returning pointer to int with int parameter
606* [ ] (int,char) hp;            // pointer to routine returning no result with int and char parameters
607* [ * int,int ] (int) jp;       // pointer to routine returning pointer to int and int, with int parameter
608\end{lstlisting}
609While parameter names are optional, \emph{a routine name cannot be specified};
610for example, the following is incorrect:
611\begin{lstlisting}
612* [ int x ] f () fp;            // routine name "f" is not allowed
613\end{lstlisting}
614
615
616\section{Named and Default Arguments}
617
618Named and default arguments~\cite{Hardgrave76}\footnote{
619Francez~\cite{Francez77} proposed a further extension to the named-parameter passing style, which specifies what type of communication (by value, by reference, by name) the argument is passed to the routine.}
620are two mechanisms to simplify routine call.
621Both mechanisms are discussed with respect to \CFA.
622\begin{description}
623\item[Named (or Keyword) Arguments:]
624provide the ability to specify an argument to a routine call using the parameter name rather than the position of the parameter.
625For example, given the routine:
626\begin{lstlisting}
627void p( int x, int y, int z ) {...}
628\end{lstlisting}
629a positional call is:
630\begin{lstlisting}
631p( 4, 7, 3 );
632\end{lstlisting}
633whereas a named (keyword) call may be:
634\begin{lstlisting}
635p( z : 3, x : 4, y : 7 ); // rewrite $\Rightarrow$ p( 4, 7, 3 )
636\end{lstlisting}
637Here the order of the arguments is unimportant, and the names of the parameters are used to associate argument values with the corresponding parameters.
638The compiler rewrites a named call into a positional call.
639The advantages of named parameters are:
640\begin{itemize}
641\item
642Remembering the names of the parameters may be easier than the order in the routine definition.
643\item
644Parameter names provide documentation at the call site (assuming the names are descriptive).
645\item
646Changes can be made to the order or number of parameters without affecting the call (although the call must still be recompiled).
647\end{itemize}
648
649Unfortunately, named arguments do not work in C-style programming-languages because a routine prototype is not required to specify parameter names, nor do the names in the prototype have to match with the actual definition.
650For example, the following routine prototypes and definition are all valid.
651\begin{lstlisting}
652void p( int, int, int );                        // equivalent prototypes
653void p( int x, int y, int z );
654void p( int y, int x, int z );
655void p( int z, int y, int x );
656void p( int q, int r, int s ) {}        // match with this definition
657\end{lstlisting}
658Forcing matching parameter names in routine prototypes with corresponding routine definitions is possible, but goes against a strong tradition in C programming.
659Alternatively, prototype definitions can be eliminated by using a two-pass compilation, and implicitly creating header files for exports.
660The former is easy to do, while the latter is more complex.
661Currently, \CFA does \emph{not} attempt to support named arguments.
662
663\item[Default Arguments]
664provide the ability to associate a default value with a parameter so it can be optionally specified in the argument list.
665For example, given the routine:
666\begin{lstlisting}
667void p( int x = 1, int y = 2, int z = 3 ) {...}
668\end{lstlisting}
669the allowable positional calls are:
670\begin{lstlisting}
671p();                            // rewrite $\Rightarrow$ p( 1, 2, 3 )
672p( 4 );                         // rewrite $\Rightarrow$ p( 4, 2, 3 )
673p( 4, 4 );                      // rewrite $\Rightarrow$ p( 4, 4, 3 )
674p( 4, 4, 4 );           // rewrite $\Rightarrow$ p( 4, 4, 4 )
675// empty arguments
676p(  , 4, 4 );           // rewrite $\Rightarrow$ p( 1, 4, 4 )
677p( 4,  , 4 );           // rewrite $\Rightarrow$ p( 4, 2, 4 )
678p( 4, 4,   );           // rewrite $\Rightarrow$ p( 4, 4, 3 )
679p( 4,  ,   );           // rewrite $\Rightarrow$ p( 4, 2, 3 )
680p(  , 4,   );           // rewrite $\Rightarrow$ p( 1, 4, 3 )
681p(  ,  , 4 );           // rewrite $\Rightarrow$ p( 1, 2, 4 )
682p(  ,  ,   );           // rewrite $\Rightarrow$ p( 1, 2, 3 )
683\end{lstlisting}
684Here the missing arguments are inserted from the default values in the parameter list.
685The compiler rewrites missing default values into explicit positional arguments.
686The advantages of default values are:
687\begin{itemize}
688\item
689Routines with a large number of parameters are often very generalized, giving a programmer a number of different options on how a computation is performed.
690For many of these kinds of routines, there are standard or default settings that work for the majority of computations.
691Without default values for parameters, a programmer is forced to specify these common values all the time, resulting in long argument lists that are error prone.
692\item
693When a routine's interface is augmented with new parameters, it extends the interface providing generalizability\footnote{
694``It should be possible for the implementor of an abstraction to increase its generality.
695So long as the modified abstraction is a generalization of the original, existing uses of the abstraction will not require change.
696It might be possible to modify an abstraction in a manner which is not a generalization without affecting existing uses, but, without inspecting the modules in which the uses occur, this possibility cannot be determined.
697This criterion precludes the addition of parameters, unless these parameters have default or inferred values that are valid for all possible existing applications.''~\cite[p.~128]{Cormack90}}
698(somewhat like the generalization provided by inheritance for classes).
699That is, all existing calls are still valid, although the call must still be recompiled.
700\end{itemize}
701The only disadvantage of default arguments is that unintentional omission of an argument may not result in a compiler-time error.
702Instead, a default value is used, which may not be the programmer's intent.
703
704Default values may only appear in a prototype versus definition context:
705\begin{lstlisting}
706void p( int x, int y = 2, int z = 3 );          // prototype: allowed
707void p( int, int = 2, int = 3 );                        // prototype: allowed
708void p( int x, int y = 2, int z = 3 ) {}        // definition: not allowed
709\end{lstlisting}
710The reason for this restriction is to allow separate compilation.
711Multiple prototypes with different default values is an error.
712\end{description}
713
714Ellipse (``...'') arguments present problems when used with default arguments.
715The conflict occurs because both named and ellipse arguments must appear after positional arguments, giving two possibilities:
716\begin{lstlisting}
717p( /* positional */, . . ., /* named */ );
718p( /* positional */, /* named */, . . . );
719\end{lstlisting}
720While it is possible to implement both approaches, the first possibly is more complex than the second, e.g.:
721\begin{lstlisting}
722p( int x, int y, int z, . . . );
723p( 1, 4, 5, 6, z : 3, y : 2 ); // assume p( /* positional */, . . ., /* named */ );
724p( 1, z : 3, y : 2, 4, 5, 6 ); // assume p( /* positional */, /* named */, . . . );
725\end{lstlisting}
726In the first call, it is necessary for the programmer to conceptually rewrite the call, changing named arguments into positional, before knowing where the ellipse arguments begin.
727Hence, this approach seems significantly more difficult, and hence, confusing and error prone.
728In the second call, the named arguments separate the positional and ellipse arguments, making it trivial to read the call.
729
730The problem is exacerbated with default arguments, e.g.:
731\begin{lstlisting}
732void p( int x, int y = 2, int z = 3. . . );
733p( 1, 4, 5, 6, z : 3 );         // assume p( /* positional */, . . ., /* named */ );
734p( 1, z : 3, 4, 5, 6 );         // assume p( /* positional */, /* named */, . . . );
735\end{lstlisting}
736The first call is an error because arguments 4 and 5 are actually positional not ellipse arguments;
737therefore, argument 5 subsequently conflicts with the named argument z : 3.
738In the second call, the default value for y is implicitly inserted after argument 1 and the named arguments separate the positional and ellipse arguments, making it trivial to read the call.
739For these reasons, \CFA requires named arguments before ellipse arguments.
740Finally, while ellipse arguments are needed for a small set of existing C routines, like printf, the extended \CFA type system largely eliminates the need for ellipse arguments (see Section 24), making much of this discussion moot.
741
742Default arguments and overloading (see Section 24) are complementary.
743While in theory default arguments can be simulated with overloading, as in:
744\begin{quote2}
745\begin{tabular}{@{}l@{\hspace{30pt}}l@{}}
746\multicolumn{1}{c@{\hspace{30pt}}}{\textbf{default arguments}}  & \multicolumn{1}{c}{\textbf{overloading}}      \\
747\begin{lstlisting}
748void p( int x, int y = 2, int z = 3 ) {...}
749
750
751\end{lstlisting}
752&
753\begin{lstlisting}
754void p( int x, int y, int z ) {...}
755void p( int x ) { p( x, 2, 3 ); }
756void p( int x, int y ) { p( x, y, 3 ); }
757\end{lstlisting}
758\end{tabular}
759\end{quote2}
760the number of required overloaded routines is linear in the number of default values, which is unacceptable growth.
761In general, overloading should only be used over default arguments if the body of the routine is significantly different.
762Furthermore, overloading cannot handle accessing default arguments in the middle of a positional list, via a missing argument, such as:
763\begin{lstlisting}
764p( 1, /* default */, 5 );               // rewrite $\Rightarrow$ p( 1, 2, 5 )
765\end{lstlisting}
766
767Given the \CFA restrictions above, both named and default arguments are backwards compatible.
768\CC only supports default arguments;
769Ada supports both named and default arguments.
770
771
772\section{Type/Routine Nesting}
773
774Nesting of types and routines is useful for controlling name visibility (\newterm{name hiding}).
775
776
777\subsection{Type Nesting}
778
779\CFA allows \Index{type nesting}, and type qualification of the nested types, where as C hoists\index{type!hoisting} (refactors) nested types into the enclosing scope and has no type qualification.
780\begin{quote2}
781\begin{tabular}{@{}l@{\hspace{30pt}}l|l@{}}
782\multicolumn{1}{c@{\hspace{30pt}}}{\textbf{C Type Nesting}}     & \multicolumn{1}{c}{\textbf{C Implicit Hoisting}}      & \multicolumn{1}{|c}{\textbf{\CFA}}    \\
783\hline
784\begin{lstlisting}
785struct S {
786        enum C { R, G, B };
787        struct T {
788                union U { int i, j; };
789                enum C c;
790                short int i, j;
791        };
792        struct T t;
793} s;
794
795int fred() {
796        s.t.c = R;
797        struct T t = { R, 1, 2 };
798        enum C c;
799        union U u;
800}
801\end{lstlisting}
802&
803\begin{lstlisting}
804enum C { R, G, B };
805union U { int i, j; };
806struct T {
807        enum C c;
808        short int i, j;
809};
810struct S {
811        struct T t;
812} s;
813       
814
815
816
817
818
819
820\end{lstlisting}
821&
822\begin{lstlisting}
823struct S {
824        enum C { R, G, B };
825        struct T {
826                union U { int i, j; };
827                enum C c;
828                short int i, j;
829        };
830        struct T t;
831} s;
832
833int fred() {
834        s.t.c = `S.`R;  // type qualification
835        struct `S.`T t = { `S.`R, 1, 2 };
836        enum `S.`C c;
837        union `S.T.`U u;
838}
839\end{lstlisting}
840\end{tabular}
841\end{quote2}
842In the left example in C, types \lstinline@C@, \lstinline@U@ and \lstinline@T@ are implicitly hoisted outside of type \lstinline@S@ into the containing block scope.
843In the right example in \CFA, the types are not hoisted and accessed using the field-selection operator ``\lstinline@.@'' for type qualification, as does Java, rather than the \CC type-selection operator ``\lstinline@::@''.
844
845
846\subsection{Routine Nesting}
847
848While \CFA does not provide object programming by putting routines into structures, it does rely heavily on locally nested routines to redefine operations at or close to a call site.
849For example, the C quick-sort is wrapped into the following polymorphic \CFA routine:
850\begin{lstlisting}
851forall( otype T | { int ?<?( T, T ); } )
852void qsort( const T * arr, size_t dimension );
853\end{lstlisting}
854which can be used to sort in ascending and descending order by locally redefining the less-than operator into greater-than.
855\begin{lstlisting}
856const unsigned int size = 5;
857int ia[size];
858...                                             // assign values to array ia
859qsort( ia, size );              // sort ascending order using builtin ?<?
860{
861        `int ?<?( int x, int y ) { return x > y; }` // nested routine
862        qsort( ia, size );      // sort descending order by local redefinition
863}
864\end{lstlisting}
865
866Nested routines are not first-class, meaning a nested routine cannot be returned if it has references to variables in its enclosing blocks;
867the only exception is references to the external block of the translation unit, as these variables persist for the duration of the program.
868The following program in undefined in \CFA (and \lstinline@gcc@\index{gcc})
869\begin{lstlisting}
870[* [int]( int )] foo() {                // int (*foo())( int )
871        int `i` = 7;
872        int bar( int p ) {
873                `i` += 1;                                       // dependent on local variable
874                sout | `i` | endl;
875        }
876        return bar;                                     // undefined because of local dependence
877}
878int main() {
879        * [int](int) fp = foo();        // int (*fp)(int)
880    sout | fp( 3 ) | endl;
881}
882\end{lstlisting}
883because
884
885Currently, there are no \Index{lambda} expressions, i.e., unnamed routines because routine names are very important to properly select the correct routine.
886
887
888\section{Tuples}
889
890In C and \CFA, lists of elements appear in several contexts, such as the parameter list for a routine call.
891(More contexts are added shortly.)
892A list of such elements is called a tuple.
893The general syntax of a tuple is:
894\begin{lstlisting}
895[ $\emph{exprlist}$ ]
896\end{lstlisting}
897where \lstinline@$\emph{exprlist}$@ is a list of one or more expressions separated by commas.
898The brackets, \lstinline$[]$, allow differentiating between tuples and expressions containing the C comma operator.
899The following are examples of tuples:
900\begin{lstlisting}
901[ x, y, z ]
902[ 2 ]
903[ v+w, x*y, 3.14159, f() ]
904\end{lstlisting}
905Tuples are permitted to contain sub-tuples (i.e., nesting), such as \lstinline@[ [ 14, 21 ], 9 ]@, which is a 2-element tuple whose first element is itself a tuple.
906Note, a tuple is not a record (structure);
907a record denotes a single value with substructure, whereas a tuple is multiple values with no substructure (see flattening coercion in Section 12.1).
908In essence, tuples are largely a compile time phenomenon, having little or no runtime presence.
909
910Tuples can be organized into compile-time tuple variables;
911these variables are of \newterm{tuple type}.
912Tuple variables and types can be used anywhere lists of conventional variables and types can be used.
913The general syntax of a tuple type is:
914\begin{lstlisting}
915[ @\emph{typelist}@ ]
916\end{lstlisting}
917where \lstinline@$\emph{typelist}$@ is a list of one or more legal \CFA or C type specifications separated by commas, which may include other tuple type specifications.
918Examples of tuple types include:
919\begin{lstlisting}
920[ unsigned int, char ]
921[ double, double, double ]
922[ * int, int * ]                // mix of CFA and ANSI
923[ * [ 5 ] int, * * char, * [ [ int, int ] ] (int, int) ]
924\end{lstlisting}
925Like tuples, tuple types may be nested, such as \lstinline@[ [ int, int ], int ]@, which is a 2-element tuple type whose first element is itself a tuple type.
926
927Examples of declarations using tuple types are:
928\begin{lstlisting}
929[ int, int ] x;                 // 2 element tuple, each element of type int
930* [ char, char ] y;             // pointer to a 2 element tuple
931[ [ int, int ] ] z ([ int, int ]);
932\end{lstlisting}
933The last example declares an external routine that expects a 2 element tuple as an input parameter and returns a 2 element tuple as its result.
934
935As mentioned, tuples can appear in contexts requiring a list of value, such as an argument list of a routine call.
936In unambiguous situations, the tuple brackets may be omitted, e.g., a tuple that appears as an argument may have its
937square brackets omitted for convenience; therefore, the following routine invocations are equivalent:
938\begin{lstlisting}
939f( [ 1, x+2, fred() ] );
940f( 1, x+2, fred() );
941\end{lstlisting}
942Also, a tuple or a tuple variable may be used to supply all or part of an argument list for a routine expecting multiple input parameters or for a routine expecting a tuple as an input parameter.
943For example, the following are all legal:
944\begin{lstlisting}
945[ int, int ] w1;
946[ int, int, int ] w2;
947[ void ] f (int, int, int); /* three input parameters of type int */
948[ void ] g ([ int, int, int ]); /* 3 element tuple as input */
949f( [ 1, 2, 3 ] );
950f( w1, 3 );
951f( 1, w1 );
952f( w2 );
953g( [ 1, 2, 3 ] );
954g( w1, 3 );
955g( 1, w1 );
956g( w2 );
957\end{lstlisting}
958Note, in all cases 3 arguments are supplied even though the syntax may appear to supply less than 3. As mentioned, a
959tuple does not have structure like a record; a tuple is simply converted into a list of components.
960\begin{rationale}
961The present implementation of \CFA does not support nested routine calls when the inner routine returns multiple values; i.e., a statement such as \lstinline@g( f() )@ is not supported.
962Using a temporary variable to store the  results of the inner routine and then passing this variable to the outer routine works, however.
963\end{rationale}
964
965A tuple can contain a C comma expression, provided the expression containing the comma operator is enclosed in parentheses.
966For instance, the following tuples are equivalent:
967\begin{lstlisting}
968[ 1, 3, 5 ]
969[ 1, (2, 3), 5 ]
970\end{lstlisting}
971The second element of the second tuple is the expression (2, 3), which yields the result 3.
972This requirement is the same as for comma expressions in argument lists.
973
974Type qualifiers, i.e., const and volatile, may modify a tuple type.
975The meaning is the same as for a type qualifier modifying an aggregate type [Int99, x 6.5.2.3(7),x 6.7.3(11)], i.e., the qualifier is distributed across all of the types in the tuple, e.g.:
976\begin{lstlisting}
977const volatile [ int, float, const int ] x;
978\end{lstlisting}
979is equivalent to:
980\begin{lstlisting}
981[ const volatile int, const volatile float, const volatile int ] x;
982\end{lstlisting}
983Declaration qualifiers can only appear at the start of a \CFA tuple declaration4, e.g.:
984\begin{lstlisting}
985extern [ int, int ] w1;
986static [ int, int, int ] w2;
987\end{lstlisting}
988\begin{rationale}
989Unfortunately, C's syntax for subscripts precluded treating them as tuples.
990The C subscript list has the form \lstinline@[i][j]...@ and not \lstinline@i, j, ...]@.
991Therefore, there is no syntactic way for a routine returning multiple values to specify the different subscript values, e.g., \lstinline@f[g()]@ always means a single subscript value because there is only one set of brackets.
992Fixing this requires a major change to C because the syntactic form \lstinline@M[i, j, k]@ already has a particular meaning: \lstinline@i, j, k@ is a comma expression.
993\end{rationale}
994
995
996\subsection{Tuple Coercions}
997
998There are four coercions that can be performed on tuples and tuple variables: closing, opening, flattening and structuring.
999In addition, the coercion of dereferencing can be performed on a tuple variable to yield its value(s), as for other variables.
1000A \newterm{closing coercion} takes a set of values and converts it into a tuple value, which is a contiguous set of values, as in:
1001\begin{lstlisting}
1002[ int, int, int, int ] w;
1003w = [ 1, 2, 3, 4 ];
1004\end{lstlisting}
1005First the right-hand tuple is closed into a tuple value and then the tuple value is assigned.
1006
1007An \newterm{opening coercion} is the opposite of closing; a tuple value is converted into a tuple of values, as in:
1008\begin{lstlisting}
1009[ a, b, c, d ] = w
1010\end{lstlisting}
1011\lstinline@w@ is implicitly opened to yield a tuple of four values, which are then assigned individually.
1012
1013A \newterm{flattening coercion} coerces a nested tuple, i.e., a tuple with one or more components, which are themselves tuples, into a flattened tuple, which is a tuple whose components are not tuples, as in:
1014\begin{lstlisting}
1015[ a, b, c, d ] = [ 1, [ 2, 3 ], 4 ];
1016\end{lstlisting}
1017First the right-hand tuple is flattened and then the values are assigned individually.
1018Flattening is also performed on tuple types.
1019For example, the type \lstinline@[ int, [ int, int ], int ]@ can be coerced, using flattening, into the type \lstinline@[ int, int, int, int ]@.
1020
1021A \newterm{structuring coercion} is the opposite of flattening;
1022a tuple is structured into a more complex nested tuple.
1023For example, structuring the tuple \lstinline@[ 1, 2, 3, 4 ]@ into the tuple \lstinline@[ 1, [ 2, 3 ], 4 ]@ or the tuple type \lstinline@[ int, int, int, int ]@ into the tuple type \lstinline@[ int, [ int, int ], int ]@.
1024In the following example, the last assignment illustrates all the tuple coercions:
1025\begin{lstlisting}
1026[ int, int, int, int ] w = [ 1, 2, 3, 4 ];
1027int x = 5;
1028[ x, w ] = [ w, x ];            // all four tuple coercions
1029\end{lstlisting}
1030Starting on the right-hand tuple in the last assignment statement, w is opened, producing a tuple of four values;
1031therefore, the right-hand tuple is now the tuple \lstinline@[ [ 1, 2, 3, 4 ], 5 ]@.
1032This tuple is then flattened, yielding \lstinline@[ 1, 2, 3, 4, 5 ]@, which is structured into \lstinline@[ 1, [ 2, 3, 4, 5 ] ]@ to match the tuple type of the left-hand side.
1033The tuple \lstinline@[ 2, 3, 4, 5 ]@ is then closed to create a tuple value.
1034Finally, \lstinline@x@ is assigned \lstinline@1@ and \lstinline@w@ is assigned the tuple value using multiple assignment (see Section 14).
1035\begin{rationale}
1036A possible additional language extension is to use the structuring coercion for tuples to initialize a complex record with a tuple.
1037\end{rationale}
1038
1039
1040\section{Mass Assignment}
1041
1042\CFA permits assignment to several variables at once using mass assignment~\cite{CLU}.
1043Mass assignment has the following form:
1044\begin{lstlisting}
1045[ @\emph{lvalue}@, ..., @\emph{lvalue}@ ] = @\emph{expr}@;
1046\end{lstlisting}
1047The left-hand side is a tuple of \lstinline@$\emph{lvalues}$@, which is a list of expressions each yielding an address, i.e., any data object that can appear on the left-hand side of a conventional assignment statement.
1048\lstinline@$\emph{expr}$@ is any standard arithmetic expression.
1049Clearly, the types of the entities being assigned must be type compatible with the value of the expression.
1050
1051Mass assignment has parallel semantics, e.g., the statement:
1052\begin{lstlisting}
1053[ x, y, z ] = 1.5;
1054\end{lstlisting}
1055is equivalent to:
1056\begin{lstlisting}
1057x = 1.5; y = 1.5; z = 1.5;
1058\end{lstlisting}
1059This semantics is not the same as the following in C:
1060\begin{lstlisting}
1061x = y = z = 1.5;
1062\end{lstlisting}
1063as conversions between intermediate assignments may lose information.
1064A more complex example is:
1065\begin{lstlisting}
1066[ i, y[i], z ] = a + b;
1067\end{lstlisting}
1068which is equivalent to:
1069\begin{lstlisting}
1070t = a + b;
1071a1 = &i; a2 = &y[i]; a3 = &z;
1072*a1 = t; *a2 = t; *a3 = t;
1073\end{lstlisting}
1074The temporary \lstinline@t@ is necessary to store the value of the expression to eliminate conversion issues.
1075The temporaries for the addresses are needed so that locations on the left-hand side do not change as the values are assigned.
1076In this case, \lstinline@y[i]@ uses the previous value of \lstinline@i@ and not the new value set at the beginning of the mass assignment.
1077
1078
1079\section{Multiple Assignment}
1080
1081\CFA also supports the assignment of several values at once, known as multiple assignment~\cite{CLU,Galletly96}.
1082Multiple assignment has the following form:
1083\begin{lstlisting}
1084[ @\emph{lvalue}@, . . ., @\emph{lvalue}@ ] = [ @\emph{expr}@, . . ., @\emph{expr}@ ];
1085\end{lstlisting}
1086The left-hand side is a tuple of \lstinline@$\emph{lvalues}$@, and the right-hand side is a tuple of \lstinline@$\emph{expr}$@s.
1087Each \lstinline@$\emph{expr}$@ appearing on the righthand side of a multiple assignment statement is assigned to the corresponding \lstinline@$\emph{lvalues}$@ on the left-hand side of the statement using parallel semantics for each assignment.
1088An example of multiple assignment is:
1089\begin{lstlisting}
1090[ x, y, z ] = [ 1, 2, 3 ];
1091\end{lstlisting}
1092Here, the values \lstinline@1@, \lstinline@2@ and \lstinline@3@ are assigned, respectively, to the variables \lstinline@x@, \lstinline@y@ and \lstinline@z@.
1093 A more complex example is:
1094\begin{lstlisting}
1095[ i, y[ i ], z ] = [ 1, i, a + b ];
1096\end{lstlisting}
1097Here, the values \lstinline@1@, \lstinline@i@ and \lstinline@a + b@ are assigned to the variables \lstinline@i@, \lstinline@y[i]@ and \lstinline@z@, respectively.
1098 Note, the parallel semantics of
1099multiple assignment ensures:
1100\begin{lstlisting}
1101[ x, y ] = [ y, x ];
1102\end{lstlisting}
1103correctly interchanges (swaps) the values stored in \lstinline@x@ and \lstinline@y@.
1104The following cases are errors:
1105\begin{lstlisting}
1106[ a, b, c ] = [ 1, 2, 3, 4 ];
1107[ a, b, c ] = [ 1, 2 ];
1108\end{lstlisting}
1109because the number of entities in the left-hand tuple is unequal with the right-hand tuple.
1110
1111As for all tuple contexts in C, side effects should not be used because C does not define an ordering for the evaluation of the elements of a tuple;
1112both these examples produce indeterminate results:
1113\begin{lstlisting}
1114f( x++, x++ );                          // C routine call with side effects in arguments
1115[ v1, v2 ] = [ x++, x++ ];      // side effects in righthand side of multiple assignment
1116\end{lstlisting}
1117
1118
1119\section{Cascade Assignment}
1120
1121As in C, \CFA mass and multiple assignments can be cascaded, producing cascade assignment.
1122Cascade assignment has the following form:
1123\begin{lstlisting}
1124@\emph{tuple}@ = @\emph{tuple}@ = ... = @\emph{tuple}@;
1125\end{lstlisting}
1126and it has the same parallel semantics as for mass and multiple assignment.
1127Some examples of cascade assignment are:
1128\begin{lstlisting}
1129x1 = y1 = x2 = y2 = 0;
1130[ x1, y1 ] = [ x2, y2 ] = [ x3, y3 ];
1131[ x1, y1 ] = [ x2, y2 ] = 0;
1132[ x1, y1 ] = z = 0;
1133\end{lstlisting}
1134As in C, the rightmost assignment is performed first, i.e., assignment parses right to left.
1135
1136
1137\section{Field Tuples}
1138
1139Tuples may be used to select multiple fields of a record by field name.
1140Its general form is:
1141\begin{lstlisting}
1142@\emph{expr}@ . [ @\emph{fieldlist}@ ]
1143@\emph{expr}@ -> [ @\emph{fieldlist}@ ]
1144\end{lstlisting}
1145\lstinline@$\emph{expr}$@ is any expression yielding a value of type record, e.g., \lstinline@struct@, \lstinline@union@.
1146Each element of \lstinline@$\emph{ fieldlist}$@ is an element of the record specified by \lstinline@$\emph{expr}$@.
1147A record-field tuple may be used anywhere a tuple can be used. An example of the use of a record-field tuple is
1148the following:
1149\begin{lstlisting}
1150struct s {
1151        int f1, f2;
1152        char f3;
1153        double f4;
1154} v;
1155v.[ f3, f1, f2 ] = ['x', 11, 17 ];      // equivalent to v.f3 = 'x', v.f1 = 11, v.f2 = 17
1156f( v.[ f3, f1, f2 ] );                          // equivalent to f( v.f3, v.f1, v.f2 )
1157\end{lstlisting}
1158Note, the fields appearing in a record-field tuple may be specified in any order;
1159also, it is unnecessary to specify all the fields of a struct in a multiple record-field tuple.
1160
1161If a field of a \lstinline@struct@ is itself another \lstinline@struct@, multiple fields of this subrecord can be specified using a nested record-field tuple, as in the following example:
1162\begin{lstlisting}
1163struct inner {
1164        int f2, f3;
1165};
1166struct outer {
1167        int f1;
1168        struct inner i;
1169        double f4;
1170} o;
1171
1172o.[ f1, i.[ f2, f3 ], f4 ] = [ 11, 12, 13, 3.14159 ];
1173\end{lstlisting}
1174
1175
1176\section{Labelled Break/Continue}
1177
1178While C provides \lstinline@break@ and \lstinline@continue@ statements for altering control flow, both are restricted to one level of nesting for a particular control structure.
1179Unfortunately, this restriction forces programmers to use \lstinline@goto@ to achieve the equivalent for more than one level of nesting.
1180To prevent having to make this switch, the \lstinline@break@ and \lstinline@continue@ are extended with a target label to support static multi-level exit~\cite{Buhr85,Java}.
1181For the labelled \lstinline@break@, it is possible to specify which control structure is the target for exit, as in:
1182\begin{quote2}
1183\begin{tabular}{@{}l@{\hspace{30pt}}l@{}}
1184\multicolumn{1}{c@{\hspace{30pt}}}{\textbf{\CFA}}       & \multicolumn{1}{c}{\textbf{C}}        \\
1185\begin{lstlisting}
1186`L1:` for ( ... ) {
1187        `L2:` for ( ... ) {
1188                `L3:` for ( ... ) {
1189                        ... break `L1`; ...
1190                        ... break `L2`; ...
1191                        ... break `L3`; // or break
1192                }
1193        }
1194}
1195\end{lstlisting}
1196&
1197\begin{lstlisting}
1198for ( ... ) {
1199        for ( ... ) {
1200                for ( ... ) {
1201                        ... goto L1; ...
1202                        ... goto L2; ...
1203                        ... goto L3; // or break
1204                } L3: ;
1205        } L2: ;
1206} L1: ;
1207\end{lstlisting}
1208\end{tabular}
1209\end{quote2}
1210The inner most loop has three exit points, which cause termination of one or more of the three nested loops, respectively.
1211For the labelled \lstinline@continue@, it is possible to specify which control structure is the target for the next loop iteration, as in:
1212\begin{quote2}
1213\begin{tabular}{@{}l@{\hspace{30pt}}l@{}}
1214\multicolumn{1}{c@{\hspace{30pt}}}{\textbf{\CFA}}       & \multicolumn{1}{c}{\textbf{C}}        \\
1215\begin{lstlisting}
1216`L1`: for ( ... ) {
1217        `L2`: for ( ... ) {
1218                `L3`: for ( ... ) {
1219                        ... continue `L1`; ...
1220                        ... continue `L2`; ...
1221                        ... continue `L3`; ...
1222
1223                }
1224
1225        }
1226
1227}
1228\end{lstlisting}
1229&
1230\begin{lstlisting}
1231for ( ... ) {
1232        for ( ... ) {
1233                for ( ... ) {
1234                        ... goto L1; ...
1235                        ... goto L2; ...
1236                        ... goto L3; ...
1237                  L3: ;
1238                }
1239          L2: ;
1240        }
1241  L1: ;
1242}
1243\end{lstlisting}
1244\end{tabular}
1245\end{quote2}
1246The inner most loop has three restart points, which cause the next loop iteration to begin, respectively.
1247For both \lstinline@break@ and \lstinline@continue@, the target label must be directly associated with a \lstinline@for@, \lstinline@while@ or \lstinline@do@ statement;
1248for \lstinline@break@, the target label can also be associated with a \lstinline@switch@ statement.
1249Both \lstinline@break@ and \lstinline@continue@ with target labels are simply a \lstinline@goto@ restricted in the following ways:
1250\begin{itemize}
1251\item
1252They cannot be used to create a loop.
1253This means that only the looping construct can be used to create a loop.
1254This restriction is important since all situations that can result in repeated execution of statements in a program are clearly delineated.
1255\item
1256Since they always transfers out of containing control structures, they cannot be used to branch into a control structure.
1257\end{itemize}
1258The advantage of the labelled \lstinline@break@/\lstinline@continue@ is that it allows static multi-level exits without having to use the \lstinline@goto@ statement and ties control flow to the target control structure rather than an arbitrary point in a program.
1259Furthermore, the location of the label at the beginning of the target control structure informs the reader that complex control-flow is occurring in the body of the control structure.
1260With \lstinline@goto@, the label at the end of the control structure fails to convey this important clue early enough to the reader.
1261Finally, using an explicit target for the transfer instead of an implicit target allows new nested loop or switch constructs to be added or removed without affecting other constructs.
1262The implicit targets of the current \lstinline@break@ and \lstinline@continue@, i.e., the closest enclosing loop or \lstinline@switch@, change as certain constructs are added or removed.
1263
1264
1265\section{Switch Statement}
1266
1267C allows a number of questionable forms for the \lstinline@switch@ statement:
1268\begin{enumerate}
1269\item
1270By default, the end of a \lstinline@case@ clause\footnote{
1271In this section, the term \emph{case clause} refers to either a \lstinline@case@ or \lstinline@default@ clause.}
1272\emph{falls through} to the next \lstinline@case@ clause in the \lstinline@switch@ statement;
1273to exit a \lstinline@switch@ statement from a \lstinline@case@ clause requires explicitly terminating the clause with a transfer statement, most commonly \lstinline@break@, as in:
1274\begin{lstlisting}
1275switch ( i ) {
1276  case 1:
1277        ...
1278        // fall-through
1279  case 2:
1280        ...
1281        break;  // exit switch statement
1282}
1283\end{lstlisting}
1284The ability to fall-through to the next clause is a useful form of control flow, specifically when a sequence of case actions compound, as in:
1285\begin{lstlisting}
1286switch ( argc ) {
1287  case 3:
1288        // open output file
1289        // fall-through
1290  case 2:
1291        // open input file
1292        break;  // exit switch statement
1293  default:
1294        // usage message
1295}
1296\end{lstlisting}
1297In this example, case 2 is always done if case 3 is done.
1298This control flow is difficult to simulate with if statements or a \lstinline@switch@ statement without fall-through as code must be duplicated or placed in a separate routine.
1299C also uses fall-through to handle multiple case-values resulting in the same action, as in:
1300\begin{lstlisting}
1301switch ( i ) {
1302  case 1: case 3: case 5:       // odd values
1303        // same action
1304        break;
1305  case 2: case 4: case 6:       // even values
1306        // same action
1307        break;
1308}
1309\end{lstlisting}
1310However, this situation is handled in other languages without fall-through by allowing a list of case values.
1311While fall-through itself is not a problem, the problem occurs when fall-through is the \lstinline@default@, as this semantics is not intuitive to most programmers and is different from virtually all other programming languages with a \lstinline@switch@ statement.
1312Hence, \lstinline@default@ fall-through semantics results in a large number of programming errors as programmers often forget the \lstinline@break@ statement at the end of a \lstinline@case@ clause, resulting in inadvertent fall-through.
1313
1314\item
1315It is possible to place \lstinline@case@ clauses on statements nested \emph{within} the body of the \lstinline@switch@ statement, as in:
1316\begin{lstlisting}
1317switch ( i ) {
1318  case 0:
1319        if ( j < k ) {
1320                ...
1321          case 1:               // transfer into "if" statement
1322                if ( j < m ) {
1323                        ...
1324                  case 2:       // transfer into "if" statement
1325                        ...
1326                }
1327        }
1328  case 3:
1329        while ( j < 5 ) {
1330                ...
1331          case 4:               // transfer into "while" statement
1332                ...
1333        }
1334}
1335\end{lstlisting}
1336The problem with this usage is branching into control structures, which is known to cause both comprehension and technical difficulties.
1337The comprehension problem occurs from the inability to determine how control reaches a particular point due to the number of branches leading to it.
1338The technical problem results from the inability to ensure allocation and initialization of variables when blocks are not entered at the beginning.
1339Often transferring into a block can bypass variable declaration and/or its initialization, which results in subsequent errors.
1340There are virtually no positive arguments for this kind of control flow, and therefore, there is a strong impetus to eliminate it.
1341Nevertheless, C does have an idiom where this capability is used, known as ``Duff's device''~\cite{Duff83}:
1342\begin{lstlisting}
1343register int n = (count + 7) / 8;
1344switch ( count % 8 ) {
1345case 0: do{ *to = *from++;
1346case 7:         *to = *from++;
1347case 6:         *to = *from++;
1348case 5:         *to = *from++;
1349case 4:         *to = *from++;
1350case 3:         *to = *from++;
1351case 2:         *to = *from++;
1352case 1:         *to = *from++;
1353                } while ( --n > 0 );
1354}
1355\end{lstlisting}
1356which unrolls a loop N times (N = 8 above) and uses the \lstinline@switch@ statement to deal with any iterations not a multiple of N.
1357While efficient, this sort of special purpose usage is questionable:
1358\begin{quote}
1359Disgusting, no? But it compiles and runs just fine. I feel a combination of pride and revulsion at this
1360discovery.~\cite{Duff83}
1361\end{quote}
1362\item
1363It is possible to place the \lstinline@default@ clause anywhere in the list of labelled clauses for a \lstinline@switch@ statement, rather than only at the end.
1364Virtually all programming languages with a \lstinline@switch@ statement require the \lstinline@default@ clause to appear last in the case-clause list.
1365The logic for this semantics is that after checking all the \lstinline@case@ clauses without success, the \lstinline@default@ clause is selected;
1366hence, physically placing the \lstinline@default@ clause at the end of the \lstinline@case@ clause list matches with this semantics.
1367This physical placement can be compared to the physical placement of an \lstinline@else@ clause at the end of a series of connected \lstinline@if@/\lstinline@else@ statements.
1368
1369\item
1370It is possible to place unreachable code at the start of a \lstinline@switch@ statement, as in:
1371\begin{lstlisting}
1372switch ( x ) {
1373        int y = 1;                      // unreachable initialization
1374        x = 7;                          // unreachable code
1375  case 3: ...
1376        y = 3;
1377        ...
1378}
1379\end{lstlisting}
1380While the declaration of the local variable \lstinline@y@ is useful and its scope is across all \lstinline@case@ clauses, the initialization for such a variable is defined to never be executed because control always transfers over it.
1381Furthermore, any statements before the first \lstinline@case@ clause can only be executed if labelled and transfered to using a \lstinline@goto@, either from outside or inside of the \lstinline@switch@.
1382As mentioned, transfer into control structures should be forbidden.
1383Transfers from within the \lstinline@switch@ body using a \lstinline@goto@ are equally unpalatable.
1384\end{enumerate}
1385Before discussing potential language changes to deal with these problems, it is worth observing that in a typical C program:
1386\begin{itemize}
1387\item
1388the number of \lstinline@switch@ statements is small,
1389\item
1390most \lstinline@switch@ statements are well formed (i.e., no Duff's device),
1391\item
1392the \lstinline@default@ clause is usually written as the last case-clause,
1393\item
1394and there is only a medium amount of fall-through from one \lstinline@case@ clause to the next, and most of these result from a list of case values executing common code, rather than a sequence of case actions that compound.
1395\end{itemize}
1396These observations should help to put the effects of suggested changes into perspective.
1397% Figure 1 shows the grammar change that attempts to minimize the effect on existing C programs.
1398\begin{enumerate}
1399\item
1400Eliminating the \lstinline@default@ fall-through problem has the greatest potential for affecting existing code.
1401However, even if fall-through is removed, most \lstinline@switch@ statements would continue to work because of the explicit transfers already present at the end of each \lstinline@case@ clause, and the common placement of the \lstinline@default@ clause at the end of the case list.
1402In addition, the above grammar provides for the most common use of fall-through, i.e., a list of \lstinline@case@ clauses executing common code, e.g.:
1403\begin{lstlisting}
1404case 1:  case 2:  case 3: ...
1405\end{lstlisting}
1406Nevertheless, reversing the default action would have a non-trivial effect on case actions that compound, such as the above example of processing shell arguments.
1407Therefore, to preserve backwards compatibility, it is necessary to introduce a new kind of \lstinline@switch@ statement, called \lstinline@choose@, with no fall-through semantics.
1408The \lstinline@choose@ statement is identical to the new \lstinline@switch@ statement, except there is no implicit fall-through between case-clauses and the \lstinline@break@ statement applies to the enclosing loop construct (as for the continue statement in a \lstinline@switch@ statement).
1409It is still possible to fall-through if a case-clause ends with the new keyword fallthru, e.g.:
1410\begin{lstlisting}
1411choose ( i ) {
1412        int i;
1413  case 3, 4:
1414        i = 3;
1415        fallthru;
1416  case 8, 10:
1417  default:
1418        j = 3;
1419}
1420\end{lstlisting}
1421The ability to fall-through is retained because it is a sufficient C-idiom that most C programmers simply expect it, and its absence might discourage these programmers from using the choose statement.
1422\item
1423Eliminating Duff's device is straightforward and only invalidates a small amount of very questionable code.
1424The solution is to allow \lstinline@case@ clauses to only appear at the same nesting level as the \lstinline@switch@ body, as is done in most other programming languages with \lstinline@switch@ statements.
1425\item
1426The issue of \lstinline@default@ at locations other than at the end of the cause clause can be solved by using good programming style, and there are a few reasonable situations involving fall-through where the \lstinline@default@ clause may appear is locations other than at the end.
1427Therefore, no language change is made for this issue.
1428\item
1429Dealing with unreachable code at the start of a \lstinline@switch@ statement is solved by defining the declaration-list, including any associated initialization, at the start of a \lstinline@switch@ statement body to be executed before the transfer to the appropriate \lstinline@case@ clause.
1430This semantics is the same as for declarations at the start of a loop body, which are executed before each iteration of the loop body.
1431As well, this grammar does not allow statements to appear before the first \lstinline@case@ clause.
1432The change is compatible for declarations with initialization in this context because existing code cannot assume the initialization has occurred.
1433The change is incompatible for statements, but any existing code using it is highly questionable, as in:
1434\begin{lstlisting}
1435switch ( i ) {
1436        L: x = 5;               // questionable code
1437  case 0:
1438        ...
1439}
1440\end{lstlisting}
1441The statement after the \lstinline@switch@ can never be executed unless it is labelled.
1442If it is labelled, it must be transfered to from outside or inside the \lstinline@switch@ statement, neither of which is acceptable control flow.
1443\end{enumerate}
1444
1445
1446\section{Case Clause}
1447
1448C restricts the \lstinline@case@ clause of a \lstinline@switch@ statement to a single value.
1449For multiple \lstinline@case@ clauses associated with the same statement, it is necessary to have multiple \lstinline@case@ clauses rather than multiple values.
1450Requiring a \lstinline@case@ clause for each value does not seem to be in the spirit of brevity normally associated with C.
1451Therefore, the \lstinline@case@ clause is extended with a list of values, as in:
1452\begin{quote2}
1453\begin{tabular}{@{}l@{\hspace{30pt}}l@{\hspace{20pt}}l@{}}
1454\multicolumn{1}{c@{\hspace{30pt}}}{\textbf{\CFA}}       & \multicolumn{1}{c@{\hspace{20pt}}}{\textbf{C}}        \\
1455\begin{lstlisting}
1456switch ( i ) {
1457  `case 1, 3, 5`:
1458        ...
1459  `case 2, 4, 6`:
1460        ...
1461}
1462\end{lstlisting}
1463&
1464\begin{lstlisting}
1465switch ( i ) {
1466  case 1: case 3 : case 5:
1467        ...
1468  case 2: case 4 : case 6:
1469        ...
1470}
1471\end{lstlisting}
1472&
1473\begin{lstlisting}
1474
1475// odd values
1476
1477// even values
1478
1479
1480\end{lstlisting}
1481\end{tabular}
1482\end{quote2}
1483In addition, two forms of subranges are allowed to specify case values: the GNU C form and a new \CFA form.
1484\begin{quote2}
1485\begin{tabular}{@{}l@{\hspace{30pt}}l@{\hspace{20pt}}l@{}}
1486\multicolumn{1}{c@{\hspace{30pt}}}{\textbf{\CFA}}       & \multicolumn{1}{c@{\hspace{20pt}}}{\textbf{GNU C}}    \\
1487\begin{lstlisting}
1488switch ( i ) {
1489  `case 1~5:`
1490        ...
1491  `case 10~15:`
1492        ...
1493}
1494\end{lstlisting}
1495&
1496\begin{lstlisting}
1497switch ( i )
1498  case 1 ... 5:
1499        ...
1500  case 10 ... 15:
1501        ...
1502}
1503\end{lstlisting}
1504&
1505\begin{lstlisting}
1506
1507// 1, 2, 3, 4, 5
1508
1509// 10, 11, 12, 13, 14, 15
1510
1511
1512\end{lstlisting}
1513\end{tabular}
1514\end{quote2}
1515
1516
1517\section{Unnamed Structure Fields}
1518
1519C requires each field of a structure to have a name, except for a bit field associated with a basic type, e.g.:
1520\begin{lstlisting}
1521struct {
1522        int f1;                 // named field
1523        int f2 : 4;             // named field with bit field size
1524        int : 3;                // unnamed field for basic type with bit field size
1525        int ;                   // disallowed, unnamed field
1526        int *;                  // disallowed, unnamed field
1527        int (*)(int);   // disallowed, unnamed field
1528};
1529\end{lstlisting}
1530This requirement is relaxed by making the field name optional for all field declarations; therefore, all the field declarations in the example are allowed.
1531As for unnamed bit fields, an unnamed field is used for padding a structure to a particular size.
1532A list of unnamed fields is also supported, e.g.:
1533\begin{lstlisting}
1534struct {
1535        int , , ;               // 3 unnamed fields
1536}
1537\end{lstlisting}
1538
1539
1540\section{Exception Handling}
1541
1542Exception handling provides two mechanim: change of control flow from a raise to a handler, and commumication from the riase to the handler.
1543\begin{lstlisting}
1544exception void h( int i );
1545exception int h( int i, double d );
1546
1547void f(...) {
1548        ... throw h( 3 );
1549        ... i = resume h( 3, 5.1 );
1550}
1551
1552try {
1553        f(...);
1554} catch h( int w ) {
1555        // reset
1556} resume h( int p, double x ) {
1557        return 17;  // recover
1558} finally {
1559}
1560\end{lstlisting}
1561So the type raised would be the mangled name of the exception prototype and that name would be matched at the handler clauses by comparing the strings.
1562The arguments for the call would have to be packed in a message and unpacked at handler clause and then a call made to the handler.
1563
1564
1565\section{Types}
1566
1567\subsection{Type Definitions}
1568
1569\CFA allows users to define new types using the keyword type.
1570
1571\begin{lstlisting}
1572// SensorValue is a distinct type and represented as an int
1573type SensorValue = int;
1574\end{lstlisting}
1575
1576A type definition is different from a typedef in C because a typedef just creates an alias for a type,  while Do.s type definition creates a distinct type.
1577This means that users can define distinct function overloads for the new type (see Overloading for more information).
1578For example:
1579
1580\begin{lstlisting}
1581type SensorValue = int;
1582void printValue(int v) {...}
1583void printValue(SensorValue v) {...}
1584void process(int v) {...}
1585
1586SensorValue s = ...;
1587
1588printValue(s); // calls version with SensorValue argument
1589
1590printValue((int) s); // calls version with int argument
1591
1592process(s); // implicit conversion to int
1593\end{lstlisting}
1594
1595If SensorValue was defined with a typedef, then these two print functions would not have unique signatures.
1596This can be very useful to create a distinct type that has the same representation as another type.
1597
1598The compiler will assume it can safely convert from the old type to the new type, implicitly.
1599Users may override this and define a function that must be called to convert from one type to another.
1600
1601\begin{lstlisting}
1602type SensorValue = int;
1603// ()? is the overloaded conversion operator identifier
1604// This function converts an int to a SensorValue
1605SensorValue ()?(int val) {
1606        ...
1607}
1608void process(int v) {...}
1609
1610SensorValue s = ...;
1611process(s); // implicit call to conversion operator
1612\end{lstlisting}
1613
1614In many cases, it is not desired for the compiler to do this implicit conversion.
1615To avoid that, the user can use the explicit modifier on the conversion operator.
1616Any places where the conversion is needed but not explicit (with a cast), will result in a compile-time error.
1617
1618\begin{lstlisting}
1619type SensorValue = int;
1620
1621// conversion from int to SensorValue; must be explicit
1622explicit SensorValue ()?(int val) {
1623        ...
1624}
1625
1626void process(int v) {...}
1627
1628SensorValue s = ...;
1629process(s); // implicit cast to int: compile-time error
1630process((int) s); // explicit cast to int: calls conversion func
1631\end{lstlisting}
1632
1633The conversion may not require any code, but still need to be explicit; in that case, the syntax can be simplified to:
1634\begin{lstlisting}
1635type SensorValue = int;
1636explicit SensorValue ()?(int);
1637void process(int v) {...}
1638
1639SensorValue s = ...;
1640process(s); // compile-time error
1641process((int) s); // type is converted, no function is called
1642\end{lstlisting}
1643
1644
1645\subsection{Structures}
1646
1647Structures in \CFA are basically the same as structures in C.
1648A structure is defined with the same syntax as in C.
1649When referring to a structure in \CFA, users may omit the struct keyword.
1650\begin{lstlisting}
1651struct Point {
1652        double x;
1653        double y;
1654};
1655
1656Point p = {0.0, 0.0};
1657\end{lstlisting}
1658
1659\CFA does not support inheritance among types, but instead uses composition to enable reuse of structure fields.
1660Composition is achieved by embedding one type into another.
1661When type A is embedded in type B, an object with type B may be used as an object of type A, and the fields of type A are directly accessible.
1662Embedding types is achieved using anonymous members.
1663For example, using Point from above:
1664\begin{lstlisting}
1665void foo(Point p);
1666
1667struct ColoredPoint {
1668        Point; // anonymous member (no identifier)
1669        int Color;
1670};
1671...
1672        ColoredPoint cp = ...;
1673        cp.x = 10.3; // x from Point is accessed directly
1674        cp.color = 0x33aaff; // color is accessed normally
1675        foo(cp); // cp can be used directly as a Point
1676\end{lstlisting}
1677
1678
1679\subsection{Constructors and Destructors}
1680
1681\CFA supports C initialization of structures, but it also adds constructors for more advanced initialization.
1682Additionally, \CFA adds destructors that are called when a variable is de-allocated (variable goes out of scope or object is deleted).
1683These functions take a reference to the structure as a parameter (see
1684References for more information).
1685
1686\begin{figure}
1687\begin{lstlisting}
1688struct Widget {
1689        int id;
1690        float size;
1691        Parts *optionalParts;
1692};
1693
1694// ?{} is the constructor operator identifier
1695// The first argument is a reference to the type to initialize
1696// Subsequent arguments can be specified for initialization
1697
1698void ?{}(Widget &w) { // default constructor
1699        w.id = -1;
1700        w.size = 0.0;
1701        w.optionalParts = 0;
1702}
1703
1704// constructor with values (does not need to include all fields)
1705void ?{}(Widget &w, int id, float size) {
1706        w.id = id;
1707        w.size = size;
1708        w.optionalParts = 0;
1709}
1710
1711// ^? is the destructor operator identifier
1712void ^?(Widget &w) { // destructor
1713        w.id = 0;
1714        w.size = 0.0;
1715        if (w.optionalParts != 0) {
1716        // This is the only pointer to optionalParts, free it
1717        free(w.optionalParts);
1718        w.optionalParts = 0;
1719        }
1720}
1721
1722Widget baz; // reserve space only
1723Widget foo{}; // calls default constructor
1724Widget bar{23, 2.45}; // calls constructor with values
1725baz{24, 0.91}; // calls constructor with values
1726?{}(baz, 24, 0.91}; // explicit call to constructor
1727^bar; // explicit call to destructor
1728^?(bar); // explicit call to destructor
1729\end{lstlisting}
1730\caption{Constructors and Destructors}
1731\end{figure}
1732
1733
1734\begin{comment}
1735\section{References}
1736
1737\CFA supports reference types similar to rvalue references in \CC.
1738A reference is essentially an alias for a variable, allowing multiple names to refer to the same object.
1739A reference can be used as a safer alternative to a pointer, because it can be used to pass a variable by reference, but it must always reference an object.
1740It cannot be NULL, it must be assigned a value at initialization, and the object it references cannot change once it is set.
1741By introducing references in parameter types, users are given an easy way to pass a value by reference, without the need for NULL pointer checks.
1742In structures, a reference can replace a pointer to an object that should always have a valid value.
1743When a structure contains a reference, all of its constructors must initialize the reference and all instances of this structure must initialize it upon definition.
1744
1745The syntax for using references in \CFA is the same as \CC with the exception of reference initialization.
1746Use \lstinline@&@ to specify a reference, and access references just like regular objects, not like pointers (use dot notation to access fields).
1747When initializing a reference, \CFA uses a different syntax which differentiates reference initialization from assignment to a reference.
1748The \lstinline@&@ is used on both sides of the expression to clarify that the address of the reference is being set to the address of the variable to which it refers.
1749
1750\begin{figure}
1751\begin{lstlisting}
1752// parameter p is a reference to a Point
1753void movePointUp(Point &p) {
1754        p.y += 1.0; // Uses dot-notation to access fields
1755}
1756
1757Point p1 = ...;
1758ColoredPoint cp1 = ...;
1759movePoint(p1); // reference to p1 passed to movePoint
1760movePoint(cp1); // reference to cp1 passed to movePoint
1761
1762// a ListElement cannot be created without a valid list
1763
1764struct ListElement {
1765        int element;
1766        List &list; // a list element has a reference to the list
1767}
1768
1769// The constructor must initialize the reference
1770void ?{}(ListElement &le, int e, List &l) {
1771        le.element = e;
1772        &le.list = &l; // initialize the reference
1773}
1774
1775ListElement e1{88, numberList}; // uses constructor
1776ListElement e2; // compiler error: uninitialized reference
1777Listing 10: References
1778\end{lstlisting}
1779\end{figure}
1780\end{comment}
1781
1782
1783\section{Overloading}
1784
1785Overloading refers to the capability of a programmer to define and use multiple objects in a program with the same name.
1786In \CFA, a declaration may overload declarations from outer scopes with the same name, instead of hiding them as is the case in C.
1787This may cause identical C and \CFA programs to behave differently.
1788The compiler selects the appropriate object (overload resolution) based on context information at the place where it is used.
1789Overloading allows programmers to give functions with different signatures but similar semantics the same name, simplifying the interface to users.
1790Disadvantages of overloading are that it can be used to give functions with different semantics the same name, causing confusion, or that the compiler may resolve to a different function from what the programmer expected.
1791\CFA allows overloading of functions, operators, variables, and even the constants 0 and 1.
1792
1793The compiler follows some overload resolution rules to determine the best interpretation of all of these overloads.
1794The best valid interpretations are the valid interpretations that use the fewest unsafe conversions.
1795Of these, the best are those where the functions and objects involved are the least polymorphic.
1796Of these, the best have the lowest total conversion cost, including all implicit conversions in the argument expressions.
1797Of these, the best have the highest total conversion cost for the implicit conversions (if any) applied to the argument expressions.
1798If there is no single best valid interpretation, or if the best valid interpretation is ambiguous, then the resulting interpretation is ambiguous.
1799For details about type inference and overload resolution, please see the \CFA Language Specification.
1800\begin{lstlisting}
1801int foo(int a, int b) {
1802        float sum = 0.0;
1803        float special = 1.0;
1804        {
1805                int sum = 0;
1806                // both the float and int versions of sum are available
1807                float special = 4.0;
1808                // this inner special hides the outer version
1809                ...
1810        }
1811        ...
1812}
1813\end{lstlisting}
1814
1815
1816\subsection{Overloaded Constant}
1817
1818The constants 0 and 1 have special meaning.
1819In \CFA, as in C, all scalar types can be incremented and
1820decremented, which is defined in terms of adding or subtracting 1.
1821The operations \lstinline@&&@, \lstinline@||@, and \lstinline@!@ can be applied to any scalar arguments and are defined in terms of comparison against 0 (ex. \lstinline@(a && b)@ becomes \lstinline@(a != 0 && b != 0)@).
1822
1823In C, the integer constants 0 and 1 suffice because the integer promotion rules can convert them to any
1824arithmetic type, and the rules for pointer expressions treat constant expressions evaluating to 0 as a
1825special case.
1826However, user-defined arithmetic types often need the equivalent of a 1 or 0 for their
1827functions or operators, polymorphic functions often need 0 and 1 constants of a type matching their
1828polymorphic parameters, and user-defined pointer-like types may need a null value.
1829Defining special
1830constants for a user-defined type is more efficient than defining a conversion to the type from \lstinline@_Bool@.
1831
1832Why just 0 and 1? Why not other integers? No other integers have special status in C.
1833A facility that let programmers declare specific constants..const Rational 12., for instance. would not be much of an improvement.
1834Some facility for defining the creation of values of programmer-defined types from arbitrary integer tokens would be needed.
1835The complexity of such a feature doesn.t seem worth the gain.
1836
1837For example, to define the constants for a complex type, the programmer would define the following:
1838
1839\begin{lstlisting}
1840struct Complex {
1841        double real;
1842        double imaginary;
1843}
1844
1845const Complex 0 = {0, 0};
1846const Complex 1 = {1, 0};
1847...
1848
1849        Complex a = 0;
1850...
1851
1852        a++;
1853...
1854        if (a) { // same as if (a == 0)
1855...
1856}
1857\end{lstlisting}
1858
1859
1860\subsection{Variable Overloading}
1861
1862The overload rules of \CFA allow a programmer to define multiple variables with the same name, but different types.
1863Allowing overloading of variable names enables programmers to use the same name across multiple types, simplifying naming conventions and is compatible with the other overloading that is allowed.
1864For example, a developer may want to do the following:
1865\begin{lstlisting}
1866int pi = 3;
1867float pi = 3.14;
1868char pi = .p.;
1869\end{lstlisting}
1870
1871
1872\subsection{Function Overloading}
1873
1874Overloaded functions in \CFA are resolved based on the number and type of arguments, type of return value, and the level of specialization required (specialized functions are preferred over generic).
1875
1876The examples below give some basic intuition about how the resolution works.
1877\begin{lstlisting}
1878// Choose the one with less conversions
1879int doSomething(int value) {...} // option 1
1880int doSomething(short value) {...} // option 2
1881
1882int a, b = 4;
1883short c = 2;
1884
1885a = doSomething(b); // chooses option 1
1886a = doSomething(c); // chooses option 2
1887
1888// Choose the specialized version over the generic
1889
1890generic(type T)
1891T bar(T rhs, T lhs) {...} // option 3
1892float bar(float rhs, float lhs){...} // option 4
1893float a, b, c;
1894double d, e, f;
1895c = bar(a, b); // chooses option 4
1896
1897// specialization is preferred over unsafe conversions
1898
1899f = bar(d, e); // chooses option 5
1900\end{lstlisting}
1901
1902
1903\subsection{Operator Overloading}
1904
1905\CFA also allows operators to be overloaded, to simplify the use of user-defined types.
1906Overloading the operators allows the users to use the same syntax for their custom types that they use for built-in types, increasing readability and improving productivity.
1907\CFA uses the following special identifiers to name overloaded operators:
1908
1909\begin{table}[hbt]
1910\hfil
1911\begin{tabular}[t]{ll}
1912%identifier & operation \\ \hline
1913\lstinline@?[?]@ & subscripting \impl{?[?]}\\
1914\lstinline@?()@ & function call \impl{?()}\\
1915\lstinline@?++@ & postfix increment \impl{?++}\\
1916\lstinline@?--@ & postfix decrement \impl{?--}\\
1917\lstinline@++?@ & prefix increment \impl{++?}\\
1918\lstinline@--?@ & prefix decrement \impl{--?}\\
1919\lstinline@*?@ & dereference \impl{*?}\\
1920\lstinline@+?@ & unary plus \impl{+?}\\
1921\lstinline@-?@ & arithmetic negation \impl{-?}\\
1922\lstinline@~?@ & bitwise negation \impl{~?}\\
1923\lstinline@!?@ & logical complement \impl{"!?}\\
1924\lstinline@?*?@ & multiplication \impl{?*?}\\
1925\lstinline@?/?@ & division \impl{?/?}\\
1926\end{tabular}\hfil
1927\begin{tabular}[t]{ll}
1928%identifier & operation \\ \hline
1929\lstinline@?%?@ & remainder \impl{?%?}\\
1930\lstinline@?+?@ & addition \impl{?+?}\\
1931\lstinline@?-?@ & subtraction \impl{?-?}\\
1932\lstinline@?<<?@ & left shift \impl{?<<?}\\
1933\lstinline@?>>?@ & right shift \impl{?>>?}\\
1934\lstinline@?<?@ & less than \impl{?<?}\\
1935\lstinline@?<=?@ & less than or equal \impl{?<=?}\\
1936\lstinline@?>=?@ & greater than or equal \impl{?>=?}\\
1937\lstinline@?>?@ & greater than \impl{?>?}\\
1938\lstinline@?==?@ & equality \impl{?==?}\\
1939\lstinline@?!=?@ & inequality \impl{?"!=?}\\
1940\lstinline@?&?@ & bitwise AND \impl{?&?}\\
1941\end{tabular}\hfil
1942\begin{tabular}[t]{ll}
1943%identifier & operation \\ \hline
1944\lstinline@?^?@ & exclusive OR \impl{?^?}\\
1945\lstinline@?|?@ & inclusive OR \impl{?"|?}\\
1946\lstinline@?=?@ & simple assignment \impl{?=?}\\
1947\lstinline@?*=?@ & multiplication assignment \impl{?*=?}\\
1948\lstinline@?/=?@ & division assignment \impl{?/=?}\\
1949\lstinline@?%=?@ & remainder assignment \impl{?%=?}\\
1950\lstinline@?+=?@ & addition assignment \impl{?+=?}\\
1951\lstinline@?-=?@ & subtraction assignment \impl{?-=?}\\
1952\lstinline@?<<=?@ & left-shift assignment \impl{?<<=?}\\
1953\lstinline@?>>=?@ & right-shift assignment \impl{?>>=?}\\
1954\lstinline@?&=?@ & bitwise AND assignment \impl{?&=?}\\
1955\lstinline@?^=?@ & exclusive OR assignment \impl{?^=?}\\
1956\lstinline@?|=?@ & inclusive OR assignment \impl{?"|=?}\\
1957\end{tabular}
1958\hfil
1959\caption{Operator Identifiers}
1960\label{opids}
1961\end{table}
1962
1963These identifiers are defined such that the question marks in the name identify the location of the operands.
1964These operands represent the parameters to the functions, and define how the operands are mapped to the function call.
1965For example, \lstinline@a + b@ becomes \lstinline@?+?(a, b)@.
1966
1967In the example below, a new type, myComplex, is defined with an overloaded constructor, + operator, and string operator.
1968These operators are called using the normal C syntax.
1969
1970\begin{lstlisting}
1971type Complex = struct { // define a Complex type
1972        double real;
1973        double imag;
1974}
1975
1976// Constructor with default values
1977
1978void ?{}(Complex &c, double real = 0.0, double imag = 0.0) {
1979        c.real = real;
1980        c.imag = imag;
1981}
1982
1983Complex ?+?(Complex lhs, Complex rhs) {
1984        Complex sum;
1985        sum.real = lhs.real + rhs.real;
1986        sum.imag = lhs.imag + rhs.imag;
1987        return sum;
1988}
1989
1990String ()?(const Complex c) {
1991        // use the string conversions for the structure members
1992        return (String)c.real + . + . + (String)c.imag + .i.;
1993}
1994...
1995
1996Complex a, b, c = {1.0}; // constructor for c w/ default imag
1997...
1998c = a + b;
1999print(.sum = . + c);
2000\end{lstlisting}
2001
2002
2003\section{Auto Type-Inferencing}
2004
2005Auto type-inferencing occurs in a declaration where a variable's type is inferred from its initialization expression type.
2006\begin{quote2}
2007\begin{tabular}{@{}l@{\hspace{30pt}}ll@{}}
2008\multicolumn{1}{c@{\hspace{30pt}}}{\textbf{\CC}}        & \multicolumn{1}{c}{\lstinline@gcc@}\index{gcc} \\
2009\begin{lstlisting}
2010
2011auto j = 3.0 * 4;
2012int i;
2013auto k = i;
2014\end{lstlisting}
2015&
2016\begin{lstlisting}
2017#define expr 3.0 * i
2018typeof(expr) j = expr;
2019int i;
2020typeof(i) k = i;
2021\end{lstlisting}
2022&
2023\begin{lstlisting}
2024
2025// use type of initialization expression
2026
2027// use type of primary variable
2028\end{lstlisting}
2029\end{tabular}
2030\end{quote2}
2031The two important capabilities are:
2032\begin{itemize}
2033\item
2034preventing having to determine or write out long generic types,
2035\item
2036ensure secondary variables, related to a primary variable, always have the same type.
2037\end{itemize}
2038
2039In \CFA, \lstinline@typedef@ provides a mechanism to alias long type names with short ones, both globally and locally, but not eliminate the use of the short name.
2040\lstinline@gcc@ provides \lstinline@typeof@ to declare a secondary variable from a primary variable.
2041\CFA also relies heavily on the specification of the left-hand side of assignment for type inferencing, so in many cases it is crucial to specify the type of the left-hand side to select the correct type of the right-hand expression.
2042Only for overloaded routines with the same return type is variable type-inferencing possible.
2043Finally, \lstinline@auto@ presents the programming problem of tracking down a type when the type is actually needed.
2044For example, given
2045\begin{lstlisting}
2046auto j = `...`
2047\end{lstlisting}
2048and the need to write a routine to compute using \lstinline@j@
2049\begin{lstlisting}
2050void rtn( `...` parm );
2051rtn( j );
2052\end{lstlisting}
2053A programmer must work backwards to determine the type of \lstinline@j@'s initialization expression, reconstructing the possibly long generic type-name.
2054In this situation, having the type name or a short alias is very useful.
2055
2056There is also the conundrum in type inferencing of when to \emph{\Index{brand}} a type.
2057That is, when is the type of the variable more important than the type of its initialization expression.
2058For example, if a change is made in an initialization expression, it can cause hundreds or thousands of cascading type changes and/or errors.
2059At some point, a programmer wants the type of the variable to remain constant and the expression to be in error when it changes.
2060
2061Given \lstinline@typedef@ and \lstinline@typeof@ in \CFA, and the strong need to use the type of left-hand side in inferencing, auto type-inferencing is not supported at this time.
2062Should a significant need arise, this feature can be revisited.
2063
2064
2065\section{Generics}
2066
2067\CFA supports parametric polymorphism to allow users to define generic functions and types.
2068Generics allow programmers to use type variables in place of concrete types so that the code can be reused with multiple types.
2069The type parameters can be restricted to satisfy a set of constraints.
2070This enables \CFA to build fully compiled generic functions and types, unlike other languages like \CC where templates are expanded or must be explicitly instantiated.
2071
2072
2073\subsection{Generic Functions}
2074
2075Generic functions in \CFA are similar to template functions in \CC, and will sometimes be expanded into specialized versions, just like in \CC.
2076The difference, however, is that generic functions in \CFA can also be separately compiled, using function pointers for callers to pass in all needed functionality for the given type.
2077This means that compiled libraries can contain generic functions that can be used by programs linked with them (statically or dynamically).
2078Another advantage over \CC templates is unlike templates, generic functions are statically checked, even without being instantiated.
2079
2080A simple example of using Do.s parametric polymorphism to create a generic swap function would look like this:
2081
2082\begin{lstlisting}
2083generic(type T)
2084void swap(T &a, T &b) {
2085        T tmp = a;
2086        a = b;
2087        b = a;
2088}
2089
2090int a, b;
2091swap(a, b);
2092
2093Point p1, p2;
2094swap(p1, p2);
2095\end{lstlisting}
2096
2097Here, instead of specifying types for the parameters a and b, the function has a generic type parameter, type T.
2098This function can be called with any type, and the compiler will handle generating the proper code for that type, using call site inference to determine the appropriate value for T.
2099
2100
2101\subsection{Bounded Quantification}
2102
2103Some generic functions only work (or make sense) for any type that satisfies a given property.
2104For example, here is a function to pick the minimum of two values of some type.
2105\begin{lstlisting}
2106generic (type T | bool ?<?(T, T) )
2107
2108T min(T a, T b) {
2109        return a < b ? a : b;
2110}
2111\end{lstlisting}
2112
2113It only makes sense to call min with values of a type that has an ordering: a way to decide whether one value is less than another.
2114The ordering function used here is the less than operator, <.
2115The syntax used to reference the operator is discussed in further detail in Operator Overloading.
2116In \CFA, this assertion on the type of a generic is written as the bound, (type T | bool ?<?(T, T)).
2117The \CFA compiler enforces that minis only called with types for which the less than operator is defined, and reports a compile-time error otherwise.
2118
2119Bounds can also involve multiple types, and multiple requirements, as shown below:
2120\begin{lstlisting}
2121generic (type T, type U | { T foo(T, U); U bar(U); })
2122
2123T baz(T t, U u) {
2124        return foo(t, bar(u));
2125}
2126\end{lstlisting}
2127
2128
2129\subsection{Interfaces}
2130
2131Type bounds as shown above are not very informative, merely requiring that a function exists with the right name and type.
2132Suppose you try to call a polymorphic function and \CFA gives you an error that int combine(int, int) is not defined.
2133Can you define it? What is it supposed to do? Perhaps it should compute the sum, or the bitwise and, or the maximum, or the least common multiple; or perhaps it's an operation that can't be defined for integers.
2134The function signature doesn't say.
2135
2136Interfaces gather together a set of function signatures under a common name, which solves two problems.
2137First, an interface name can be used in type bounds instead of function signatures.
2138This avoids repetition when a bound is used in many functions.
2139Second, interfaces explicitly document the existence of a commonly used set of functionality, making programs easier to understand.
2140\begin{lstlisting}
2141generic (type T)
2142interface Orderable {
2143        bool ?<?(T, T);
2144};
2145
2146generic (type T | Orderable(T))
2147T min(T a, T b) {
2148        return a < b ? a : b;
2149}
2150\end{lstlisting}
2151
2152This definition of the interface Orderable makes the generic function min easier to read and understand.
2153Orderable can also be reused for other generic functions, max for example.
2154Interfaces can also build on top of other interfaces.
2155For example:
2156\begin{lstlisting}
2157generic (type T | Orderable(T)
2158interface FooBarable {
2159        int foo(T, T);
2160        int Bar(T, T);
2161};
2162\end{lstlisting}
2163
2164The FooBarable interface specifies all of the bounds of the Orderable interface, plus the additional bounds specified in its definition.
2165A type does not need to specify that it satisfies any interface, the compiler can figure this out at compile time.
2166For example, there is no need to add some special syntax to show that a type implements the Orderable interface, just define a ?<? operator and it is satisfied.
2167
2168
2169\subsection{Generic Typedefs}
2170
2171Type synonyms can be defined generically using the typedef keyword together with a generic type annotation.
2172These can be used to abbreviate complicated type expressions, especially in generic code.
2173\begin{lstlisting}
2174// typedef the generic function pointers for later use
2175
2176generic(type T)
2177typedef int (*predicate)(T);
2178generic(type Captured, type T)
2179typedef void (*callback)(Captured, T);
2180
2181generic(type T)
2182void find(int length, T *array,
2183        predicate(T) p, callback(void *, T)f) {
2184        int i;
2185        for (i = 0; i < length; i++)
2186        if (p(array[i])) f(NULL, array[i]);
2187}
2188\end{lstlisting}
2189
2190
2191\subsection{Generic Types}
2192
2193Generic types are defined using the same mechanisms as those described above for generic functions.
2194This feature allows users to create types that have one or more fields that use generic parameters as types, similar to a template classes in \CC.
2195For example, to make a generic linked list, a placeholder is created for the type of the elements, so that the specific type of the elements in the list need not be specified when defining the list.
2196In C, something like this would have to be done using void pointers and unsafe casting.
2197As in generic functions, Do.s generic types are different from \CC templates in that they can be fully compiled, while \CC templates are more like macro expansions.
2198This means that a \CFA generic type from a compiled library can be used with any type that satisfies the bounds.
2199
2200The syntax for defining a generic type looks very similar to that of a generic function.
2201Generic types support bounds and interfaces, using the same syntax as generic functions.
2202\begin{lstlisting}
2203generic (type T)
2204struct LinkedListElem {
2205        T elem;
2206        LinkedListElem(T) *next;
2207};
2208
2209LinkedListElem *++?(LinkedListElem **elem) {
2210        return *elem = elem->next;
2211}
2212
2213generic (type T)
2214struct LinkedList {
2215        LinkedListElem(T) *head;
2216        unsigned int size;
2217}
2218
2219generic (type T | bool ?==?(T, T))
2220bool contains(LinkedList(T) *list, T elem) {
2221        for(LinkedListElem *iter = list->head; iter != 0; ++iter) {
2222        if (iter->elem == elem) return true;
2223        }
2224        return false;
2225}
2226\end{lstlisting}
2227
2228
2229\section{Safety}
2230
2231Safety, along with productivity, is a key goal of Do.
2232This section discusses the safety features that have been included in \CFA to help programmers create more stable, reliable, and secure code.
2233
2234
2235\subsection{Exceptions}
2236
2237\CFA introduces support for exceptions as an easier way to recover from exceptional conditions that may be detected within a block of code.
2238In C, developers can use error codes and special return values to report to a caller that an error occurred in a function.
2239The major problem with error codes is that they can be easily ignored by the caller.
2240Failure to properly check for errors can result in the caller making incorrect assumptions about the current state or about the return value that are very likely to result in errors later on in the program, making the source of the problem more difficult to find when debugging.
2241An unhandled exception on the other hand will cause a crash, revealing the original source of the erroneous state.
2242
2243Exceptions in \CFA allow a different type of control flow.
2244Throwing an exception terminates execution of the current block, invokes the destructors of variables that are local to the block, and propagates the exception to the parent block.
2245The exception is immediately re-thrown from the parent block unless it is caught as described below.
2246\CFA uses keywords similar to \CC for exception handling.
2247An exception is thrown using a throw statement, which accepts one argument.
2248
2249\begin{lstlisting}
2250        ...
2251
2252        throw 13;
2253
2254        ...
2255\end{lstlisting}
2256
2257An exception can be caught using a catch statement, which specifies the type of the exception it can catch.
2258A catch is specified immediately after a guarded block to signify that it can catch an exception from that block.
2259A guarded block is specified using the try keyword, followed by a block of code inside of curly braces.
2260
2261\begin{lstlisting}
2262        ...
2263
2264        try {
2265                throw 13;
2266        }
2267        catch(int e) {
2268                printf(.caught an exception: %d\n., e);
2269        }
2270\end{lstlisting}
2271
2272
2273\subsection{Memory Management}
2274
2275
2276\subsubsection{Manual Memory Management}
2277
2278Using malloc and free to dynamically allocate memory exposes several potential, and common, errors.
2279First, malloc breaks type safety because it returns a pointer to void.
2280There is no relationship between the type that the returned pointer is cast to, and the amount of memory allocated.
2281This problem is solved with a type-safe malloc.
2282Do.s type-safe malloc does not take any arguments for size.
2283Instead, it infers the type based on the return value, and then allocates space for the inferred type.
2284
2285\begin{lstlisting}
2286float *f = malloc(); // allocates the size of a float
2287
2288struct S {
2289        int i, j, k;
2290};
2291
2292struct S *s = malloc(); // allocates the size of a struct S
2293\end{lstlisting}
2294
2295In addition to the improved malloc, \CFA also provides a technique for combining allocation and initialization into one step, using the new function.
2296For all constructors defined for a given type (see Operator Overloading), a corresponding call to new can be used to allocate and construct that type.
2297
2298\begin{lstlisting}
2299type Complex = struct {
2300        float real;
2301        float imag;
2302};
2303
2304// default constructor
2305
2306void ?{}(Complex &c) {
2307        c.real = 0.0;
2308        c.imag = 0.0;
2309}
2310
2311
2312
2313// 2 parameter constructor
2314
2315void ?{}(Complex &c, float real, float imag) {
2316        c.real = real;
2317        c.imag = imag;
2318}
2319
2320
2321int main() {
2322        Complex c1; // No constructor is called
2323        Complex c2{}; // Default constructor called
2324        Complex c3{1.0, -1.0}; // 2 parameter constructor is called
2325
2326        Complex *p1 = malloc(); // allocate
2327        Complex *p2 = new(); // allocate + default constructor
2328        Complex *p3 = new(0.5, 1.0); // allocate + 2 param constructor
2329}
2330
2331\end{lstlisting}
2332
2333
2334\subsubsection{Automatic Memory Management}
2335
2336\CFA may also support automatic memory management to further improve safety.
2337If the compiler can insert all of the code needed to manage dynamically allocated memory (automatic reference counting), then developers can avoid problems with dangling pointers, double frees, memory leaks, etc.
2338This feature requires further investigation.
2339\CFA will not have a garbage collector, but might use some kind of region-based memory management.
2340
2341
2342\subsection{Unsafe C Constructs}
2343
2344C programmers are able to access all of the low-level tricks that are sometimes needed for close-to-the-hardware programming.
2345Some of these practices however are often error-prone and difficult to read and maintain.
2346Since \CFA is designed to be safer than C, such constructs are disallowed in \CFA code.
2347If a programmer wants to use one of these unsafe C constructs, the unsafe code must be contained in a C linkage block (see Interoperability), which will be compiled like C code.
2348This block means that the user is telling the tools, .I know this is unsafe, but I.m going to do it anyway..
2349
2350The exact set of unsafe C constructs that will be disallowed in \CFA has not yet been decided, but is sure to include pointer arithmetic, pointer casting, etc.
2351Once the full set is decided, the rules will be listed here.
2352
2353
2354\section{Syntactic Anomalies}
2355
2356The number 0 and 1 are treated specially in \CFA, and can be redefined as variables.
2357One syntactic anomaly is when a field in an structure is names 0 or 1:
2358\begin{lstlisting}
2359struct S {
2360        int 0, 1;
2361} s;
2362\end{lstlisting}
2363The problem occurs in accesing these fields using the selection operation ``\lstinline@.@'':
2364\begin{lstlisting}
2365s.0 = 0;        // ambiguity with floating constant .0
2366s.1 = 1;        // ambiguity with floating constant .1
2367\end{lstlisting}
2368To make this work, a space is required after the field selection:
2369\begin{lstlisting}
2370`s.@\textvisiblespace@0` = 0;
2371`s.@\textvisiblespace@1` = 1;
2372\end{lstlisting}
2373While this sytact is awkward, it is unlikely many programers will name fields of a structure 0 or 1.
2374Like the \CC lexical problem with closing template-syntax, e.g, \lstinline@Foo<Bar<int`>>`@, this issue can be solved with a more powerful lexer/parser.
2375
2376There are several ambiguous cases with operator identifiers, e.g., \lstinline@int *?*?()@, where the string \lstinline@*?*?@ can be lexed as \lstinline@*@/\lstinline@?*?@ or \lstinline@*?@/\lstinline@*?@.
2377Since it is common practise to put a unary operator juxtaposed to an identifier, e.g., \lstinline@*i@, users will be annoyed if they cannot do this with respect to operator identifiers.
2378Even with this special hack, there are 5 general cases that cannot be handled.
2379The first case is for the function-call identifier \lstinline@?()@:
2380\begin{lstlisting}
2381int *@\textvisiblespace@?()();  // declaration: space required after '*'
2382*@\textvisiblespace@?()();              // expression: space required after '*'
2383\end{lstlisting}
2384Without the space, the string \lstinline@*?()@ is ambiguous without N character look ahead;
2385it requires scanning ahead to determine if there is a \lstinline@'('@, which is the start of an argument/parameter list.
2386
2387The 4 remaining cases occur in expressions:
2388\begin{lstlisting}
2389i++@\textvisiblespace@?i:0;             // space required before '?'
2390i--@\textvisiblespace@?i:0;             // space required before '?'
2391i@\textvisiblespace@?++i:0;             // space required after '?'
2392i@\textvisiblespace@?--i:0;             // space required after '?'
2393\end{lstlisting}
2394In the first two cases, the string \lstinline@i++?@ is ambiguous, where this string can be lexed as \lstinline@i@ / \lstinline@++?@ or \lstinline@i++@ / \lstinline@?@;
2395it requires scanning ahead to determine if there is a \lstinline@'('@, which is the start of an argument list.
2396In the second two cases, the string \lstinline@?++x@ is ambiguous, where this string can be lexed as \lstinline@?++@ / \lstinline@x@ or \lstinline@?@ / y\lstinline@++x@;
2397it requires scanning ahead to determine if there is a \lstinline@'('@, which is the start of an argument list.
2398
2399
2400\section{Concurrency}
2401
2402Today's processors for nearly all use cases, ranging from embedded systems to large cloud computing servers, are composed of multiple cores, often heterogeneous.
2403As machines grow in complexity, it becomes more difficult for a program to make the most use of the hardware available.
2404\CFA includes built-in concurrency features to enable high performance and improve programmer productivity on these multi-/many-core machines.
2405
2406Concurrency support in \CFA is implemented on top of a highly efficient runtime system of light-weight, M:N, user level threads.
2407The model integrates concurrency features into the language by making the structure type the core unit of concurrency.
2408All communication occurs through method calls, where data is sent via method arguments, and received via the return value.
2409This enables a very familiar interface to all programmers, even those with no parallel programming experience.
2410It also allows the compiler to do static type checking of all communication, a very important safety feature.
2411This controlled communication with type safety has some similarities with channels in Go, and can actually implement
2412channels exactly, as well as create additional communication patterns that channels cannot.
2413Mutex objects, monitors, are used to contain mutual exclusion within an object and synchronization across concurrent threads.
2414
2415Three new keywords are added to support these features:
2416
2417monitor creates a structure with implicit locking when accessing fields
2418
2419mutex implies use of a monitor requiring the implicit locking
2420
2421task creates a type with implicit locking, separate stack, and a thread
2422
2423
2424\subsection{Monitors}
2425
2426A monitor is a structure in \CFA which includes implicit locking of its fields.
2427Users of a monitor interact with it just like any structure, but the compiler handles code as needed to ensure mutual exclusion.
2428An example of the definition of a monitor is shown here:
2429\begin{lstlisting}
2430type Account = monitor {
2431        const unsigned long number; // account number
2432        float balance; // account balance
2433};
2434\end{lstlisting}
2435
2436Since a monitor structure includes an implicit locking mechanism, it does not make sense to copy a monitor;
2437it is always passed by reference.
2438Users can specify to the compiler whether or not a function will require mutual exclusion of the monitor using the mutex modifier on the parameter.
2439When mutex is specified, the compiler inserts locking before executing the body of the function, and unlocking after the body.
2440This means that a function requiring mutual exclusion could block if the lock is already held by another thread.
2441Blocking on a monitor lock does not block the kernel thread, it simply blocks the user thread, which yields its kernel thread while waiting to obtain the lock.
2442If multiple mutex parameters are specified, they will be locked in parameter order (i.e. first parameter is locked first) and unlocked in the
2443reverse order.
2444\begin{lstlisting}
2445// This function accesses a constant field, it does not require
2446// mutual exclusion
2447
2448export unsigned long getAccountNumber(Account &a) {
2449        return a.number;
2450}
2451
2452// This function accesses and modifies a shared field; it
2453// requires mutual exclusion
2454
2455export float withdrawal(mutex Account &a, float amount) {
2456        a.balance -= amount;
2457        return a.balance;
2458}
2459\end{lstlisting}
2460
2461Often, one function using a monitor will call another function using that same monitor.
2462If both require mutual exclusion, then the thread would be waiting for itself to release the lock when it calls the inner function.
2463This situation is resolved by allowing recursive entry (reentrant locks), meaning that if the lock is already held by the caller, it can be locked again.
2464It will still be unlocked the same number of times.
2465An example of this situation is shown below:
2466
2467\begin{lstlisting}
2468// deleting a job from a worker requires mutual exclusion
2469
2470void deleteJob(mutex Worker &w, Job &j) {
2471        ...
2472}
2473
2474// transferring requires mutual exclusion and calls deleteJob
2475
2476void transferJob(mutex Worker &from, Worker &to) {
2477        ...
2478        deleteJob(j);
2479        ...
2480}
2481\end{lstlisting}
2482
2483
2484\subsection{Tasks}
2485
2486\CFA also provides a simple mechanism for creating and utilizing user level threads.
2487A task provides mutual exclusion like a monitor, and also has its own execution state and a thread of control.
2488Similar to a monitor, a task is defined like a structure:
2489\begin{lstlisting}
2490type Adder = task {
2491        int *row;
2492        int size;
2493        int &subtotal;
2494}
2495\end{lstlisting}
2496
2497A task may define a constructor, which will be called upon allocation and run on the caller.s thread.
2498A destructor may also be defined, which is called at de-allocation (when a dynamic object is deleted or when a local object goes out of scope).
2499After a task is allocated and initialized, its thread is spawned implicitly and begins executing in its function call method.
2500All tasks must define this function call method, with a void return value and no additional parameters, or the compiler will report an error.
2501Below are example functions for the above Adder task, and its usage to sum up a matrix on multiple threads.
2502(Note that this example is designed to display the syntax and functionality, not the best method to solve this problem)
2503\begin{lstlisting}
2504void ?{}(Adder &a, int r[], int s, int &st) { // constructor
2505        a.row = r;
2506        a.size = s;
2507        a.subtotal = st;
2508}
2509
2510// implicitly spawn thread and begin execution here
2511
2512void ?()(Adder &a) {
2513        int c;
2514        subtotal = 0;
2515        for (c=0; c<a.size; ++c) {
2516        subtotal += row[c];
2517        }
2518}
2519
2520int main() {
2521        const int rows = 100, cols = 1000000;
2522        int matrix[rows][cols];
2523        int subtotals[rows];
2524        int total = 0;
2525        int r;
2526
2527        { // create a new scope here for our adders
2528        Adder adders[rows];
2529        // read in the matrix
2530        ...
2531        for (r=0; r<rows; ++r) {
2532        // tasks are initialized on this thread
2533        Adders[r] = {matrix[r], cols, subtotals[r]};
2534        Adders[r](); // spawn thread and begin execution
2535        }
2536        } // adders go out of scope; block here until they all finish
2537        total += subtotals[r];
2538        printf(.total is %d\n., total);
2539}
2540\end{lstlisting}
2541
2542
2543\subsection{Cooperative Scheduling}
2544
2545Tasks in \CFA are cooperatively scheduled, meaning that a task will not be interrupted by another task, except at specific yield points.
2546In Listing 31, there are no yield points, so each task runs to completion with no interruptions.
2547Places where a task could yield include waiting for a lock (explicitly or implicitly), waiting for I/O, or waiting for a specific function (or one of a set of functions) to be called.
2548This last option is introduced with the yield function. yield is used to indicate that this task should yield its thread until the specified function is called.
2549For example, the code below defines a monitor that maintains a generic list.
2550When a task tries to pop from the list, but it is empty, the task should yield until another task puts something into the list, with the push function.
2551Similarly, when a task tries to push something onto the list, but it is full, it will yield until another task frees some space with the pop function.
2552
2553\begin{lstlisting}
2554// type T is used as a generic type for all definitions inside
2555// the curly brackets
2556
2557generic(type T) {
2558        type Channel = monitor {
2559        List(T) list; // list is a simple generic list type
2560        };
2561
2562        T pop(mutex &Channel(T) ch) {
2563        if (ch.list.empty()) {
2564        // yield until push is called for this channel
2565        yield(push);
2566        }
2567        return ch.list.pop();
2568        }
2569
2570        void push(mutex &Channel(T)ch, T val) {
2571        if (ch.list.full()) {
2572        // yield until pop is called for this channel
2573        yield(pop);
2574        }
2575        ch.list.push(val);
2576        }
2577}
2578\end{lstlisting}
2579
2580A task can also yield indefinitely by calling yield with no arguments.
2581This will tell the scheduler to yield this task until it is resumed by some other task.
2582A task can resume another task by using its functional call operator.
2583The code below shows a simple ping-pong example, where two tasks yield back and forth to each other using these methods.
2584
2585\begin{lstlisting}
2586type Ping = task {
2587        Pong *partner;
2588};
2589
2590void ?{}(Ping &p, Pong *partner = 0) {
2591        p.partner = partner;
2592}
2593
2594void ?()(Ping &p) {
2595        for(;;) { // loop forever
2596        printf(.ping\n.);
2597        partner(); // resumes the partner task
2598        yield(); // yields this task
2599        }
2600}
2601
2602type Pong = task {
2603        Ping *partner;
2604};
2605
2606void ?{}(Pong &p, Ping *partner = 0) {
2607        p.partner = partner;
2608}
2609
2610void ?()(Pong &p) {
2611        for(;;) { // loop forever
2612        yield(); // yields this task
2613        printf(.pong/n.);
2614        partner(); // resumes the partner task
2615        }
2616}
2617
2618void main() {
2619        Ping ping; // allocate ping
2620        Pong pong{ping}; // allocate, initialize, and start pong
2621        Ping{pong}; // initialize and start ping
2622}
2623\end{lstlisting}
2624
2625The same functionality can be accomplished by providing functions to be called by the partner task.
2626\begin{lstlisting}
2627type Pingpong = task {
2628        String msg;
2629        Pingpong *partner;
2630};
2631
2632void ?{}(Pingpong &p, String msg, Pingpong *partner = 0) {
2633        p.msg = msg;
2634        p.partner = partner;
2635}
2636
2637void ?()(Pingpong &p) {
2638        for(;;) {
2639        yield(go);
2640        }
2641}
2642
2643void go(Pingpong &p) {
2644        print(.%(p.msg)\n.);
2645        go(p.partner);
2646}
2647
2648void main() {
2649        Pingpong ping = {.ping.};
2650        Pingpong pong = {.pong., ping};
2651        ping.partner = pong;
2652        go(ping);
2653}
2654\end{lstlisting}
2655
2656
2657\section{Modules and Packages }
2658
2659\begin{comment}
2660High-level encapsulation is useful for organizing code into reusable units, and accelerating compilation speed.
2661\CFA provides a convenient mechanism for creating, building and sharing groups of functionality that enhances productivity and improves compile time.
2662
2663There are two levels of encapsulation in \CFA, module and package.
2664A module is a logical grouping of functionality that can be easily pulled into another project, much like a module in Python or a package in Go.
2665A module forms a namespace to limit the visibility and prevent naming conflicts of variables.
2666Furthermore, a module is an independent translation unit, which can be compiled separately to accelerate the compilation speed.
2667
2668A package is a physical grouping of one or more modules that is used for code distribution and version management.
2669Package is also the level of granularity at which dependences are managed.
2670A package is similar to the Crate in Rust.
2671
2672
2673\subsection{No Declarations, No Header Files}
2674
2675In C and \CC, it is necessary to declare or define every global variable, global function, and type before it is used in each file.
2676Header files and a preprocessor are normally used to avoid repeating code.
2677Thus, many variables, functions, and types are described twice, which exposes an opportunity for errors and causes additional maintenance work.
2678Instead of following this model, the \CFA tools can extract all of the same information from the code automatically.
2679This information is then stored in the object files for each module, in a format that can quickly be read by the compiler, and stored at the top of the file, for quick access.
2680In addition to the user productivity improvements, this simple change also improves compile time, by saving the information in a simple machine readable format, instead of making the compiler parse the same information over and over from a header file.
2681This seems like a minor change, but according to (Pike, Go at Google: Language Design in the Service of Software Engineering), this simple change can cause massive reductions in compile time.
2682
2683In \CFA, multiple definitions are not necessary.
2684Within a module, all of the module's global definitions are visible throughout the module.
2685For example, the following code compiles, even though isOdd was not declared before being called:
2686\begin{lstlisting}
2687bool isEven(unsigned int x) {
2688        if (x == 0) return true;
2689        else return !isOdd(x);
2690}
2691
2692bool isOdd(unsigned int x) {
2693        if (x == 1) return true;
2694        else return !isEven(x - 2);
2695}
2696\end{lstlisting}
2697
2698Header files in C are used to expose the declarations from a library, so that they can be used externally.
2699With \CFA, this functionality is replaced with module exports, discussed below.
2700When building a \CFA module which needs to be callable from C code, users can use the tools to generate a header file suitable for including in these C files with all of the needed declarations.
2701
2702In order to interoperate with existing C code, \CFA files can still include header files, the contents of which will be enclosed in a C linkage section to indicate C calling conventions (see Interoperability for more information).
2703
2704
2705\subsection{Modules}
2706
2707A module typically contains a set of related types and methods, with some objects accessible from outside the package, and some limited to use inside the module.
2708These modules can then be easily shared and reused in multiple projects.
2709As modules are intended to be distributed for reuse, they should generally have stable, well-defined interfaces.
2710
2711\CFA adds the following keywords to express the module systems: module, export, import, as.
2712
2713
2714\subsubsection{Module Declaration}
2715
2716The syntax to declare a module is module moduleName;.
2717
2718The module declaration must be at the beginning of a file, and each file can only belong to one module.
2719If there is no module declaration at the beginning of a file, the file belongs to the global module.
2720A module can span several files.
2721By convention, a module and the files belonging to the module have additional mapping relationship which is described in the Do-Lang Tooling documentation.
2722
2723The moduleName follows the same rules of a variable name, except that it can use slash "/" to indicate the module/sub-module relationship.
2724For example, container/vector is a valid module name, where container is the parent module name, and vector is the sub-module under container.
2725
2726Only the interfaces of a module are visible from outside, when the module is imported. export is a type decorator to declare a module interface.
2727A method, a global variable or a type can be declared as a module interface.
2728Types defined in a module and referenced by an exported function or a variable must be exported, too.
2729
2730The following code is a simple module declaration example.
2731\begin{lstlisting}
2732module M;
2733
2734//visible outside module M
2735
2736export int f(int i) { return i + 1; }
2737export double aCounter;
2738
2739//not visible outside module M
2740
2741int g(int i) { return i - 1; }
2742
2743double bCounter;
2744\end{lstlisting}
2745
2746export module moduleName; can be use to re-export all the visible (exported) names in moduleName from the current module.
2747
2748
2749\subsubsection{Module Import}
2750
2751The syntax to import a module is import moduleName; or import moduleName as anotherName;.
2752One package cannot be imported with both of the two types of syntax in one file.
2753A package imported in one file will only be visible in this file.
2754For example, two files, A and B belong to the same module.
2755If file A imports another module, M, the exported names in M are not visible in file B.
2756
2757All of the exported names are visible in the file that imports the module.
2758The exported names can be accessed within a namespace based on the module name in the first syntax (ex moduleName.foo).
2759If moduleName has several elements separated by '/' to describe a sub-module (ex. import container/vector;), the last element in the moduleName is used as the namespace to access the visible names in that module (ex vector.add(...);).
2760The as keyword is used to confine the imported names in a unique namespace (ex. anotherName.foo). anotherName must be a valid identifier (same rules as a variable name) which means it cannot have '/' in it.
2761Conflicts in namespaces will be reported by the compiler.
2762The second method can be used to solve conflicting name problems.
2763The following code snippets show the two situations.
2764
2765\begin{lstlisting}
2766module util/counter;
2767export int f(int i) { return i+1; }
2768
2769import util/counter;
2770
2771int main() {
2772        return counter.f(200); // f() from the package counter
2773}
2774
2775import util/counter as ct;
2776int main() {
2777        return ct.f(200); // f() from the package counter
2778}
2779\end{lstlisting}
2780
2781
2782Additionally, using the .as. syntax, a user can force the compiler to add the imported names into the current namespace using .as ..With these module rules, the following module definitions and imports can be achieved without any problem.
2783
2784\begin{lstlisting}
2785module M1;
2786export int f(int i) { return i+1;} // visible outside
2787
2788int g(int i) { return i-1;} // not visible outside
2789
2790module M2;
2791int f(int i) { return i * 2; } // not visible outside
2792export int g(int g) { return i / 2; } // visible outside
2793
2794import M1 as .;
2795
2796import M2 as .;
2797
2798
2799int main() {
2800        return f(3) + g(4); //f() from M1 and g() from M2;
2801}
2802\end{lstlisting}
2803
2804
2805\subsubsection{Sub-Module and Module Aggregation}
2806
2807Several modules can be organized in a parent module and sub-modules relationship.
2808The sub-module names are based on hierarchical naming, and use slash, "/", to indicate the relationship.
2809For example, std/vector and std/io are sub-modules of module std.
2810The exported names in a sub-module are NOT visible if the parent module is imported, which means the exported names in the sub-module are
2811not implicitly exported in the parent module.
2812
2813Aggregation is a mechanism to support components and simplified importing.
2814The mechanism is not based on naming but based on manual declaration.
2815For example, the following is the aggregated sequence module.
2816The export {...} is syntactic sugar for many lines of export module aModule;.
2817If an aggregated module is imported, all the included modules in the aggregation are imported.
2818
2819\begin{lstlisting}
2820module std/sequence;
2821
2822export {
2823        module std/vector;
2824        module std/list;
2825        module std/array;
2826        module std/deque;
2827        module std/forward_list;
2828        module std/queue;
2829        module std/stack;
2830};
2831\end{lstlisting}
2832
2833After importing the aggregated module, each individual name is still contained in the original name space.
2834For example, vector.add() and list.add() should be used to reference the add methods if there are add methods in both the vector module and the list module.
2835
2836
2837\subsubsection{Import from Repository}
2838
2839When a module is imported, the tools locate the module in the one of the accessible package paths (defined by command line flag or environment variable).
2840The tools also support retrieving modules of a package from external repositories.
2841See Listing 40: Package directory structure
2842
2843
2844\subsubsection{Package Import}
2845
2846Because packages are the places where the building tool looks for modules, there is no code required in the \CFA source file to import a package.
2847In order to use modules in a package, the programmer needs to guide the building tool to locate the right package by 1) Adding the package's parent path into \$DOPATH;
2848or 2) Adding the package dependence into the current project's Do.prj.
2849More details about locating a module in a package are explained in the next section.
2850
2851
2852\subsubsection{Package Versioning}
2853
2854A package must have a version number.
2855The version number is a string.
2856For example "1.0", "1.a", "A1", and "1ec5fab753eb979d3886a491845b8ae152d58c8f" are all valid version numbers.
2857By convention, a package is stored in a directory named packageName-packageVersion.
2858For example, the util package with version 1.1 is stored in a directory named util-1.1.
2859
2860The project description file can optionally specify the version of the package used in the current project.
2861If not defined, because the version number is a string, and all the different versions for the same package will be sorted in increasing order, the package with the largest version number will be used in the compilation.
2862The builder tool will record the specific package version used in the build in the project's "Do.lock" file to enable fully repeatable builds.
2863
2864
2865\subsection{Module and Package Organization}
2866
2867\CFA has two level of encapsulations, module and package.
2868This section explains the object model of modules, packages and other language concepts.
2869It also explains how programmers should organize their code, and the method used by the build tools to locate packages, and import modules for compilation.
2870
2871
2872\subsubsection{Object Model}
2873
2874There are several concepts in Do.
2875\begin{itemize}
2876\item
2877File: a \CFA source file
2878\item
2879Module: a container to organize a set of related types and methods; It has a module name, and several interfaces visible from outside
2880\item
2881Package: a container to organize modules for distribution; It has attributes like name, author,
2882version, dependences, etc.
2883\item
2884Project: a working set for a \CFA project; It has attributes like name, author, version, dependences, etc.
2885\end{itemize}
2886
2887The following rules summarize the object model of all the above concepts:
2888\begin{itemize}
2889\item
2890A module contains one or more files
2891\begin{itemize}
2892\item
2893One file can only belong to one module
2894\item
2895A module has its name and interfaces exported
2896\item
2897A file without a module declaration at the beginning belongs to the global module
2898\item
2899\end{itemize}
2900
2901\item
2902A package contains one or more modules
2903\begin{itemize}
2904\item
2905A package has additional meta info described in Do.prj file
2906\item
2907A package may be dependent on other packages.
2908\end{itemize}
2909
2910\item
2911A project contains one or more modules in its source code
2912\begin{itemize}
2913\item
2914A project has additional meta info described in Do.prj file
2915\item
2916A project may be dependent on other packages
2917\item
2918A project can be transformed into a package for distribution
2919\item
2920A project can generate one or more executable binaries
2921\end{itemize}
2922\end{itemize}
2923
2924
2925\subsubsection{Module File Organization}
2926
2927The rules of this section are the conventions to organize module files in one package.
2928
2929The file location of a module in a package must match the module/submodule naming hierarchy.
2930The names separated by slash "/" must match the directory levels.
2931If only one file is used to implement one module, there is no need to put the module implementation file inside a sub-directory.
2932The file can be put inside its parent module's sub-directory with the sub module's name as the file name.
2933
2934Here is an example of a package, util.
2935\begin{lstlisting}
2936+ util
2937Do.prj #package description file
2938        heap.do #Case 1: module heap;
2939        list.do #Case 1: mdoule list;
2940        ring.do #Case 1: module ring;
2941        + string #Case 2
2942        impl1.do #module string;
2943        + std
2944        vector.do
2945        list.do
2946        + array #Case 3
2947        array1.do #module std/array;
2948        array2.do #module std/array;
2949        sequence.do #Case 4, module std/sequence;
2950        test.do #Case 5
2951\end{lstlisting}
2952
2953\begin{itemize}
2954\item
2955Case 1: Each individual file implements a module
2956\item
2957Case 2: Put the implementation of a module under the sub-directory, but there is only one file
2958\item
2959Case 3: Put the implementation of a module under the sub-directory; There are several files to
2960implement one module
2961\item
2962Case 4: One file to express one aggregation
2963\item
2964Case 5: The file does not belong to any module; It is used for testing purpose
2965\end{itemize}
2966
2967The example only uses source code, ".do" files, to show the module file organization.
2968Other module packaging formats, like binary, must also follow the same rules.
2969
2970
2971\subsection{Module File Format}
2972
2973\CFA supports different types of module file formats.
2974
2975\begin{itemize}
2976\item
2977Pure source code format: The files should be organized following the previous section's definition.
2978\item
2979IR format (TBD): The \CFA compiler IR format, similar to the source code format
2980\item
2981Binary format, including ".a" static library or ".so" dynamic linkage library
2982\begin{itemize}
2983\item
2984The file's name must match the right level's module name defined in the previous section
2985\item
2986E.g. "util.so" includes all modules for the package util.
2987\item
2988E.g. "string.so" under the package directory to include files belonging to "module string;"
2989\end{itemize}
2990\item.
2991Archive format
2992\begin{itemize}
2993\item
2994The archive is named as ".dar", and is a zip archive of the source code or the binary for a package
2995\item
2996E.g. "util.dar" is the whole package for util package including the package direction file
2997\end{itemize}
2998\item
2999Hybrid format
3000\begin{itemize}
3001\item
3002A package can be distributed partly in source code, partly in binary format, and/or packaged in the archive format
3003\item
3004The only limitation is that the names of the files must match the module location names defined in previous section
3005\end{itemize}
3006\end{itemize}
3007Package and Module Locating and the \CFA Language Tooling documentation for more details.
3008
3009
3010\subsection{Packages}
3011
3012A package is synonymous with a library in other languages.
3013The intent of the package level encapsulation is to facilitate code distribution, version control, and dependence management.
3014A package is a physical grouping of one or more modules in a directory (an archive file for a directory).
3015The concept of a package is the convention for grouping code, and the contract between the language and the building tool to search for imported modules.
3016
3017
3018\subsubsection{Package Definition}
3019
3020A package is defined by putting a project description file, Do.prj, with one or more modules into a directory.
3021This project description file contains the package's meta data, including package name, author, version, dependences, etc.
3022It should be in the root of the package directory.
3023
3024The modules in the package could be either source code, or compiled binary format.
3025The location of the module files should follow the module name's path.
3026
3027Here is a simple example of the directory structure of a package, core.
3028It contains a module std and several sub-modules under std.
3029\begin{lstlisting}
3030+ core
3031        Do.prj
3032        + std
3033        + io
3034        file.do # module std/io/file;
3035        network.do #module std/io/network;
3036        + container
3037        vector.do #module std/container/vector;
3038        list.do #module std/container/list;
3039\end{lstlisting}
3040
3041
3042\subsubsection{Package Import}
3043
3044Because packages are the places where the building tool looks for modules, there is no code required in the \CFA source file to import a package.
3045In order to use modules in a package, the programmer needs to guide the building tool to locate the right package by 1) Adding the package's parent path into \$DOPATH; or 2) Adding the package dependence into the current project's Do.prj.
3046More details about locating a module in a package are explained in the next section.
3047
3048
3049\subsubsection{Package Versioning}
3050
3051A package must have a version number.
3052The version number is a string.
3053For example "1.0", "1.a", "A1", and "1ec5fab753eb979d3886a491845b8ae152d58c8f" are all valid version numbers.
3054By convention, a package is stored in a directory named packageName-packageVersion.
3055For example, the util package with version 1.1 is stored in a directory named util-1.1.
3056
3057The project description file can optionally specify the version of the package used in the current project.
3058If not defined, because the version number is a string, and all the different versions for the same package will be sorted in increasing order, the package with the largest version number will be used in the compilation.
3059The builder tool will record the specific package version used in the build in the project's "Do.lock" file to enable fully repeatable builds.
3060
3061
3062\subsection{Module and Package Organization}
3063
3064\CFA has two level of encapsulations, module and package.
3065This section explains the object model of modules, packages and other language concepts.
3066It also explains how programmers should organize their code, and the method used by the build tools to locate packages, and import modules for compilation.
3067
3068
3069\subsubsection{Object Model}
3070
3071There are several concepts in Do.
3072\begin{itemize}
3073\item
3074File: a \CFA source file
3075\item
3076Module: a container to organize a set of related types and methods; It has a module name, and several interfaces visible from outside
3077\item
3078Package: a container to organize modules for distribution; It has attributes like name, author, version, dependences, etc.
3079\item
3080Project: a working set for a \CFA project; It has attributes like name, author, version, dependences, etc.
3081\end{itemize}
3082
3083The following rules summarize the object model of all the above concepts:
3084\begin{itemize}
3085\item
3086A module contains one or more files
3087\begin{itemize}
3088\item
3089One file can only belong to one module
3090\item
3091A module has its name and interfaces exported
3092\item
3093A file without a module declaration at the beginning belongs to the global module
3094\end{itemize}
3095\item
3096A package contains one or more modules
3097\begin{itemize}
3098\item
3099A package has additional meta info described in Do.prj file
3100\item
3101A package may be dependent on other packages.
3102\end{itemize}
3103\item
3104A project contains one or more modules in its source code
3105\begin{itemize}
3106\item
3107A project has additional meta info described in Do.prj file
3108\item
3109A project may be dependent on other packages
3110\item
3111A project can be transformed into a package for distribution
3112\item
3113A project can generate one or more executable binaries
3114\end{itemize}
3115\end{itemize}
3116
3117
3118\subsubsection{Module File Organization}
3119
3120The rules of this section are the conventions to organize module files in one package.
3121
3122The file location of a module in a package must match the module/submodule naming hierarchy.
3123The names separated by slash "/" must match the directory levels.
3124If only one file is used to implement one module, there is no need to put the module implementation file inside a sub-directory.
3125The file can be put inside its parent module's sub-directory with the sub module's name as the file name.
3126
3127Here is an example of a package, util.
3128\begin{lstlisting}
3129+ util
3130        Do.prj #package description file
3131        heap.do #Case 1: module heap;
3132        list.do #Case 1: mdoule list;
3133        ring.do #Case 1: module ring;
3134        + string #Case 2
3135        impl1.do #module string;
3136        + std
3137        vector.do
3138        list.do
3139        + array #Case 3
3140        array1.do #module std/array;
3141        array2.do #module std/array;
3142        sequence.do #Case 4, module std/sequence;
3143        test.do #Case 5
3144\end{lstlisting}
3145
3146
3147\begin{itemize}
3148\item
3149Case 1: Each individual file implements a module
3150\item
3151Case 2: Put the implementation of a module under the sub-directory, but there is only one file
3152\item
3153Case 3: Put the implementation of a module under the sub-directory; There are several files to implement one module
3154\item
3155Case 4: One file to express one aggregation
3156\item
3157Case 5: The file does not belong to any module; It is used for testing purpose
3158\end{itemize}
3159
3160The example only uses source code, ".do" files, to show the module file organization.
3161Other module packaging formats, like binary, must also follow the same rules.
3162
3163
3164\subsubsection{Module File Format}
3165
3166\CFA supports different types of module file formats.
3167
3168\begin{itemize}
3169\item
3170Pure source code format: The files should be organized following the previous section's definition.
3171\item
3172IR format (TBD): The \CFA compiler IR format, similar to the source code format
3173\item
3174Binary format, including ".a" static library or ".so" dynamic linkage library
3175\begin{itemize}
3176\item
3177The file's name must match the right level's module name defined in the previous section
3178\item
3179E.g. "util.so" includes all modules for the package util.
3180\item
3181E.g. "string.so" under the package directory to include files belonging to "module string;"
3182\end{itemize}
3183\item
3184Archive format
3185\begin{itemize}
3186\item
3187The archive is named as ".dar", and is a zip archive of the source code or the binary for a package
3188\item
3189E.g. "util.dar" is the whole package for util package including the package direction file
3190\end{itemize}
3191\item
3192Hybrid format
3193\begin{itemize}
3194\item
3195A package can be distributed partly in source code, partly in binary format, and/or packaged in the archive format
3196\item
3197The only limitation is that the names of the files must match the module location names defined in previous section
3198\end{itemize}
3199\end{itemize}
3200
3201
3202\subsection{Package and Module Locating}
3203
3204The high-level build tools provided by \CFA will handle finding a package in your local filesystem or retrieving it from a repository if necessary, building it if necessary, and linking with it.
3205If a programmer prefers, one can directly call the compiler, docc to build the source files and create and link to static libraries.
3206
3207When a source file imports a module, the \CFA build tool and docc compiler will locate the module according to the following order:
3208
3209\begin{enumerate}
3210\item
3211This source file's directory tree, which is typically the project's src directory
3212\item
3213All of the dependent packages (in a directory or in an archive file) under the current \CFA project's pkg directory
3214\item
3215The dependent packages (in a directory or in an archive file) inside the paths defined in the DOPATH environment variable
3216\item
3217The dependent packages (in a directory or in an archive file) inside the global \CFA SDK installation's pkg directory
3218\item
3219If one dependent package is still not found, the builder tool will automatically retrieve it from the repository defined in the SDK installation's configuration, and store it in the SDK's pkg directory
3220\end{enumerate}
3221
3222The module found first in a package will shadow the modules with the same name in the later packages in the search sequence.
3223
3224
3225\subsubsection{Dependent Package}
3226
3227Dependent packages are those packages containing modules that the current project's source code will import from.
3228Dependent packages are defined implicitly or explicitly in one \CFA project.
3229All of the packages under the current project's pkg directory are implicitly dependent packages.
3230For others, the dependent packages must be defined in the project's Do.prj file.
3231
3232
3233\subsubsection{Package and Module Locating Example}
3234
3235\begin{lstlisting}
3236# A project's source code tree
3237
3238--------------------------------------
3239
3240+ testProject
3241        Do.prj
3242        + src
3243        main.do
3244        + pkg
3245        + security-1.1
3246        Do.prj
3247        security.do #module security
3248
3249--------------------------------------
3250
3251# Do.prj
3252
3253--------------------------------------
3254
3255[dependences]
3256std
3257util = "0.2"
3258
3259--------------------------------------
3260
3261# main.do
3262
3263---------------------------------------
3264
3265import security;
3266import std/vector;
3267import container;
3268
3269----------------------------------------
3270\end{lstlisting}
3271
3272
3273\begin{lstlisting}
3274# pkg directory's source code tree
3275
3276-----------------------------------------
3277
3278+ pkg
3279        + std-1.0
3280        Do.prj
3281        vector.do #module std/vector;
3282        queue.do #module std/queue;
3283        + std-1.1
3284        Do.prj
3285        vector.do #module std/vector;
3286        queue.do #module std/queue;
3287        list.do #module std/list;
3288        + util-0.1
3289        Do.prj
3290        container.do #module container;
3291        + security-1.0
3292        security.do #module security;
3293------------------------------------------
3294\end{lstlisting}
3295
3296
3297During the compiling of main.do file import security;
3298The security module appears in both the local security-1.1 package, and the global security-1.0 package.
3299According to the locating sequence, the local security module in security-1.1 will be used.
3300And because the security-1.1 package is under local's pkg directory.
3301No dependence description is required in the project Do.prj file.
3302
3303import std/vector;
3304
3305The std/vector package appears in two different versions' packages in the global path and the project dependence doesn't specify the version. std-1.1 is used in this case.
3306
3307import container;
3308
3309The Do.prj specifies the version 0.2 should be used to locate container module from util package but only version 0.1 is available in the local file system.
3310The builder tool then will try to retrieve it from the web and store it in the global pkg directory.
3311After that, the container module from the newly downloaded package will be used in the compilation.
3312\end{comment}
3313
3314
3315\section{Comparison with Other Languages}
3316
3317\CFA is one of many languages that attempts to improve upon C.
3318In developing \CFA, many other languages were consulted for ideas, constructs, and syntax.
3319Therefore, it is important to show how these languages each compare with Do.
3320In this section, \CFA is compared with what the writers of this document consider to be the closest competitors of Do: \CC, Go, Rust, and D.
3321
3322
3323\subsection{Comparing Key Features of \CFA}
3324
3325
3326\subsubsection{Constructors and Destructors}
3327
3328\lstset{basicstyle=\sf\relsize{-2}}
3329
3330\begin{flushleft}
3331\begin{tabular}{@{}l|l|l|l@{}}
3332\multicolumn{1}{c|}{\textbf{\CFA}}      & \multicolumn{1}{c|}{\textbf{\CC}} & \multicolumn{1}{c|}{\textbf{Go}} & \multicolumn{1}{c}{\textbf{Rust}}      \\
3333\hline
3334\begin{lstlisting}
3335struct Line {
3336        float length;
3337}
3338// default constructor
3339void ?{}( Line * l ) {
3340        sout | "default" | endl;
3341        l.length = 0.0;
3342}
3343
3344
3345// constructor with length
3346void ?{}( Line * l, float length ) {
3347        sout | "length" | length | endl;
3348
3349        l.length = length;
3350}
3351
3352// destructor
3353void ^?(Line &l) {
3354        sout | "destroyed" | endl;
3355        l.length = 0.0;
3356}
3357
3358// usage
3359Line line1;
3360Line line2{ 3.4 };
3361\end{lstlisting}
3362&
3363\begin{lstlisting}[language=C++]
3364class Line {
3365        float length;
3366
3367        // default constructor
3368        Line() {
3369                cout << "default" << endl;
3370                length = 0.0;
3371        }
3372
3373
3374        // constructor with length
3375        Line( float l ) {
3376                cout << "length " << length
3377                         << endl;
3378                length = l;
3379        }
3380
3381        // destructor
3382        ~Line() {
3383                cout << "destroyed" << endl;
3384                length = 0.0;
3385        }
3386}
3387// usage
3388Line line1;
3389Line line2( 3.4 );
3390\end{lstlisting}
3391&
3392\begin{lstlisting}[language=Golang]
3393type Line struct {
3394        length float32
3395}
3396// default constructor
3397func makeLine() Line {
3398        fmt.PrintLn( "default" )
3399        return Line{0.0}
3400}
3401
3402
3403// constructor with length
3404func makeLine( length float32 ) Line {
3405        fmt.Printf( "length %v", length )
3406
3407        return Line{length}
3408}
3409
3410// no destructor
3411
3412
3413
3414
3415
3416// usage
3417line1 := makeLine()
3418line2 := makeLine( 3.4 )
3419\end{lstlisting}
3420&
3421\begin{lstlisting}
3422struct Line {
3423        length: f32
3424}
3425// default constructor
3426impl Default for Line {
3427        fn default () -> Line {
3428                println!( "default" );
3429                Line{ length: 0.0 }
3430        }
3431}
3432// constructor with length
3433impl Line {
3434        fn make( len: f32 ) -> Line {
3435                println!( "length: {}", len );
3436                Line{ length: len }
3437        }
3438}
3439// destructor
3440impl Drop for Line {
3441        fn drop( &mut self ) {
3442                self.length = 0.0
3443        }
3444}
3445// usage
3446let line1:Line = Default::default();
3447Line line2( 3.4 );
3448\end{lstlisting}
3449\end{tabular}
3450\end{flushleft}
3451
3452
3453\subsubsection{Operator Overloading}
3454
3455\begin{flushleft}
3456\begin{tabular}{@{}l|l|l|l@{}}
3457\multicolumn{1}{c|}{\textbf{\CFA}}      & \multicolumn{1}{c|}{\textbf{\CC}} & \multicolumn{1}{c|}{\textbf{Go}} & \multicolumn{1}{c}{\textbf{Rust}}      \\
3458\hline
3459\begin{lstlisting}
3460struct Cpx {
3461        double re, im;
3462};
3463// overload addition operator
3464Cpx ?+?( Cpx l, const Cpx r ) {
3465        return (Cpx){l.re+l.im, l.im+r.im};
3466}
3467Cpx a, b, c;
3468c = a + b;
3469\end{lstlisting}
3470&
3471\begin{lstlisting}
3472struct Cpx {
3473        double re, im;
3474};
3475// overload addition operator
3476Cpx operator+( Cpx l, const Cpx r ) {
3477        return (Cpx){l.re+l.im, l.im+r.im};
3478}
3479Cpx a, b, c;
3480c = a + b;
3481\end{lstlisting}
3482&
3483\begin{lstlisting}
3484// no operator overloading
3485
3486
3487
3488
3489
3490
3491
3492\end{lstlisting}
3493&
3494\begin{lstlisting}
3495struct Cpx {
3496        re: f32,
3497        im: f32
3498}
3499// overload addition operator
3500impl Add for Cpx {
3501        type Output = Cpx
3502        fn add(self, r: Cpx) -> Cpx {
3503                let mut res = Cpx{re: 0.0, im: 0.0};
3504                res.re = self.re + r.re;
3505                res.im = self.im + r.im;
3506                return res
3507        }
3508}
3509let (a, b, mut c) = ...;
3510c = a + b
3511\end{lstlisting}
3512\end{tabular}
3513\end{flushleft}
3514
3515
3516\subsubsection{Calling C Functions}
3517
3518\begin{flushleft}
3519\begin{tabular}{@{}l|l|l@{}}
3520\multicolumn{1}{c|}{\textbf{\CFA/\CC}} & \multicolumn{1}{c|}{\textbf{Go}} & \multicolumn{1}{c}{\textbf{Rust}}   \\
3521\hline
3522\begin{lstlisting}[boxpos=t]
3523extern "C" {
3524#include <sys/types.h>
3525#include <sys/stat.h>
3526#include <unistd.h>
3527}
3528size_t fileSize( const char *path ) {
3529        struct stat s;
3530        stat(path, &s);
3531        return s.st_size;
3532}
3533\end{lstlisting}
3534&
3535\begin{lstlisting}[boxpos=t]
3536/*
3537#cgo
3538#include <sys/types.h>
3539#include <sys/stat.h>
3540#include <unistd.h>
3541*/
3542import "C"
3543import "unsafe"
3544
3545func fileSize(path string) C.size_t {
3546        var buf C.struct_stat
3547        c_string := C.CString(path)
3548        C.stat(p, &buf)
3549        C.free(unsafe.Pointer(c_string))
3550        return buf._st_size
3551}
3552\end{lstlisting}
3553&
3554\begin{lstlisting}[boxpos=t]
3555use libc::{c_int, size_t};
3556// translated from sys/stat.h
3557#[repr(C)]
3558struct stat_t {
3559        ...
3560        st_size: size_t,
3561        ...
3562}
3563#[link(name = "libc")]
3564extern {
3565        fn stat(path: *const u8,
3566        buf: *mut stat_t) -> c_int;
3567}
3568fn fileSize(path: *const u8) -> size_t
3569{
3570        unsafe {
3571                let mut buf: stat_t = uninit();
3572                stat(path, &mut buf);
3573                buf.st_size
3574        }
3575}
3576\end{lstlisting}
3577\end{tabular}
3578\end{flushleft}
3579
3580
3581\subsubsection{Generic Functions}
3582
3583\begin{flushleft}
3584\begin{tabular}{@{}l|l|l|l@{}}
3585\multicolumn{1}{c|}{\textbf{\CFA}}      & \multicolumn{1}{c|}{\textbf{\CC}} & \multicolumn{1}{c|}{\textbf{Go}} & \multicolumn{1}{c}{\textbf{Rust}}      \\
3586\hline
3587\begin{lstlisting}
3588generic(type T, type N |
3589        { int ?<?(N, N); })
3590T *maximize(N (*f)(const T&),
3591        int n, T *a) {
3592        T *bestX = NULL;
3593        N bestN;
3594        for (int i = 0; i < n; i++) {
3595        N curN = f(a[i]);
3596        if (bestX == NULL ||
3597        curN > bestN) {
3598        bestX = &a[i]; bestN = curN;
3599        }
3600        }
3601        return bestX;
3602}
3603
3604string *longest(int n, string *p)
3605{
3606        return maximize(length, n, p);
3607}
3608\end{lstlisting}
3609&
3610\begin{lstlisting}
3611template<typename T, typename F>
3612T *maximize(const F &f,
3613        int n, T *a) {
3614        typedef decltype(f(a[0])) N;
3615        T *bestX = NULL;
3616        N bestN;
3617        for (int i = 0; i < n; i++) {
3618        N curN = f(a[i]);
3619        if (bestX == NULL || curN > bestN)
3620        {
3621        bestX = &a[i]; bestN = curN;
3622        }
3623        }
3624        return bestX;
3625}
3626
3627string *longest(int n, string *p) {
3628        return maximize(
3629        [](const string &s) {
3630        return s.length();
3631        }, n, p);
3632}
3633\end{lstlisting}
3634&
3635\begin{lstlisting}
3636// Go does not support generics!
3637func maximize(
3638        gt func(interface{}, interface{}) bool,
3639        f func(interface{}) interface{},
3640        a []interface{}) interface{} {
3641        var bestX interface{} = nil
3642        var bestN interface{} = nil
3643        for _, x := range a {
3644        curN := f(x)
3645        if bestX == nil || gt(curN, bestN)
3646        {
3647        bestN = curN
3648        bestX = x
3649        }
3650        }
3651        return bestX
3652}
3653
3654func longest(
3655        a []interface{}) interface{} {
3656        return maximize(
3657        func(a, b interface{}) bool {
3658        return a.(int) > b.(int) },
3659        func(s interface{}) interface{} {
3660        return len(s.(string)) },
3661        a).(string)
3662}
3663\end{lstlisting}
3664&
3665\begin{lstlisting}
3666use std::cmp::Ordering;
3667
3668fn maximize<N: Ord + Copy, T, F:
3669Fn(&T) -> N>(f: F, a: &Vec<T>) ->
3670Option<&T> {
3671        let mut best_x: Option<&T> = None;
3672        let mut best_n: Option<N> = None;
3673        for x in a {
3674        let n = f(x);
3675        if (match best_n { None => true,
3676        Some(bn) =>
3677        n.cmp(&bn) == Ordering::Greater })
3678        {
3679        best_x = Some(x);
3680        best_n = Some(n);
3681        }
3682        }
3683        return best_x
3684}
3685
3686fn longest(a: &Vec<String>) ->
3687        Option<&String> {
3688        return
3689        maximize(|x: &String| x.len(), a)
3690}
3691\end{lstlisting}
3692\end{tabular}
3693\end{flushleft}
3694
3695
3696\begin{comment}
3697\subsubsection{Modules/Packages}
3698
3699\begin{lstlisting}
3700\CFA
3701\CC
3702
3703
3704module example/M;
3705
3706export int inc(int val) {
3707        return val + 1;
3708}
3709
3710
3711
3712
3713--------------------------------------
3714//Use the module in another file
3715import example/M;
3716int main() {
3717        print(M.inc(100));
3718        return 0;
3719}
3720// Using \CC17 module proposal
3721
3722module example.M;
3723
3724export {
3725        int inc(int val);
3726}
3727
3728int inc(inv val) {
3729        return val + 1;
3730}
3731--------------------------------------
3732// Use the module in another file
3733import example.M;
3734int main() {
3735        cout << inc(100) << endl;
3736        return 0;
3737}
3738
3739Go
3740Rust
3741package example/M;
3742
3743func Inc(val int32) int32 {
3744        // Capitalization indicates exported
3745        return val + 100
3746}
3747
3748
3749--------------------------------------
3750//Use the package in another file
3751package main
3752import .fmt.
3753import "example/M"
3754
3755func main() int32 {
3756        fmt.Printf(.%v., M.Inc(100))
3757}
3758pub mod example {
3759        pub mod M {
3760        pub inc(val i32) -> i32 {
3761        return val + 100;
3762        }
3763        }
3764}
3765
3766--------------------------------------
3767//Use the module in another file
3768use example::M;
3769
3770
3771
3772fn main() {
3773        println!(.{}., M::inc(100));
3774}
3775\end{lstlisting}
3776\end{comment}
3777
3778
3779\subsubsection{Parallel Tasks}
3780
3781\begin{flushleft}
3782\begin{tabular}{@{}l|l|l|l@{}}
3783\multicolumn{1}{c|}{\textbf{\CFA}}      & \multicolumn{1}{c|}{\textbf{\CC}} & \multicolumn{1}{c|}{\textbf{Go}} & \multicolumn{1}{c}{\textbf{Rust}}      \\
3784\hline
3785\begin{lstlisting}
3786task Nonzero {
3787        int *data;
3788        int start;
3789        int end;
3790        int* res;
3791};
3792
3793void ?{}(Nonzero &a, int d[], int s,
3794        int e, int* subres) {
3795        // constructor
3796        a.data = d;
3797        a.start = s;
3798        a.end = e;
3799        a.res = subres;
3800}
3801
3802// implicitly spawn thread here
3803void ?()(NonzeroCounter &a) {
3804        int i;
3805        int nonzero = 0;
3806        for (i=start; c<end; ++i) {
3807        if(a.data[i]!=0){ nonzero++;}
3808        }
3809        *a.res = nonzero;
3810}
3811
3812int main() {
3813        int sz = ...
3814        int data[sz] = ...;
3815        int r1 = 0, r2=0;
3816        int res;
3817        { // create a scope for Nonzero
3818        Nonzero n1{data, 0, sz/2, &n1};
3819        Nonzero n2{data, sz/2, sz, &n2};
3820        n1();//spawn
3821        n2();//spawn
3822        }
3823        res = r1+r2;
3824        return res;
3825}
3826\end{lstlisting}
3827&
3828\begin{lstlisting}
3829#include <thread>
3830#include <mutex>
3831
3832std::mutex m;
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845void task(const vector<int>&v,
3846        int* res, size_t s,
3847        size_t e) {
3848        int non_zero = 0;
3849        for(size_t i = s; i < e; ++i){
3850        if(v[i]!=0) { non_zero++;}
3851        }
3852        std::unique_lock<mutex> lck {m};
3853        *res += non_zero;
3854}
3855
3856int main() {
3857        vector<int> data = ...; //data
3858        int res = 0;
3859        std::thread t1 {task, ref(data),
3860        &res, 0,
3861        data.size()/2};
3862        std::thread t2 {task, ref(data),
3863        &res, data.size()/2,
3864        data.size()};
3865        t1.join();
3866        t2.join();
3867        return res;
3868}
3869\end{lstlisting}
3870&
3871\begin{lstlisting}
3872package main
3873
3874import "fmt"
3875
3876func nonzero(data []int, c chan int) {
3877        nz := 0
3878        for _, v:=range data {
3879        if(v!=0) { nz := nz+1 }
3880        }
3881        c <- nz
3882}
3883
3884func main() {
3885        sz := ...
3886        data := make([]int, sz)
3887        ... // data init
3888        go nonzero(data[:len(data)/2], c)
3889        go nonzero(data[len(data)/2:], c)
3890        n1, n2 := <-c, <-c
3891        res := n1 + n2
3892        fmt.Println(res)
3893}
3894\end{lstlisting}
3895&
3896\begin{lstlisting}
3897use std::thread;
3898use std::sync:mpsc::channel;
3899
3900fn main() {
3901        let sz = ...;
3902        let mut data:Vec<i32> =
3903        Vec::with_capacity(sz as usize);
3904        ... //init data
3905        let (tx, rx) = channel();
3906        for i in 0..1 {
3907        let tx = tx.clone();
3908        let data = data.clone()
3909        thread::spawn(move|| {
3910        let mut nz := 0;
3911        let mut s = 0;
3912        let mut e = sz / 2;
3913        if i == 1 {
3914        s = sz/2;
3915        e = data.len();
3916        }
3917        for i in s..(e - 1) {
3918        if data[i] != 0 (
3919        nz = nz + 1
3920        }
3921        }
3922        tx.send(nz).unwrap();
3923        });
3924        }
3925        let res = rx.recv().unwrap() +
3926        rx.recv().unwrap();
3927        println!(.{}., res);
3928}
3929\end{lstlisting}
3930\end{tabular}
3931\end{flushleft}
3932
3933\lstset{basicstyle=\sf\relsize{-1}}
3934
3935
3936\subsection{Summary of Language Comparison}
3937
3938
3939\subsubsection{\CC}
3940
3941\CC is a general-purpose programming language.
3942It has imperative, object-oriented and generic programming features, while also providing facilities for low-level memory manipulation. (Wikipedia)
3943
3944The primary focus of \CC seems to be adding object-oriented programming to C, and this is the primary difference between \CC and Do.
3945\CC uses classes to encapsulate data and the functions that operate on that data, and to hide the internal representation of the data.
3946\CFA uses modules instead to perform these same tasks.
3947Classes in \CC also enable inheritance among types.
3948Instead of inheritance, \CFA embraces composition and interfaces to achieve the same goals with more flexibility.
3949There are many studies and articles comparing inheritance and composition (or is-a versus has-a relationships), so we will not go into more detail here (Venners, 1998) (Pike, Go at Google: Language Design in the Service of Software Engineering , 2012).
3950
3951Overloading in \CFA is very similar to overloading in \CC, with the exception of the additional use, in \CFA, of the return type to differentiate between overloaded functions.
3952References and exceptions in \CFA are heavily based on the same features from \CC.
3953The mechanism for interoperating with C code in \CFA is also borrowed from \CC.
3954
3955Both \CFA and \CC provide generics, and the syntax is quite similar.
3956The key difference between the two, is that in \CC templates are expanded at compile time for each type for which the template is instantiated, while in \CFA, function pointers are used to make the generic fully compilable.
3957This means that a generic function can be defined in a compiled library, and still be used as expected from source.
3958
3959
3960\subsubsection{Go}
3961
3962Go, also commonly referred to as golang, is a programming language developed at Google in 2007 [.].
3963It is a statically typed language with syntax loosely derived from that of C, adding garbage collection, type
3964safety, some structural typing capabilities, additional built-in types such as variable-length arrays and key-value maps, and a large standard library. (Wikipedia)
3965
3966Go and \CFA differ significantly in syntax and implementation, but the underlying core concepts of the two languages are aligned.
3967Both Go and \CFA use composition and interfaces as opposed to inheritance to enable encapsulation and abstraction.
3968Both languages (along with their tooling ecosystem) provide a simple packaging mechanism for building units of code for easy sharing and reuse.
3969Both languages also include built-in light weight, user level threading concurrency features that attempt to simplify the effort and thought process required for writing parallel programs while maintaining high performance.
3970
3971Go has a significant runtime which handles the scheduling of its light weight threads, and performs garbage collection, among other tasks.
3972\CFA uses a cooperative scheduling algorithm for its tasks, and uses automatic reference counting to enable advanced memory management without garbage collection.
3973This results in Go requiring significant overhead to interface with C libraries while \CFA has no overhead.
3974
3975
3976\subsubsection{Rust}
3977
3978Rust is a general-purpose, multi-paradigm, compiled programming language developed by Mozilla Research.
3979It is designed to be a "safe, concurrent, practical language", supporting pure-functional, concurrent-actor[dubious . discuss][citation needed], imperative-procedural, and object-oriented styles.
3980
3981The primary focus of Rust is in safety, especially in concurrent programs.
3982To enforce a high level of safety, Rust has added ownership as a core feature of the language to guarantee memory safety.
3983This safety comes at the cost of a difficult learning curve, a change in the thought model of the program, and often some runtime overhead.
3984
3985Aside from those key differences, Rust and \CFA also have several similarities.
3986Both languages support no overhead interoperability with C and have minimal runtimes.
3987Both languages support inheritance and polymorphism through the use of interfaces (traits).
3988
3989
3990\subsubsection{D}
3991
3992The D programming language is an object-oriented, imperative, multi-paradigm system programming
3993language created by Walter Bright of Digital Mars and released in 2001. [.]
3994Though it originated as a re-engineering of \CC, D is a distinct language, having redesigned some core \CC features while also taking inspiration from other languages, notably Java, Python, Ruby, C\#, and Eiffel.
3995
3996D and \CFA both start with C and add productivity features.
3997The obvious difference is that D uses classes and inheritance while \CFA uses composition and interfaces.
3998D is closer to \CFA than \CC since it is limited to single inheritance and also supports interfaces.
3999Like \CC, and unlike \CFA, D uses garbage collection and has compile-time expanded templates.
4000D does not have any built-in concurrency constructs in the
4001language, though it does have a standard library for concurrency which includes the low-level primitives for concurrency.
4002
4003
4004\appendix
4005
4006
4007\section{Incompatible}
4008
4009The following incompatibles exist between C and \CFA, and are similar to Annex C for \CC~\cite{ANSI14:C++}.
4010
4011\begin{enumerate}
4012\item
4013Change type of character literal \lstinline@int@ to \lstinline@char@.
4014This change allows overloading differentiation argument type matching, e.g.:
4015\begin{lstlisting}
4016int function( int i );
4017int function( char c );
4018function( 'x' );
4019\end{lstlisting}
4020It is preferable that this call match the second version of function rather than the first. \\
4021Effect on original feature: Change to semantics of well-defined feature. ISO C programs which depend on
4022\begin{lstlisting}
4023sizeof('x') == sizeof(int)
4024\end{lstlisting}
4025will not work the same as C++ programs. \\
4026Difficulty of converting: Simple. \\
4027How widely used: Programs which depend upon sizeof('x') are probably rare.
4028
4029\item
4030Change: String literals made \lstinline@const@ \\
4031The type of a string literal is changed from \lstinline@array of char@ to \lstinline@array of const char@.
4032The type of a wide string literal is changed from \lstinline@array of wchar_t@ to \lstinline@array of const wchar_t@. \\
4033Rationale: This avoids calling an inappropriate overloaded function, which might expect to be able to modify its argument.
4034Effect on original feature: Change to semantics of well-defined feature. \\
4035Difficulty of converting: Simple syntactic transformation, because string literals can be converted to \lstinline@char*;@ (4.2).
4036The most common cases are handled by a new but deprecated standard conversion:
4037\begin{lstlisting}
4038char* p = "abc"; // valid in C, deprecated in C++
4039char* q = expr ? "abc" : "de"; // valid in C, invalid in C++
4040\end{lstlisting}
4041How widely used: Programs that have a legitimate reason to treat string literals as pointers to potentially modifiable memory are probably rare.
4042
4043\item
4044Change: C++ does not have \emph{tentative definitions} as in C.
4045E.g., at file scope,
4046\begin{lstlisting}
4047int i;
4048int i;
4049\end{lstlisting}
4050is valid in C, invalid in C++.
4051This makes it impossible to define mutually referential file-local static
4052objects, if initializers are restricted to the syntactic forms of C. For example,
4053\begin{lstlisting}
4054struct X { int i; struct X *next; };
4055static struct X a;
4056static struct X b = { 0, &a };
4057static struct X a = { 1, &b };
4058\end{lstlisting}
4059Rationale: This avoids having different initialization rules for builtin types and userdefined types.
4060Effect on original feature: Deletion of semantically welldefined feature. \\
4061Difficulty of converting: Semantic transformation.
4062In C++, the initializer for one of a set of mutuallyreferential filelocal static objects must invoke a function call to achieve the initialization.
4063How widely used: Seldom.
4064
4065\item
4066Change: A struct is a scope in C++, not in C \\
4067Rationale: Class scope is crucial to C++, and a struct is a class. \\
4068Effect on original feature: Change to semantics of well-defined feature. \\
4069Difficulty of converting: Semantic transformation. \\
4070How widely used: C programs use struct extremely frequently, but the change is only noticeable when struct, enumeration, or enumerator names are referred to outside the struct.
4071The latter is probably rare.
4072
4073\CFA is C \emph{incompatible} on this issue, and provides semantics similar to \CC.
4074Nested types are not hoisted and can be referenced using the field selection operator ``\lstinline@.@'', unlike the \CC scope-resolution operator ``\lstinline@::@''.
4075Given that nested types in C are equivalent to not using them, i.e., they are essentially useless, it is unlikely there are any realistic usages that break because of this incompatibility.
4076
4077
4078\item
4079Change: In C++, the name of a nested class is local to its enclosing class.
4080In C the name of the nested class belongs to the same scope as the name of the outermost enclosing class
4081Example:
4082\begin{lstlisting}
4083struct X {
4084struct Y { /* ... */ } y;
4085};
4086struct Y yy; // valid C, invalid C++
4087\end{lstlisting}
4088Rationale: C++ classes have member functions which require that classes establish scopes. The C rule
4089would leave classes as an incomplete scope mechanism which would prevent C++ programmers from maintaining
4090locality within a class. A coherent set of scope rules for C++ based on the C rule would be very
4091complicated and C++ programmers would be unable to predict reliably the meanings of nontrivial examples
4092involving nested or local functions.
4093Effect on original feature: Change of semantics of welldefined
4094feature.
4095Difficulty of converting: Semantic transformation. To make the struct type name visible in the scope of
4096the enclosing struct, the struct tag could be declared in the scope of the enclosing struct, before the enclosing
4097struct is defined. Example:
4098\begin{lstlisting}
4099struct Y; // struct Y and struct X are at the same scope
4100struct X {
4101struct Y { /* ... */ } y;
4102};
4103\end{lstlisting}
4104All the definitions of C struct types enclosed in other struct definitions and accessed outside the scope of
4105the enclosing struct could be exported to the scope of the enclosing struct. Note: this is a consequence of
4106the difference in scope rules, which is documented in 3.3.
4107How widely used: Seldom.
4108\end{enumerate}
4109
4110
4111\section{I/O Library}
4112\label{s:IOLibrary}
4113\index{input/output library}
4114
4115The goal for \CFA I/O is to make I/O as simple as possible for the general case, while fully supporting polmorphism and user defined types in a consistent way.
4116The general case is printing out a sequence of variables separated by whitespace.
4117\begin{quote2}
4118\begin{tabular}{@{}l@{\hspace{30pt}}l@{}}
4119\multicolumn{1}{c@{\hspace{30pt}}}{\textbf{\CFA}}       & \multicolumn{1}{c}{\textbf{\CC}}      \\
4120\begin{lstlisting}
4121int x = 0, y = 1, z = 2;
4122`sout` `|` x `|` y `|` z `| endl`;
4123\end{lstlisting}
4124&
4125\begin{lstlisting}
4126
4127cout << x << " " << y << " " << z << endl;
4128\end{lstlisting}
4129\end{tabular}
4130\end{quote2}
4131The \CFA form is half as many characters, and is similar to Python I/O with respect to implicit separators.
4132
4133The logical-or operator is used because it is the lowest-priority overloadable operator, other than assignment.
4134Therefore, fewer output expressions require parenthesis.
4135\begin{quote2}
4136\begin{tabular}{@{}ll@{}}
4137\textbf{\CFA:}
4138&
4139\begin{lstlisting}
4140sout | x * 3 | y + 1 | z << 2 | x == y | (x | y) | (x || y) | (x > z ? 1 : 2) | endl;
4141\end{lstlisting}
4142\\
4143\textbf{\CC:}
4144&
4145\begin{lstlisting}
4146cout << x * 3 << y + 1 << (z << 2) << (x == y) << (x | y) << (x || y) << (x > z ? 1 : 2) << endl;
4147\end{lstlisting}
4148\end{tabular}
4149\end{quote2}
4150Finally, the logical-or operator has a link with the Shell pipe-operator for moving data, although data flows in the opposite direction.
4151
4152The implicit seperator\index{I/O separator} character (space/blank) is a separator not a terminator.
4153The rules for implicitly adding the separator are:
4154\begin{enumerate}
4155\item
4156A seperator does not appear at the start or end of a line.
4157\begin{lstlisting}[belowskip=0pt]
4158sout 1 | 2 | 3 | endl;
4159\end{lstlisting}
4160\begin{lstlisting}[mathescape=off,showspaces=true,aboveskip=0pt,belowskip=0pt]
41611 2 3
4162\end{lstlisting}
4163\item
4164A seperator does not appear before or after a character literal or variable.
4165\begin{lstlisting}
4166sout | '1' | '2' | '3' | endl;
4167123
4168\end{lstlisting}
4169\item
4170A seperator does not appear before or after a null (empty) C string
4171\begin{lstlisting}
4172sout | 1 | "" | 2 | "" | 3 | endl;
4173123
4174\end{lstlisting}
4175which is a local mechanism to disable insertion of the separator character.
4176\item
4177A seperator does not appear before a C string starting with the \Index{extended ASCII}\index{ASCII} characters: \lstinline[mathescape=off]@([{$£¥¿«@
4178%$
4179\begin{lstlisting}[mathescape=off]
4180sout | "x (" | 1 | "x [" | 2 | "x {" | 3 | "x $" | 4 | "x £" | 5 | "x ¥" | 6 | "x ¿" | 7 | "x «" | 8 | endl;
4181\end{lstlisting}
4182%$
4183\begin{lstlisting}[mathescape=off,showspaces=true,aboveskip=0pt,belowskip=0pt]
4184x (1 x [2 x {3 x $4 x £5 x ¥6 x ¿7 x «8
4185\end{lstlisting}
4186%$
4187\item
4188A seperator does not appear after a C string ending with the extended ASCII characters: \lstinline@,.:;!?)]}%¢»@
4189\begin{lstlisting}[belowskip=0pt]
4190sout | 1 | ", x" | 2 | ". x" | 3 | ": x" | 4 | "; x" | 5 | "! x" | 6 | "? x" | 7 | ") x" | 8 | "] x" | 9 | "} x"
4191         | 10 | "% x" | 11 | L"¢ x" | 12 | L"» x" | endl;
4192\end{lstlisting}
4193\begin{lstlisting}[mathescape=off,showspaces=true,aboveskip=0pt,belowskip=0pt]
41941, x 2. x 3: x 4; x 5! x 6? x 7) x 8] x 9} x 10% x 11¢ 12»
4195\end{lstlisting}
4196\item
4197A seperator does not appear before or after a C string begining/ending with the characters: \lstinline@\f\n\r\t\v\`'"@
4198\begin{lstlisting}[belowskip=0pt]
4199sout | "x '" | 1 | "' x \`" | 2 | "\` x \"" | 3 | "\" x" | endl;
4200\end{lstlisting}
4201\begin{lstlisting}[mathescape=off,showspaces=true,aboveskip=0pt,belowskip=0pt]
4202x '1' x \`2\` x "3" x
4203\end{lstlisting}
4204\begin{lstlisting}[showtabs=true,aboveskip=0pt]
4205sout | "x\t" | 1 | "\tx" | endl;
4206x       1       x
4207\end{lstlisting}
4208\end{enumerate}
4209The following \CC-style \Index{manipulator}s allow further control over implicit seperation.
4210\begin{lstlisting}[mathescape=off,belowskip=0pt]
4211sout | sepOn | 1 | 2 | 3 | sepOn | endl;        // separator at start of line
4212\end{lstlisting}
4213\begin{lstlisting}[mathescape=off,showspaces=true,aboveskip=0pt,belowskip=0pt]
4214 1 2 3
4215\end{lstlisting}
4216\begin{lstlisting}[mathescape=off,aboveskip=0pt,belowskip=0pt]
4217sout | 1 | sepOff | 2 | 3 | endl;                       // turn off implicit separator temporarily
4218\end{lstlisting}
4219\begin{lstlisting}[mathescape=off,showspaces=true,aboveskip=0pt,belowskip=0pt]
422012 3
4221\end{lstlisting}
4222\begin{lstlisting}[mathescape=off,aboveskip=0pt,belowskip=0pt]
4223sout | sepDisable | 1 | 2 | 3 | endl;           // turn off implicit separation, affects all subsequent prints
4224\end{lstlisting}
4225\begin{lstlisting}[mathescape=off,showspaces=true,aboveskip=0pt,belowskip=0pt]
4226123
4227\end{lstlisting}
4228\begin{lstlisting}[mathescape=off,aboveskip=0pt,belowskip=0pt]
4229sout | 1 | sepOn | 2 | 3 | endl;                        // turn on implicit separator temporarily
4230\end{lstlisting}
4231\begin{lstlisting}[mathescape=off,showspaces=true,aboveskip=0pt,belowskip=0pt]
42321 23
4233\end{lstlisting}
4234\begin{lstlisting}[mathescape=off,aboveskip=0pt,belowskip=0pt]
4235sout | sepEnable | 1 | 2 | 3 | endl;            // turn on implicit separation, affects all subsequent prints
4236\end{lstlisting}
4237\begin{lstlisting}[mathescape=off,showspaces=true,aboveskip=0pt,belowskip=0pt]
42381 2 3
4239\end{lstlisting}
4240\begin{lstlisting}[mathescape=off,aboveskip=0pt,aboveskip=0pt,belowskip=0pt]
4241sepSet( sout, ", $" );                                          // change separator from " " to ", $"
4242sout | 1 | 2 | 3 | endl;
4243\end{lstlisting}
4244%$
4245\begin{lstlisting}[mathescape=off,showspaces=true,aboveskip=0pt]
42461, $2, $3
4247\end{lstlisting}
4248%$
4249\VRef[Figure]{f:ExampleIO} shows an example of input and output I/O in \CFA.
4250
4251\begin{figure}
4252\begin{lstlisting}[mathescape=off]
4253#include <fstream>
4254
4255int main() {
4256        char c;                                                                                                         // basic types
4257        short int si;
4258        unsigned short int usi;
4259        int i;
4260        unsigned int ui;
4261        long int li;
4262        unsigned long int uli;
4263        long long int lli;
4264        unsigned long long int ulli;
4265        float f;
4266        double d;
4267        long double ld;
4268        float _Complex fc;
4269        double _Complex dc;
4270        long double _Complex ldc;
4271        char s1[10], s2[10];
4272
4273        ifstream in;                                                                                            // create / open file
4274        open( &in, "input.data", "r" );
4275
4276        &in | &c                                                                                                        // character
4277                | &si | &usi | &i | &ui | &li | &uli | &lli | &ulli             // integral
4278                | &f | &d | &ld                                                                                 // floating point
4279                | &fc | &dc | &ldc                                                                              // floating-point complex
4280                | cstr( s1 ) | cstr( s2, 10 );                                                  // C string, length unchecked and checked
4281
4282        sout | c | ' ' | endl                                                                           // character
4283                 | si | usi | i | ui | li | uli | lli | ulli | endl             // integral
4284                 | f | d | ld | endl                                                                    // floating point
4285                 | fc | dc | ldc | endl;                                                                // complex
4286        sout | endl;
4287        sout | f | "" | d | "" | ld | endl                                                      // floating point without separator
4288                 | sepDisable | fc | dc | ldc | sepEnable | endl                // complex without separator
4289                 | sepOn | s1 | sepOff | s2 | endl                                              // local separator removal
4290                 | s1 | "" | s2 | endl;                                                                 // C string withou separator
4291        sout | endl;
4292        sepSet( sout, ", $" );                                                                          // change separator, maximum of 15 characters
4293        sout | f | d | ld | endl                                                                        // floating point without separator
4294                 | fc | dc | ldc | endl                                                                 // complex without separator
4295                 | s1 | s2 | endl;
4296}
4297
4298$ cat input.data
4299A 1 2 3 4 5 6 7 8 1.1 1.2 1.3 1.1+2.3 1.1-2.3 1.1-2.3 abc xyz
4300$ a.out
4301A
43021 2 3 4 5 6 7 8
43031.1 1.2 1.3
43041.1+2.3i 1.1-2.3i 1.1-2.3i
4305
43061.11.21.3
43071.1+2.3i1.1-2.3i1.1-2.3i
4308 abcxyz
4309abcxyz
4310
43111.1, $1.2, $1.3
43121.1+2.3i, $1.1-2.3i, $1.1-2.3i
4313abc, $xyz
4314\end{lstlisting}
4315\caption{Example I/O}
4316\label{f:ExampleIO}
4317\end{figure}
4318
4319
4320\section{Standard Library}
4321\label{s:StandardLibrary}
4322
4323The goal of the \CFA standard-library is to wrap many of the existing C library-routines that are explicitly polymorphic into implicitly polymorphic versions.
4324
4325
4326\subsection{malloc}
4327
4328\begin{lstlisting}
4329forall( otype T ) T * malloc( void );
4330forall( otype T ) T * malloc( char fill );
4331forall( otype T ) T * malloc( T * ptr, size_t size );
4332forall( otype T ) T * malloc( T * ptr, size_t size, unsigned char fill );
4333forall( otype T ) T * calloc( size_t size );
4334forall( otype T ) T * realloc( T * ptr, size_t size );
4335forall( otype T ) T * realloc( T * ptr, size_t size, unsigned char fill );
4336
4337forall( otype T ) T * aligned_alloc( size_t alignment );
4338forall( otype T ) T * memalign( size_t alignment );             // deprecated
4339forall( otype T ) int posix_memalign( T ** ptr, size_t alignment );
4340
4341forall( otype T ) T * memset( T * ptr, unsigned char fill ); // use default value '\0' for fill
4342forall( otype T ) T * memset( T * ptr );                                // remove when default value available
4343\end{lstlisting}
4344
4345
4346\subsection{ato/strto}
4347
4348\begin{lstlisting}
4349int ato( const char * ptr );
4350unsigned int ato( const char * ptr );
4351long int ato( const char * ptr );
4352unsigned long int ato( const char * ptr );
4353long long int ato( const char * ptr );
4354unsigned long long int ato( const char * ptr );
4355float ato( const char * ptr );
4356double ato( const char * ptr );
4357long double ato( const char * ptr );
4358float _Complex ato( const char * ptr );
4359double _Complex ato( const char * ptr );
4360long double _Complex ato( const char * ptr );
4361
4362int strto( const char * sptr, char ** eptr, int base );
4363unsigned int strto( const char * sptr, char ** eptr, int base );
4364long int strto( const char * sptr, char ** eptr, int base );
4365unsigned long int strto( const char * sptr, char ** eptr, int base );
4366long long int strto( const char * sptr, char ** eptr, int base );
4367unsigned long long int strto( const char * sptr, char ** eptr, int base );
4368float strto( const char * sptr, char ** eptr );
4369double strto( const char * sptr, char ** eptr );
4370long double strto( const char * sptr, char ** eptr );
4371float _Complex strto( const char * sptr, char ** eptr );
4372double _Complex strto( const char * sptr, char ** eptr );
4373long double _Complex strto( const char * sptr, char ** eptr );
4374\end{lstlisting}
4375
4376
4377\subsection{bsearch/qsort}
4378
4379\begin{lstlisting}
4380forall( otype T | { int ?<?( T, T ); } )
4381T * bsearch( const T key, const T * arr, size_t dimension );
4382
4383forall( otype T | { int ?<?( T, T ); } )
4384void qsort( const T * arr, size_t dimension );
4385\end{lstlisting}
4386
4387
4388\subsection{abs}
4389
4390\begin{lstlisting}
4391char abs( char );
4392extern "C" {
4393int abs( int );                         // use default C routine for int
4394} // extern "C"
4395long int abs( long int );
4396long long int abs( long long int );
4397float abs( float );
4398double abs( double );
4399long double abs( long double );
4400float _Complex abs( float _Complex );
4401double _Complex abs( double _Complex );
4402long double _Complex abs( long double _Complex );
4403\end{lstlisting}
4404
4405
4406\subsection{floor/ceil}
4407
4408\begin{lstlisting}
4409float floor( float );
4410extern "C" {
4411double floor( double );         // use C routine for double
4412} // extern "C"
4413long double floor( long double );
4414
4415float ceil( float );
4416extern "C" {
4417double ceil( double );          // use C routine for double
4418} // extern "C"
4419long double ceil( long double );
4420\end{lstlisting}
4421
4422
4423\subsection{random}
4424
4425\begin{lstlisting}
4426void rand48seed( long int s );
4427char rand48();
4428int rand48();
4429unsigned int rand48();
4430long int rand48();
4431unsigned long int rand48();
4432float rand48();
4433double rand48();
4434float _Complex rand48();
4435double _Complex rand48();
4436long double _Complex rand48();
4437\end{lstlisting}
4438
4439
4440\subsection{min/max/swap}
4441
4442\begin{lstlisting}
4443forall( otype T | { int ?<?( T, T ); } )
4444T min( const T t1, const T t2 );
4445
4446forall( otype T | { int ?>?( T, T ); } )
4447T max( const T t1, const T t2 );
4448
4449forall( otype T )
4450void swap( T * t1, T * t2 );
4451\end{lstlisting}
4452
4453
4454\section{Rational Numbers}
4455\label{s:RationalNumbers}
4456
4457Rational numbers are numbers written as a ratio, i.e., as a fraction, where the numerator (top number) and the denominator (bottom number) are whole numbers.
4458When creating and computing with rational numbers, results are constantly reduced to keep the numerator and denominator as small as possible.
4459
4460\begin{lstlisting}
4461// implementation
4462struct Rational {
4463        long int numerator, denominator;                                        // invariant: denominator > 0
4464}; // Rational
4465
4466// constants
4467extern struct Rational 0;
4468extern struct Rational 1;
4469
4470// constructors
4471Rational rational();
4472Rational rational( long int n );
4473Rational rational( long int n, long int d );
4474
4475// getter/setter for numerator/denominator
4476long int numerator( Rational r );
4477long int numerator( Rational r, long int n );
4478long int denominator( Rational r );
4479long int denominator( Rational r, long int d );
4480
4481// comparison
4482int ?==?( Rational l, Rational r );
4483int ?!=?( Rational l, Rational r );
4484int ?<?( Rational l, Rational r );
4485int ?<=?( Rational l, Rational r );
4486int ?>?( Rational l, Rational r );
4487int ?>=?( Rational l, Rational r );
4488
4489// arithmetic
4490Rational -?( Rational r );
4491Rational ?+?( Rational l, Rational r );
4492Rational ?-?( Rational l, Rational r );
4493Rational ?*?( Rational l, Rational r );
4494Rational ?/?( Rational l, Rational r );
4495
4496// conversion
4497double widen( Rational r );
4498Rational narrow( double f, long int md );
4499
4500// I/O
4501forall( dtype istype | istream( istype ) ) istype * ?|?( istype *, Rational * );
4502forall( dtype ostype | ostream( ostype ) ) ostype * ?|?( ostype *, Rational );
4503\end{lstlisting}
4504
4505
4506\bibliographystyle{plain}
4507\bibliography{cfa}
4508
4509
4510\addcontentsline{toc}{section}{\indexname} % add index name to table of contents
4511\begin{theindex}
4512Italic page numbers give the location of the main entry for the referenced term.
4513Plain page numbers denote uses of the indexed term.
4514Entries for grammar non-terminals are italicized.
4515A typewriter font is used for grammar terminals and program identifiers.
4516\indexspace
4517\input{user.ind}
4518\end{theindex}
4519
4520
4521\end{document}
4522
4523% Local Variables: %
4524% tab-width: 4 %
4525% fill-column: 100 %
4526% compile-command: "make" %
4527% End: %
Note: See TracBrowser for help on using the repository browser.