source: doc/user/user.tex @ 42851542

ADTaaron-thesisarm-ehast-experimentalcleanup-dtorsctordeferred_resndemanglerenumforall-pointer-decaygc_noraiijacob/cs343-translationjenkins-sandboxmemorynew-astnew-ast-unique-exprnew-envno_listpersistent-indexerpthread-emulationqualifiedEnumresolv-newwith_gc
Last change on this file since 42851542 was 0638c44, checked in by Peter A. Buhr <pabuhr@…>, 8 years ago

more formatting changes to documents, update I/O for examples

  • Property mode set to 100644
File size: 178.8 KB
Line 
1%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -*- Mode: Latex -*- %%%%%%%%%%%%%%%%%%%%%%%%%%%%
2%%
3%% Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
4%%
5%% The contents of this file are covered under the licence agreement in the
6%% file "LICENCE" distributed with Cforall.
7%%
8%% user.tex --
9%%
10%% Author           : Peter A. Buhr
11%% Created On       : Wed Apr  6 14:53:29 2016
12%% Last Modified By : Peter A. Buhr
13%% Last Modified On : Tue May  3 08:05:33 2016
14%% Update Count     : 246
15%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
16
17% requires tex packages: texlive-base texlive-latex-base tex-common texlive-humanities texlive-latex-extra texlive-fonts-recommended
18
19% inline code ©...© (copyright symbol) emacs: C-q M-)
20% red highlighting ®...® (registered trademark sumbol) emacs: C-q M-.
21% latex escape §...§ (section symbol) emacs: C-q M-'
22% keyword escape ¶...¶ (pilcrow symbol) emacs: C-q M-^
23% math escape $...$ (dollar symbol)
24
25\documentclass[openright,twoside]{article}
26%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
27
28% Latex packages used in the document.
29\usepackage[T1]{fontenc}                                % allow Latin1 (extended ASCII) characters
30\usepackage{textcomp}
31\usepackage[latin1]{inputenc}
32\usepackage{upquote}
33\usepackage{fullpage,times}
34\usepackage{epic,eepic}
35\usepackage{xspace}
36\usepackage{varioref}
37\usepackage{listings}
38\usepackage{footmisc}
39\usepackage{comment}
40\usepackage{latexsym}                                   % \Box
41\usepackage{mathptmx}                                   % better math font with "times"
42\usepackage[pagewise]{lineno}
43\renewcommand{\linenumberfont}{\scriptsize\sffamily}
44\usepackage[dvips,plainpages=false,pdfpagelabels,pdfpagemode=UseNone,colorlinks=true,pagebackref=true,linkcolor=blue,citecolor=blue,urlcolor=blue,pagebackref=true,breaklinks=true]{hyperref}
45\usepackage{breakurl}
46\renewcommand{\UrlFont}{\small\sf}
47
48%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
49
50% Bespoke macros used in the document.
51\input{common}
52
53%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
54
55% Names used in the document.
56
57\newcommand{\Version}{1.0.0}
58\newcommand{\CS}{C\raisebox{-0.9ex}{\large$^\sharp$}\xspace}
59
60%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
61
62\setcounter{secnumdepth}{3}                             % number subsubsections
63\setcounter{tocdepth}{3}                                % subsubsections in table of contents
64\makeindex
65
66%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
67
68\begin{document}
69\pagestyle{headings}
70\linenumbers                                            % comment out to turn off line numbering
71
72\title{\Huge
73\vspace*{1in}
74\CFA (\CFL) User Manual                         \\
75Version 1.0                                                     \\
76\vspace*{0.25in}
77\huge``describe not prescribe''         \\
78\vspace*{1in}
79}% title
80\author{\huge
81Peter A. Buhr and ...                           \\
82}% author
83\date{
84DRAFT \\
85\today
86}% date
87
88\pagenumbering{roman}
89\pagestyle{plain}
90
91\maketitle
92
93\vspace*{\fill}
94\thispagestyle{empty}
95\noindent
96\copyright\,2016 \CFA Project \\ \\
97\noindent
98This work is licensed under the Creative Commons Attribution 4.0 International License.
99To view a copy of this license, visit {\small\url{http://creativecommons.org/licenses/by/4.0}}.
100\vspace*{1in}
101
102\clearpage
103\pdfbookmark[1]{Contents}{section}
104\tableofcontents
105
106\clearpage
107\pagenumbering{arabic}
108
109
110\section{Introduction}
111
112\CFA\footnote{Pronounced ``C-for-all'', and written \CFA, CFA, or \CFL.} is a modern general-purpose programming-language, designed an as evolutionary step forward from the C programming language.
113The syntax of the \CFA language builds from C, and should look immediately familiar to C programmers.
114% Any language feature that is not described here can be assumed to be using the standard C11 syntax.
115\CFA adds many modern programming-language features, which directly leads to increased safety and productivity, while maintaining interoperability with existing C programs and achieving C performance.
116Like C, \CFA is a statically typed, procedural language with a low-overhead runtime, meaning there is no global garbage-collection.
117The primary new features include parametric-polymorphism routines and types, exceptions, concurrency, and modules.
118
119One of the main design philosophies of \CFA is to ``describe not prescribe'', which means \CFA tries to provide a pathway from low-level C programming to high-level \CFA programming, but it does not force programmers to ``do the right thing''.
120Programmers can cautiously add \CFA extensions to their C programs in any order and at any time to incrementally move towards safer, higher-level programming features.
121A programmer is always free to reach back to C from \CFA for any reason, and in many cases, new \CFA features have a fallback to a C mechanism.
122There is no notion or requirement for rewriting a legacy C program in \CFA;
123instead, a programmer evolves an existing C program into \CFA by incrementally incorporating \CFA features.
124New programs can be written in \CFA using a combination of C and \CFA features.
125\CC had a similar goal 30 years ago, but has struggled over the intervening time to incorporate modern programming-language features because of early design choices.
126\CFA has 30 years of hindsight and clean starting point.
127
128Like \CC, there may be both an old and new ways to achieve the same effect.
129For example, the following programs compare the \CFA and C I/O mechanisms.
130\begin{quote2}
131\begin{tabular}{@{}l@{\hspace{30pt}}l@{}}
132\multicolumn{1}{c@{\hspace{30pt}}}{\textbf{\CFA}}       & \multicolumn{1}{c}{\textbf{C}}        \\
133\begin{lstlisting}
134#include <fstream>
135int main( void ) {
136        int x = 0, y = 1, z = 2;
137        sout | x | y | z | endl;
138}
139\end{lstlisting}
140&
141\begin{lstlisting}
142#include <stdio.h>
143int main( void ) {
144        int x = 0, y = 1, z = 2;
145        printf( "%d %d %d\n", x, y, z );
146}
147\end{lstlisting}
148\end{tabular}
149\end{quote2}
150Both programs output the same result.
151While the \CFA I/O looks similar to the \CC output style, there are several important differences, such as automatic spacing between variables as in Python (see also~\VRef{s:IOLibrary}).
152
153This document is a reference manual for the \CFA programming language, targeted at \CFA programmers.
154Implementers may refer to the \CFA Programming Language Specification for details about the language syntax and semantics.
155In its current state, this document covers the intended core features of the language.
156Changes to the syntax and additional features are expected to be included in later revisions.
157% For additional information, see \url{http://wiki.do-lang.org}.
158
159
160\section{History}
161
162The \CFA project started with K-W C~\cite{Till89,Buhr94a}, which extended C with new declaration syntax, multiple return values from routines, and extended assignment capabilities using the notion of tuples.
163(See~\cite{Werther96} for some similar work, but for \CC.)
164The original \CFA project~\cite{Ditchfield92} extended the C type system with parametric polymorphism and overloading, as opposed to the \CC approach of object-oriented extensions to the C type-system.
165A first implementation of the core Cforall language was created~\cite{Bilson03,Esteves04}, but at the time there was little interesting in extending C, so work did not continue.
166As the saying goes, ``What goes around, comes around'', and there is now renewed interest in the C programming language because of legacy code-bases, so the \CFA project has been restarted.
167
168
169\section{Why fix C?}
170
171Even with all its problems, C is a very popular programming language because it allows writing software at virtually any level in a computer system without restriction.
172For systems programming, where direct access to hardware and dealing with real-time issues is a requirement, C is usually the language of choice.
173As well, there are millions of lines of C legacy code, forming the base for many software development projects (especially on UNIX systems).
174The TIOBE index (\url{http://www.tiobe.com/tiobe_index}) for March 2016 shows programming-language popularity, with Java 20.5\%, C 14.5\%, \CC 6.7\%, \CS 4.3\%, Python 4.3\%, and all other programming languages below 3\%.
175Hence, C is still an extremely important programming language, with double the usage of \CC, where \CC itself is largely C code.
176Finally, love it or hate it, C has been an important and influential part of computer science for 40 years and it appears it will continue to be for many more years.
177Unfortunately, C has too many problems and omissions to make it an acceptable programming language for modern needs.
178
179The goal of this project is to engineer modern language features into C in an evolutionary rather than revolutionary way.
180\CC~\cite{c++,ANSI14:C++} is an example of a similar project;
181however, it largely extended the language, and did not address existing problems.\footnote{%
182Two important existing problems addressed were changing the type of character literals from ©int© to ©char© and enumerator from ©int© to the type of its enumerators.}
183Fortran~\cite{Fortran08}, Ada~\cite{Ada12}, and Cobol~\cite{Cobol14} are examples of programming languages that took an evolutionary approach, where modern language features are added and problems fixed within the framework of the existing language.
184Java~\cite{Java8}, Go~\cite{Go}, Rust~\cite{Rust} and D~\cite{D} are examples of the revolutionary approach for modernizing C/\CC, resulting in a new language rather than an extension of the descendent.
185These languages have different syntax and semantics from C, and do not interoperate directly with C, largely because of garbage collection.
186As a result, there is a significant learning curve to move to these languages, and C legacy-code must be rewritten.
187These costs can be prohibitive for many companies with a large software base in C/\CC, and many programmers that require retraining to a new programming language.
188
189The result of this project is a language that is largely backwards compatible with C11~\cite{C11}, but containing many modern language features and fixing some of the well known C problems.
190Without significant extension to the C programming language, C will be unable to cope with the needs of modern programming problems and programmers;
191as a result, it will fade into disuse.
192Considering the large body of existing C code and programmers, there is significant impetus to ensure C is transformed into a modern programming language.
193While C11 made a few simple extensions to the language, nothing was added to address existing problems in the language or to augment the language with modern language features.
194While some may argue that modern language features may make C complex and inefficient, it is clear a language without modern capabilities is insufficient for the advanced programming problems existing today.
195
196
197\section{Interoperability}
198
199\CFA is designed to integrate well with existing C programs and libraries.
200The most important feature of interoperability is to use the same calling conventions, so there is no overhead to call existing C routines.
201This feature allows users of \CFA to take advantage of the existing panoply of C libraries from inside their \CFA code.
202In fact, one of the biggest issues for any new programming language is establishing a minimum level of library code to support a large body of activities.
203Programming-language developers often state that adequate library support takes more work than designing and implementing the language itself.
204Like \CC, \CFA starts with immediate access to all exiting C libraries, and in many cases, can easily wrap library routines with simpler and safer interfaces, at very low cost.
205
206However, it is necessary to differentiate between C and \CFA code because of name overloading, as for \CC.
207For example, the C math-library provides the following routines for computing the absolute value of the basic type: ©abs©, ©labs©, ©llabs©, ©fabs©, ©fabsf©, ©fabsl©, ©cabsf©, ©cabs©, and ©cabsl©.
208Whereas, \CFA wraps each of these routines into one with the common name ©abs©.
209\begin{lstlisting}
210char abs( char );
211extern "C" {
212int abs( int );                         // use default C routine for int
213} // extern "C"
214long int abs( long int );
215long long int abs( long long int );
216float abs( float );
217double abs( double );
218long double abs( long double );
219float _Complex abs( float _Complex );
220double _Complex abs( double _Complex );
221long double _Complex abs( long double _Complex );
222\end{lstlisting}
223The problem is the name clash between the library routine ©abs© and the \CFA names ©abs©.
224Hence, names appearing in an ©extern "C"© block have \newterm{C linkage}.
225Then overloading polymorphism uses a mechanism called \newterm{name mangling} to create unique names that are different from C names, which are not mangled.
226Hence, there is the same need as in \CC, to know if a name is a C or \CFA name, so it can be correctly formed.
227There is no way around this problem, other than C's approach of creating unique names for each pairing of operation and type.
228This example strongly illustrates a core idea in \CFA: \emph{the power of a name}.
229The name ``©abs©'' evokes the notion of absolute value, and many mathematical types provide the notion of absolute value.
230Hence, knowing the name ©abs© should be sufficient to apply it to any type where it is applicable.
231The time savings and safety of using one name uniformly versus $N$ unique names should not be underestimated.
232
233
234\section[Compiling CFA Program]{Compiling \CFA Program}
235
236The command ©cfa© is used to compile \CFA program(s).
237This command works like the GNU ©gcc©\index{gcc} command, e.g.:
238\begin{lstlisting}
239cfa§\indexc{cfa}\index{compilation!cfa@©cfa©}§ [ gcc-options ] C/§\CFA§-files [ assembler/loader-files ]
240\end{lstlisting}
241By default, \CFA programs having the following ©gcc© flags turned on:
242\begin{description}
243\item\hspace*{-4pt}\Indexc{-std=gnu99}\index{compilation option!-std=gnu99@{©-std=gnu99©}}
244The 1999 C standard plus GNU extensions.
245\item\hspace*{-4pt}\Indexc{-fgnu89-¶inline¶}\index{compilation option!-fgnu89-inline@{©-fgnu89-¶inline¶©}}
246Use the traditional GNU semantics for inline routines in C99 mode.
247\end{description}
248The following new \CFA option is available:
249\begin{description}
250\item\hspace*{-4pt}\Indexc{-CFA}\index{compilation option!-CFA@{©-CFA©}}
251Only the C preprocessor and the \CFA translator steps are performed and the transformed program is written to standard output, which makes it possible to examine the code generated by the \CFA translator.
252\end{description}
253
254The following preprocessor variables are available:
255\begin{description}
256\item\hspace*{-4pt}\Indexc{__CFA__}\index{preprocessor variables!__CFA__@{©__CFA__©}}
257is always available during preprocessing and its value is the current major \Index{version number} of \CFA.\footnote{
258The C preprocessor allows only integer values in a preprocessor variable so a value like ``\Version'' is not allowed.
259Hence, the need to have three variables for the major, minor and patch version number.}
260
261\item\hspace*{-4pt}\Indexc{__CFA_MINOR__}\index{preprocessor variables!__CFA_MINOR__@{©__CFA_MINOR__©}}
262is always available during preprocessing and its value is the current minor \Index{version number} of \CFA.
263
264\item\hspace*{-4pt}\Indexc{__CFA_PATCH__}\index{preprocessor variables!__CFA_PATCH____CFA_PATCH__©}
265is always available during preprocessing and its value is the current patch \Index{version number} of \CFA.
266
267\item\hspace*{-4pt}\Indexc{__CFORALL__}\index{preprocessor variables!__CFORALL____CFORALL__©}
268is always available during preprocessing and it has no value.
269\end{description}
270
271These preprocessor variables allow conditional compilation of programs that must work differently in these situations.
272For example, to toggle between C and \CFA extensions, using the following:
273\begin{lstlisting}
274#ifndef __CFORALL__
275#include <stdio.h>                      // C header file
276#else
277#include <fstream>                      // §\CFA{}§ header file
278#endif
279\end{lstlisting}
280which conditionally includes the correct header file, if the program is compiled using ©gcc© or ©cfa©.
281
282
283\section{Underscores in Constants}
284
285Numeric constants are extended to allow \Index{underscore}s within constants\index{constant!underscore}, e.g.:
286\begin{lstlisting}
287_®147®_®483®_®648;                            // decimal constant
28856_ul;                                          // decimal unsigned long constant
2890_377;                                          // octal constant
2900x_ff_ff;                                       // hexadecimal constant
2910x_ef3d_aa5c;                           // hexadecimal constant
2923.141_592_654;                          // floating point constant
29310_e_+1_00;                                     // floating point constant
2940x_ff_ff_p_3;                           // hexadecimal floating point
2950x_1.ffff_ffff_p_128_l;         // hexadecimal floating point long constant
296L_"\x_ff_ee";                           // wide character constant
297\end{lstlisting}
298The rules for placement of underscores is as follows:
299\begin{enumerate}
300\item
301A sequence of underscores is disallowed, e.g., ©12__34© is invalid.
302\item
303Underscores may only appear within a sequence of digits (regardless of the digit radix).
304In other words, an underscore cannot start or end a sequence of digits, e.g., ©_1©, ©1_© and ©_1_© are invalid (actually, the 1st and 3rd examples are identifier names).
305\item
306A numeric prefix may end with an underscore;
307a numeric infix may begin and/or end with an underscore;
308a numeric suffix may begin with an underscore.
309For example, the octal ©0© or hexadecimal ©0x© prefix may end with an underscore ©0_377© or ©0x_ff©;
310the exponent infix ©E© may start or end with an underscore ©1.0_E10©, ©1.0E_10© or ©1.0_E_10©;
311the type suffixes ©U©, ©L©, etc. may start with an underscore ©1_U©, ©1_ll© or ©1.0E10_f©.
312\end{enumerate}
313It is significantly easier to read and enter long constants when they are broken up into smaller groupings (most cultures use comma or period among digits for the same purpose).
314This extension is backwards compatible, matches with the use of underscore in variable names, and appears in Ada and Java.
315
316
317\section{Declarations}
318\label{s:Declarations}
319
320C declaration syntax is notoriously confusing and error prone.
321For example, many C programmers are confused by a declaration as simple as:
322\begin{quote2}
323\begin{tabular}{@{}ll@{}}
324\begin{lstlisting}[aboveskip=0pt,belowskip=0pt]
325int *x[ 5 ]
326\end{lstlisting}
327&
328\raisebox{-0.75\totalheight}{\input{Cdecl}}
329\end{tabular}
330\end{quote2}
331Is this an array of 5 pointers to integers or a pointer to an array of 5 integers?
332Another example of confusion results from the fact that a routine name and its parameters are embedded within the return type, mimicking the way the return value is used at the routine's call site.
333For example, a routine returning a pointer to an array of integers is defined and used in the following way:
334\begin{lstlisting}
335int (*f())[ 5 ] {...};  // definition mimics usage
336... (*f())[ 3 ] += 1;
337\end{lstlisting}
338Essentially, the return type is wrapped around the routine name in successive layers (like an onion).
339While attempting to make the two contexts consistent was a laudable goal, it has not worked out in practice.
340
341\CFA provides its own type, variable and routine declarations, using a slightly different syntax.
342The new declarations place modifiers to the left of the base type, while C declarations place modifiers to the right of the base type.
343The only exception is bit field specification, which always appear to the right of the base type.
344C and the new \CFA declarations may appear together in the same program block, but cannot be mixed within a specific declaration.
345
346In \CFA declarations, the same tokens are used as in C: the character ©*© is used to indicate a pointer, square brackets ©[©\,©]© are used to represent an array, and parentheses ©()© are used to indicate a routine parameter.
347However, unlike C, \CFA type declaration tokens are specified from left to right and the entire type specification is distributed across all variables in the declaration list.
348For instance, variables ©x© and ©y© of type pointer to integer are defined in \CFA as follows:
349\begin{quote2}
350\begin{tabular}{@{}l@{\hspace{30pt}}l@{}}
351\multicolumn{1}{c@{\hspace{30pt}}}{\textbf{\CFA}}       & \multicolumn{1}{c}{\textbf{C}}        \\
352\begin{lstlisting}
353®* int x, y;®
354\end{lstlisting}
355&
356\begin{lstlisting}
357int *x, *y;
358\end{lstlisting}
359\end{tabular}
360\end{quote2}
361Other examples are:
362\begin{quote2}
363\begin{tabular}{@{}l@{\hspace{30pt}}l@{\hspace{20pt}}l@{}}
364\multicolumn{1}{c@{\hspace{30pt}}}{\textbf{\CFA}}       & \multicolumn{1}{c@{\hspace{20pt}}}{\textbf{C}}        \\
365\begin{lstlisting}
366[ 5 ] int z;
367[ 5 ] * char w;
368* [ 5 ] double v;
369struct s {
370        int f0:3;
371        * int f1;
372        [ 5 ] * int f2;
373};
374\end{lstlisting}
375&
376\begin{lstlisting}
377int z[ 5 ];
378char *w[ 5 ];
379double (*v)[ 5 ];
380struct s {
381        int f0:3;
382        int *f1;
383        int *f2[ 5 ]
384};
385\end{lstlisting}
386&
387\begin{lstlisting}
388// array of 5 integers
389// array of 5 pointers to char
390// pointer to array of 5 doubles
391
392// common bit field syntax
393
394
395
396\end{lstlisting}
397\end{tabular}
398\end{quote2}
399
400All type qualifiers, i.e., ©const© and ©volatile©, are used in the normal way with the new declarations but appear left to right, e.g.:
401\begin{quote2}
402\begin{tabular}{@{}l@{\hspace{30pt}}l@{\hspace{20pt}}l@{}}
403\multicolumn{1}{c@{\hspace{30pt}}}{\textbf{\CFA}}       & \multicolumn{1}{c@{\hspace{20pt}}}{\textbf{C}}        \\
404\begin{lstlisting}
405const * const int x;
406const * [ 5 ] const int y;
407\end{lstlisting}
408&
409\begin{lstlisting}
410int const * const x;
411const int (* const y)[ 5 ]
412\end{lstlisting}
413&
414\begin{lstlisting}
415// const pointer to const integer
416// const pointer to array of 5 const integers
417\end{lstlisting}
418\end{tabular}
419\end{quote2}
420All declaration qualifiers, i.e., ©extern©, ©static©, etc., are used in the normal way with the new declarations but can only appear at the start of a \CFA routine declaration,\footnote{\label{StorageClassSpecifier}
421The placement of a storage-class specifier other than at the beginning of the declaration specifiers in a declaration is an obsolescent feature.~\cite[\S~6.11.5(1)]{C11}} e.g.:
422\begin{quote2}
423\begin{tabular}{@{}l@{\hspace{30pt}}l@{\hspace{20pt}}l@{}}
424\multicolumn{1}{c@{\hspace{30pt}}}{\textbf{\CFA}}       & \multicolumn{1}{c@{\hspace{20pt}}}{\textbf{C}}        \\
425\begin{lstlisting}
426extern [ 5 ] int x;
427static * const int y;
428\end{lstlisting}
429&
430\begin{lstlisting}
431int extern x[ 5 ];
432const int static *y;
433\end{lstlisting}
434&
435\begin{lstlisting}
436// externally visible array of 5 integers
437// internally visible pointer to constant int
438\end{lstlisting}
439\end{tabular}
440\end{quote2}
441
442Unsupported are K\&R C declarations where the base type defaults to ©int©, if no type is specified\footnote{
443At least one type specifier shall be given in the declaration specifiers in each declaration, and in the specifier-qualifier list in each structure declaration and type name~\cite[\S~6.7.2(2)]{C11}},
444e.g.:
445\begin{lstlisting}
446x;                                              // int x
447*y;                                             // int *y
448f( p1, p2 );                    // int f( int p1, int p2 );
449f( p1, p2 ) {}                  // int f( int p1, int p2 ) {}
450\end{lstlisting}
451
452As stated above, the two styles of declaration may appear together in the same block.
453Therefore, a programmer has the option of either continuing to use traditional C declarations or take advantage of the new style.
454Clearly, both styles need to be supported for some time due to existing C-style header-files, particularly for UNIX systems.
455
456
457\section{Type Operators}
458
459The new declaration syntax can be used in other contexts where types are required, e.g., casts and the pseudo-routine ©sizeof©:
460\begin{quote2}
461\begin{tabular}{@{}l@{\hspace{30pt}}l@{}}
462\multicolumn{1}{c@{\hspace{30pt}}}{\textbf{\CFA}}       & \multicolumn{1}{c}{\textbf{C}}        \\
463\begin{lstlisting}
464y = (* int)x;
465i = sizeof([ 5 ] * int);
466\end{lstlisting}
467&
468\begin{lstlisting}
469y = (int *)x;
470i = sizeof(int *[ 5 ]);
471\end{lstlisting}
472\end{tabular}
473\end{quote2}
474
475
476\section{Routine Definition}
477
478\CFA also supports a new syntax for routine definition, as well as ISO C and K\&R routine syntax.
479The point of the new syntax is to allow returning multiple values from a routine~\cite{CLU,Galletly96}, e.g.:
480\begin{lstlisting}
481®[ int o1, int o2, char o3 ]® f( int i1, char i2, char i3 ) {
482        §\emph{routine body}§
483}
484\end{lstlisting}
485where routine ©f© has three output (return values) and three input parameters.
486Existing C syntax cannot be extended with multiple return types because it is impossible to embed a single routine name within multiple return type specifications.
487
488In detail, the brackets, ©[]©, enclose the result type, where each return value is named and that name is a local variable of the particular return type.\footnote{
489Michael Tiemann, with help from Doug Lea, provided named return values in g++, circa 1989.}
490The value of each local return variable is automatically returned at routine termination.
491Declaration qualifiers can only appear at the start of a routine definition, e.g.:
492\begin{lstlisting}
493extern [ int x ] g( int y ) {§\,§}
494\end{lstlisting}
495Lastly, if there are no output parameters or input parameters, the brackets and/or parentheses must still be specified;
496in both cases the type is assumed to be void as opposed to old style C defaults of int return type and unknown parameter types, respectively, as in:
497\begin{lstlisting}
498\,§] g();                                             // no input or output parameters
499[ void ] g( void );                     // no input or output parameters
500\end{lstlisting}
501
502Routine f is called as follows:
503\begin{lstlisting}
504[ i, j, ch ] = f( 3, 'a', ch );
505\end{lstlisting}
506The list of return values from f and the grouping on the left-hand side of the assignment is called a tuple and discussed in Section 12.
507
508\CFA style declarations cannot be used to declare parameters for K\&R style routine definitions because of the following ambiguity:
509\begin{lstlisting}
510int (*f(x))[ 5 ] int x; {}
511\end{lstlisting}
512The string ``©int (*f(x))[ 5 ]©'' declares a K\&R style routine of type returning a pointer to an array of 5 integers, while the string ``©[ 5 ] int x©'' declares a \CFA style parameter x of type array of 5 integers.
513Since the strings overlap starting with the open bracket, ©[©, there is an ambiguous interpretation for the string.
514As well, \CFA-style declarations cannot be used to declare parameters for C-style routine-definitions because of the following ambiguity:
515\begin{lstlisting}
516typedef int foo;
517int f( int (* foo) );           // foo is redefined as a parameter name
518\end{lstlisting}
519The string ``©int (* foo)©'' declares a C-style named-parameter of type pointer to an integer (the parenthesis are superfluous), while the same string declares a \CFA style unnamed parameter of type routine returning integer with unnamed parameter of type pointer to foo.
520The redefinition of a type name in a parameter list is the only context in C where the character ©*© can appear to the left of a type name, and \CFA relies on all type modifier characters appearing to the right of the type name.
521The inability to use \CFA declarations in these two contexts is probably a blessing because it precludes programmers from arbitrarily switching between declarations forms within a declaration contexts.
522
523C-style declarations can be used to declare parameters for \CFA style routine definitions, e.g.:
524\begin{lstlisting}
525[ int ] f( * int, int * );      // returns an integer, accepts 2 pointers to integers
526[ * int, int * ] f( int );      // returns 2 pointers to integers, accepts an integer
527\end{lstlisting}
528The reason for allowing both declaration styles in the new context is for backwards compatibility with existing preprocessor macros that generate C-style declaration-syntax, as in:
529\begin{lstlisting}
530#define ptoa( n, d ) int (*n)[ d ]
531int f( ptoa(p,5) ) ...          // expands to int f( int (*p)[ 5 ] )
532[ int ] f( ptoa(p,5) ) ...      // expands to [ int ] f( int (*p)[ 5 ] )
533\end{lstlisting}
534Again, programmers are highly encouraged to use one declaration form or the other, rather than mixing the forms.
535
536
537\subsection{Returning Values}
538
539Named return values handle the case where it is necessary to define a local variable whose value is then returned in a ©return© statement, as in:
540\begin{lstlisting}
541int f() {
542        int x;
543        ... x = 0; ... x = y; ...
544        return x;
545}
546\end{lstlisting}
547Because the value in the return variable is automatically returned when a \CFA routine terminates, the ©return© statement \emph{does not} contain an expression, as in:
548\begin{lstlisting}
549®[ int x ]® f() {
550        ... x = 0; ... x = y; ...
551        ®return;® // implicitly return x
552}
553\end{lstlisting}
554When the return is encountered, the current value of ©x© is returned to the calling routine.
555As well, ``falling off the end'' of a routine without a ©return© statement is permitted, as in:
556\begin{lstlisting}
557[ int x ] f() {
558        ... x = 0; ... x = y; ...
559} // implicitly return x
560\end{lstlisting}
561In this case, the current value of ©x© is returned to the calling routine just as if a ©return© had been encountered.
562
563
564\subsection{Routine Prototype}
565
566The syntax of the new routine prototype declaration follows directly from the new routine definition syntax;
567as well, parameter names are optional, e.g.:
568\begin{lstlisting}
569[ int x ] f ();                         // returning int with no parameters
570[ * int ] g (int y);            // returning pointer to int with int parameter
571[ ] h (int,char);                       // returning no result with int and char parameters
572[ * int,int ] j (int);          // returning pointer to int and int, with int parameter
573\end{lstlisting}
574This syntax allows a prototype declaration to be created by cutting and pasting source text from the routine definition header (or vice versa).
575It is possible to declare multiple routine-prototypes in a single declaration, but the entire type specification is distributed across \emph{all} routine names in the declaration list (see~\VRef{s:Declarations}), e.g.:
576\begin{quote2}
577\begin{tabular}{@{}l@{\hspace{30pt}}l@{}}
578\multicolumn{1}{c@{\hspace{30pt}}}{\textbf{\CFA}}       & \multicolumn{1}{c}{\textbf{C}}        \\
579\begin{lstlisting}
580[ int ] f(int), g;
581\end{lstlisting}
582&
583\begin{lstlisting}
584int f(int), g(int);
585\end{lstlisting}
586\end{tabular}
587\end{quote2}
588Declaration qualifiers can only appear at the start of a \CFA routine declaration,\footref{StorageClassSpecifier} e.g.:
589\begin{lstlisting}
590extern [ int ] f (int);
591static [ int ] g (int);
592\end{lstlisting}
593
594
595\section{Routine Pointers}
596
597The syntax for pointers to \CFA routines specifies the pointer name on the right, e.g.:
598\begin{lstlisting}
599* [ int x ] () fp;                      // pointer to routine returning int with no parameters
600* [ * int ] (int y) gp;         // pointer to routine returning pointer to int with int parameter
601* [ ] (int,char) hp;            // pointer to routine returning no result with int and char parameters
602* [ * int,int ] (int) jp;       // pointer to routine returning pointer to int and int, with int parameter
603\end{lstlisting}
604While parameter names are optional, \emph{a routine name cannot be specified};
605for example, the following is incorrect:
606\begin{lstlisting}
607* [ int x ] f () fp;            // routine name "f" is not allowed
608\end{lstlisting}
609
610
611\section{Named and Default Arguments}
612
613Named and default arguments~\cite{Hardgrave76}\footnote{
614Francez~\cite{Francez77} proposed a further extension to the named-parameter passing style, which specifies what type of communication (by value, by reference, by name) the argument is passed to the routine.}
615are two mechanisms to simplify routine call.
616Both mechanisms are discussed with respect to \CFA.
617\begin{description}
618\item[Named (or Keyword) Arguments:]
619provide the ability to specify an argument to a routine call using the parameter name rather than the position of the parameter.
620For example, given the routine:
621\begin{lstlisting}
622void p( int x, int y, int z ) {...}
623\end{lstlisting}
624a positional call is:
625\begin{lstlisting}
626p( 4, 7, 3 );
627\end{lstlisting}
628whereas a named (keyword) call may be:
629\begin{lstlisting}
630p( z : 3, x : 4, y : 7 ); // rewrite $\Rightarrow$ p( 4, 7, 3 )
631\end{lstlisting}
632Here the order of the arguments is unimportant, and the names of the parameters are used to associate argument values with the corresponding parameters.
633The compiler rewrites a named call into a positional call.
634The advantages of named parameters are:
635\begin{itemize}
636\item
637Remembering the names of the parameters may be easier than the order in the routine definition.
638\item
639Parameter names provide documentation at the call site (assuming the names are descriptive).
640\item
641Changes can be made to the order or number of parameters without affecting the call (although the call must still be recompiled).
642\end{itemize}
643
644Unfortunately, named arguments do not work in C-style programming-languages because a routine prototype is not required to specify parameter names, nor do the names in the prototype have to match with the actual definition.
645For example, the following routine prototypes and definition are all valid.
646\begin{lstlisting}
647void p( int, int, int );                        // equivalent prototypes
648void p( int x, int y, int z );
649void p( int y, int x, int z );
650void p( int z, int y, int x );
651void p( int q, int r, int s ) {}        // match with this definition
652\end{lstlisting}
653Forcing matching parameter names in routine prototypes with corresponding routine definitions is possible, but goes against a strong tradition in C programming.
654Alternatively, prototype definitions can be eliminated by using a two-pass compilation, and implicitly creating header files for exports.
655The former is easy to do, while the latter is more complex.
656Currently, \CFA does \emph{not} attempt to support named arguments.
657
658\item[Default Arguments]
659provide the ability to associate a default value with a parameter so it can be optionally specified in the argument list.
660For example, given the routine:
661\begin{lstlisting}
662void p( int x = 1, int y = 2, int z = 3 ) {...}
663\end{lstlisting}
664the allowable positional calls are:
665\begin{lstlisting}
666p();                            // rewrite $\Rightarrow$ p( 1, 2, 3 )
667p( 4 );                         // rewrite $\Rightarrow$ p( 4, 2, 3 )
668p( 4, 4 );                      // rewrite $\Rightarrow$ p( 4, 4, 3 )
669p( 4, 4, 4 );           // rewrite $\Rightarrow$ p( 4, 4, 4 )
670// empty arguments
671p(  , 4, 4 );           // rewrite $\Rightarrow$ p( 1, 4, 4 )
672p( 4,  , 4 );           // rewrite $\Rightarrow$ p( 4, 2, 4 )
673p( 4, 4,   );           // rewrite $\Rightarrow$ p( 4, 4, 3 )
674p( 4,  ,   );           // rewrite $\Rightarrow$ p( 4, 2, 3 )
675p(  , 4,   );           // rewrite $\Rightarrow$ p( 1, 4, 3 )
676p(  ,  , 4 );           // rewrite $\Rightarrow$ p( 1, 2, 4 )
677p(  ,  ,   );           // rewrite $\Rightarrow$ p( 1, 2, 3 )
678\end{lstlisting}
679Here the missing arguments are inserted from the default values in the parameter list.
680The compiler rewrites missing default values into explicit positional arguments.
681The advantages of default values are:
682\begin{itemize}
683\item
684Routines with a large number of parameters are often very generalized, giving a programmer a number of different options on how a computation is performed.
685For many of these kinds of routines, there are standard or default settings that work for the majority of computations.
686Without default values for parameters, a programmer is forced to specify these common values all the time, resulting in long argument lists that are error prone.
687\item
688When a routine's interface is augmented with new parameters, it extends the interface providing generalizability\footnote{
689``It should be possible for the implementor of an abstraction to increase its generality.
690So long as the modified abstraction is a generalization of the original, existing uses of the abstraction will not require change.
691It might be possible to modify an abstraction in a manner which is not a generalization without affecting existing uses, but, without inspecting the modules in which the uses occur, this possibility cannot be determined.
692This criterion precludes the addition of parameters, unless these parameters have default or inferred values that are valid for all possible existing applications.''~\cite[p.~128]{Cormack90}}
693(somewhat like the generalization provided by inheritance for classes).
694That is, all existing calls are still valid, although the call must still be recompiled.
695\end{itemize}
696The only disadvantage of default arguments is that unintentional omission of an argument may not result in a compiler-time error.
697Instead, a default value is used, which may not be the programmer's intent.
698
699Default values may only appear in a prototype versus definition context:
700\begin{lstlisting}
701void p( int x, int y = 2, int z = 3 );          // prototype: allowed
702void p( int, int = 2, int = 3 );                        // prototype: allowed
703void p( int x, int y = 2, int z = 3 ) {}        // definition: not allowed
704\end{lstlisting}
705The reason for this restriction is to allow separate compilation.
706Multiple prototypes with different default values is an error.
707\end{description}
708
709Ellipse (``...'') arguments present problems when used with default arguments.
710The conflict occurs because both named and ellipse arguments must appear after positional arguments, giving two possibilities:
711\begin{lstlisting}
712p( /* positional */, . . ., /* named */ );
713p( /* positional */, /* named */, . . . );
714\end{lstlisting}
715While it is possible to implement both approaches, the first possibly is more complex than the second, e.g.:
716\begin{lstlisting}
717p( int x, int y, int z, . . . );
718p( 1, 4, 5, 6, z : 3, y : 2 ); // assume p( /* positional */, . . ., /* named */ );
719p( 1, z : 3, y : 2, 4, 5, 6 ); // assume p( /* positional */, /* named */, . . . );
720\end{lstlisting}
721In the first call, it is necessary for the programmer to conceptually rewrite the call, changing named arguments into positional, before knowing where the ellipse arguments begin.
722Hence, this approach seems significantly more difficult, and hence, confusing and error prone.
723In the second call, the named arguments separate the positional and ellipse arguments, making it trivial to read the call.
724
725The problem is exacerbated with default arguments, e.g.:
726\begin{lstlisting}
727void p( int x, int y = 2, int z = 3. . . );
728p( 1, 4, 5, 6, z : 3 );         // assume p( /* positional */, . . ., /* named */ );
729p( 1, z : 3, 4, 5, 6 );         // assume p( /* positional */, /* named */, . . . );
730\end{lstlisting}
731The first call is an error because arguments 4 and 5 are actually positional not ellipse arguments;
732therefore, argument 5 subsequently conflicts with the named argument z : 3.
733In the second call, the default value for y is implicitly inserted after argument 1 and the named arguments separate the positional and ellipse arguments, making it trivial to read the call.
734For these reasons, \CFA requires named arguments before ellipse arguments.
735Finally, while ellipse arguments are needed for a small set of existing C routines, like printf, the extended \CFA type system largely eliminates the need for ellipse arguments (see Section 24), making much of this discussion moot.
736
737Default arguments and overloading (see Section 24) are complementary.
738While in theory default arguments can be simulated with overloading, as in:
739\begin{quote2}
740\begin{tabular}{@{}l@{\hspace{30pt}}l@{}}
741\multicolumn{1}{c@{\hspace{30pt}}}{\textbf{default arguments}}  & \multicolumn{1}{c}{\textbf{overloading}}      \\
742\begin{lstlisting}
743void p( int x, int y = 2, int z = 3 ) {...}
744
745
746\end{lstlisting}
747&
748\begin{lstlisting}
749void p( int x, int y, int z ) {...}
750void p( int x ) { p( x, 2, 3 ); }
751void p( int x, int y ) { p( x, y, 3 ); }
752\end{lstlisting}
753\end{tabular}
754\end{quote2}
755the number of required overloaded routines is linear in the number of default values, which is unacceptable growth.
756In general, overloading should only be used over default arguments if the body of the routine is significantly different.
757Furthermore, overloading cannot handle accessing default arguments in the middle of a positional list, via a missing argument, such as:
758\begin{lstlisting}
759p( 1, /* default */, 5 );               // rewrite $\Rightarrow$ p( 1, 2, 5 )
760\end{lstlisting}
761
762Given the \CFA restrictions above, both named and default arguments are backwards compatible.
763\CC only supports default arguments;
764Ada supports both named and default arguments.
765
766
767\section{Type/Routine Nesting}
768
769Nesting of types and routines is useful for controlling name visibility (\newterm{name hiding}).
770
771
772\subsection{Type Nesting}
773
774\CFA allows \Index{type nesting}, and type qualification of the nested types, where as C hoists\index{type hoisting} (refactors) nested types into the enclosing scope and has no type qualification.
775\begin{quote2}
776\begin{tabular}{@{}l@{\hspace{30pt}}l|l@{}}
777\multicolumn{1}{c@{\hspace{30pt}}}{\textbf{C Type Nesting}}     & \multicolumn{1}{c}{\textbf{C Implicit Hoisting}}      & \multicolumn{1}{|c}{\textbf{\CFA}}    \\
778\hline
779\begin{lstlisting}
780struct S {
781        enum C { R, G, B };
782        struct T {
783                union U { int i, j; };
784                enum C c;
785                short int i, j;
786        };
787        struct T t;
788} s;
789
790int fred() {
791        s.t.c = R;
792        struct T t = { R, 1, 2 };
793        enum C c;
794        union U u;
795}
796\end{lstlisting}
797&
798\begin{lstlisting}
799enum C { R, G, B };
800union U { int i, j; };
801struct T {
802        enum C c;
803        short int i, j;
804};
805struct S {
806        struct T t;
807} s;
808       
809
810
811
812
813
814
815\end{lstlisting}
816&
817\begin{lstlisting}
818struct S {
819        enum C { R, G, B };
820        struct T {
821                union U { int i, j; };
822                enum C c;
823                short int i, j;
824        };
825        struct T t;
826} s;
827
828int fred() {
829        s.t.c = ®S.®R;  // type qualification
830        struct ®S.®T t = { ®S.®R, 1, 2 };
831        enum ®S.®C c;
832        union ®S.T.®U u;
833}
834\end{lstlisting}
835\end{tabular}
836\end{quote2}
837In the left example in C, types ©C©, ©U© and ©T© are implicitly hoisted outside of type ©S© into the containing block scope.
838In the right example in \CFA, the types are not hoisted and accessed using the field-selection operator ``©.©'' for type qualification, as does Java, rather than the \CC type-selection operator ``©::©''.
839
840
841\subsection{Routine Nesting}
842
843While \CFA does not provide object programming by putting routines into structures, it does rely heavily on locally nested routines to redefine operations at or close to a call site.
844For example, the C quick-sort is wrapped into the following polymorphic \CFA routine:
845\begin{lstlisting}
846forall( otype T | { int ?<?( T, T ); } )
847void qsort( const T * arr, size_t dimension );
848\end{lstlisting}
849which can be used to sort in ascending and descending order by locally redefining the less-than operator into greater-than.
850\begin{lstlisting}
851const unsigned int size = 5;
852int ia[size];
853...                                             // assign values to array ia
854qsort( ia, size );              // sort ascending order using builtin ?<?
855{
856        ®int ?<?( int x, int y ) { return x > y; }® // nested routine
857        qsort( ia, size );      // sort descending order by local redefinition
858}
859\end{lstlisting}
860
861Nested routines are not first-class, meaning a nested routine cannot be returned if it has references to variables in its enclosing blocks;
862the only exception is references to the external block of the translation unit, as these variables persist for the duration of the program.
863The following program in undefined in \CFA (and ©gcc©\index{gcc})
864\begin{lstlisting}
865[* [int]( int )] foo() {                // int (*foo())( int )
866        int ®i® = 7;
867        int bar( int p ) {
868                ®i® += 1;                                       // dependent on local variable
869                sout | ®i® | endl;
870        }
871        return bar;                                     // undefined because of local dependence
872}
873int main() {
874        * [int](int) fp = foo();        // int (*fp)(int)
875    sout | fp( 3 ) | endl;
876}
877\end{lstlisting}
878because
879
880Currently, there are no \Index{lambda} expressions, i.e., unnamed routines because routine names are very important to properly select the correct routine.
881
882
883\section{Tuples}
884
885In C and \CFA, lists of elements appear in several contexts, such as the parameter list for a routine call.
886(More contexts are added shortly.)
887A list of such elements is called a tuple.
888The general syntax of a tuple is:
889\begin{lstlisting}
890[ §\emph{exprlist}§ ]
891\end{lstlisting}
892where ©$\emph{exprlist}$© is a list of one or more expressions separated by commas.
893The brackets, ©[]©, allow differentiating between tuples and expressions containing the C comma operator.
894The following are examples of tuples:
895\begin{lstlisting}
896[ x, y, z ]
897[ 2 ]
898[ v+w, x*y, 3.14159, f() ]
899\end{lstlisting}
900Tuples are permitted to contain sub-tuples (i.e., nesting), such as ©[ [ 14, 21 ], 9 ]©, which is a 2-element tuple whose first element is itself a tuple.
901Note, a tuple is not a record (structure);
902a record denotes a single value with substructure, whereas a tuple is multiple values with no substructure (see flattening coercion in Section 12.1).
903In essence, tuples are largely a compile time phenomenon, having little or no runtime presence.
904
905Tuples can be organized into compile-time tuple variables;
906these variables are of \newterm{tuple type}.
907Tuple variables and types can be used anywhere lists of conventional variables and types can be used.
908The general syntax of a tuple type is:
909\begin{lstlisting}
910[ §\emph{typelist}§ ]
911\end{lstlisting}
912where ©$\emph{typelist}$© is a list of one or more legal \CFA or C type specifications separated by commas, which may include other tuple type specifications.
913Examples of tuple types include:
914\begin{lstlisting}
915[ unsigned int, char ]
916[ double, double, double ]
917[ * int, int * ]                // mix of CFA and ANSI
918[ * [ 5 ] int, * * char, * [ [ int, int ] ] (int, int) ]
919\end{lstlisting}
920Like tuples, tuple types may be nested, such as ©[ [ int, int ], int ]©, which is a 2-element tuple type whose first element is itself a tuple type.
921
922Examples of declarations using tuple types are:
923\begin{lstlisting}
924[ int, int ] x;                 // 2 element tuple, each element of type int
925* [ char, char ] y;             // pointer to a 2 element tuple
926[ [ int, int ] ] z ([ int, int ]);
927\end{lstlisting}
928The last example declares an external routine that expects a 2 element tuple as an input parameter and returns a 2 element tuple as its result.
929
930As mentioned, tuples can appear in contexts requiring a list of value, such as an argument list of a routine call.
931In unambiguous situations, the tuple brackets may be omitted, e.g., a tuple that appears as an argument may have its
932square brackets omitted for convenience; therefore, the following routine invocations are equivalent:
933\begin{lstlisting}
934f( [ 1, x+2, fred() ] );
935f( 1, x+2, fred() );
936\end{lstlisting}
937Also, a tuple or a tuple variable may be used to supply all or part of an argument list for a routine expecting multiple input parameters or for a routine expecting a tuple as an input parameter.
938For example, the following are all legal:
939\begin{lstlisting}
940[ int, int ] w1;
941[ int, int, int ] w2;
942[ void ] f (int, int, int); /* three input parameters of type int */
943[ void ] g ([ int, int, int ]); /* 3 element tuple as input */
944f( [ 1, 2, 3 ] );
945f( w1, 3 );
946f( 1, w1 );
947f( w2 );
948g( [ 1, 2, 3 ] );
949g( w1, 3 );
950g( 1, w1 );
951g( w2 );
952\end{lstlisting}
953Note, in all cases 3 arguments are supplied even though the syntax may appear to supply less than 3. As mentioned, a
954tuple does not have structure like a record; a tuple is simply converted into a list of components.
955\begin{rationale}
956The present implementation of \CFA does not support nested routine calls when the inner routine returns multiple values; i.e., a statement such as ©g( f() )© is not supported.
957Using a temporary variable to store the  results of the inner routine and then passing this variable to the outer routine works, however.
958\end{rationale}
959
960A tuple can contain a C comma expression, provided the expression containing the comma operator is enclosed in parentheses.
961For instance, the following tuples are equivalent:
962\begin{lstlisting}
963[ 1, 3, 5 ]
964[ 1, (2, 3), 5 ]
965\end{lstlisting}
966The second element of the second tuple is the expression (2, 3), which yields the result 3.
967This requirement is the same as for comma expressions in argument lists.
968
969Type qualifiers, i.e., const and volatile, may modify a tuple type.
970The meaning is the same as for a type qualifier modifying an aggregate type [Int99, x 6.5.2.3(7),x 6.7.3(11)], i.e., the qualifier is distributed across all of the types in the tuple, e.g.:
971\begin{lstlisting}
972const volatile [ int, float, const int ] x;
973\end{lstlisting}
974is equivalent to:
975\begin{lstlisting}
976[ const volatile int, const volatile float, const volatile int ] x;
977\end{lstlisting}
978Declaration qualifiers can only appear at the start of a \CFA tuple declaration4, e.g.:
979\begin{lstlisting}
980extern [ int, int ] w1;
981static [ int, int, int ] w2;
982\end{lstlisting}
983\begin{rationale}
984Unfortunately, C's syntax for subscripts precluded treating them as tuples.
985The C subscript list has the form ©[i][j]...© and not ©[i, j, ...]©.
986Therefore, there is no syntactic way for a routine returning multiple values to specify the different subscript values, e.g., ©f[g()]© always means a single subscript value because there is only one set of brackets.
987Fixing this requires a major change to C because the syntactic form ©M[i, j, k]© already has a particular meaning: ©i, j, k© is a comma expression.
988\end{rationale}
989
990
991\subsection{Tuple Coercions}
992
993There are four coercions that can be performed on tuples and tuple variables: closing, opening, flattening and structuring.
994In addition, the coercion of dereferencing can be performed on a tuple variable to yield its value(s), as for other variables.
995A \newterm{closing coercion} takes a set of values and converts it into a tuple value, which is a contiguous set of values, as in:
996\begin{lstlisting}
997[ int, int, int, int ] w;
998w = [ 1, 2, 3, 4 ];
999\end{lstlisting}
1000First the right-hand tuple is closed into a tuple value and then the tuple value is assigned.
1001
1002An \newterm{opening coercion} is the opposite of closing; a tuple value is converted into a tuple of values, as in:
1003\begin{lstlisting}
1004[ a, b, c, d ] = w
1005\end{lstlisting}
1006©w© is implicitly opened to yield a tuple of four values, which are then assigned individually.
1007
1008A \newterm{flattening coercion} coerces a nested tuple, i.e., a tuple with one or more components, which are themselves tuples, into a flattened tuple, which is a tuple whose components are not tuples, as in:
1009\begin{lstlisting}
1010[ a, b, c, d ] = [ 1, [ 2, 3 ], 4 ];
1011\end{lstlisting}
1012First the right-hand tuple is flattened and then the values are assigned individually.
1013Flattening is also performed on tuple types.
1014For example, the type ©[ int, [ int, int ], int ]© can be coerced, using flattening, into the type ©[ int, int, int, int ]©.
1015
1016A \newterm{structuring coercion} is the opposite of flattening;
1017a tuple is structured into a more complex nested tuple.
1018For example, structuring the tuple ©[ 1, 2, 3, 4 ]© into the tuple ©[ 1, [ 2, 3 ], 4 ]© or the tuple type ©[ int, int, int, int ]© into the tuple type ©[ int, [ int, int ], int ]©.
1019In the following example, the last assignment illustrates all the tuple coercions:
1020\begin{lstlisting}
1021[ int, int, int, int ] w = [ 1, 2, 3, 4 ];
1022int x = 5;
1023[ x, w ] = [ w, x ];            // all four tuple coercions
1024\end{lstlisting}
1025Starting on the right-hand tuple in the last assignment statement, w is opened, producing a tuple of four values;
1026therefore, the right-hand tuple is now the tuple ©[ [ 1, 2, 3, 4 ], 5 ]©.
1027This tuple is then flattened, yielding ©[ 1, 2, 3, 4, 5 ]©, which is structured into ©[ 1, [ 2, 3, 4, 5 ] ]© to match the tuple type of the left-hand side.
1028The tuple ©[ 2, 3, 4, 5 ]© is then closed to create a tuple value.
1029Finally, ©x© is assigned ©1© and ©w© is assigned the tuple value using multiple assignment (see Section 14).
1030\begin{rationale}
1031A possible additional language extension is to use the structuring coercion for tuples to initialize a complex record with a tuple.
1032\end{rationale}
1033
1034
1035\section{Mass Assignment}
1036
1037\CFA permits assignment to several variables at once using mass assignment~\cite{CLU}.
1038Mass assignment has the following form:
1039\begin{lstlisting}
1040[ §\emph{lvalue}§, ..., §\emph{lvalue}§ ] = §\emph{expr}§;
1041\end{lstlisting}
1042The left-hand side is a tuple of \emph{lvalues}, which is a list of expressions each yielding an address, i.e., any data object that can appear on the left-hand side of a conventional assignment statement.
1043©$\emph{expr}$© is any standard arithmetic expression.
1044Clearly, the types of the entities being assigned must be type compatible with the value of the expression.
1045
1046Mass assignment has parallel semantics, e.g., the statement:
1047\begin{lstlisting}
1048[ x, y, z ] = 1.5;
1049\end{lstlisting}
1050is equivalent to:
1051\begin{lstlisting}
1052x = 1.5; y = 1.5; z = 1.5;
1053\end{lstlisting}
1054This semantics is not the same as the following in C:
1055\begin{lstlisting}
1056x = y = z = 1.5;
1057\end{lstlisting}
1058as conversions between intermediate assignments may lose information.
1059A more complex example is:
1060\begin{lstlisting}
1061[ i, y[i], z ] = a + b;
1062\end{lstlisting}
1063which is equivalent to:
1064\begin{lstlisting}
1065t = a + b;
1066a1 = &i; a2 = &y[i]; a3 = &z;
1067*a1 = t; *a2 = t; *a3 = t;
1068\end{lstlisting}
1069The temporary ©t© is necessary to store the value of the expression to eliminate conversion issues.
1070The temporaries for the addresses are needed so that locations on the left-hand side do not change as the values are assigned.
1071In this case, ©y[i]© uses the previous value of ©i© and not the new value set at the beginning of the mass assignment.
1072
1073
1074\section{Multiple Assignment}
1075
1076\CFA also supports the assignment of several values at once, known as multiple assignment~\cite{CLU,Galletly96}.
1077Multiple assignment has the following form:
1078\begin{lstlisting}
1079[ §\emph{lvalue}§, . . ., §\emph{lvalue}§ ] = [ §\emph{expr}§, . . ., §\emph{expr}§ ];
1080\end{lstlisting}
1081The left-hand side is a tuple of \emph{lvalues}, and the right-hand side is a tuple of \emph{expr}s.
1082Each \emph{expr} appearing on the righthand side of a multiple assignment statement is assigned to the corresponding \emph{lvalues} on the left-hand side of the statement using parallel semantics for each assignment.
1083An example of multiple assignment is:
1084\begin{lstlisting}
1085[ x, y, z ] = [ 1, 2, 3 ];
1086\end{lstlisting}
1087Here, the values ©1©, ©2© and ©3© are assigned, respectively, to the variables ©x©, ©y© and ©z©.
1088 A more complex example is:
1089\begin{lstlisting}
1090[ i, y[ i ], z ] = [ 1, i, a + b ];
1091\end{lstlisting}
1092Here, the values ©1©, ©i© and ©a + b© are assigned to the variables ©i©, ©y[i]© and ©z©, respectively.
1093 Note, the parallel semantics of
1094multiple assignment ensures:
1095\begin{lstlisting}
1096[ x, y ] = [ y, x ];
1097\end{lstlisting}
1098correctly interchanges (swaps) the values stored in ©x© and ©y©.
1099The following cases are errors:
1100\begin{lstlisting}
1101[ a, b, c ] = [ 1, 2, 3, 4 ];
1102[ a, b, c ] = [ 1, 2 ];
1103\end{lstlisting}
1104because the number of entities in the left-hand tuple is unequal with the right-hand tuple.
1105
1106As for all tuple contexts in C, side effects should not be used because C does not define an ordering for the evaluation of the elements of a tuple;
1107both these examples produce indeterminate results:
1108\begin{lstlisting}
1109f( x++, x++ );                          // C routine call with side effects in arguments
1110[ v1, v2 ] = [ x++, x++ ];      // side effects in righthand side of multiple assignment
1111\end{lstlisting}
1112
1113
1114\section{Cascade Assignment}
1115
1116As in C, \CFA mass and multiple assignments can be cascaded, producing cascade assignment.
1117Cascade assignment has the following form:
1118\begin{lstlisting}
1119§\emph{tuple}§ = §\emph{tuple}§ = ... = §\emph{tuple}§;
1120\end{lstlisting}
1121and it has the same parallel semantics as for mass and multiple assignment.
1122Some examples of cascade assignment are:
1123\begin{lstlisting}
1124x1 = y1 = x2 = y2 = 0;
1125[ x1, y1 ] = [ x2, y2 ] = [ x3, y3 ];
1126[ x1, y1 ] = [ x2, y2 ] = 0;
1127[ x1, y1 ] = z = 0;
1128\end{lstlisting}
1129As in C, the rightmost assignment is performed first, i.e., assignment parses right to left.
1130
1131
1132\section{Field Tuples}
1133
1134Tuples may be used to select multiple fields of a record by field name.
1135Its general form is:
1136\begin{lstlisting}
1137§\emph{expr}§ . [ §\emph{fieldlist}§ ]
1138§\emph{expr}§ -> [ §\emph{fieldlist}§ ]
1139\end{lstlisting}
1140\emph{expr} is any expression yielding a value of type record, e.g., ©struct©, ©union©.
1141Each element of \emph{ fieldlist} is an element of the record specified by \emph{expr}.
1142A record-field tuple may be used anywhere a tuple can be used. An example of the use of a record-field tuple is
1143the following:
1144\begin{lstlisting}
1145struct s {
1146        int f1, f2;
1147        char f3;
1148        double f4;
1149} v;
1150v.[ f3, f1, f2 ] = ['x', 11, 17 ];      // equivalent to v.f3 = 'x', v.f1 = 11, v.f2 = 17
1151f( v.[ f3, f1, f2 ] );                          // equivalent to f( v.f3, v.f1, v.f2 )
1152\end{lstlisting}
1153Note, the fields appearing in a record-field tuple may be specified in any order;
1154also, it is unnecessary to specify all the fields of a struct in a multiple record-field tuple.
1155
1156If a field of a ©struct© is itself another ©struct©, multiple fields of this subrecord can be specified using a nested record-field tuple, as in the following example:
1157\begin{lstlisting}
1158struct inner {
1159        int f2, f3;
1160};
1161struct outer {
1162        int f1;
1163        struct inner i;
1164        double f4;
1165} o;
1166
1167o.[ f1, i.[ f2, f3 ], f4 ] = [ 11, 12, 13, 3.14159 ];
1168\end{lstlisting}
1169
1170
1171\section{Labelled Break/Continue}
1172
1173While C provides ©break© and ©continue© statements for altering control flow, both are restricted to one level of nesting for a particular control structure.
1174Unfortunately, this restriction forces programmers to use ©goto© to achieve the equivalent for more than one level of nesting.
1175To prevent having to make this switch, the ©break© and ©continue© are extended with a target label to support static multi-level exit~\cite{Buhr85,Java}.
1176For the labelled ©break©, it is possible to specify which control structure is the target for exit, as in:
1177\begin{quote2}
1178\begin{tabular}{@{}l@{\hspace{30pt}}l@{}}
1179\multicolumn{1}{c@{\hspace{30pt}}}{\textbf{\CFA}}       & \multicolumn{1}{c}{\textbf{C}}        \\
1180\begin{lstlisting}
1181®L1:® for ( ... ) {
1182        ®L2:® for ( ... ) {
1183                ®L3:® for ( ... ) {
1184                        ... break ®L1®; ...
1185                        ... break ®L2®; ...
1186                        ... break ®L3®; // or break
1187                }
1188        }
1189}
1190\end{lstlisting}
1191&
1192\begin{lstlisting}
1193for ( ... ) {
1194        for ( ... ) {
1195                for ( ... ) {
1196                        ... goto L1; ...
1197                        ... goto L2; ...
1198                        ... goto L3; // or break
1199                } L3: ;
1200        } L2: ;
1201} L1: ;
1202\end{lstlisting}
1203\end{tabular}
1204\end{quote2}
1205The inner most loop has three exit points, which cause termination of one or more of the three nested loops, respectively.
1206For the labelled ©continue©, it is possible to specify which control structure is the target for the next loop iteration, as in:
1207\begin{quote2}
1208\begin{tabular}{@{}l@{\hspace{30pt}}l@{}}
1209\multicolumn{1}{c@{\hspace{30pt}}}{\textbf{\CFA}}       & \multicolumn{1}{c}{\textbf{C}}        \\
1210\begin{lstlisting}
1211®L1®: for ( ... ) {
1212        ®L2®: for ( ... ) {
1213                ®L3®: for ( ... ) {
1214                        ... continue ®L1®; ...
1215                        ... continue ®L2®; ...
1216                        ... continue ®L3®; ...
1217
1218                }
1219
1220        }
1221
1222}
1223\end{lstlisting}
1224&
1225\begin{lstlisting}
1226for ( ... ) {
1227        for ( ... ) {
1228                for ( ... ) {
1229                        ... goto L1; ...
1230                        ... goto L2; ...
1231                        ... goto L3; ...
1232                  L3: ;
1233                }
1234          L2: ;
1235        }
1236  L1: ;
1237}
1238\end{lstlisting}
1239\end{tabular}
1240\end{quote2}
1241The inner most loop has three restart points, which cause the next loop iteration to begin, respectively.
1242For both ©break© and ©continue©, the target label must be directly associated with a ©for©, ©while© or ©do© statement;
1243for ©break©, the target label can also be associated with a ©switch© statement.
1244Both ©break© and ©continue© with target labels are simply a ©goto© restricted in the following ways:
1245\begin{itemize}
1246\item
1247They cannot be used to create a loop.
1248This means that only the looping construct can be used to create a loop.
1249This restriction is important since all situations that can result in repeated execution of statements in a program are clearly delineated.
1250\item
1251Since they always transfers out of containing control structures, they cannot be used to branch into a control structure.
1252\end{itemize}
1253The advantage of the labelled ©break©/©continue© is that it allows static multi-level exits without having to use the ©goto© statement and ties control flow to the target control structure rather than an arbitrary point in a program.
1254Furthermore, the location of the label at the beginning of the target control structure informs the reader that complex control-flow is occurring in the body of the control structure.
1255With ©goto©, the label at the end of the control structure fails to convey this important clue early enough to the reader.
1256Finally, using an explicit target for the transfer instead of an implicit target allows new nested loop or switch constructs to be added or removed without affecting other constructs.
1257The implicit targets of the current ©break© and ©continue©, i.e., the closest enclosing loop or ©switch©, change as certain constructs are added or removed.
1258
1259
1260\section{Switch Statement}
1261
1262C allows a number of questionable forms for the ©switch© statement:
1263\begin{enumerate}
1264\item
1265By default, the end of a ©case© clause\footnote{
1266In this section, the term \emph{case clause} refers to either a ©case© or ©default© clause.}
1267\emph{falls through} to the next ©case© clause in the ©switch© statement;
1268to exit a ©switch© statement from a ©case© clause requires explicitly terminating the clause with a transfer statement, most commonly ©break©, as in:
1269\begin{lstlisting}
1270switch ( i ) {
1271  case 1:
1272        ...
1273        // fall-through
1274  case 2:
1275        ...
1276        break;  // exit switch statement
1277}
1278\end{lstlisting}
1279The ability to fall-through to the next clause is a useful form of control flow, specifically when a sequence of case actions compound, as in:
1280\begin{lstlisting}
1281switch ( argc ) {
1282  case 3:
1283        // open output file
1284        // fall-through
1285  case 2:
1286        // open input file
1287        break;  // exit switch statement
1288  default:
1289        // usage message
1290}
1291\end{lstlisting}
1292In this example, case 2 is always done if case 3 is done.
1293This control flow is difficult to simulate with if statements or a ©switch© statement without fall-through as code must be duplicated or placed in a separate routine.
1294C also uses fall-through to handle multiple case-values resulting in the same action, as in:
1295\begin{lstlisting}
1296switch ( i ) {
1297  case 1: case 3: case 5:       // odd values
1298        // same action
1299        break;
1300  case 2: case 4: case 6:       // even values
1301        // same action
1302        break;
1303}
1304\end{lstlisting}
1305However, this situation is handled in other languages without fall-through by allowing a list of case values.
1306While fall-through itself is not a problem, the problem occurs when fall-through is the ©default©, as this semantics is not intuitive to most programmers and is different from virtually all other programming languages with a ©switch© statement.
1307Hence, ©default© fall-through semantics results in a large number of programming errors as programmers often forget the ©break© statement at the end of a ©case© clause, resulting in inadvertent fall-through.
1308
1309\item
1310It is possible to place ©case© clauses on statements nested \emph{within} the body of the ©switch© statement, as in:
1311\begin{lstlisting}
1312switch ( i ) {
1313  case 0:
1314        if ( j < k ) {
1315                ...
1316          case 1:               // transfer into "if" statement
1317                if ( j < m ) {
1318                        ...
1319                  case 2:       // transfer into "if" statement
1320                        ...
1321                }
1322        }
1323  case 3:
1324        while ( j < 5 ) {
1325                ...
1326          case 4:               // transfer into "while" statement
1327                ...
1328        }
1329}
1330\end{lstlisting}
1331The problem with this usage is branching into control structures, which is known to cause both comprehension and technical difficulties.
1332The comprehension problem occurs from the inability to determine how control reaches a particular point due to the number of branches leading to it.
1333The technical problem results from the inability to ensure allocation and initialization of variables when blocks are not entered at the beginning.
1334Often transferring into a block can bypass variable declaration and/or its initialization, which results in subsequent errors.
1335There are virtually no positive arguments for this kind of control flow, and therefore, there is a strong impetus to eliminate it.
1336Nevertheless, C does have an idiom where this capability is used, known as ``Duff's device''~\cite{Duff83}:
1337\begin{lstlisting}
1338register int n = (count + 7) / 8;
1339switch ( count % 8 ) {
1340case 0: do{ *to = *from++;
1341case 7:         *to = *from++;
1342case 6:         *to = *from++;
1343case 5:         *to = *from++;
1344case 4:         *to = *from++;
1345case 3:         *to = *from++;
1346case 2:         *to = *from++;
1347case 1:         *to = *from++;
1348                } while ( --n > 0 );
1349}
1350\end{lstlisting}
1351which unrolls a loop N times (N = 8 above) and uses the ©switch© statement to deal with any iterations not a multiple of N.
1352While efficient, this sort of special purpose usage is questionable:
1353\begin{quote}
1354Disgusting, no? But it compiles and runs just fine. I feel a combination of pride and revulsion at this
1355discovery.~\cite{Duff83}
1356\end{quote}
1357\item
1358It is possible to place the ©default© clause anywhere in the list of labelled clauses for a ©switch© statement, rather than only at the end.
1359Virtually all programming languages with a ©switch© statement require the ©default© clause to appear last in the case-clause list.
1360The logic for this semantics is that after checking all the ©case© clauses without success, the ©default© clause is selected;
1361hence, physically placing the ©default© clause at the end of the ©case© clause list matches with this semantics.
1362This physical placement can be compared to the physical placement of an ©else© clause at the end of a series of connected ©if©/©else© statements.
1363
1364\item
1365It is possible to place unreachable code at the start of a ©switch© statement, as in:
1366\begin{lstlisting}
1367switch ( x ) {
1368        int y = 1;                      // unreachable initialization
1369        x = 7;                          // unreachable code
1370  case 3: ...
1371        y = 3;
1372        ...
1373}
1374\end{lstlisting}
1375While the declaration of the local variable ©y© is useful and its scope is across all ©case© clauses, the initialization for such a variable is defined to never be executed because control always transfers over it.
1376Furthermore, any statements before the first ©case© clause can only be executed if labelled and transfered to using a ©goto©, either from outside or inside of the ©switch©.
1377As mentioned, transfer into control structures should be forbidden.
1378Transfers from within the ©switch© body using a ©goto© are equally unpalatable.
1379\end{enumerate}
1380Before discussing potential language changes to deal with these problems, it is worth observing that in a typical C program:
1381\begin{itemize}
1382\item
1383the number of ©switch© statements is small,
1384\item
1385most ©switch© statements are well formed (i.e., no Duff's device),
1386\item
1387the ©default© clause is usually written as the last case-clause,
1388\item
1389and there is only a medium amount of fall-through from one ©case© clause to the next, and most of these result from a list of case values executing common code, rather than a sequence of case actions that compound.
1390\end{itemize}
1391These observations should help to put the effects of suggested changes into perspective.
1392% Figure 1 shows the grammar change that attempts to minimize the effect on existing C programs.
1393\begin{enumerate}
1394\item
1395Eliminating the ©default© fall-through problem has the greatest potential for affecting existing code.
1396However, even if fall-through is removed, most ©switch© statements would continue to work because of the explicit transfers already present at the end of each ©case© clause, and the common placement of the ©default© clause at the end of the case list.
1397In addition, the above grammar provides for the most common use of fall-through, i.e., a list of ©case© clauses executing common code, e.g.:
1398\begin{lstlisting}
1399case 1:  case 2:  case 3: ...
1400\end{lstlisting}
1401Nevertheless, reversing the default action would have a non-trivial effect on case actions that compound, such as the above example of processing shell arguments.
1402Therefore, to preserve backwards compatibility, it is necessary to introduce a new kind of ©switch© statement, called ©choose©, with no fall-through semantics.
1403The ©choose© statement is identical to the new ©switch© statement, except there is no implicit fall-through between case-clauses and the ©break© statement applies to the enclosing loop construct (as for the continue statement in a ©switch© statement).
1404It is still possible to fall-through if a case-clause ends with the new keyword fallthru, e.g.:
1405\begin{lstlisting}
1406choose ( i ) {
1407        int i;
1408  case 3, 4:
1409        i = 3;
1410        fallthru;
1411  case 8, 10:
1412  default:
1413        j = 3;
1414}
1415\end{lstlisting}
1416The ability to fall-through is retained because it is a sufficient C-idiom that most C programmers simply expect it, and its absence might discourage these programmers from using the choose statement.
1417\item
1418Eliminating Duff's device is straightforward and only invalidates a small amount of very questionable code.
1419The solution is to allow ©case© clauses to only appear at the same nesting level as the ©switch© body, as is done in most other programming languages with ©switch© statements.
1420\item
1421The issue of ©default© at locations other than at the end of the cause clause can be solved by using good programming style, and there are a few reasonable situations involving fall-through where the ©default© clause may appear is locations other than at the end.
1422Therefore, no language change is made for this issue.
1423\item
1424Dealing with unreachable code at the start of a ©switch© statement is solved by defining the declaration-list, including any associated initialization, at the start of a ©switch© statement body to be executed before the transfer to the appropriate ©case© clause.
1425This semantics is the same as for declarations at the start of a loop body, which are executed before each iteration of the loop body.
1426As well, this grammar does not allow statements to appear before the first ©case© clause.
1427The change is compatible for declarations with initialization in this context because existing code cannot assume the initialization has occurred.
1428The change is incompatible for statements, but any existing code using it is highly questionable, as in:
1429\begin{lstlisting}
1430switch ( i ) {
1431        L: x = 5;               // questionable code
1432  case 0:
1433        ...
1434}
1435\end{lstlisting}
1436The statement after the ©switch© can never be executed unless it is labelled.
1437If it is labelled, it must be transfered to from outside or inside the ©switch© statement, neither of which is acceptable control flow.
1438\end{enumerate}
1439
1440
1441\section{Case Clause}
1442
1443C restricts the ©case© clause of a ©switch© statement to a single value.
1444For multiple ©case© clauses associated with the same statement, it is necessary to have multiple ©case© clauses rather than multiple values.
1445Requiring a ©case© clause for each value does not seem to be in the spirit of brevity normally associated with C.
1446Therefore, the ©case© clause is extended with a list of values, as in:
1447\begin{quote2}
1448\begin{tabular}{@{}l@{\hspace{30pt}}l@{\hspace{20pt}}l@{}}
1449\multicolumn{1}{c@{\hspace{30pt}}}{\textbf{\CFA}}       & \multicolumn{1}{c@{\hspace{20pt}}}{\textbf{C}}        \\
1450\begin{lstlisting}
1451switch ( i ) {
1452  ®case 1, 3, 5®:
1453        ...
1454  ®case 2, 4, 6®:
1455        ...
1456}
1457\end{lstlisting}
1458&
1459\begin{lstlisting}
1460switch ( i ) {
1461  case 1: case 3 : case 5:
1462        ...
1463  case 2: case 4 : case 6:
1464        ...
1465}
1466\end{lstlisting}
1467&
1468\begin{lstlisting}
1469
1470// odd values
1471
1472// even values
1473
1474
1475\end{lstlisting}
1476\end{tabular}
1477\end{quote2}
1478In addition, two forms of subranges are allowed to specify case values: the GNU C form and a new \CFA form.
1479\begin{quote2}
1480\begin{tabular}{@{}l@{\hspace{30pt}}l@{\hspace{20pt}}l@{}}
1481\multicolumn{1}{c@{\hspace{30pt}}}{\textbf{\CFA}}       & \multicolumn{1}{c@{\hspace{20pt}}}{\textbf{GNU C}}    \\
1482\begin{lstlisting}
1483switch ( i ) {
1484  ®case 1~5:®
1485        ...
1486  ®case 10~15:®
1487        ...
1488}
1489\end{lstlisting}
1490&
1491\begin{lstlisting}
1492switch ( i )
1493  case 1 ... 5:
1494        ...
1495  case 10 ... 15:
1496        ...
1497}
1498\end{lstlisting}
1499&
1500\begin{lstlisting}
1501
1502// 1, 2, 3, 4, 5
1503
1504// 10, 11, 12, 13, 14, 15
1505
1506
1507\end{lstlisting}
1508\end{tabular}
1509\end{quote2}
1510
1511
1512\section{Unnamed Structure Fields}
1513
1514C requires each field of a structure to have a name, except for a bit field associated with a basic type, e.g.:
1515\begin{lstlisting}
1516struct {
1517        int f1;                 // named field
1518        int f2 : 4;             // named field with bit field size
1519        int : 3;                // unnamed field for basic type with bit field size
1520        int ;                   // disallowed, unnamed field
1521        int *;                  // disallowed, unnamed field
1522        int (*)(int);   // disallowed, unnamed field
1523};
1524\end{lstlisting}
1525This requirement is relaxed by making the field name optional for all field declarations; therefore, all the field declarations in the example are allowed.
1526As for unnamed bit fields, an unnamed field is used for padding a structure to a particular size.
1527A list of unnamed fields is also supported, e.g.:
1528\begin{lstlisting}
1529struct {
1530        int , , ;               // 3 unnamed fields
1531}
1532\end{lstlisting}
1533
1534
1535\section{Exception Handling}
1536
1537Exception handling provides two mechanim: change of control flow from a raise to a handler, and commumication from the riase to the handler.
1538\begin{lstlisting}
1539exception void h( int i );
1540exception int h( int i, double d );
1541
1542void f(...) {
1543        ... throw h( 3 );
1544        ... i = resume h( 3, 5.1 );
1545}
1546
1547try {
1548        f(...);
1549} catch h( int w ) {
1550        // reset
1551} resume h( int p, double x ) {
1552        return 17;  // recover
1553} finally {
1554}
1555\end{lstlisting}
1556So the type raised would be the mangled name of the exception prototype and that name would be matched at the handler clauses by comparing the strings.
1557The arguments for the call would have to be packed in a message and unpacked at handler clause and then a call made to the handler.
1558
1559
1560\section{Types}
1561
1562\subsection{Type Definitions}
1563
1564\CFA allows users to define new types using the keyword type.
1565
1566\begin{lstlisting}
1567// SensorValue is a distinct type and represented as an int
1568type SensorValue = int;
1569\end{lstlisting}
1570
1571A type definition is different from a typedef in C because a typedef just creates an alias for a type,  while Do.s type definition creates a distinct type.
1572This means that users can define distinct function overloads for the new type (see Overloading for more information).
1573For example:
1574
1575\begin{lstlisting}
1576type SensorValue = int;
1577void printValue(int v) {...}
1578void printValue(SensorValue v) {...}
1579void process(int v) {...}
1580
1581SensorValue s = ...;
1582
1583printValue(s); // calls version with SensorValue argument
1584
1585printValue((int) s); // calls version with int argument
1586
1587process(s); // implicit conversion to int
1588\end{lstlisting}
1589
1590If SensorValue was defined with a typedef, then these two print functions would not have unique signatures.
1591This can be very useful to create a distinct type that has the same representation as another type.
1592
1593The compiler will assume it can safely convert from the old type to the new type, implicitly.
1594Users may override this and define a function that must be called to convert from one type to another.
1595
1596\begin{lstlisting}
1597type SensorValue = int;
1598// ()? is the overloaded conversion operator identifier
1599// This function converts an int to a SensorValue
1600SensorValue ()?(int val) {
1601        ...
1602}
1603void process(int v) {...}
1604
1605SensorValue s = ...;
1606process(s); // implicit call to conversion operator
1607\end{lstlisting}
1608
1609In many cases, it is not desired for the compiler to do this implicit conversion.
1610To avoid that, the user can use the explicit modifier on the conversion operator.
1611Any places where the conversion is needed but not explicit (with a cast), will result in a compile-time error.
1612
1613\begin{lstlisting}
1614type SensorValue = int;
1615
1616// conversion from int to SensorValue; must be explicit
1617explicit SensorValue ()?(int val) {
1618        ...
1619}
1620
1621void process(int v) {...}
1622
1623SensorValue s = ...;
1624process(s); // implicit cast to int: compile-time error
1625process((int) s); // explicit cast to int: calls conversion func
1626\end{lstlisting}
1627
1628The conversion may not require any code, but still need to be explicit; in that case, the syntax can be simplified to:
1629\begin{lstlisting}
1630type SensorValue = int;
1631explicit SensorValue ()?(int);
1632void process(int v) {...}
1633
1634SensorValue s = ...;
1635process(s); // compile-time error
1636process((int) s); // type is converted, no function is called
1637\end{lstlisting}
1638
1639
1640\subsection{Structures}
1641
1642Structures in \CFA are basically the same as structures in C.
1643A structure is defined with the same syntax as in C.
1644When referring to a structure in \CFA, users may omit the struct keyword.
1645\begin{lstlisting}
1646struct Point {
1647        double x;
1648        double y;
1649};
1650
1651Point p = {0.0, 0.0};
1652\end{lstlisting}
1653
1654\CFA does not support inheritance among types, but instead uses composition to enable reuse of structure fields.
1655Composition is achieved by embedding one type into another.
1656When type A is embedded in type B, an object with type B may be used as an object of type A, and the fields of type A are directly accessible.
1657Embedding types is achieved using anonymous members.
1658For example, using Point from above:
1659\begin{lstlisting}
1660void foo(Point p);
1661
1662struct ColoredPoint {
1663        Point; // anonymous member (no identifier)
1664        int Color;
1665};
1666...
1667        ColoredPoint cp = ...;
1668        cp.x = 10.3; // x from Point is accessed directly
1669        cp.color = 0x33aaff; // color is accessed normally
1670        foo(cp); // cp can be used directly as a Point
1671\end{lstlisting}
1672
1673
1674\subsection{Constructors and Destructors}
1675
1676\CFA supports C initialization of structures, but it also adds constructors for more advanced initialization.
1677Additionally, \CFA adds destructors that are called when a variable is de-allocated (variable goes out of scope or object is deleted).
1678These functions take a reference to the structure as a parameter (see
1679References for more information).
1680
1681\begin{figure}
1682\begin{lstlisting}
1683struct Widget {
1684        int id;
1685        float size;
1686        Parts *optionalParts;
1687};
1688
1689// ?{} is the constructor operator identifier
1690// The first argument is a reference to the type to initialize
1691// Subsequent arguments can be specified for initialization
1692
1693void ?{}(Widget &w) { // default constructor
1694        w.id = -1;
1695        w.size = 0.0;
1696        w.optionalParts = 0;
1697}
1698
1699// constructor with values (does not need to include all fields)
1700void ?{}(Widget &w, int id, float size) {
1701        w.id = id;
1702        w.size = size;
1703        w.optionalParts = 0;
1704}
1705
1706// ^? is the destructor operator identifier
1707void ^?(Widget &w) { // destructor
1708        w.id = 0;
1709        w.size = 0.0;
1710        if (w.optionalParts != 0) {
1711        // This is the only pointer to optionalParts, free it
1712        free(w.optionalParts);
1713        w.optionalParts = 0;
1714        }
1715}
1716
1717Widget baz; // reserve space only
1718Widget foo{}; // calls default constructor
1719Widget bar{23, 2.45}; // calls constructor with values
1720baz{24, 0.91}; // calls constructor with values
1721?{}(baz, 24, 0.91}; // explicit call to constructor
1722^bar; // explicit call to destructor
1723^?(bar); // explicit call to destructor
1724\end{lstlisting}
1725\caption{Constructors and Destructors}
1726\end{figure}
1727
1728
1729\begin{comment}
1730\section{References}
1731
1732\CFA supports reference types similar to rvalue references in \CC.
1733A reference is essentially an alias for a variable, allowing multiple names to refer to the same object.
1734A reference can be used as a safer alternative to a pointer, because it can be used to pass a variable by reference, but it must always reference an object.
1735It cannot be NULL, it must be assigned a value at initialization, and the object it references cannot change once it is set.
1736By introducing references in parameter types, users are given an easy way to pass a value by reference, without the need for NULL pointer checks.
1737In structures, a reference can replace a pointer to an object that should always have a valid value.
1738When a structure contains a reference, all of its constructors must initialize the reference and all instances of this structure must initialize it upon definition.
1739
1740The syntax for using references in \CFA is the same as \CC with the exception of reference initialization.
1741Use ©&© to specify a reference, and access references just like regular objects, not like pointers (use dot notation to access fields).
1742When initializing a reference, \CFA uses a different syntax which differentiates reference initialization from assignment to a reference.
1743The ©&© is used on both sides of the expression to clarify that the address of the reference is being set to the address of the variable to which it refers.
1744
1745\begin{figure}
1746\begin{lstlisting}
1747// parameter p is a reference to a Point
1748void movePointUp(Point &p) {
1749        p.y += 1.0; // Uses dot-notation to access fields
1750}
1751
1752Point p1 = ...;
1753ColoredPoint cp1 = ...;
1754movePoint(p1); // reference to p1 passed to movePoint
1755movePoint(cp1); // reference to cp1 passed to movePoint
1756
1757// a ListElement cannot be created without a valid list
1758
1759struct ListElement {
1760        int element;
1761        List &list; // a list element has a reference to the list
1762}
1763
1764// The constructor must initialize the reference
1765void ?{}(ListElement &le, int e, List &l) {
1766        le.element = e;
1767        &le.list = &l; // initialize the reference
1768}
1769
1770ListElement e1{88, numberList}; // uses constructor
1771ListElement e2; // compiler error: uninitialized reference
1772Listing 10: References
1773\end{lstlisting}
1774\end{figure}
1775\end{comment}
1776
1777
1778\section{Overloading}
1779
1780Overloading refers to the capability of a programmer to define and use multiple objects in a program with the same name.
1781In \CFA, a declaration may overload declarations from outer scopes with the same name, instead of hiding them as is the case in C.
1782This may cause identical C and \CFA programs to behave differently.
1783The compiler selects the appropriate object (overload resolution) based on context information at the place where it is used.
1784Overloading allows programmers to give functions with different signatures but similar semantics the same name, simplifying the interface to users.
1785Disadvantages of overloading are that it can be used to give functions with different semantics the same name, causing confusion, or that the compiler may resolve to a different function from what the programmer expected.
1786\CFA allows overloading of functions, operators, variables, and even the constants 0 and 1.
1787
1788The compiler follows some overload resolution rules to determine the best interpretation of all of these overloads.
1789The best valid interpretations are the valid interpretations that use the fewest unsafe conversions.
1790Of these, the best are those where the functions and objects involved are the least polymorphic.
1791Of these, the best have the lowest total conversion cost, including all implicit conversions in the argument expressions.
1792Of these, the best have the highest total conversion cost for the implicit conversions (if any) applied to the argument expressions.
1793If there is no single best valid interpretation, or if the best valid interpretation is ambiguous, then the resulting interpretation is ambiguous.
1794For details about type inference and overload resolution, please see the \CFA Language Specification.
1795\begin{lstlisting}
1796int foo(int a, int b) {
1797        float sum = 0.0;
1798        float special = 1.0;
1799        {
1800                int sum = 0;
1801                // both the float and int versions of sum are available
1802                float special = 4.0;
1803                // this inner special hides the outer version
1804                ...
1805        }
1806        ...
1807}
1808\end{lstlisting}
1809
1810
1811\subsection{Overloaded Constant}
1812
1813The constants 0 and 1 have special meaning.
1814In \CFA, as in C, all scalar types can be incremented and
1815decremented, which is defined in terms of adding or subtracting 1.
1816The operations ©&&©, ©||©, and ©!© can be applied to any scalar arguments and are defined in terms of comparison against 0 (ex. ©(a && b)© becomes ©(a != 0 && b != 0)©).
1817
1818In C, the integer constants 0 and 1 suffice because the integer promotion rules can convert them to any
1819arithmetic type, and the rules for pointer expressions treat constant expressions evaluating to 0 as a
1820special case.
1821However, user-defined arithmetic types often need the equivalent of a 1 or 0 for their
1822functions or operators, polymorphic functions often need 0 and 1 constants of a type matching their
1823polymorphic parameters, and user-defined pointer-like types may need a null value.
1824Defining special
1825constants for a user-defined type is more efficient than defining a conversion to the type from ©_Bool©.
1826
1827Why just 0 and 1? Why not other integers? No other integers have special status in C.
1828A facility that let programmers declare specific constants..const Rational 12., for instance. would not be much of an improvement.
1829Some facility for defining the creation of values of programmer-defined types from arbitrary integer tokens would be needed.
1830The complexity of such a feature doesn.t seem worth the gain.
1831
1832For example, to define the constants for a complex type, the programmer would define the following:
1833
1834\begin{lstlisting}
1835struct Complex {
1836        double real;
1837        double imaginary;
1838}
1839
1840const Complex 0 = {0, 0};
1841const Complex 1 = {1, 0};
1842...
1843
1844        Complex a = 0;
1845...
1846
1847        a++;
1848...
1849        if (a) { // same as if (a == 0)
1850...
1851}
1852\end{lstlisting}
1853
1854
1855\subsection{Variable Overloading}
1856
1857The overload rules of \CFA allow a programmer to define multiple variables with the same name, but different types.
1858Allowing overloading of variable names enables programmers to use the same name across multiple types, simplifying naming conventions and is compatible with the other overloading that is allowed.
1859For example, a developer may want to do the following:
1860\begin{lstlisting}
1861int pi = 3;
1862float pi = 3.14;
1863char pi = .p.;
1864\end{lstlisting}
1865
1866
1867\subsection{Function Overloading}
1868
1869Overloaded functions in \CFA are resolved based on the number and type of arguments, type of return value, and the level of specialization required (specialized functions are preferred over generic).
1870
1871The examples below give some basic intuition about how the resolution works.
1872\begin{lstlisting}
1873// Choose the one with less conversions
1874int doSomething(int value) {...} // option 1
1875int doSomething(short value) {...} // option 2
1876
1877int a, b = 4;
1878short c = 2;
1879
1880a = doSomething(b); // chooses option 1
1881a = doSomething(c); // chooses option 2
1882
1883// Choose the specialized version over the generic
1884
1885generic(type T)
1886T bar(T rhs, T lhs) {...} // option 3
1887float bar(float rhs, float lhs){...} // option 4
1888float a, b, c;
1889double d, e, f;
1890c = bar(a, b); // chooses option 4
1891
1892// specialization is preferred over unsafe conversions
1893
1894f = bar(d, e); // chooses option 5
1895\end{lstlisting}
1896
1897
1898\subsection{Operator Overloading}
1899
1900\CFA also allows operators to be overloaded, to simplify the use of user-defined types.
1901Overloading the operators allows the users to use the same syntax for their custom types that they use for built-in types, increasing readability and improving productivity.
1902\CFA uses the following special identifiers to name overloaded operators:
1903
1904\begin{table}[hbt]
1905\hfil
1906\begin{tabular}[t]{ll}
1907%identifier & operation \\ \hline
1908©?[?]© & subscripting \impl{?[?]}\\
1909©?()© & function call \impl{?()}\\
1910©?++© & postfix increment \impl{?++}\\
1911©?--© & postfix decrement \impl{?--}\\
1912©++?© & prefix increment \impl{++?}\\
1913©--?© & prefix decrement \impl{--?}\\
1914©*?© & dereference \impl{*?}\\
1915©+?© & unary plus \impl{+?}\\
1916©-?© & arithmetic negation \impl{-?}\\
1917©~?© & bitwise negation \impl{~?}\\
1918©!?© & logical complement \impl{"!?}\\
1919©?*?© & multiplication \impl{?*?}\\
1920©?/?© & division \impl{?/?}\\
1921\end{tabular}\hfil
1922\begin{tabular}[t]{ll}
1923%identifier & operation \\ \hline
1924©?%?© & remainder \impl{?%?}\\
1925©?+?© & addition \impl{?+?}\\
1926©?-?© & subtraction \impl{?-?}\\
1927©?<<?© & left shift \impl{?<<?}\\
1928©?>>?© & right shift \impl{?>>?}\\
1929©?<?© & less than \impl{?<?}\\
1930©?<=?© & less than or equal \impl{?<=?}\\
1931©?>=?© & greater than or equal \impl{?>=?}\\
1932©?>?© & greater than \impl{?>?}\\
1933©?==?© & equality \impl{?==?}\\
1934©?!=?© & inequality \impl{?"!=?}\\
1935©?&& bitwise AND \impl{?&?}\\
1936\end{tabular}\hfil
1937\begin{tabular}[t]{ll}
1938%identifier & operation \\ \hline
1939©?^& exclusive OR \impl{?^?}\\
1940©?|?© & inclusive OR \impl{?"|?}\\
1941©?=?© & simple assignment \impl{?=?}\\
1942©?*=?© & multiplication assignment \impl{?*=?}\\
1943©?/=?© & division assignment \impl{?/=?}\\
1944©?%=?© & remainder assignment \impl{?%=?}\\
1945©?+=?© & addition assignment \impl{?+=?}\\
1946©?-=?© & subtraction assignment \impl{?-=?}\\
1947©?<<=?© & left-shift assignment \impl{?<<=?}\\
1948©?>>=?© & right-shift assignment \impl{?>>=?}\\
1949©?&=?© & bitwise AND assignment \impl{?&=?}\\
1950©?^=?© & exclusive OR assignment \impl{?^=?}\\
1951©?|=?© & inclusive OR assignment \impl{?"|=?}\\
1952\end{tabular}
1953\hfil
1954\caption{Operator Identifiers}
1955\label{opids}
1956\end{table}
1957
1958These identifiers are defined such that the question marks in the name identify the location of the operands.
1959These operands represent the parameters to the functions, and define how the operands are mapped to the function call.
1960For example, ©a + b© becomes ©?+?(a, b)©.
1961
1962In the example below, a new type, myComplex, is defined with an overloaded constructor, + operator, and string operator.
1963These operators are called using the normal C syntax.
1964
1965\begin{lstlisting}
1966type Complex = struct { // define a Complex type
1967        double real;
1968        double imag;
1969}
1970
1971// Constructor with default values
1972
1973void ?{}(Complex &c, double real = 0.0, double imag = 0.0) {
1974        c.real = real;
1975        c.imag = imag;
1976}
1977
1978Complex ?+?(Complex lhs, Complex rhs) {
1979        Complex sum;
1980        sum.real = lhs.real + rhs.real;
1981        sum.imag = lhs.imag + rhs.imag;
1982        return sum;
1983}
1984
1985String ()?(const Complex c) {
1986        // use the string conversions for the structure members
1987        return (String)c.real + . + . + (String)c.imag + .i.;
1988}
1989...
1990
1991Complex a, b, c = {1.0}; // constructor for c w/ default imag
1992...
1993c = a + b;
1994print(.sum = . + c);
1995\end{lstlisting}
1996
1997
1998\section{Auto Type-Inferencing}
1999
2000Auto type-inferencing occurs in a declaration where a variable's type is inferred from its initialization expression type.
2001\begin{quote2}
2002\begin{tabular}{@{}l@{\hspace{30pt}}ll@{}}
2003\multicolumn{1}{c@{\hspace{30pt}}}{\textbf{\CC}}        & \multicolumn{1}{c}{©gcc©}\index{gcc} \\
2004\begin{lstlisting}
2005
2006auto j = 3.0 * 4;
2007int i;
2008auto k = i;
2009\end{lstlisting}
2010&
2011\begin{lstlisting}
2012#define expr 3.0 * i
2013typeof(expr) j = expr;
2014int i;
2015typeof(i) k = i;
2016\end{lstlisting}
2017&
2018\begin{lstlisting}
2019
2020// use type of initialization expression
2021
2022// use type of primary variable
2023\end{lstlisting}
2024\end{tabular}
2025\end{quote2}
2026The two important capabilities are:
2027\begin{itemize}
2028\item
2029preventing having to determine or write out long generic types,
2030\item
2031ensure secondary variables, related to a primary variable, always have the same type.
2032\end{itemize}
2033
2034In \CFA, ©typedef© provides a mechanism to alias long type names with short ones, both globally and locally, but not eliminate the use of the short name.
2035©gcc© provides ©typeof© to declare a secondary variable from a primary variable.
2036\CFA also relies heavily on the specification of the left-hand side of assignment for type inferencing, so in many cases it is crucial to specify the type of the left-hand side to select the correct type of the right-hand expression.
2037Only for overloaded routines with the same return type is variable type-inferencing possible.
2038Finally, ©auto© presents the programming problem of tracking down a type when the type is actually needed.
2039For example, given
2040\begin{lstlisting}
2041auto j = ®...®
2042\end{lstlisting}
2043and the need to write a routine to compute using ©j©
2044\begin{lstlisting}
2045void rtn( ®...® parm );
2046rtn( j );
2047\end{lstlisting}
2048A programmer must work backwards to determine the type of ©j©'s initialization expression, reconstructing the possibly long generic type-name.
2049In this situation, having the type name or a short alias is very useful.
2050
2051There is also the conundrum in type inferencing of when to \emph{\Index{brand}} a type.
2052That is, when is the type of the variable more important than the type of its initialization expression.
2053For example, if a change is made in an initialization expression, it can cause hundreds or thousands of cascading type changes and/or errors.
2054At some point, a programmer wants the type of the variable to remain constant and the expression to be in error when it changes.
2055
2056Given ©typedef© and ©typeof© in \CFA, and the strong need to use the type of left-hand side in inferencing, auto type-inferencing is not supported at this time.
2057Should a significant need arise, this feature can be revisited.
2058
2059
2060\section{Generics}
2061
2062\CFA supports parametric polymorphism to allow users to define generic functions and types.
2063Generics allow programmers to use type variables in place of concrete types so that the code can be reused with multiple types.
2064The type parameters can be restricted to satisfy a set of constraints.
2065This enables \CFA to build fully compiled generic functions and types, unlike other languages like \CC where templates are expanded or must be explicitly instantiated.
2066
2067
2068\subsection{Generic Functions}
2069
2070Generic functions in \CFA are similar to template functions in \CC, and will sometimes be expanded into specialized versions, just like in \CC.
2071The difference, however, is that generic functions in \CFA can also be separately compiled, using function pointers for callers to pass in all needed functionality for the given type.
2072This means that compiled libraries can contain generic functions that can be used by programs linked with them (statically or dynamically).
2073Another advantage over \CC templates is unlike templates, generic functions are statically checked, even without being instantiated.
2074
2075A simple example of using Do.s parametric polymorphism to create a generic swap function would look like this:
2076
2077\begin{lstlisting}
2078generic(type T)
2079void swap(T &a, T &b) {
2080        T tmp = a;
2081        a = b;
2082        b = a;
2083}
2084
2085int a, b;
2086swap(a, b);
2087
2088Point p1, p2;
2089swap(p1, p2);
2090\end{lstlisting}
2091
2092Here, instead of specifying types for the parameters a and b, the function has a generic type parameter, type T.
2093This function can be called with any type, and the compiler will handle generating the proper code for that type, using call site inference to determine the appropriate value for T.
2094
2095
2096\subsection{Bounded Quantification}
2097
2098Some generic functions only work (or make sense) for any type that satisfies a given property.
2099For example, here is a function to pick the minimum of two values of some type.
2100\begin{lstlisting}
2101generic (type T | bool ?<?(T, T) )
2102
2103T min(T a, T b) {
2104        return a < b ? a : b;
2105}
2106\end{lstlisting}
2107
2108It only makes sense to call min with values of a type that has an ordering: a way to decide whether one value is less than another.
2109The ordering function used here is the less than operator, <.
2110The syntax used to reference the operator is discussed in further detail in Operator Overloading.
2111In \CFA, this assertion on the type of a generic is written as the bound, (type T | bool ?<?(T, T)).
2112The \CFA compiler enforces that minis only called with types for which the less than operator is defined, and reports a compile-time error otherwise.
2113
2114Bounds can also involve multiple types, and multiple requirements, as shown below:
2115\begin{lstlisting}
2116generic (type T, type U | { T foo(T, U); U bar(U); })
2117
2118T baz(T t, U u) {
2119        return foo(t, bar(u));
2120}
2121\end{lstlisting}
2122
2123
2124\subsection{Interfaces}
2125
2126Type bounds as shown above are not very informative, merely requiring that a function exists with the right name and type.
2127Suppose you try to call a polymorphic function and \CFA gives you an error that int combine(int, int) is not defined.
2128Can you define it? What is it supposed to do? Perhaps it should compute the sum, or the bitwise and, or the maximum, or the least common multiple; or perhaps it's an operation that can't be defined for integers.
2129The function signature doesn't say.
2130
2131Interfaces gather together a set of function signatures under a common name, which solves two problems.
2132First, an interface name can be used in type bounds instead of function signatures.
2133This avoids repetition when a bound is used in many functions.
2134Second, interfaces explicitly document the existence of a commonly used set of functionality, making programs easier to understand.
2135\begin{lstlisting}
2136generic (type T)
2137interface Orderable {
2138        bool ?<?(T, T);
2139};
2140
2141generic (type T | Orderable(T))
2142T min(T a, T b) {
2143        return a < b ? a : b;
2144}
2145\end{lstlisting}
2146
2147This definition of the interface Orderable makes the generic function min easier to read and understand.
2148Orderable can also be reused for other generic functions, max for example.
2149Interfaces can also build on top of other interfaces.
2150For example:
2151\begin{lstlisting}
2152generic (type T | Orderable(T)
2153interface FooBarable {
2154        int foo(T, T);
2155        int Bar(T, T);
2156};
2157\end{lstlisting}
2158
2159The FooBarable interface specifies all of the bounds of the Orderable interface, plus the additional bounds specified in its definition.
2160A type does not need to specify that it satisfies any interface, the compiler can figure this out at compile time.
2161For example, there is no need to add some special syntax to show that a type implements the Orderable interface, just define a ?<? operator and it is satisfied.
2162
2163
2164\subsection{Generic Typedefs}
2165
2166Type synonyms can be defined generically using the typedef keyword together with a generic type annotation.
2167These can be used to abbreviate complicated type expressions, especially in generic code.
2168\begin{lstlisting}
2169// typedef the generic function pointers for later use
2170
2171generic(type T)
2172typedef int (*predicate)(T);
2173generic(type Captured, type T)
2174typedef void (*callback)(Captured, T);
2175
2176generic(type T)
2177void find(int length, T *array,
2178        predicate(T) p, callback(void *, T)f) {
2179        int i;
2180        for (i = 0; i < length; i++)
2181        if (p(array[i])) f(NULL, array[i]);
2182}
2183\end{lstlisting}
2184
2185
2186\subsection{Generic Types}
2187
2188Generic types are defined using the same mechanisms as those described above for generic functions.
2189This feature allows users to create types that have one or more fields that use generic parameters as types, similar to a template classes in \CC.
2190For example, to make a generic linked list, a placeholder is created for the type of the elements, so that the specific type of the elements in the list need not be specified when defining the list.
2191In C, something like this would have to be done using void pointers and unsafe casting.
2192As in generic functions, Do.s generic types are different from \CC templates in that they can be fully compiled, while \CC templates are more like macro expansions.
2193This means that a \CFA generic type from a compiled library can be used with any type that satisfies the bounds.
2194
2195The syntax for defining a generic type looks very similar to that of a generic function.
2196Generic types support bounds and interfaces, using the same syntax as generic functions.
2197\begin{lstlisting}
2198generic (type T)
2199struct LinkedListElem {
2200        T elem;
2201        LinkedListElem(T) *next;
2202};
2203
2204LinkedListElem *++?(LinkedListElem **elem) {
2205        return *elem = elem->next;
2206}
2207
2208generic (type T)
2209struct LinkedList {
2210        LinkedListElem(T) *head;
2211        unsigned int size;
2212}
2213
2214generic (type T | bool ?==?(T, T))
2215bool contains(LinkedList(T) *list, T elem) {
2216        for(LinkedListElem *iter = list->head; iter != 0; ++iter) {
2217        if (iter->elem == elem) return true;
2218        }
2219        return false;
2220}
2221\end{lstlisting}
2222
2223
2224\section{Safety}
2225
2226Safety, along with productivity, is a key goal of Do.
2227This section discusses the safety features that have been included in \CFA to help programmers create more stable, reliable, and secure code.
2228
2229
2230\subsection{Exceptions}
2231
2232\CFA introduces support for exceptions as an easier way to recover from exceptional conditions that may be detected within a block of code.
2233In C, developers can use error codes and special return values to report to a caller that an error occurred in a function.
2234The major problem with error codes is that they can be easily ignored by the caller.
2235Failure to properly check for errors can result in the caller making incorrect assumptions about the current state or about the return value that are very likely to result in errors later on in the program, making the source of the problem more difficult to find when debugging.
2236An unhandled exception on the other hand will cause a crash, revealing the original source of the erroneous state.
2237
2238Exceptions in \CFA allow a different type of control flow.
2239Throwing an exception terminates execution of the current block, invokes the destructors of variables that are local to the block, and propagates the exception to the parent block.
2240The exception is immediately re-thrown from the parent block unless it is caught as described below.
2241\CFA uses keywords similar to \CC for exception handling.
2242An exception is thrown using a throw statement, which accepts one argument.
2243
2244\begin{lstlisting}
2245        ...
2246
2247        throw 13;
2248
2249        ...
2250\end{lstlisting}
2251
2252An exception can be caught using a catch statement, which specifies the type of the exception it can catch.
2253A catch is specified immediately after a guarded block to signify that it can catch an exception from that block.
2254A guarded block is specified using the try keyword, followed by a block of code inside of curly braces.
2255
2256\begin{lstlisting}
2257        ...
2258
2259        try {
2260                throw 13;
2261        }
2262        catch(int e) {
2263                printf(.caught an exception: %d\n., e);
2264        }
2265\end{lstlisting}
2266
2267
2268\subsection{Memory Management}
2269
2270
2271\subsubsection{Manual Memory Management}
2272
2273Using malloc and free to dynamically allocate memory exposes several potential, and common, errors.
2274First, malloc breaks type safety because it returns a pointer to void.
2275There is no relationship between the type that the returned pointer is cast to, and the amount of memory allocated.
2276This problem is solved with a type-safe malloc.
2277Do.s type-safe malloc does not take any arguments for size.
2278Instead, it infers the type based on the return value, and then allocates space for the inferred type.
2279
2280\begin{lstlisting}
2281float *f = malloc(); // allocates the size of a float
2282
2283struct S {
2284        int i, j, k;
2285};
2286
2287struct S *s = malloc(); // allocates the size of a struct S
2288\end{lstlisting}
2289
2290In addition to the improved malloc, \CFA also provides a technique for combining allocation and initialization into one step, using the new function.
2291For all constructors defined for a given type (see Operator Overloading), a corresponding call to new can be used to allocate and construct that type.
2292
2293\begin{lstlisting}
2294type Complex = struct {
2295        float real;
2296        float imag;
2297};
2298
2299// default constructor
2300
2301void ?{}(Complex &c) {
2302        c.real = 0.0;
2303        c.imag = 0.0;
2304}
2305
2306
2307
2308// 2 parameter constructor
2309
2310void ?{}(Complex &c, float real, float imag) {
2311        c.real = real;
2312        c.imag = imag;
2313}
2314
2315
2316int main() {
2317        Complex c1; // No constructor is called
2318        Complex c2{}; // Default constructor called
2319        Complex c3{1.0, -1.0}; // 2 parameter constructor is called
2320
2321        Complex *p1 = malloc(); // allocate
2322        Complex *p2 = new(); // allocate + default constructor
2323        Complex *p3 = new(0.5, 1.0); // allocate + 2 param constructor
2324}
2325
2326\end{lstlisting}
2327
2328
2329\subsubsection{Automatic Memory Management}
2330
2331\CFA may also support automatic memory management to further improve safety.
2332If the compiler can insert all of the code needed to manage dynamically allocated memory (automatic reference counting), then developers can avoid problems with dangling pointers, double frees, memory leaks, etc.
2333This feature requires further investigation.
2334\CFA will not have a garbage collector, but might use some kind of region-based memory management.
2335
2336
2337\subsection{Unsafe C Constructs}
2338
2339C programmers are able to access all of the low-level tricks that are sometimes needed for close-to-the-hardware programming.
2340Some of these practices however are often error-prone and difficult to read and maintain.
2341Since \CFA is designed to be safer than C, such constructs are disallowed in \CFA code.
2342If a programmer wants to use one of these unsafe C constructs, the unsafe code must be contained in a C linkage block (see Interoperability), which will be compiled like C code.
2343This block means that the user is telling the tools, .I know this is unsafe, but I.m going to do it anyway..
2344
2345The exact set of unsafe C constructs that will be disallowed in \CFA has not yet been decided, but is sure to include pointer arithmetic, pointer casting, etc.
2346Once the full set is decided, the rules will be listed here.
2347
2348
2349\section{Syntactic Anomalies}
2350
2351The number 0 and 1 are treated specially in \CFA, and can be redefined as variables.
2352One syntactic anomaly is when a field in an structure is names 0 or 1:
2353\begin{lstlisting}
2354struct S {
2355        int 0, 1;
2356} s;
2357\end{lstlisting}
2358The problem occurs in accesing these fields using the selection operation ``©.©'':
2359\begin{lstlisting}
2360s.0 = 0;        // ambiguity with floating constant .0
2361s.1 = 1;        // ambiguity with floating constant .1
2362\end{lstlisting}
2363To make this work, a space is required after the field selection:
2364\begin{lstlisting}
2365®s.§\textvisiblespace§0® = 0;
2366®s.§\textvisiblespace§1® = 1;
2367\end{lstlisting}
2368While this sytact is awkward, it is unlikely many programers will name fields of a structure 0 or 1.
2369Like the \CC lexical problem with closing template-syntax, e.g, ©Foo<Bar<int®>>®©, this issue can be solved with a more powerful lexer/parser.
2370
2371There are several ambiguous cases with operator identifiers, e.g., ©int *?*?()©, where the string ©*?*?© can be lexed as ©*©/©?*?© or ©*?©/©*?©.
2372Since it is common practise to put a unary operator juxtaposed to an identifier, e.g., ©*i©, users will be annoyed if they cannot do this with respect to operator identifiers.
2373Even with this special hack, there are 5 general cases that cannot be handled.
2374The first case is for the function-call identifier ©?()©:
2375\begin{lstlisting}
2376int *§\textvisiblespace§?()();  // declaration: space required after '*'
2377\textvisiblespace§?()();              // expression: space required after '*'
2378\end{lstlisting}
2379Without the space, the string ©*?()© is ambiguous without N character look ahead;
2380it requires scanning ahead to determine if there is a ©'('©, which is the start of an argument/parameter list.
2381
2382The 4 remaining cases occur in expressions:
2383\begin{lstlisting}
2384i++§\textvisiblespace§?i:0;             // space required before '?'
2385i--§\textvisiblespace§?i:0;             // space required before '?'
2386\textvisiblespace§?++i:0;             // space required after '?'
2387\textvisiblespace§?--i:0;             // space required after '?'
2388\end{lstlisting}
2389In the first two cases, the string ©i++?© is ambiguous, where this string can be lexed as ©i© / ©++?© or ©i++© / ©?©;
2390it requires scanning ahead to determine if there is a ©'('©, which is the start of an argument list.
2391In the second two cases, the string ©?++x© is ambiguous, where this string can be lexed as ©?++© / ©x© or ©?© / y©++x©;
2392it requires scanning ahead to determine if there is a ©'('©, which is the start of an argument list.
2393
2394
2395\section{Concurrency}
2396
2397Today's processors for nearly all use cases, ranging from embedded systems to large cloud computing servers, are composed of multiple cores, often heterogeneous.
2398As machines grow in complexity, it becomes more difficult for a program to make the most use of the hardware available.
2399\CFA includes built-in concurrency features to enable high performance and improve programmer productivity on these multi-/many-core machines.
2400
2401Concurrency support in \CFA is implemented on top of a highly efficient runtime system of light-weight, M:N, user level threads.
2402The model integrates concurrency features into the language by making the structure type the core unit of concurrency.
2403All communication occurs through method calls, where data is sent via method arguments, and received via the return value.
2404This enables a very familiar interface to all programmers, even those with no parallel programming experience.
2405It also allows the compiler to do static type checking of all communication, a very important safety feature.
2406This controlled communication with type safety has some similarities with channels in Go, and can actually implement
2407channels exactly, as well as create additional communication patterns that channels cannot.
2408Mutex objects, monitors, are used to contain mutual exclusion within an object and synchronization across concurrent threads.
2409
2410Three new keywords are added to support these features:
2411
2412monitor creates a structure with implicit locking when accessing fields
2413
2414mutex implies use of a monitor requiring the implicit locking
2415
2416task creates a type with implicit locking, separate stack, and a thread
2417
2418
2419\subsection{Monitors}
2420
2421A monitor is a structure in \CFA which includes implicit locking of its fields.
2422Users of a monitor interact with it just like any structure, but the compiler handles code as needed to ensure mutual exclusion.
2423An example of the definition of a monitor is shown here:
2424\begin{lstlisting}
2425type Account = monitor {
2426        const unsigned long number; // account number
2427        float balance; // account balance
2428};
2429\end{lstlisting}
2430
2431Since a monitor structure includes an implicit locking mechanism, it does not make sense to copy a monitor;
2432it is always passed by reference.
2433Users can specify to the compiler whether or not a function will require mutual exclusion of the monitor using the mutex modifier on the parameter.
2434When mutex is specified, the compiler inserts locking before executing the body of the function, and unlocking after the body.
2435This means that a function requiring mutual exclusion could block if the lock is already held by another thread.
2436Blocking on a monitor lock does not block the kernel thread, it simply blocks the user thread, which yields its kernel thread while waiting to obtain the lock.
2437If multiple mutex parameters are specified, they will be locked in parameter order (i.e. first parameter is locked first) and unlocked in the
2438reverse order.
2439\begin{lstlisting}
2440// This function accesses a constant field, it does not require
2441// mutual exclusion
2442
2443export unsigned long getAccountNumber(Account &a) {
2444        return a.number;
2445}
2446
2447// This function accesses and modifies a shared field; it
2448// requires mutual exclusion
2449
2450export float withdrawal(mutex Account &a, float amount) {
2451        a.balance -= amount;
2452        return a.balance;
2453}
2454\end{lstlisting}
2455
2456Often, one function using a monitor will call another function using that same monitor.
2457If both require mutual exclusion, then the thread would be waiting for itself to release the lock when it calls the inner function.
2458This situation is resolved by allowing recursive entry (reentrant locks), meaning that if the lock is already held by the caller, it can be locked again.
2459It will still be unlocked the same number of times.
2460An example of this situation is shown below:
2461
2462\begin{lstlisting}
2463// deleting a job from a worker requires mutual exclusion
2464
2465void deleteJob(mutex Worker &w, Job &j) {
2466        ...
2467}
2468
2469// transferring requires mutual exclusion and calls deleteJob
2470
2471void transferJob(mutex Worker &from, Worker &to) {
2472        ...
2473        deleteJob(j);
2474        ...
2475}
2476\end{lstlisting}
2477
2478
2479\subsection{Tasks}
2480
2481\CFA also provides a simple mechanism for creating and utilizing user level threads.
2482A task provides mutual exclusion like a monitor, and also has its own execution state and a thread of control.
2483Similar to a monitor, a task is defined like a structure:
2484\begin{lstlisting}
2485type Adder = task {
2486        int *row;
2487        int size;
2488        int &subtotal;
2489}
2490\end{lstlisting}
2491
2492A task may define a constructor, which will be called upon allocation and run on the caller.s thread.
2493A destructor may also be defined, which is called at de-allocation (when a dynamic object is deleted or when a local object goes out of scope).
2494After a task is allocated and initialized, its thread is spawned implicitly and begins executing in its function call method.
2495All tasks must define this function call method, with a void return value and no additional parameters, or the compiler will report an error.
2496Below are example functions for the above Adder task, and its usage to sum up a matrix on multiple threads.
2497(Note that this example is designed to display the syntax and functionality, not the best method to solve this problem)
2498\begin{lstlisting}
2499void ?{}(Adder &a, int r[], int s, int &st) { // constructor
2500        a.row = r;
2501        a.size = s;
2502        a.subtotal = st;
2503}
2504
2505// implicitly spawn thread and begin execution here
2506
2507void ?()(Adder &a) {
2508        int c;
2509        subtotal = 0;
2510        for (c=0; c<a.size; ++c) {
2511        subtotal += row[c];
2512        }
2513}
2514
2515int main() {
2516        const int rows = 100, cols = 1000000;
2517        int matrix[rows][cols];
2518        int subtotals[rows];
2519        int total = 0;
2520        int r;
2521
2522        { // create a new scope here for our adders
2523        Adder adders[rows];
2524        // read in the matrix
2525        ...
2526        for (r=0; r<rows; ++r) {
2527        // tasks are initialized on this thread
2528        Adders[r] = {matrix[r], cols, subtotals[r]};
2529        Adders[r](); // spawn thread and begin execution
2530        }
2531        } // adders go out of scope; block here until they all finish
2532        total += subtotals[r];
2533        printf(.total is %d\n., total);
2534}
2535\end{lstlisting}
2536
2537
2538\subsection{Cooperative Scheduling}
2539
2540Tasks in \CFA are cooperatively scheduled, meaning that a task will not be interrupted by another task, except at specific yield points.
2541In Listing 31, there are no yield points, so each task runs to completion with no interruptions.
2542Places where a task could yield include waiting for a lock (explicitly or implicitly), waiting for I/O, or waiting for a specific function (or one of a set of functions) to be called.
2543This last option is introduced with the yield function. yield is used to indicate that this task should yield its thread until the specified function is called.
2544For example, the code below defines a monitor that maintains a generic list.
2545When a task tries to pop from the list, but it is empty, the task should yield until another task puts something into the list, with the push function.
2546Similarly, when a task tries to push something onto the list, but it is full, it will yield until another task frees some space with the pop function.
2547
2548\begin{lstlisting}
2549// type T is used as a generic type for all definitions inside
2550// the curly brackets
2551
2552generic(type T) {
2553        type Channel = monitor {
2554        List(T) list; // list is a simple generic list type
2555        };
2556
2557        T pop(mutex &Channel(T) ch) {
2558        if (ch.list.empty()) {
2559        // yield until push is called for this channel
2560        yield(push);
2561        }
2562        return ch.list.pop();
2563        }
2564
2565        void push(mutex &Channel(T)ch, T val) {
2566        if (ch.list.full()) {
2567        // yield until pop is called for this channel
2568        yield(pop);
2569        }
2570        ch.list.push(val);
2571        }
2572}
2573\end{lstlisting}
2574
2575A task can also yield indefinitely by calling yield with no arguments.
2576This will tell the scheduler to yield this task until it is resumed by some other task.
2577A task can resume another task by using its functional call operator.
2578The code below shows a simple ping-pong example, where two tasks yield back and forth to each other using these methods.
2579
2580\begin{lstlisting}
2581type Ping = task {
2582        Pong *partner;
2583};
2584
2585void ?{}(Ping &p, Pong *partner = 0) {
2586        p.partner = partner;
2587}
2588
2589void ?()(Ping &p) {
2590        for(;;) { // loop forever
2591        printf(.ping\n.);
2592        partner(); // resumes the partner task
2593        yield(); // yields this task
2594        }
2595}
2596
2597type Pong = task {
2598        Ping *partner;
2599};
2600
2601void ?{}(Pong &p, Ping *partner = 0) {
2602        p.partner = partner;
2603}
2604
2605void ?()(Pong &p) {
2606        for(;;) { // loop forever
2607        yield(); // yields this task
2608        printf(.pong/n.);
2609        partner(); // resumes the partner task
2610        }
2611}
2612
2613void main() {
2614        Ping ping; // allocate ping
2615        Pong pong{ping}; // allocate, initialize, and start pong
2616        Ping{pong}; // initialize and start ping
2617}
2618\end{lstlisting}
2619
2620The same functionality can be accomplished by providing functions to be called by the partner task.
2621\begin{lstlisting}
2622type Pingpong = task {
2623        String msg;
2624        Pingpong *partner;
2625};
2626
2627void ?{}(Pingpong &p, String msg, Pingpong *partner = 0) {
2628        p.msg = msg;
2629        p.partner = partner;
2630}
2631
2632void ?()(Pingpong &p) {
2633        for(;;) {
2634        yield(go);
2635        }
2636}
2637
2638void go(Pingpong &p) {
2639        print(.%(p.msg)\n.);
2640        go(p.partner);
2641}
2642
2643void main() {
2644        Pingpong ping = {.ping.};
2645        Pingpong pong = {.pong., ping};
2646        ping.partner = pong;
2647        go(ping);
2648}
2649\end{lstlisting}
2650
2651
2652\section{Modules and Packages }
2653
2654\begin{comment}
2655High-level encapsulation is useful for organizing code into reusable units, and accelerating compilation speed.
2656\CFA provides a convenient mechanism for creating, building and sharing groups of functionality that enhances productivity and improves compile time.
2657
2658There are two levels of encapsulation in \CFA, module and package.
2659A module is a logical grouping of functionality that can be easily pulled into another project, much like a module in Python or a package in Go.
2660A module forms a namespace to limit the visibility and prevent naming conflicts of variables.
2661Furthermore, a module is an independent translation unit, which can be compiled separately to accelerate the compilation speed.
2662
2663A package is a physical grouping of one or more modules that is used for code distribution and version management.
2664Package is also the level of granularity at which dependences are managed.
2665A package is similar to the Crate in Rust.
2666
2667
2668\subsection{No Declarations, No Header Files}
2669
2670In C and \CC, it is necessary to declare or define every global variable, global function, and type before it is used in each file.
2671Header files and a preprocessor are normally used to avoid repeating code.
2672Thus, many variables, functions, and types are described twice, which exposes an opportunity for errors and causes additional maintenance work.
2673Instead of following this model, the \CFA tools can extract all of the same information from the code automatically.
2674This information is then stored in the object files for each module, in a format that can quickly be read by the compiler, and stored at the top of the file, for quick access.
2675In addition to the user productivity improvements, this simple change also improves compile time, by saving the information in a simple machine readable format, instead of making the compiler parse the same information over and over from a header file.
2676This seems like a minor change, but according to (Pike, Go at Google: Language Design in the Service of Software Engineering), this simple change can cause massive reductions in compile time.
2677
2678In \CFA, multiple definitions are not necessary.
2679Within a module, all of the module's global definitions are visible throughout the module.
2680For example, the following code compiles, even though isOdd was not declared before being called:
2681\begin{lstlisting}
2682bool isEven(unsigned int x) {
2683        if (x == 0) return true;
2684        else return !isOdd(x);
2685}
2686
2687bool isOdd(unsigned int x) {
2688        if (x == 1) return true;
2689        else return !isEven(x - 2);
2690}
2691\end{lstlisting}
2692
2693Header files in C are used to expose the declarations from a library, so that they can be used externally.
2694With \CFA, this functionality is replaced with module exports, discussed below.
2695When building a \CFA module which needs to be callable from C code, users can use the tools to generate a header file suitable for including in these C files with all of the needed declarations.
2696
2697In order to interoperate with existing C code, \CFA files can still include header files, the contents of which will be enclosed in a C linkage section to indicate C calling conventions (see Interoperability for more information).
2698
2699
2700\subsection{Modules}
2701
2702A module typically contains a set of related types and methods, with some objects accessible from outside the package, and some limited to use inside the module.
2703These modules can then be easily shared and reused in multiple projects.
2704As modules are intended to be distributed for reuse, they should generally have stable, well-defined interfaces.
2705
2706\CFA adds the following keywords to express the module systems: module, export, import, as.
2707
2708
2709\subsubsection{Module Declaration}
2710
2711The syntax to declare a module is module moduleName;.
2712
2713The module declaration must be at the beginning of a file, and each file can only belong to one module.
2714If there is no module declaration at the beginning of a file, the file belongs to the global module.
2715A module can span several files.
2716By convention, a module and the files belonging to the module have additional mapping relationship which is described in the Do-Lang Tooling documentation.
2717
2718The moduleName follows the same rules of a variable name, except that it can use slash "/" to indicate the module/sub-module relationship.
2719For example, container/vector is a valid module name, where container is the parent module name, and vector is the sub-module under container.
2720
2721Only the interfaces of a module are visible from outside, when the module is imported. export is a type decorator to declare a module interface.
2722A method, a global variable or a type can be declared as a module interface.
2723Types defined in a module and referenced by an exported function or a variable must be exported, too.
2724
2725The following code is a simple module declaration example.
2726\begin{lstlisting}
2727module M;
2728
2729//visible outside module M
2730
2731export int f(int i) { return i + 1; }
2732export double aCounter;
2733
2734//not visible outside module M
2735
2736int g(int i) { return i - 1; }
2737
2738double bCounter;
2739\end{lstlisting}
2740
2741export module moduleName; can be use to re-export all the visible (exported) names in moduleName from the current module.
2742
2743
2744\subsubsection{Module Import}
2745
2746The syntax to import a module is import moduleName; or import moduleName as anotherName;.
2747One package cannot be imported with both of the two types of syntax in one file.
2748A package imported in one file will only be visible in this file.
2749For example, two files, A and B belong to the same module.
2750If file A imports another module, M, the exported names in M are not visible in file B.
2751
2752All of the exported names are visible in the file that imports the module.
2753The exported names can be accessed within a namespace based on the module name in the first syntax (ex moduleName.foo).
2754If moduleName has several elements separated by '/' to describe a sub-module (ex. import container/vector;), the last element in the moduleName is used as the namespace to access the visible names in that module (ex vector.add(...);).
2755The as keyword is used to confine the imported names in a unique namespace (ex. anotherName.foo). anotherName must be a valid identifier (same rules as a variable name) which means it cannot have '/' in it.
2756Conflicts in namespaces will be reported by the compiler.
2757The second method can be used to solve conflicting name problems.
2758The following code snippets show the two situations.
2759
2760\begin{lstlisting}
2761module util/counter;
2762export int f(int i) { return i+1; }
2763
2764import util/counter;
2765
2766int main() {
2767        return counter.f(200); // f() from the package counter
2768}
2769
2770import util/counter as ct;
2771int main() {
2772        return ct.f(200); // f() from the package counter
2773}
2774\end{lstlisting}
2775
2776
2777Additionally, using the .as. syntax, a user can force the compiler to add the imported names into the current namespace using .as ..With these module rules, the following module definitions and imports can be achieved without any problem.
2778
2779\begin{lstlisting}
2780module M1;
2781export int f(int i) { return i+1;} // visible outside
2782
2783int g(int i) { return i-1;} // not visible outside
2784
2785module M2;
2786int f(int i) { return i * 2; } // not visible outside
2787export int g(int g) { return i / 2; } // visible outside
2788
2789import M1 as .;
2790
2791import M2 as .;
2792
2793
2794int main() {
2795        return f(3) + g(4); //f() from M1 and g() from M2;
2796}
2797\end{lstlisting}
2798
2799
2800\subsubsection{Sub-Module and Module Aggregation}
2801
2802Several modules can be organized in a parent module and sub-modules relationship.
2803The sub-module names are based on hierarchical naming, and use slash, "/", to indicate the relationship.
2804For example, std/vector and std/io are sub-modules of module std.
2805The exported names in a sub-module are NOT visible if the parent module is imported, which means the exported names in the sub-module are
2806not implicitly exported in the parent module.
2807
2808Aggregation is a mechanism to support components and simplified importing.
2809The mechanism is not based on naming but based on manual declaration.
2810For example, the following is the aggregated sequence module.
2811The export {...} is syntactic sugar for many lines of export module aModule;.
2812If an aggregated module is imported, all the included modules in the aggregation are imported.
2813
2814\begin{lstlisting}
2815module std/sequence;
2816
2817export {
2818        module std/vector;
2819        module std/list;
2820        module std/array;
2821        module std/deque;
2822        module std/forward_list;
2823        module std/queue;
2824        module std/stack;
2825};
2826\end{lstlisting}
2827
2828After importing the aggregated module, each individual name is still contained in the original name space.
2829For example, vector.add() and list.add() should be used to reference the add methods if there are add methods in both the vector module and the list module.
2830
2831
2832\subsubsection{Import from Repository}
2833
2834When a module is imported, the tools locate the module in the one of the accessible package paths (defined by command line flag or environment variable).
2835The tools also support retrieving modules of a package from external repositories.
2836See Listing 40: Package directory structure
2837
2838
2839\subsubsection{Package Import}
2840
2841Because packages are the places where the building tool looks for modules, there is no code required in the \CFA source file to import a package.
2842In order to use modules in a package, the programmer needs to guide the building tool to locate the right package by 1) Adding the package's parent path into \$DOPATH;
2843or 2) Adding the package dependence into the current project's Do.prj.
2844More details about locating a module in a package are explained in the next section.
2845
2846
2847\subsubsection{Package Versioning}
2848
2849A package must have a version number.
2850The version number is a string.
2851For example "1.0", "1.a", "A1", and "1ec5fab753eb979d3886a491845b8ae152d58c8f" are all valid version numbers.
2852By convention, a package is stored in a directory named packageName-packageVersion.
2853For example, the util package with version 1.1 is stored in a directory named util-1.1.
2854
2855The project description file can optionally specify the version of the package used in the current project.
2856If not defined, because the version number is a string, and all the different versions for the same package will be sorted in increasing order, the package with the largest version number will be used in the compilation.
2857The builder tool will record the specific package version used in the build in the project's "Do.lock" file to enable fully repeatable builds.
2858
2859
2860\subsection{Module and Package Organization}
2861
2862\CFA has two level of encapsulations, module and package.
2863This section explains the object model of modules, packages and other language concepts.
2864It also explains how programmers should organize their code, and the method used by the build tools to locate packages, and import modules for compilation.
2865
2866
2867\subsubsection{Object Model}
2868
2869There are several concepts in Do.
2870\begin{itemize}
2871\item
2872File: a \CFA source file
2873\item
2874Module: a container to organize a set of related types and methods; It has a module name, and several interfaces visible from outside
2875\item
2876Package: a container to organize modules for distribution; It has attributes like name, author,
2877version, dependences, etc.
2878\item
2879Project: a working set for a \CFA project; It has attributes like name, author, version, dependences, etc.
2880\end{itemize}
2881
2882The following rules summarize the object model of all the above concepts:
2883\begin{itemize}
2884\item
2885A module contains one or more files
2886\begin{itemize}
2887\item
2888One file can only belong to one module
2889\item
2890A module has its name and interfaces exported
2891\item
2892A file without a module declaration at the beginning belongs to the global module
2893\item
2894\end{itemize}
2895
2896\item
2897A package contains one or more modules
2898\begin{itemize}
2899\item
2900A package has additional meta info described in Do.prj file
2901\item
2902A package may be dependent on other packages.
2903\end{itemize}
2904
2905\item
2906A project contains one or more modules in its source code
2907\begin{itemize}
2908\item
2909A project has additional meta info described in Do.prj file
2910\item
2911A project may be dependent on other packages
2912\item
2913A project can be transformed into a package for distribution
2914\item
2915A project can generate one or more executable binaries
2916\end{itemize}
2917\end{itemize}
2918
2919
2920\subsubsection{Module File Organization}
2921
2922The rules of this section are the conventions to organize module files in one package.
2923
2924The file location of a module in a package must match the module/submodule naming hierarchy.
2925The names separated by slash "/" must match the directory levels.
2926If only one file is used to implement one module, there is no need to put the module implementation file inside a sub-directory.
2927The file can be put inside its parent module's sub-directory with the sub module's name as the file name.
2928
2929Here is an example of a package, util.
2930\begin{lstlisting}
2931+ util
2932Do.prj #package description file
2933        heap.do #Case 1: module heap;
2934        list.do #Case 1: mdoule list;
2935        ring.do #Case 1: module ring;
2936        + string #Case 2
2937        impl1.do #module string;
2938        + std
2939        vector.do
2940        list.do
2941        + array #Case 3
2942        array1.do #module std/array;
2943        array2.do #module std/array;
2944        sequence.do #Case 4, module std/sequence;
2945        test.do #Case 5
2946\end{lstlisting}
2947
2948\begin{itemize}
2949\item
2950Case 1: Each individual file implements a module
2951\item
2952Case 2: Put the implementation of a module under the sub-directory, but there is only one file
2953\item
2954Case 3: Put the implementation of a module under the sub-directory; There are several files to
2955implement one module
2956\item
2957Case 4: One file to express one aggregation
2958\item
2959Case 5: The file does not belong to any module; It is used for testing purpose
2960\end{itemize}
2961
2962The example only uses source code, ".do" files, to show the module file organization.
2963Other module packaging formats, like binary, must also follow the same rules.
2964
2965
2966\subsection{Module File Format}
2967
2968\CFA supports different types of module file formats.
2969
2970\begin{itemize}
2971\item
2972Pure source code format: The files should be organized following the previous section's definition.
2973\item
2974IR format (TBD): The \CFA compiler IR format, similar to the source code format
2975\item
2976Binary format, including ".a" static library or ".so" dynamic linkage library
2977\begin{itemize}
2978\item
2979The file's name must match the right level's module name defined in the previous section
2980\item
2981E.g. "util.so" includes all modules for the package util.
2982\item
2983E.g. "string.so" under the package directory to include files belonging to "module string;"
2984\end{itemize}
2985\item.
2986Archive format
2987\begin{itemize}
2988\item
2989The archive is named as ".dar", and is a zip archive of the source code or the binary for a package
2990\item
2991E.g. "util.dar" is the whole package for util package including the package direction file
2992\end{itemize}
2993\item
2994Hybrid format
2995\begin{itemize}
2996\item
2997A package can be distributed partly in source code, partly in binary format, and/or packaged in the archive format
2998\item
2999The only limitation is that the names of the files must match the module location names defined in previous section
3000\end{itemize}
3001\end{itemize}
3002Package and Module Locating and the \CFA Language Tooling documentation for more details.
3003
3004
3005\subsection{Packages}
3006
3007A package is synonymous with a library in other languages.
3008The intent of the package level encapsulation is to facilitate code distribution, version control, and dependence management.
3009A package is a physical grouping of one or more modules in a directory (an archive file for a directory).
3010The concept of a package is the convention for grouping code, and the contract between the language and the building tool to search for imported modules.
3011
3012
3013\subsubsection{Package Definition}
3014
3015A package is defined by putting a project description file, Do.prj, with one or more modules into a directory.
3016This project description file contains the package's meta data, including package name, author, version, dependences, etc.
3017It should be in the root of the package directory.
3018
3019The modules in the package could be either source code, or compiled binary format.
3020The location of the module files should follow the module name's path.
3021
3022Here is a simple example of the directory structure of a package, core.
3023It contains a module std and several sub-modules under std.
3024\begin{lstlisting}
3025+ core
3026        Do.prj
3027        + std
3028        + io
3029        file.do # module std/io/file;
3030        network.do #module std/io/network;
3031        + container
3032        vector.do #module std/container/vector;
3033        list.do #module std/container/list;
3034\end{lstlisting}
3035
3036
3037\subsubsection{Package Import}
3038
3039Because packages are the places where the building tool looks for modules, there is no code required in the \CFA source file to import a package.
3040In order to use modules in a package, the programmer needs to guide the building tool to locate the right package by 1) Adding the package's parent path into \$DOPATH; or 2) Adding the package dependence into the current project's Do.prj.
3041More details about locating a module in a package are explained in the next section.
3042
3043
3044\subsubsection{Package Versioning}
3045
3046A package must have a version number.
3047The version number is a string.
3048For example "1.0", "1.a", "A1", and "1ec5fab753eb979d3886a491845b8ae152d58c8f" are all valid version numbers.
3049By convention, a package is stored in a directory named packageName-packageVersion.
3050For example, the util package with version 1.1 is stored in a directory named util-1.1.
3051
3052The project description file can optionally specify the version of the package used in the current project.
3053If not defined, because the version number is a string, and all the different versions for the same package will be sorted in increasing order, the package with the largest version number will be used in the compilation.
3054The builder tool will record the specific package version used in the build in the project's "Do.lock" file to enable fully repeatable builds.
3055
3056
3057\subsection{Module and Package Organization}
3058
3059\CFA has two level of encapsulations, module and package.
3060This section explains the object model of modules, packages and other language concepts.
3061It also explains how programmers should organize their code, and the method used by the build tools to locate packages, and import modules for compilation.
3062
3063
3064\subsubsection{Object Model}
3065
3066There are several concepts in Do.
3067\begin{itemize}
3068\item
3069File: a \CFA source file
3070\item
3071Module: a container to organize a set of related types and methods; It has a module name, and several interfaces visible from outside
3072\item
3073Package: a container to organize modules for distribution; It has attributes like name, author, version, dependences, etc.
3074\item
3075Project: a working set for a \CFA project; It has attributes like name, author, version, dependences, etc.
3076\end{itemize}
3077
3078The following rules summarize the object model of all the above concepts:
3079\begin{itemize}
3080\item
3081A module contains one or more files
3082\begin{itemize}
3083\item
3084One file can only belong to one module
3085\item
3086A module has its name and interfaces exported
3087\item
3088A file without a module declaration at the beginning belongs to the global module
3089\end{itemize}
3090\item
3091A package contains one or more modules
3092\begin{itemize}
3093\item
3094A package has additional meta info described in Do.prj file
3095\item
3096A package may be dependent on other packages.
3097\end{itemize}
3098\item
3099A project contains one or more modules in its source code
3100\begin{itemize}
3101\item
3102A project has additional meta info described in Do.prj file
3103\item
3104A project may be dependent on other packages
3105\item
3106A project can be transformed into a package for distribution
3107\item
3108A project can generate one or more executable binaries
3109\end{itemize}
3110\end{itemize}
3111
3112
3113\subsubsection{Module File Organization}
3114
3115The rules of this section are the conventions to organize module files in one package.
3116
3117The file location of a module in a package must match the module/submodule naming hierarchy.
3118The names separated by slash "/" must match the directory levels.
3119If only one file is used to implement one module, there is no need to put the module implementation file inside a sub-directory.
3120The file can be put inside its parent module's sub-directory with the sub module's name as the file name.
3121
3122Here is an example of a package, util.
3123\begin{lstlisting}
3124+ util
3125        Do.prj #package description file
3126        heap.do #Case 1: module heap;
3127        list.do #Case 1: mdoule list;
3128        ring.do #Case 1: module ring;
3129        + string #Case 2
3130        impl1.do #module string;
3131        + std
3132        vector.do
3133        list.do
3134        + array #Case 3
3135        array1.do #module std/array;
3136        array2.do #module std/array;
3137        sequence.do #Case 4, module std/sequence;
3138        test.do #Case 5
3139\end{lstlisting}
3140
3141
3142\begin{itemize}
3143\item
3144Case 1: Each individual file implements a module
3145\item
3146Case 2: Put the implementation of a module under the sub-directory, but there is only one file
3147\item
3148Case 3: Put the implementation of a module under the sub-directory; There are several files to implement one module
3149\item
3150Case 4: One file to express one aggregation
3151\item
3152Case 5: The file does not belong to any module; It is used for testing purpose
3153\end{itemize}
3154
3155The example only uses source code, ".do" files, to show the module file organization.
3156Other module packaging formats, like binary, must also follow the same rules.
3157
3158
3159\subsubsection{Module File Format}
3160
3161\CFA supports different types of module file formats.
3162
3163\begin{itemize}
3164\item
3165Pure source code format: The files should be organized following the previous section's definition.
3166\item
3167IR format (TBD): The \CFA compiler IR format, similar to the source code format
3168\item
3169Binary format, including ".a" static library or ".so" dynamic linkage library
3170\begin{itemize}
3171\item
3172The file's name must match the right level's module name defined in the previous section
3173\item
3174E.g. "util.so" includes all modules for the package util.
3175\item
3176E.g. "string.so" under the package directory to include files belonging to "module string;"
3177\end{itemize}
3178\item
3179Archive format
3180\begin{itemize}
3181\item
3182The archive is named as ".dar", and is a zip archive of the source code or the binary for a package
3183\item
3184E.g. "util.dar" is the whole package for util package including the package direction file
3185\end{itemize}
3186\item
3187Hybrid format
3188\begin{itemize}
3189\item
3190A package can be distributed partly in source code, partly in binary format, and/or packaged in the archive format
3191\item
3192The only limitation is that the names of the files must match the module location names defined in previous section
3193\end{itemize}
3194\end{itemize}
3195
3196
3197\subsection{Package and Module Locating}
3198
3199The high-level build tools provided by \CFA will handle finding a package in your local filesystem or retrieving it from a repository if necessary, building it if necessary, and linking with it.
3200If a programmer prefers, one can directly call the compiler, docc to build the source files and create and link to static libraries.
3201
3202When a source file imports a module, the \CFA build tool and docc compiler will locate the module according to the following order:
3203
3204\begin{enumerate}
3205\item
3206This source file's directory tree, which is typically the project's src directory
3207\item
3208All of the dependent packages (in a directory or in an archive file) under the current \CFA project's pkg directory
3209\item
3210The dependent packages (in a directory or in an archive file) inside the paths defined in the DOPATH environment variable
3211\item
3212The dependent packages (in a directory or in an archive file) inside the global \CFA SDK installation's pkg directory
3213\item
3214If one dependent package is still not found, the builder tool will automatically retrieve it from the repository defined in the SDK installation's configuration, and store it in the SDK's pkg directory
3215\end{enumerate}
3216
3217The module found first in a package will shadow the modules with the same name in the later packages in the search sequence.
3218
3219
3220\subsubsection{Dependent Package}
3221
3222Dependent packages are those packages containing modules that the current project's source code will import from.
3223Dependent packages are defined implicitly or explicitly in one \CFA project.
3224All of the packages under the current project's pkg directory are implicitly dependent packages.
3225For others, the dependent packages must be defined in the project's Do.prj file.
3226
3227
3228\subsubsection{Package and Module Locating Example}
3229
3230\begin{lstlisting}
3231# A project's source code tree
3232
3233--------------------------------------
3234
3235+ testProject
3236        Do.prj
3237        + src
3238        main.do
3239        + pkg
3240        + security-1.1
3241        Do.prj
3242        security.do #module security
3243
3244--------------------------------------
3245
3246# Do.prj
3247
3248--------------------------------------
3249
3250[dependences]
3251std
3252util = "0.2"
3253
3254--------------------------------------
3255
3256# main.do
3257
3258---------------------------------------
3259
3260import security;
3261import std/vector;
3262import container;
3263
3264----------------------------------------
3265\end{lstlisting}
3266
3267
3268\begin{lstlisting}
3269# pkg directory's source code tree
3270
3271-----------------------------------------
3272
3273+ pkg
3274        + std-1.0
3275        Do.prj
3276        vector.do #module std/vector;
3277        queue.do #module std/queue;
3278        + std-1.1
3279        Do.prj
3280        vector.do #module std/vector;
3281        queue.do #module std/queue;
3282        list.do #module std/list;
3283        + util-0.1
3284        Do.prj
3285        container.do #module container;
3286        + security-1.0
3287        security.do #module security;
3288------------------------------------------
3289\end{lstlisting}
3290
3291
3292During the compiling of main.do file import security;
3293The security module appears in both the local security-1.1 package, and the global security-1.0 package.
3294According to the locating sequence, the local security module in security-1.1 will be used.
3295And because the security-1.1 package is under local's pkg directory.
3296No dependence description is required in the project Do.prj file.
3297
3298import std/vector;
3299
3300The std/vector package appears in two different versions' packages in the global path and the project dependence doesn't specify the version. std-1.1 is used in this case.
3301
3302import container;
3303
3304The Do.prj specifies the version 0.2 should be used to locate container module from util package but only version 0.1 is available in the local file system.
3305The builder tool then will try to retrieve it from the web and store it in the global pkg directory.
3306After that, the container module from the newly downloaded package will be used in the compilation.
3307\end{comment}
3308
3309
3310\section{Comparison with Other Languages}
3311
3312\CFA is one of many languages that attempts to improve upon C.
3313In developing \CFA, many other languages were consulted for ideas, constructs, and syntax.
3314Therefore, it is important to show how these languages each compare with Do.
3315In this section, \CFA is compared with what the writers of this document consider to be the closest competitors of Do: \CC, Go, Rust, and D.
3316
3317
3318\subsection[Comparing Key Features of CFA]{Comparing Key Features of \CFA}
3319
3320
3321\subsubsection{Constructors and Destructors}
3322
3323\lstset{basicstyle=\sf\relsize{-2}}
3324
3325\begin{flushleft}
3326\begin{tabular}{@{}l|l|l|l@{}}
3327\multicolumn{1}{c|}{\textbf{\CFA}}      & \multicolumn{1}{c|}{\textbf{\CC}} & \multicolumn{1}{c|}{\textbf{Go}} & \multicolumn{1}{c}{\textbf{Rust}}      \\
3328\hline
3329\begin{lstlisting}
3330struct Line {
3331        float length;
3332}
3333// default constructor
3334void ?{}( Line * l ) {
3335        sout | "default" | endl;
3336        l.length = 0.0;
3337}
3338
3339
3340// constructor with length
3341void ?{}( Line * l, float length ) {
3342        sout | "length" | length | endl;
3343
3344        l.length = length;
3345}
3346
3347// destructor
3348void ^?(Line &l) {
3349        sout | "destroyed" | endl;
3350        l.length = 0.0;
3351}
3352
3353// usage
3354Line line1;
3355Line line2{ 3.4 };
3356\end{lstlisting}
3357&
3358\begin{lstlisting}[language=C++]
3359class Line {
3360        float length;
3361
3362        // default constructor
3363        Line() {
3364                cout << "default" << endl;
3365                length = 0.0;
3366        }
3367
3368
3369        // constructor with length
3370        Line( float l ) {
3371                cout << "length " << length
3372                         << endl;
3373                length = l;
3374        }
3375
3376        // destructor
3377        ~Line() {
3378                cout << "destroyed" << endl;
3379                length = 0.0;
3380        }
3381}
3382// usage
3383Line line1;
3384Line line2( 3.4 );
3385\end{lstlisting}
3386&
3387\begin{lstlisting}[language=Golang]
3388type Line struct {
3389        length float32
3390}
3391// default constructor
3392func makeLine() Line {
3393        fmt.PrintLn( "default" )
3394        return Line{0.0}
3395}
3396
3397
3398// constructor with length
3399func makeLine( length float32 ) Line {
3400        fmt.Printf( "length %v", length )
3401
3402        return Line{length}
3403}
3404
3405// no destructor
3406
3407
3408
3409
3410
3411// usage
3412line1 := makeLine()
3413line2 := makeLine( 3.4 )
3414\end{lstlisting}
3415&
3416\begin{lstlisting}
3417struct Line {
3418        length: f32
3419}
3420// default constructor
3421impl Default for Line {
3422        fn default () -> Line {
3423                println!( "default" );
3424                Line{ length: 0.0 }
3425        }
3426}
3427// constructor with length
3428impl Line {
3429        fn make( len: f32 ) -> Line {
3430                println!( "length: {}", len );
3431                Line{ length: len }
3432        }
3433}
3434// destructor
3435impl Drop for Line {
3436        fn drop( &mut self ) {
3437                self.length = 0.0
3438        }
3439}
3440// usage
3441let line1:Line = Default::default();
3442Line line2( 3.4 );
3443\end{lstlisting}
3444\end{tabular}
3445\end{flushleft}
3446
3447
3448\subsubsection{Operator Overloading}
3449
3450\begin{flushleft}
3451\begin{tabular}{@{}l|l|l|l@{}}
3452\multicolumn{1}{c|}{\textbf{\CFA}}      & \multicolumn{1}{c|}{\textbf{\CC}} & \multicolumn{1}{c|}{\textbf{Go}} & \multicolumn{1}{c}{\textbf{Rust}}      \\
3453\hline
3454\begin{lstlisting}
3455struct Cpx {
3456        double re, im;
3457};
3458// overload addition operator
3459Cpx ?+?( Cpx l, const Cpx r ) {
3460        return (Cpx){l.re+l.im, l.im+r.im};
3461}
3462Cpx a, b, c;
3463c = a + b;
3464\end{lstlisting}
3465&
3466\begin{lstlisting}
3467struct Cpx {
3468        double re, im;
3469};
3470// overload addition operator
3471Cpx operator+( Cpx l, const Cpx r ) {
3472        return (Cpx){l.re+l.im, l.im+r.im};
3473}
3474Cpx a, b, c;
3475c = a + b;
3476\end{lstlisting}
3477&
3478\begin{lstlisting}
3479// no operator overloading
3480
3481
3482
3483
3484
3485
3486
3487\end{lstlisting}
3488&
3489\begin{lstlisting}
3490struct Cpx {
3491        re: f32,
3492        im: f32
3493}
3494// overload addition operator
3495impl Add for Cpx {
3496        type Output = Cpx
3497        fn add(self, r: Cpx) -> Cpx {
3498                let mut res = Cpx{re: 0.0, im: 0.0};
3499                res.re = self.re + r.re;
3500                res.im = self.im + r.im;
3501                return res
3502        }
3503}
3504let (a, b, mut c) = ...;
3505c = a + b
3506\end{lstlisting}
3507\end{tabular}
3508\end{flushleft}
3509
3510
3511\subsubsection{Calling C Functions}
3512
3513\begin{flushleft}
3514\begin{tabular}{@{}l|l|l@{}}
3515\multicolumn{1}{c|}{\textbf{\CFA/\CC}} & \multicolumn{1}{c|}{\textbf{Go}} & \multicolumn{1}{c}{\textbf{Rust}}   \\
3516\hline
3517\begin{lstlisting}[boxpos=t]
3518extern "C" {
3519#include <sys/types.h>
3520#include <sys/stat.h>
3521#include <unistd.h>
3522}
3523size_t fileSize( const char *path ) {
3524        struct stat s;
3525        stat(path, &s);
3526        return s.st_size;
3527}
3528\end{lstlisting}
3529&
3530\begin{lstlisting}[boxpos=t]
3531/*
3532#cgo
3533#include <sys/types.h>
3534#include <sys/stat.h>
3535#include <unistd.h>
3536*/
3537import "C"
3538import "unsafe"
3539
3540func fileSize(path string) C.size_t {
3541        var buf C.struct_stat
3542        c_string := C.CString(path)
3543        C.stat(p, &buf)
3544        C.free(unsafe.Pointer(c_string))
3545        return buf._st_size
3546}
3547\end{lstlisting}
3548&
3549\begin{lstlisting}[boxpos=t]
3550use libc::{c_int, size_t};
3551// translated from sys/stat.h
3552#[repr(C)]
3553struct stat_t {
3554        ...
3555        st_size: size_t,
3556        ...
3557}
3558#[link(name = "libc")]
3559extern {
3560        fn stat(path: *const u8,
3561        buf: *mut stat_t) -> c_int;
3562}
3563fn fileSize(path: *const u8) -> size_t
3564{
3565        unsafe {
3566                let mut buf: stat_t = uninit();
3567                stat(path, &mut buf);
3568                buf.st_size
3569        }
3570}
3571\end{lstlisting}
3572\end{tabular}
3573\end{flushleft}
3574
3575
3576\subsubsection{Generic Functions}
3577
3578\begin{flushleft}
3579\begin{tabular}{@{}l|l|l|l@{}}
3580\multicolumn{1}{c|}{\textbf{\CFA}}      & \multicolumn{1}{c|}{\textbf{\CC}} & \multicolumn{1}{c|}{\textbf{Go}} & \multicolumn{1}{c}{\textbf{Rust}}      \\
3581\hline
3582\begin{lstlisting}
3583generic(type T, type N |
3584        { int ?<?(N, N); })
3585T *maximize(N (*f)(const T&),
3586        int n, T *a) {
3587        T *bestX = NULL;
3588        N bestN;
3589        for (int i = 0; i < n; i++) {
3590        N curN = f(a[i]);
3591        if (bestX == NULL ||
3592        curN > bestN) {
3593        bestX = &a[i]; bestN = curN;
3594        }
3595        }
3596        return bestX;
3597}
3598
3599string *longest(int n, string *p)
3600{
3601        return maximize(length, n, p);
3602}
3603\end{lstlisting}
3604&
3605\begin{lstlisting}
3606template<typename T, typename F>
3607T *maximize(const F &f,
3608        int n, T *a) {
3609        typedef decltype(f(a[0])) N;
3610        T *bestX = NULL;
3611        N bestN;
3612        for (int i = 0; i < n; i++) {
3613        N curN = f(a[i]);
3614        if (bestX == NULL || curN > bestN)
3615        {
3616        bestX = &a[i]; bestN = curN;
3617        }
3618        }
3619        return bestX;
3620}
3621
3622string *longest(int n, string *p) {
3623        return maximize(
3624        [](const string &s) {
3625        return s.length();
3626        }, n, p);
3627}
3628\end{lstlisting}
3629&
3630\begin{lstlisting}
3631// Go does not support generics!
3632func maximize(
3633        gt func(interface{}, interface{}) bool,
3634        f func(interface{}) interface{},
3635        a []interface{}) interface{} {
3636        var bestX interface{} = nil
3637        var bestN interface{} = nil
3638        for _, x := range a {
3639        curN := f(x)
3640        if bestX == nil || gt(curN, bestN)
3641        {
3642        bestN = curN
3643        bestX = x
3644        }
3645        }
3646        return bestX
3647}
3648
3649func longest(
3650        a []interface{}) interface{} {
3651        return maximize(
3652        func(a, b interface{}) bool {
3653        return a.(int) > b.(int) },
3654        func(s interface{}) interface{} {
3655        return len(s.(string)) },
3656        a).(string)
3657}
3658\end{lstlisting}
3659&
3660\begin{lstlisting}
3661use std::cmp::Ordering;
3662
3663fn maximize<N: Ord + Copy, T, F:
3664Fn(&T) -> N>(f: F, a: &Vec<T>) ->
3665Option<&T> {
3666        let mut best_x: Option<&T> = None;
3667        let mut best_n: Option<N> = None;
3668        for x in a {
3669        let n = f(x);
3670        if (match best_n { None => true,
3671        Some(bn) =>
3672        n.cmp(&bn) == Ordering::Greater })
3673        {
3674        best_x = Some(x);
3675        best_n = Some(n);
3676        }
3677        }
3678        return best_x
3679}
3680
3681fn longest(a: &Vec<String>) ->
3682        Option<&String> {
3683        return
3684        maximize(|x: &String| x.len(), a)
3685}
3686\end{lstlisting}
3687\end{tabular}
3688\end{flushleft}
3689
3690
3691\begin{comment}
3692\subsubsection{Modules / Packages}
3693
3694\begin{lstlisting}
3695\CFA
3696\CC
3697
3698
3699module example/M;
3700
3701export int inc(int val) {
3702        return val + 1;
3703}
3704
3705
3706
3707
3708--------------------------------------
3709//Use the module in another file
3710import example/M;
3711int main() {
3712        print(M.inc(100));
3713        return 0;
3714}
3715// Using \CC17 module proposal
3716
3717module example.M;
3718
3719export {
3720        int inc(int val);
3721}
3722
3723int inc(inv val) {
3724        return val + 1;
3725}
3726--------------------------------------
3727// Use the module in another file
3728import example.M;
3729int main() {
3730        cout << inc(100) << endl;
3731        return 0;
3732}
3733
3734Go
3735Rust
3736package example/M;
3737
3738func Inc(val int32) int32 {
3739        // Capitalization indicates exported
3740        return val + 100
3741}
3742
3743
3744--------------------------------------
3745//Use the package in another file
3746package main
3747import .fmt.
3748import "example/M"
3749
3750func main() int32 {
3751        fmt.Printf(.%v., M.Inc(100))
3752}
3753pub mod example {
3754        pub mod M {
3755        pub inc(val i32) -> i32 {
3756        return val + 100;
3757        }
3758        }
3759}
3760
3761--------------------------------------
3762//Use the module in another file
3763use example::M;
3764
3765
3766
3767fn main() {
3768        println!(.{}., M::inc(100));
3769}
3770\end{lstlisting}
3771\end{comment}
3772
3773
3774\subsubsection{Parallel Tasks}
3775
3776\begin{flushleft}
3777\begin{tabular}{@{}l|l|l|l@{}}
3778\multicolumn{1}{c|}{\textbf{\CFA}}      & \multicolumn{1}{c|}{\textbf{\CC}} & \multicolumn{1}{c|}{\textbf{Go}} & \multicolumn{1}{c}{\textbf{Rust}}      \\
3779\hline
3780\begin{lstlisting}
3781task Nonzero {
3782        int *data;
3783        int start;
3784        int end;
3785        int* res;
3786};
3787
3788void ?{}(Nonzero &a, int d[], int s,
3789        int e, int* subres) {
3790        // constructor
3791        a.data = d;
3792        a.start = s;
3793        a.end = e;
3794        a.res = subres;
3795}
3796
3797// implicitly spawn thread here
3798void ?()(NonzeroCounter &a) {
3799        int i;
3800        int nonzero = 0;
3801        for (i=start; c<end; ++i) {
3802        if(a.data[i]!=0){ nonzero++;}
3803        }
3804        *a.res = nonzero;
3805}
3806
3807int main() {
3808        int sz = ...
3809        int data[sz] = ...;
3810        int r1 = 0, r2=0;
3811        int res;
3812        { // create a scope for Nonzero
3813        Nonzero n1{data, 0, sz/2, &n1};
3814        Nonzero n2{data, sz/2, sz, &n2};
3815        n1();//spawn
3816        n2();//spawn
3817        }
3818        res = r1+r2;
3819        return res;
3820}
3821\end{lstlisting}
3822&
3823\begin{lstlisting}
3824#include <thread>
3825#include <mutex>
3826
3827std::mutex m;
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840void task(const vector<int>&v,
3841        int* res, size_t s,
3842        size_t e) {
3843        int non_zero = 0;
3844        for(size_t i = s; i < e; ++i){
3845        if(v[i]!=0) { non_zero++;}
3846        }
3847        std::unique_lock<mutex> lck {m};
3848        *res += non_zero;
3849}
3850
3851int main() {
3852        vector<int> data = ...; //data
3853        int res = 0;
3854        std::thread t1 {task, ref(data),
3855        &res, 0,
3856        data.size()/2};
3857        std::thread t2 {task, ref(data),
3858        &res, data.size()/2,
3859        data.size()};
3860        t1.join();
3861        t2.join();
3862        return res;
3863}
3864\end{lstlisting}
3865&
3866\begin{lstlisting}
3867package main
3868
3869import "fmt"
3870
3871func nonzero(data []int, c chan int) {
3872        nz := 0
3873        for _, v:=range data {
3874        if(v!=0) { nz := nz+1 }
3875        }
3876        c <- nz
3877}
3878
3879func main() {
3880        sz := ...
3881        data := make([]int, sz)
3882        ... // data init
3883        go nonzero(data[:len(data)/2], c)
3884        go nonzero(data[len(data)/2:], c)
3885        n1, n2 := <-c, <-c
3886        res := n1 + n2
3887        fmt.Println(res)
3888}
3889\end{lstlisting}
3890&
3891\begin{lstlisting}
3892use std::thread;
3893use std::sync:mpsc::channel;
3894
3895fn main() {
3896        let sz = ...;
3897        let mut data:Vec<i32> =
3898        Vec::with_capacity(sz as usize);
3899        ... //init data
3900        let (tx, rx) = channel();
3901        for i in 0..1 {
3902        let tx = tx.clone();
3903        let data = data.clone()
3904        thread::spawn(move|| {
3905        let mut nz := 0;
3906        let mut s = 0;
3907        let mut e = sz / 2;
3908        if i == 1 {
3909        s = sz/2;
3910        e = data.len();
3911        }
3912        for i in s..(e - 1) {
3913        if data[i] != 0 (
3914        nz = nz + 1
3915        }
3916        }
3917        tx.send(nz).unwrap();
3918        });
3919        }
3920        let res = rx.recv().unwrap() +
3921        rx.recv().unwrap();
3922        println!(.{}., res);
3923}
3924\end{lstlisting}
3925\end{tabular}
3926\end{flushleft}
3927
3928\lstset{basicstyle=\sf\relsize{-1}}
3929
3930
3931\subsection{Summary of Language Comparison}
3932
3933
3934\subsubsection[C++]{\CC}
3935
3936\CC is a general-purpose programming language.
3937It has imperative, object-oriented and generic programming features, while also providing facilities for low-level memory manipulation. (Wikipedia)
3938
3939The primary focus of \CC seems to be adding object-oriented programming to C, and this is the primary difference between \CC and Do.
3940\CC uses classes to encapsulate data and the functions that operate on that data, and to hide the internal representation of the data.
3941\CFA uses modules instead to perform these same tasks.
3942Classes in \CC also enable inheritance among types.
3943Instead of inheritance, \CFA embraces composition and interfaces to achieve the same goals with more flexibility.
3944There are many studies and articles comparing inheritance and composition (or is-a versus has-a relationships), so we will not go into more detail here (Venners, 1998) (Pike, Go at Google: Language Design in the Service of Software Engineering , 2012).
3945
3946Overloading in \CFA is very similar to overloading in \CC, with the exception of the additional use, in \CFA, of the return type to differentiate between overloaded functions.
3947References and exceptions in \CFA are heavily based on the same features from \CC.
3948The mechanism for interoperating with C code in \CFA is also borrowed from \CC.
3949
3950Both \CFA and \CC provide generics, and the syntax is quite similar.
3951The key difference between the two, is that in \CC templates are expanded at compile time for each type for which the template is instantiated, while in \CFA, function pointers are used to make the generic fully compilable.
3952This means that a generic function can be defined in a compiled library, and still be used as expected from source.
3953
3954
3955\subsubsection{Go}
3956
3957Go, also commonly referred to as golang, is a programming language developed at Google in 2007 [.].
3958It is a statically typed language with syntax loosely derived from that of C, adding garbage collection, type
3959safety, some structural typing capabilities, additional built-in types such as variable-length arrays and key-value maps, and a large standard library. (Wikipedia)
3960
3961Go and \CFA differ significantly in syntax and implementation, but the underlying core concepts of the two languages are aligned.
3962Both Go and \CFA use composition and interfaces as opposed to inheritance to enable encapsulation and abstraction.
3963Both languages (along with their tooling ecosystem) provide a simple packaging mechanism for building units of code for easy sharing and reuse.
3964Both languages also include built-in light weight, user level threading concurrency features that attempt to simplify the effort and thought process required for writing parallel programs while maintaining high performance.
3965
3966Go has a significant runtime which handles the scheduling of its light weight threads, and performs garbage collection, among other tasks.
3967\CFA uses a cooperative scheduling algorithm for its tasks, and uses automatic reference counting to enable advanced memory management without garbage collection.
3968This results in Go requiring significant overhead to interface with C libraries while \CFA has no overhead.
3969
3970
3971\subsubsection{Rust}
3972
3973Rust is a general-purpose, multi-paradigm, compiled programming language developed by Mozilla Research.
3974It is designed to be a "safe, concurrent, practical language", supporting pure-functional, concurrent-actor[dubious . discuss][citation needed], imperative-procedural, and object-oriented styles.
3975
3976The primary focus of Rust is in safety, especially in concurrent programs.
3977To enforce a high level of safety, Rust has added ownership as a core feature of the language to guarantee memory safety.
3978This safety comes at the cost of a difficult learning curve, a change in the thought model of the program, and often some runtime overhead.
3979
3980Aside from those key differences, Rust and \CFA also have several similarities.
3981Both languages support no overhead interoperability with C and have minimal runtimes.
3982Both languages support inheritance and polymorphism through the use of interfaces (traits).
3983
3984
3985\subsubsection{D}
3986
3987The D programming language is an object-oriented, imperative, multi-paradigm system programming
3988language created by Walter Bright of Digital Mars and released in 2001. [.]
3989Though it originated as a re-engineering of \CC, D is a distinct language, having redesigned some core \CC features while also taking inspiration from other languages, notably Java, Python, Ruby, C\#, and Eiffel.
3990
3991D and \CFA both start with C and add productivity features.
3992The obvious difference is that D uses classes and inheritance while \CFA uses composition and interfaces.
3993D is closer to \CFA than \CC since it is limited to single inheritance and also supports interfaces.
3994Like \CC, and unlike \CFA, D uses garbage collection and has compile-time expanded templates.
3995D does not have any built-in concurrency constructs in the
3996language, though it does have a standard library for concurrency which includes the low-level primitives for concurrency.
3997
3998
3999\appendix
4000
4001
4002\section{Incompatible}
4003
4004The following incompatibles exist between C and \CFA, and are similar to Annex C for \CC~\cite{ANSI14:C++}.
4005
4006\begin{enumerate}
4007\item
4008Change type of character literal ©int© to ©char©.
4009This change allows overloading differentiation argument type matching, e.g.:
4010\begin{lstlisting}
4011int function( int i );
4012int function( char c );
4013function( 'x' );
4014\end{lstlisting}
4015It is preferable that this call match the second version of function rather than the first. \\
4016Effect on original feature: Change to semantics of well-defined feature. ISO C programs which depend on
4017\begin{lstlisting}
4018sizeof('x') == sizeof(int)
4019\end{lstlisting}
4020will not work the same as C++ programs. \\
4021Difficulty of converting: Simple. \\
4022How widely used: Programs which depend upon sizeof('x') are probably rare.
4023
4024\item
4025Change: String literals made ©const© \\
4026The type of a string literal is changed from ©array of char© to ©array of const char©.
4027The type of a wide string literal is changed from ©array of wchar_t© to ©array of const wchar_t©. \\
4028Rationale: This avoids calling an inappropriate overloaded function, which might expect to be able to modify its argument.
4029Effect on original feature: Change to semantics of well-defined feature. \\
4030Difficulty of converting: Simple syntactic transformation, because string literals can be converted to ©char*;© (4.2).
4031The most common cases are handled by a new but deprecated standard conversion:
4032\begin{lstlisting}
4033char* p = "abc"; // valid in C, deprecated in C++
4034char* q = expr ? "abc" : "de"; // valid in C, invalid in C++
4035\end{lstlisting}
4036How widely used: Programs that have a legitimate reason to treat string literals as pointers to potentially modifiable memory are probably rare.
4037
4038\item
4039Change: C++ does not have \emph{tentative definitions} as in C.
4040E.g., at file scope,
4041\begin{lstlisting}
4042int i;
4043int i;
4044\end{lstlisting}
4045is valid in C, invalid in C++.
4046This makes it impossible to define mutually referential file-local static
4047objects, if initializers are restricted to the syntactic forms of C. For example,
4048\begin{lstlisting}
4049struct X { int i; struct X *next; };
4050static struct X a;
4051static struct X b = { 0, &a };
4052static struct X a = { 1, &b };
4053\end{lstlisting}
4054Rationale: This avoids having different initialization rules for builtin types and userdefined types.
4055Effect on original feature: Deletion of semantically welldefined feature. \\
4056Difficulty of converting: Semantic transformation.
4057In C++, the initializer for one of a set of mutuallyreferential filelocal static objects must invoke a function call to achieve the initialization.
4058How widely used: Seldom.
4059
4060\item
4061Change: A struct is a scope in C++, not in C \\
4062Rationale: Class scope is crucial to C++, and a struct is a class. \\
4063Effect on original feature: Change to semantics of well-defined feature. \\
4064Difficulty of converting: Semantic transformation. \\
4065How widely used: C programs use struct extremely frequently, but the change is only noticeable when struct, enumeration, or enumerator names are referred to outside the struct.
4066The latter is probably rare.
4067
4068\CFA is C \emph{incompatible} on this issue, and provides semantics similar to \CC.
4069Nested types are not hoisted and can be referenced using the field selection operator ``©.©'', unlike the \CC scope-resolution operator ``©::©''.
4070Given that nested types in C are equivalent to not using them, i.e., they are essentially useless, it is unlikely there are any realistic usages that break because of this incompatibility.
4071
4072\item
4073Change: In C++, the name of a nested class is local to its enclosing class.
4074In C the name of the nested class belongs to the same scope as the name of the outermost enclosing class
4075Example:
4076\begin{lstlisting}
4077struct X {
4078struct Y { /* ... */ } y;
4079};
4080struct Y yy; // valid C, invalid C++
4081\end{lstlisting}
4082Rationale: C++ classes have member functions which require that classes establish scopes.
4083The C rule would leave classes as an incomplete scope mechanism which would prevent C++ programmers from maintaining locality within a class. A coherent set of scope rules for C++ based on the C rule would be very complicated and C++ programmers would be unable to predict reliably the meanings of nontrivial examples involving nested or local functions.
4084Effect on original feature: Change of semantics of welldefined feature.
4085Difficulty of converting: Semantic transformation. To make the struct type name visible in the scope of the enclosing struct, the struct tag could be declared in the scope of the enclosing struct, before the enclosing struct is defined. Example:
4086\begin{lstlisting}
4087struct Y; // struct Y and struct X are at the same scope
4088struct X {
4089struct Y { /* ... */ } y;
4090};
4091\end{lstlisting}
4092All the definitions of C struct types enclosed in other struct definitions and accessed outside the scope of the enclosing struct could be exported to the scope of the enclosing struct.
4093Note: this is a consequence of the difference in scope rules, which is documented in 3.3.
4094How widely used: Seldom.
4095\end{enumerate}
4096
4097
4098\section{I/O Library}
4099\label{s:IOLibrary}
4100\index{input/output library}
4101
4102The goal for \CFA I/O is to make I/O as simple as possible for the general case, while fully supporting polmorphism and user defined types in a consistent way.
4103The general case is printing out a sequence of variables separated by whitespace.
4104\begin{quote2}
4105\begin{tabular}{@{}l@{\hspace{30pt}}l@{}}
4106\multicolumn{1}{c@{\hspace{30pt}}}{\textbf{\CFA}}       & \multicolumn{1}{c}{\textbf{\CC}}      \\
4107\begin{lstlisting}
4108int x = 0, y = 1, z = 2;
4109®sout® ®|® x ®|® y ®|® z ®| endl®;
4110\end{lstlisting}
4111&
4112\begin{lstlisting}
4113
4114cout << x << " " << y << " " << z << endl;
4115\end{lstlisting}
4116\end{tabular}
4117\end{quote2}
4118The \CFA form is half as many characters, and is similar to \Index{Python} I/O with respect to implicit separators.
4119
4120The logical-or operator is used because it is the lowest-priority overloadable operator, other than assignment.
4121Therefore, fewer output expressions require parenthesis.
4122\begin{quote2}
4123\begin{tabular}{@{}ll@{}}
4124\textbf{\CFA:}
4125&
4126\begin{lstlisting}
4127sout | x * 3 | y + 1 | z << 2 | x == y | (x | y) | (x || y) | (x > z ? 1 : 2) | endl;
4128\end{lstlisting}
4129\\
4130\textbf{\CC:}
4131&
4132\begin{lstlisting}
4133cout << x * 3 << y + 1 << (z << 2) << (x == y) << (x | y) << (x || y) << (x > z ? 1 : 2) << endl;
4134\end{lstlisting}
4135\end{tabular}
4136\end{quote2}
4137Finally, the logical-or operator has a link with the Shell pipe-operator for moving data, although data flows in the opposite direction.
4138
4139The implicit seperator\index{I/O separator} character (space/blank) is a separator not a terminator.
4140The rules for implicitly adding the separator are:
4141\begin{enumerate}
4142\item
4143A seperator does not appear at the start or end of a line.
4144\begin{lstlisting}[belowskip=0pt]
4145sout | 1 | 2 | 3 | endl;
4146\end{lstlisting}
4147\begin{lstlisting}[mathescape=off,showspaces=true,aboveskip=0pt,belowskip=0pt]
41481 2 3
4149\end{lstlisting}
4150\item
4151A seperator does not appear before or after a character literal or variable.
4152\begin{lstlisting}
4153sout | '1' | '2' | '3' | endl;
4154123
4155\end{lstlisting}
4156\item
4157A seperator does not appear before or after a null (empty) C string
4158\begin{lstlisting}
4159sout | 1 | "" | 2 | "" | 3 | endl;
4160123
4161\end{lstlisting}
4162which is a local mechanism to disable insertion of the separator character.
4163\item
4164A seperator does not appear before a C string starting with the (extended) \Index{ASCII}\index{ASCII!extended} characters: \lstinline[mathescape=off]@([{$£¥¡¿«@
4165%$
4166\begin{lstlisting}[mathescape=off]
4167sout | "x (" | 1 | "x [" | 2 | "x {" | 3 | "x $" | 4 | "x £" | 5 | "x ¥" | 6 | "x ¡" | 7 | "x ¿" | 8 | "x «" | 9 | endl;
4168\end{lstlisting}
4169%$
4170\begin{lstlisting}[mathescape=off,showspaces=true,aboveskip=0pt,belowskip=0pt]
4171x (1 x [2 x {3 x $4 x £5 x ¥6 x ¡7 x ¿8 x «9
4172\end{lstlisting}
4173%$
4174\item
4175A seperator does not appear after a C string ending with the (extended) \Index{ASCII}\index{ASCII!extended} characters: ©,.:;!?)]}%¢»©
4176\begin{lstlisting}[belowskip=0pt]
4177sout | 1 | ", x" | 2 | ". x" | 3 | ": x" | 4 | "; x" | 5 | "! x" | 6 | "? x" | 7 | ") x" | 8 | "] x" | 9 | "} x"
4178         | 10 | "% x" | 11 | "¢ x" | 12 | "» x" | endl;
4179\end{lstlisting}
4180\begin{lstlisting}[mathescape=off,showspaces=true,aboveskip=0pt,belowskip=0pt]
41811, x 2. x 3: x 4; x 5! x 6? x 7) x 8] x 9} x 10% x 11¢ 12»
4182\end{lstlisting}
4183\item
4184A seperator does not appear before or after a C string begining/ending with the \Index{ASCII} quote or whitespace characters: \lstinline[showspaces=true]@`'" \t\v\f\r\n@
4185\begin{lstlisting}[belowskip=0pt]
4186sout | "x`" | 1 | "`x'" | 2 | "'x\"" | 3 | "\"x" | "x " | 4 | " x" | "x\t" | 1 | "\tx" | endl;
4187\end{lstlisting}
4188\begin{lstlisting}[mathescape=off,showspaces=true,showtabs=true,aboveskip=0pt,belowskip=0pt]
4189x`1`x'2'x"3"x x 4 x x   1       x
4190\end{lstlisting}
4191\end{enumerate}
4192The following \CC-style \Index{manipulator}s allow further control over implicit seperation.
4193\begin{lstlisting}[mathescape=off,belowskip=0pt]
4194sout | sepOn | 1 | 2 | 3 | sepOn | endl;        // separator at start of line
4195\end{lstlisting}
4196\begin{lstlisting}[mathescape=off,showspaces=true,aboveskip=0pt,belowskip=0pt]
4197 1 2 3
4198\end{lstlisting}
4199\begin{lstlisting}[mathescape=off,aboveskip=0pt,belowskip=0pt]
4200sout | 1 | sepOff | 2 | 3 | endl;                       // turn off implicit separator temporarily
4201\end{lstlisting}
4202\begin{lstlisting}[mathescape=off,showspaces=true,aboveskip=0pt,belowskip=0pt]
420312 3
4204\end{lstlisting}
4205\begin{lstlisting}[mathescape=off,aboveskip=0pt,belowskip=0pt]
4206sout | sepDisable | 1 | 2 | 3 | endl;           // turn off implicit separation, affects all subsequent prints
4207\end{lstlisting}
4208\begin{lstlisting}[mathescape=off,showspaces=true,aboveskip=0pt,belowskip=0pt]
4209123
4210\end{lstlisting}
4211\begin{lstlisting}[mathescape=off,aboveskip=0pt,belowskip=0pt]
4212sout | 1 | sepOn | 2 | 3 | endl;                        // turn on implicit separator temporarily
4213\end{lstlisting}
4214\begin{lstlisting}[mathescape=off,showspaces=true,aboveskip=0pt,belowskip=0pt]
42151 23
4216\end{lstlisting}
4217\begin{lstlisting}[mathescape=off,aboveskip=0pt,belowskip=0pt]
4218sout | sepEnable | 1 | 2 | 3 | endl;            // turn on implicit separation, affects all subsequent prints
4219\end{lstlisting}
4220\begin{lstlisting}[mathescape=off,showspaces=true,aboveskip=0pt,belowskip=0pt]
4221 1 2 3
4222\end{lstlisting}
4223\begin{lstlisting}[mathescape=off,aboveskip=0pt,aboveskip=0pt,belowskip=0pt]
4224sepSet( sout, ", $" );                                          // change separator from " " to ", $"
4225sout | 1 | 2 | 3 | endl;
4226\end{lstlisting}
4227%$
4228\begin{lstlisting}[mathescape=off,showspaces=true,aboveskip=0pt]
42291, $2, $3
4230\end{lstlisting}
4231%$
4232\begin{comment}
4233#include <fstream>
4234
4235int main() {
4236        int x = 3, y = 5, z = 7;
4237        sout | x * 3 | y + 1 | z << 2 | x == y | (x | y) | (x || y) | (x > z ? 1 : 2) | endl;
4238        sout | 1 | 2 | 3 | endl;
4239        sout | '1' | '2' | '3' | endl;
4240        sout | 1 | "" | 2 | "" | 3 | endl;
4241        sout | "x (" | 1 | "x [" | 2 | "x {" | 3 | "x $" | 4 | "x £" | 5 | "x ¥" | 6 | "x ¡" | 7 | "x ¿" | 8 | "x «" | 9 | endl;
4242        sout | 1 | ", x" | 2 | ". x" | 3 | ": x" | 4 | "; x" | 5 | "! x" | 6 | "? x" | 7 | ") x" | 8 | "] x" | 9 | "} x"
4243                 | 10 | "% x" | 11 | "¢ x" | 12 | "» x" | endl;
4244        sout | "x`" | 1 | "`x'" | 2 | "'x\"" | 3 | "\"x" | "x " | 4 | " x" | "x\t" | 1 | "\tx" | endl;
4245        sout | sepOn | 1 | 2 | 3 | sepOn | endl;        // separator at start of line
4246        sout | 1 | sepOff | 2 | 3 | endl;                       // turn off implicit separator temporarily
4247        sout | sepDisable | 1 | 2 | 3 | endl;           // turn off implicit separation, affects all subsequent prints
4248        sout | 1 | sepOn | 2 | 3 | endl;                        // turn on implicit separator temporarily
4249        sout | sepEnable | 1 | 2 | 3 | endl;            // turn on implicit separation, affects all subsequent prints
4250        sepSet( sout, ", $" );                                          // change separator from " " to ", $"
4251        sout | 1 | 2 | 3 | endl;
4252
4253}
4254
4255// Local Variables: //
4256// tab-width: 4 //
4257// End: //
4258\end{comment}
4259%$
4260
4261
4262\section{Standard Library}
4263\label{s:StandardLibrary}
4264
4265The goal of the \CFA standard-library is to wrap many of the existing C library-routines that are explicitly polymorphic into implicitly polymorphic versions.
4266
4267
4268\subsection{malloc}
4269
4270\begin{lstlisting}
4271forall( otype T ) T * malloc( void );§\indexc{malloc}§
4272forall( otype T ) T * malloc( char fill );
4273forall( otype T ) T * malloc( T * ptr, size_t size );
4274forall( otype T ) T * malloc( T * ptr, size_t size, unsigned char fill );
4275forall( otype T ) T * calloc( size_t nmemb );§\indexc{calloc}§
4276forall( otype T ) T * realloc( T * ptr, size_t size );§\indexc{ato}§
4277forall( otype T ) T * realloc( T * ptr, size_t size, unsigned char fill );
4278
4279forall( otype T ) T * aligned_alloc( size_t alignment );§\indexc{ato}§
4280forall( otype T ) T * memalign( size_t alignment );             // deprecated
4281forall( otype T ) int posix_memalign( T ** ptr, size_t alignment );
4282
4283forall( otype T ) T * memset( T * ptr, unsigned char fill ); // use default value '\0' for fill
4284forall( otype T ) T * memset( T * ptr );                                // remove when default value available
4285\end{lstlisting}
4286
4287
4288\subsection{ato / strto}
4289
4290\begin{lstlisting}
4291int ato( const char * ptr );§\indexc{ato}§
4292unsigned int ato( const char * ptr );
4293long int ato( const char * ptr );
4294unsigned long int ato( const char * ptr );
4295long long int ato( const char * ptr );
4296unsigned long long int ato( const char * ptr );
4297float ato( const char * ptr );
4298double ato( const char * ptr );
4299long double ato( const char * ptr );
4300float _Complex ato( const char * ptr );
4301double _Complex ato( const char * ptr );
4302long double _Complex ato( const char * ptr );
4303
4304int strto( const char * sptr, char ** eptr, int base );
4305unsigned int strto( const char * sptr, char ** eptr, int base );
4306long int strto( const char * sptr, char ** eptr, int base );
4307unsigned long int strto( const char * sptr, char ** eptr, int base );
4308long long int strto( const char * sptr, char ** eptr, int base );
4309unsigned long long int strto( const char * sptr, char ** eptr, int base );
4310float strto( const char * sptr, char ** eptr );
4311double strto( const char * sptr, char ** eptr );
4312long double strto( const char * sptr, char ** eptr );
4313float _Complex strto( const char * sptr, char ** eptr );
4314double _Complex strto( const char * sptr, char ** eptr );
4315long double _Complex strto( const char * sptr, char ** eptr );
4316\end{lstlisting}
4317
4318
4319\subsection{bsearch / qsort}
4320
4321\begin{lstlisting}
4322forall( otype T | { int ?<?( T, T ); } )
4323T * bsearch( const T key, const T * arr, size_t dimension );§\indexc{bsearch}§
4324
4325forall( otype T | { int ?<?( T, T ); } )
4326void qsort( const T * arr, size_t dimension );§\indexc{qsort}§
4327\end{lstlisting}
4328
4329
4330\subsection{abs}
4331
4332\begin{lstlisting}
4333char abs( char );§\indexc{abs}§
4334int abs( int );
4335long int abs( long int );
4336long long int abs( long long int );
4337float abs( float );
4338double abs( double );
4339long double abs( long double );
4340float abs( float _Complex );
4341double abs( double _Complex );
4342long double abs( long double _Complex );
4343\end{lstlisting}
4344
4345
4346\subsection{random}
4347
4348\begin{lstlisting}
4349void rand48seed( long int s );§\indexc{rand48seed}§
4350char rand48();§\indexc{rand48}§
4351int rand48();
4352unsigned int rand48();
4353long int rand48();
4354unsigned long int rand48();
4355float rand48();
4356double rand48();
4357float _Complex rand48();
4358double _Complex rand48();
4359long double _Complex rand48();
4360\end{lstlisting}
4361
4362
4363\subsection{min / max / swap}
4364
4365\begin{lstlisting}
4366forall( otype T | { int ?<?( T, T ); } )
4367T min( const T t1, const T t2 );§\indexc{min}§
4368
4369forall( otype T | { int ?>?( T, T ); } )
4370T max( const T t1, const T t2 );§\indexc{max}§
4371
4372forall( otype T )
4373void swap( T * t1, T * t2 );§\indexc{swap}§
4374\end{lstlisting}
4375
4376
4377\section{Math Library}
4378\label{s:Math Library}
4379
4380The goal of the \CFA math-library is to wrap many of the existing C math library-routines that are explicitly polymorphic into implicitly polymorphic versions.
4381
4382
4383\subsection{General}
4384
4385\begin{lstlisting}
4386float fabs( float );§\indexc{fabs}§
4387double fabs( double );
4388long double fabs( long double );
4389float cabs( float _Complex );
4390double cabs( double _Complex );
4391long double cabs( long double _Complex );
4392
4393float ?%?( float, float );§\indexc{fmod}§
4394float fmod( float, float );
4395double ?%?( double, double );
4396double fmod( double, double );
4397long double ?%?( long double, long double );
4398long double fmod( long double, long double );
4399
4400float remainder( float, float );§\indexc{remainder}§
4401double remainder( double, double );
4402long double remainder( long double, long double );
4403
4404[ int, float ] remquo( float, float );§\indexc{remquo}§
4405float remquo( float, float, int * );
4406[ int, double ] remquo( double, double );
4407double remquo( double, double, int * );
4408[ int, long double ] remquo( long double, long double );
4409long double remquo( long double, long double, int * );
4410
4411[ int, float ] div( float, float );                                             // alternative name for remquo
4412float div( float, float, int * );§\indexc{div}§
4413[ int, double ] div( double, double );
4414double div( double, double, int * );
4415[ int, long double ] div( long double, long double );
4416long double div( long double, long double, int * );
4417
4418float fma( float, float, float );§\indexc{fma}§
4419double fma( double, double, double );
4420long double fma( long double, long double, long double );
4421
4422float fdim( float, float );§\indexc{fdim}§
4423double fdim( double, double );
4424long double fdim( long double, long double );
4425
4426float nan( const char * );§\indexc{nan}§
4427double nan( const char * );
4428long double nan( const char * );
4429\end{lstlisting}
4430
4431
4432\subsection{Exponential}
4433
4434\begin{lstlisting}
4435float exp( float );§\indexc{exp}§
4436double exp( double );
4437long double exp( long double );
4438float _Complex exp( float _Complex );
4439double _Complex exp( double _Complex );
4440long double _Complex exp( long double _Complex );
4441
4442float exp2( float );§\indexc{exp2}§
4443double exp2( double );
4444long double exp2( long double );
4445float _Complex exp2( float _Complex );
4446double _Complex exp2( double _Complex );
4447long double _Complex exp2( long double _Complex );
4448
4449float expm1( float );§\indexc{expm1}§
4450double expm1( double );
4451long double expm1( long double );
4452
4453float log( float );§\indexc{log}§
4454double log( double );
4455long double log( long double );
4456float _Complex log( float _Complex );
4457double _Complex log( double _Complex );
4458long double _Complex log( long double _Complex );
4459
4460float log2( float );§\indexc{log2}§
4461double log2( double );
4462long double log2( long double );
4463float _Complex log2( float _Complex );
4464double _Complex log2( double _Complex );
4465long double _Complex log2( long double _Complex );
4466
4467float log10( float );§\indexc{log10}§
4468double log10( double );
4469long double log10( long double );
4470float _Complex log10( float _Complex );
4471double _Complex log10( double _Complex );
4472long double _Complex log10( long double _Complex );
4473
4474float log1p( float );§\indexc{log1p}§
4475double log1p( double );
4476long double log1p( long double );
4477
4478int ilogb( float );§\indexc{ilogb}§
4479int ilogb( double );
4480int ilogb( long double );
4481
4482float logb( float );§\indexc{logb}§
4483double logb( double );
4484long double logb( long double );
4485\end{lstlisting}
4486
4487
4488\subsection{Power}
4489
4490\begin{lstlisting}
4491float sqrt( float );§\indexc{sqrt}§
4492double sqrt( double );
4493long double sqrt( long double );
4494float _Complex sqrt( float _Complex );
4495double _Complex sqrt( double _Complex );
4496long double _Complex sqrt( long double _Complex );
4497
4498float cbrt( float );§\indexc{cbrt}§
4499double cbrt( double );
4500long double cbrt( long double );
4501
4502float hypot( float, float );§\indexc{hypot}§
4503double hypot( double, double );
4504long double hypot( long double, long double );
4505
4506float pow( float, float );§\indexc{pow}§
4507double pow( double, double );
4508long double pow( long double, long double );
4509float _Complex pow( float _Complex, float _Complex );
4510double _Complex pow( double _Complex, double _Complex );
4511long double _Complex pow( long double _Complex, long double _Complex );
4512\end{lstlisting}
4513
4514
4515\subsection{Trigonometric}
4516
4517\begin{lstlisting}
4518float sin( float );§\indexc{sin}§
4519double sin( double );
4520long double sin( long double );
4521float _Complex sin( float _Complex );
4522double _Complex sin( double _Complex );
4523long double _Complex sin( long double _Complex );
4524
4525float cos( float );§\indexc{cos}§
4526double cos( double );
4527long double cos( long double );
4528float _Complex cos( float _Complex );
4529double _Complex cos( double _Complex );
4530long double _Complex cos( long double _Complex );
4531
4532float tan( float );§\indexc{tan}§
4533double tan( double );
4534long double tan( long double );
4535float _Complex tan( float _Complex );
4536double _Complex tan( double _Complex );
4537long double _Complex tan( long double _Complex );
4538
4539float asin( float );§\indexc{asin}§
4540double asin( double );
4541long double asin( long double );
4542float _Complex asin( float _Complex );
4543double _Complex asin( double _Complex );
4544long double _Complex asin( long double _Complex );
4545
4546float acos( float );§\indexc{acos}§
4547double acos( double );
4548long double acos( long double );
4549float _Complex acos( float _Complex );
4550double _Complex acos( double _Complex );
4551long double _Complex acos( long double _Complex );
4552
4553float atan( float );§\indexc{atan}§
4554double atan( double );
4555long double atan( long double );
4556float _Complex atan( float _Complex );
4557double _Complex atan( double _Complex );
4558long double _Complex atan( long double _Complex );
4559
4560float atan2( float, float );§\indexc{atan2}§
4561double atan2( double, double );
4562long double atan2( long double, long double );
4563
4564float atan( float, float );                                                             // alternative name for atan2
4565double atan( double, double );§\indexc{atan}§
4566long double atan( long double, long double );
4567\end{lstlisting}
4568
4569
4570\subsection{Hyperbolic}
4571
4572\begin{lstlisting}
4573float sinh( float );§\indexc{sinh}§
4574double sinh( double );
4575long double sinh( long double );
4576float _Complex sinh( float _Complex );
4577double _Complex sinh( double _Complex );
4578long double _Complex sinh( long double _Complex );
4579
4580float cosh( float );§\indexc{cosh}§
4581double cosh( double );
4582long double cosh( long double );
4583float _Complex cosh( float _Complex );
4584double _Complex cosh( double _Complex );
4585long double _Complex cosh( long double _Complex );
4586
4587float tanh( float );§\indexc{tanh}§
4588double tanh( double );
4589long double tanh( long double );
4590float _Complex tanh( float _Complex );
4591double _Complex tanh( double _Complex );
4592long double _Complex tanh( long double _Complex );
4593
4594float asinh( float );§\indexc{asinh}§
4595double asinh( double );
4596long double asinh( long double );
4597float _Complex asinh( float _Complex );
4598double _Complex asinh( double _Complex );
4599long double _Complex asinh( long double _Complex );
4600
4601float acosh( float );§\indexc{acosh}§
4602double acosh( double );
4603long double acosh( long double );
4604float _Complex acosh( float _Complex );
4605double _Complex acosh( double _Complex );
4606long double _Complex acosh( long double _Complex );
4607
4608float atanh( float );§\indexc{atanh}§
4609double atanh( double );
4610long double atanh( long double );
4611float _Complex atanh( float _Complex );
4612double _Complex atanh( double _Complex );
4613long double _Complex atanh( long double _Complex );
4614\end{lstlisting}
4615
4616
4617\subsection{Error / Gamma}
4618
4619\begin{lstlisting}
4620float erf( float );§\indexc{erf}§
4621double erf( double );
4622long double erf( long double );
4623float _Complex erf( float _Complex );
4624double _Complex erf( double _Complex );
4625long double _Complex erf( long double _Complex );
4626
4627float erfc( float );§\indexc{erfc}§
4628double erfc( double );
4629long double erfc( long double );
4630float _Complex erfc( float _Complex );
4631double _Complex erfc( double _Complex );
4632long double _Complex erfc( long double _Complex );
4633
4634float lgamma( float );§\indexc{lgamma}§
4635double lgamma( double );
4636long double lgamma( long double );
4637float lgamma( float, int * );
4638double lgamma( double, int * );
4639long double lgamma( long double, int * );
4640
4641float tgamma( float );§\indexc{tgamma}§
4642double tgamma( double );
4643long double tgamma( long double );
4644\end{lstlisting}
4645
4646
4647\subsection{Nearest Integer}
4648
4649\begin{lstlisting}
4650float floor( float );§\indexc{floor}§
4651double floor( double );
4652long double floor( long double );
4653
4654float ceil( float );§\indexc{ceil}§
4655double ceil( double );
4656long double ceil( long double );
4657
4658float trunc( float );§\indexc{trunc}§
4659double trunc( double );
4660long double trunc( long double );
4661
4662float rint( float );§\indexc{rint}§
4663long double rint( long double );
4664long int rint( float );
4665long int rint( double );
4666long int rint( long double );
4667long long int rint( float );
4668long long int rint( double );
4669long long int rint( long double );
4670
4671long int lrint( float );§\indexc{lrint}§
4672long int lrint( double );
4673long int lrint( long double );
4674long long int llrint( float );
4675long long int llrint( double );
4676long long int llrint( long double );
4677
4678float nearbyint( float );§\indexc{nearbyint}§
4679double nearbyint( double );
4680long double nearbyint( long double );
4681
4682float round( float );§\indexc{round}§
4683long double round( long double );
4684long int round( float );
4685long int round( double );
4686long int round( long double );
4687long long int round( float );
4688long long int round( double );
4689long long int round( long double );
4690
4691long int lround( float );§\indexc{lround}§
4692long int lround( double );
4693long int lround( long double );
4694long long int llround( float );
4695long long int llround( double );
4696long long int llround( long double );
4697\end{lstlisting}
4698
4699
4700\subsection{Manipulation}
4701
4702\begin{lstlisting}
4703float copysign( float, float );§\indexc{copysign}§
4704double copysign( double, double );
4705long double copysign( long double, long double );
4706
4707float frexp( float, int * );§\indexc{frexp}§
4708double frexp( double, int * );
4709long double frexp( long double, int * );
4710
4711float ldexp( float, int );§\indexc{ldexp}§
4712double ldexp( double, int );
4713long double ldexp( long double, int );
4714
4715[ float, float ] modf( float );§\indexc{modf}§
4716float modf( float, float * );
4717[ double, double ] modf( double );
4718double modf( double, double * );
4719[ long double, long double ] modf( long double );
4720long double modf( long double, long double * );
4721
4722float nextafter( float, float );§\indexc{nextafter}§
4723double nextafter( double, double );
4724long double nextafter( long double, long double );
4725
4726float nexttoward( float, long double );§\indexc{nexttoward}§
4727double nexttoward( double, long double );
4728long double nexttoward( long double, long double );
4729
4730float scalbn( float, int );§\indexc{scalbn}§
4731double scalbn( double, int );
4732long double scalbn( long double, int );
4733
4734float scalbln( float, long int );§\indexc{scalbln}§
4735double scalbln( double, long int );
4736long double scalbln( long double, long int );
4737\end{lstlisting}
4738
4739
4740\section{Rational Numbers}
4741\label{s:RationalNumbers}
4742
4743Rational numbers are numbers written as a ratio, i.e., as a fraction, where the numerator (top number) and the denominator (bottom number) are whole numbers.
4744When creating and computing with rational numbers, results are constantly reduced to keep the numerator and denominator as small as possible.
4745
4746\begin{lstlisting}
4747// implementation
4748struct Rational {§\indexc{Rational}§
4749        long int numerator, denominator;                                        // invariant: denominator > 0
4750}; // Rational
4751
4752// constants
4753extern struct Rational 0;
4754extern struct Rational 1;
4755
4756// constructors
4757Rational rational();
4758Rational rational( long int n );
4759Rational rational( long int n, long int d );
4760
4761// getter/setter for numerator/denominator
4762long int numerator( Rational r );
4763long int numerator( Rational r, long int n );
4764long int denominator( Rational r );
4765long int denominator( Rational r, long int d );
4766
4767// comparison
4768int ?==?( Rational l, Rational r );
4769int ?!=?( Rational l, Rational r );
4770int ?<?( Rational l, Rational r );
4771int ?<=?( Rational l, Rational r );
4772int ?>?( Rational l, Rational r );
4773int ?>=?( Rational l, Rational r );
4774
4775// arithmetic
4776Rational -?( Rational r );
4777Rational ?+?( Rational l, Rational r );
4778Rational ?-?( Rational l, Rational r );
4779Rational ?*?( Rational l, Rational r );
4780Rational ?/?( Rational l, Rational r );
4781
4782// conversion
4783double widen( Rational r );
4784Rational narrow( double f, long int md );
4785
4786// I/O
4787forall( dtype istype | istream( istype ) ) istype * ?|?( istype *, Rational * );
4788forall( dtype ostype | ostream( ostype ) ) ostype * ?|?( ostype *, Rational );
4789\end{lstlisting}
4790
4791
4792\bibliographystyle{plain}
4793\bibliography{cfa}
4794
4795
4796\addcontentsline{toc}{section}{\indexname} % add index name to table of contents
4797\begin{theindex}
4798Italic page numbers give the location of the main entry for the referenced term.
4799Plain page numbers denote uses of the indexed term.
4800Entries for grammar non-terminals are italicized.
4801A typewriter font is used for grammar terminals and program identifiers.
4802\indexspace
4803\input{user.ind}
4804\end{theindex}
4805
4806
4807\end{document}
4808
4809% Local Variables: %
4810% tab-width: 4 %
4811% fill-column: 100 %
4812% compile-command: "make" %
4813% End: %
Note: See TracBrowser for help on using the repository browser.