source: doc/user/user.tex @ 4096de0

aaron-thesisarm-ehcleanup-dtorsctordeferred_resndemanglerjacob/cs343-translationjenkins-sandboxmemorynew-astnew-ast-unique-exprnew-envno_listpersistent-indexerresolv-newwith_gc
Last change on this file since 4096de0 was 4096de0, checked in by Peter A. Buhr <pabuhr@…>, 5 years ago

write pointer/reference section, update LaTeX macros

  • Property mode set to 100644
File size: 198.3 KB
Line 
1%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -*- Mode: Latex -*- %%%%%%%%%%%%%%%%%%%%%%%%%%%%
2%%
3%% Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
4%%
5%% The contents of this file are covered under the licence agreement in the
6%% file "LICENCE" distributed with Cforall.
7%%
8%% user.tex --
9%%
10%% Author           : Peter A. Buhr
11%% Created On       : Wed Apr  6 14:53:29 2016
12%% Last Modified By : Peter A. Buhr
13%% Last Modified On : Wed Jul  6 21:08:24 2016
14%% Update Count     : 1070
15%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
16
17% requires tex packages: texlive-base texlive-latex-base tex-common texlive-humanities texlive-latex-extra texlive-fonts-recommended
18
19% inline code ©...© (copyright symbol) emacs: C-q M-)
20% red highlighting ®...® (registered trademark symbol) emacs: C-q M-.
21% blue highlighting ß...ß (sharp s symbol) emacs: C-q M-_
22% green highlighting ¢...¢ (cent symbol) emacs: C-q M-"
23% LaTex escape §...§ (section symbol) emacs: C-q M-'
24% keyword escape ¶...¶ (pilcrow symbol) emacs: C-q M-^
25% math escape $...$ (dollar symbol)
26
27\documentclass[twoside,11pt]{article}
28
29%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
30
31% Latex packages used in the document.
32\usepackage[T1]{fontenc}                                % allow Latin1 (extended ASCII) characters
33\usepackage{textcomp}
34\usepackage[latin1]{inputenc}
35\usepackage{fullpage,times,comment}
36\usepackage{epic,eepic}
37\usepackage{upquote}                                                                    % switch curled `'" to straight
38\usepackage{calc}
39\usepackage{xspace}
40\usepackage{graphicx}
41\usepackage{varioref}                                                                   % extended references
42\usepackage{listings}                                                                   % format program code
43\usepackage[flushmargin]{footmisc}                                              % support label/reference in footnote
44\usepackage{latexsym}                                   % \Box glyph
45\usepackage{mathptmx}                                   % better math font with "times"
46\usepackage[usenames]{color}
47\usepackage[pagewise]{lineno}
48\renewcommand{\linenumberfont}{\scriptsize\sffamily}
49\input{common}                                          % bespoke macros used in the document
50\usepackage[dvips,plainpages=false,pdfpagelabels,pdfpagemode=UseNone,colorlinks=true,pagebackref=true,linkcolor=blue,citecolor=blue,urlcolor=blue,pagebackref=true,breaklinks=true]{hyperref}
51\usepackage{breakurl}
52\renewcommand{\UrlFont}{\small\sf}
53
54\setlength{\topmargin}{-0.45in}                                                 % move running title into header
55\setlength{\headsep}{0.25in}
56
57%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
58
59% Names used in the document.
60
61\newcommand{\Version}{1.0.0}
62\newcommand{\CS}{C\raisebox{-0.9ex}{\large$^\sharp$}\xspace}
63
64\newcommand{\Textbf}[2][red]{{\color{#1}{\textbf{#2}}}}
65\newcommand{\Emph}[2][red]{{\color{#1}\textbf{\emph{#2}}}}
66\newcommand{\R}[1]{\Textbf{#1}}
67\newcommand{\B}[1]{{\Textbf[blue]{#1}}}
68\newcommand{\G}[1]{{\Textbf[OliveGreen]{#1}}}
69
70\newsavebox{\LstBox}
71
72%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
73
74\setcounter{secnumdepth}{3}                             % number subsubsections
75\setcounter{tocdepth}{3}                                % subsubsections in table of contents
76\makeindex
77
78%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
79
80\title{\Huge
81\vspace*{1in}
82\CFA (\CFL) User Manual                         \\
83Version 1.0                                                     \\
84\vspace*{0.25in}
85\huge``describe not prescribe''
86\vspace*{1in}
87}% title
88
89\author{\huge
90Peter A. Buhr and ...
91}% author
92
93\date{
94DRAFT \\ \today
95}% date
96
97%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
98
99\begin{document}
100\pagestyle{headings}
101% changed after setting pagestyle
102\renewcommand{\sectionmark}[1]{\markboth{\thesection\quad #1}{\thesection\quad #1}}
103\renewcommand{\subsectionmark}[1]{\markboth{\thesubsection\quad #1}{\thesubsection\quad #1}}
104\pagenumbering{roman}
105\linenumbers                                            % comment out to turn off line numbering
106
107\maketitle
108\thispagestyle{empty}
109\vspace*{\fill}
110\noindent
111\copyright\,2016 \CFA Project \\ \\
112\noindent
113This work is licensed under the Creative Commons Attribution 4.0 International License.
114To view a copy of this license, visit {\small\url{http://creativecommons.org/licenses/by/4.0}}.
115\vspace*{1in}
116
117\clearpage
118\thispagestyle{plain}
119\pdfbookmark[1]{Contents}{section}
120\tableofcontents
121
122\clearpage
123\thispagestyle{plain}
124\pagenumbering{arabic}
125
126
127\section{Introduction}
128
129\CFA\footnote{Pronounced ``C-for-all'', and written \CFA, CFA, or \CFL.} is a modern general-purpose programming-language, designed an as evolutionary step forward from the C programming language.
130The syntax of the \CFA language builds from C, and should look immediately familiar to C/\CC programmers.
131% Any language feature that is not described here can be assumed to be using the standard C11 syntax.
132\CFA adds many modern programming-language features that directly lead to increased \emph{safety} and \emph{productivity}, while maintaining interoperability with existing C programs and achieving C performance.
133Like C, \CFA is a statically typed, procedural language with a low-overhead runtime, meaning there is no global garbage-collection.
134The primary new features include parametric-polymorphism routines and types, exceptions, concurrency, and modules.
135
136One of the main design philosophies of \CFA is to ``describe not prescribe'', which means \CFA tries to provide a pathway from low-level C programming to high-level \CFA programming, but it does not force programmers to ``do the right thing''.
137Programmers can cautiously add \CFA extensions to their C programs in any order and at any time to incrementally move towards safer, higher-level programming features.
138A programmer is always free to reach back to C from \CFA for any reason, and in many cases, new \CFA features have a fallback to a C mechanism.
139There is no notion or requirement for rewriting a legacy C program in \CFA;
140instead, a programmer evolves an existing C program into \CFA by incrementally incorporating \CFA features.
141New programs can be written in \CFA using a combination of C and \CFA features.
142\CC had a similar goal 30 years ago, but has struggled over the intervening time to incorporate modern programming-language features because of early design choices.
143\CFA has 30 years of hindsight and a clean starting point.
144
145Like \CC, there may be both an old and new ways to achieve the same effect.
146For example, the following programs compare the \CFA and C I/O mechanisms.
147\begin{quote2}
148\begin{tabular}{@{}l@{\hspace{3em}}l@{}}
149\multicolumn{1}{c@{\hspace{3em}}}{\textbf{\CFA}}        & \multicolumn{1}{c}{\textbf{C}}        \\
150\begin{lstlisting}
151#include <fstream>
152int main( void ) {
153        int x = 0, y = 1, z = 2;
154        ®sout | x | y | z | endl;®
155}
156\end{lstlisting}
157&
158\begin{lstlisting}
159#include <stdio.h>
160int main( void ) {
161        int x = 0, y = 1, z = 2;
162        ®printf( "%d %d %d\n", x, y, z );®
163}
164\end{lstlisting}
165\end{tabular}
166\end{quote2}
167Both programs output the same result.
168While the \CFA I/O looks similar to the \CC output style, there are important differences, such as automatic spacing between variables as in Python (see also~\VRef{s:IOLibrary}).
169
170This document is a user manual for the \CFA programming language, targeted at \CFA programmers.
171Implementers may refer to the \CFA Programming Language Specification for details about the language syntax and semantics.
172In its current state, this document covers the intended core features of the language.
173Changes to the syntax and additional features are expected to be included in later revisions.
174% For additional information, see \url{http://wiki.do-lang.org}.
175
176
177\section{History}
178
179The \CFA project started with K-W C~\cite{Buhr94a,Till89}, which extended C with new declaration syntax, multiple return values from routines, and extended assignment capabilities using the notion of tuples.
180(See~\cite{Werther96} for some similar work, but for \CC.)
181The original \CFA project~\cite{Ditchfield92} extended the C type system with parametric polymorphism and overloading, as opposed to the \CC approach of object-oriented extensions to the C type-system.
182A first implementation of the core Cforall language was created~\cite{Bilson03,Esteves04}, but at the time there was little interesting in extending C, so work did not continue.
183As the saying goes, ``What goes around, comes around.'', and there is now renewed interest in the C programming language because of legacy code-bases, so the \CFA project has been restarted.
184
185
186\section{Why fix C?}
187
188Even with all its problems, C is a very popular programming language because it allows writing software at virtually any level in a computer system without restriction.
189For system programming, where direct access to hardware and dealing with real-time issues is a requirement, C is usually the language of choice.
190As well, there are millions of lines of C legacy code, forming the base for many software development projects (especially on UNIX systems).
191The TIOBE index (\url{http://www.tiobe.com/tiobe_index}) for March 2016 shows programming-language popularity, with Java 20.5\%, C 14.5\%, \CC 6.7\%, \CS 4.3\%, Python 4.3\%, and all other programming languages below 3\%.
192As well, for 30 years, C has been the number 1 and 2 most popular programming language:
193\begin{center}
194\setlength{\tabcolsep}{1.5ex}
195\begin{tabular}{@{}r|c|c|c|c|c|c|c@{}}
196Ranking & 2016  & 2011  & 2006  & 2001  & 1996  & 1991  & 1986          \\
197\hline
198Java    & 1             & 1             & 1             & 3             & 29    & -             & -                     \\
199\hline
200\R{C}   & \R{2} & \R{2} & \R{2} & \R{1} & \R{1} & \R{1} & \R{1}         \\
201\hline
202\CC             & 3             & 3             & 3             & 2             & 2             & 2             & 7                     \\
203\end{tabular}
204\end{center}
205Hence, C is still an extremely important programming language, with double the usage of \CC, where \CC itself is largely C code.
206Finally, love it or hate it, C has been an important and influential part of computer science for 40 years and it appears it will continue to be for many more years.
207Unfortunately, C has too many problems and omissions to make it an acceptable programming language for modern needs.
208
209The goal of this project is to engineer modern language features into C in an evolutionary rather than revolutionary way.
210\CC~\cite{c++,ANSI14:C++} is an example of a similar project;
211however, it largely extended the language, and did not address many existing problems.\footnote{%
212Two important existing problems addressed were changing the type of character literals from ©int© to ©char© and enumerator from ©int© to the type of its enumerators.}
213Fortran~\cite{Fortran08}, Ada~\cite{Ada12}, and Cobol~\cite{Cobol14} are examples of programming languages that took an evolutionary approach, where modern language features (e.g., objects, concurrency) are added and problems fixed within the framework of the existing language.
214Java~\cite{Java8}, Go~\cite{Go}, Rust~\cite{Rust} and D~\cite{D} are examples of the revolutionary approach for modernizing C/\CC, resulting in a new language rather than an extension of the descendent.
215These languages have different syntax and semantics from C, and do not interoperate directly with C, largely because of garbage collection.
216As a result, there is a significant learning curve to move to these languages, and C legacy-code must be rewritten.
217These costs can be prohibitive for many companies with a large software base in C/\CC, and a significant number of programmers requiring retraining to a new programming language.
218
219The result of this project is a language that is largely backwards compatible with C11~\cite{C11}, but fixing some of the well known C problems and containing many modern language features.
220Without significant extension to the C programming language, it is becoming unable to cope with the needs of modern programming problems and programmers;
221as a result, it will fade into disuse.
222Considering the large body of existing C code and programmers, there is significant impetus to ensure C is transformed into a modern programming language.
223While C11 made a few simple extensions to the language, nothing was added to address existing problems in the language or to augment the language with modern language features.
224While some may argue that modern language features may make C complex and inefficient, it is clear a language without modern capabilities is insufficient for the advanced programming problems existing today.
225
226
227\section{Interoperability}
228
229\CFA is designed to integrate well with existing C programs and libraries.
230The most important feature of interoperability is to use the same calling conventions, so there is no overhead to call existing C routines.
231This feature allows users of \CFA to take advantage of the existing panoply of C libraries from inside their \CFA code.
232In fact, one of the biggest issues for any new programming language is establishing a minimum level of library code to support a large body of activities.
233Language developers often state that adequate library support takes more work than designing and implementing the language itself.
234Like \CC, \CFA starts with immediate access to all exiting C libraries, and in many cases, can easily wrap library routines with simpler and safer interfaces, at very low cost.
235Hence, \CFA begins by leveraging the large repository of C libraries with little cost.
236
237However, it is necessary to differentiate between C and \CFA code because of name overloading, as for \CC.
238For example, the C math-library provides the following routines for computing the absolute value of the basic types: ©abs©, ©labs©, ©llabs©, ©fabs©, ©fabsf©, ©fabsl©, ©cabsf©, ©cabs©, and ©cabsl©.
239Whereas, \CFA wraps each of these routines into ones with the common name ©abs©:
240\begin{lstlisting}
241char abs( char );
242extern "C" {
243int abs( int );                                 // use default C routine for int
244} // extern "C"
245long int abs( long int );
246long long int abs( long long int );
247float abs( float );
248double abs( double );
249long double abs( long double );
250float _Complex abs( float _Complex );
251double _Complex abs( double _Complex );
252long double _Complex abs( long double _Complex );
253\end{lstlisting}
254The problem is the name clash between the library routine ©abs© and the \CFA names ©abs©.
255Hence, names appearing in an ©extern "C"© block have \newterm{C linkage}.
256Then overloading polymorphism uses a mechanism called \newterm{name mangling} to create unique names that are different from C names, which are not mangled.
257Hence, there is the same need as in \CC, to know if a name is a C or \CFA name, so it can be correctly formed.
258There is no way around this problem, other than C's approach of creating unique names for each pairing of operation and type.
259This example strongly illustrates a core idea in \CFA: \emph{the power of a name}.
260The name ``©abs©'' evokes the notion of absolute value, and many mathematical types provide the notion of absolute value.
261Hence, knowing the name ©abs© should be sufficient to apply it to any type where it is applicable.
262The time savings and safety of using one name uniformly versus $N$ unique names should not be underestimated.
263
264
265\section[Compiling CFA Program]{Compiling \CFA Program}
266
267The command ©cfa© is used to compile \CFA program(s), and is based on the GNU ©gcc©\index{gcc} command, e.g.:
268\begin{lstlisting}
269cfa§\indexc{cfa}\index{compilation!cfa@©cfa©}§ [ gcc-options ] C/§\CFA§-files [ assembler/loader-files ]
270\end{lstlisting}
271By default, \CFA programs having the following ©gcc© flags turned on:
272\begin{description}
273\item\hspace*{-0.6ex}\Indexc{-std=gnu99}\index{compilation option!-std=gnu99@{©-std=gnu99©}}
274The 1999 C standard plus GNU extensions.
275\item\hspace*{-0.6ex}\Indexc{-fgnu89-¶inline¶}\index{compilation option!-fgnu89-inline@{©-fgnu89-¶inline¶©}}
276Use the traditional GNU semantics for inline routines in C99 mode, which allows inline routines in header files.
277\end{description}
278The following new \CFA option is available:
279\begin{description}
280\item\hspace*{-0.6ex}\Indexc{-CFA}\index{compilation option!-CFA@{©-CFA©}}
281Only the C preprocessor and the \CFA translator steps are performed and the transformed program is written to standard output, which makes it possible to examine the code generated by the \CFA translator.
282\end{description}
283
284The following preprocessor variables are available:
285\begin{description}
286\item\hspace*{-0.6ex}\Indexc{__CFA__}\index{preprocessor variables!__CFA__@{©__CFA__©}}
287is always available during preprocessing and its value is the current major \Index{version number} of \CFA.\footnote{
288The C preprocessor allows only integer values in a preprocessor variable so a value like ``\Version'' is not allowed.
289Hence, the need to have three variables for the major, minor and patch version number.}
290
291\item\hspace*{-0.6ex}\Indexc{__CFA_MINOR__}\index{preprocessor variables!__CFA_MINOR__@{©__CFA_MINOR__©}}
292is always available during preprocessing and its value is the current minor \Index{version number} of \CFA.
293
294\item\hspace*{-0.6ex}\Indexc{__CFA_PATCH__}\index{preprocessor variables!__CFA_PATCH____CFA_PATCH__©}
295is always available during preprocessing and its value is the current patch \Index{version number} of \CFA.
296
297\item\hspace*{-0.6ex}\Indexc{__CFA__}\index{preprocessor variables!__CFA____CFA__©} and \Indexc{__CFORALL__}\index{preprocessor variables!__CFORALL____CFORALL__©}
298are always available during preprocessing and have no value.
299\end{description}
300
301These preprocessor variables allow conditional compilation of programs that must work differently in these situations.
302For example, to toggle between C and \CFA extensions, using the following:
303\begin{lstlisting}
304#ifndef __CFORALL__
305#include <stdio.h>                              §\C{// C header file}§
306#else
307#include <fstream>                              §\C{// \CFA header file}§
308#endif
309\end{lstlisting}
310which conditionally includes the correct header file, if the program is compiled using ©gcc© or ©cfa©.
311
312
313\section{Underscores in Constants}
314
315Numeric constants are extended to allow \Index{underscore}s within constants\index{constant!underscore}, e.g.:
316\begin{lstlisting}
317_®147®_®483®_®648;                    §\C{// decimal constant}§
31856_ul;                                                  §\C{// decimal unsigned long constant}§
3190_377;                                                  §\C{// octal constant}§
3200x_ff_ff;                                               §\C{// hexadecimal constant}§
3210x_ef3d_aa5c;                                   §\C{// hexadecimal constant}§
3223.141_592_654;                                  §\C{// floating point constant}§
32310_e_+1_00;                                             §\C{// floating point constant}§
3240x_ff_ff_p_3;                                   §\C{// hexadecimal floating point}§
3250x_1.ffff_ffff_p_128_l;                 §\C{// hexadecimal floating point long constant}§
326L_"\x_ff_ee";                                   §\C{// wide character constant}§
327\end{lstlisting}
328The rules for placement of underscores is as follows:
329\begin{enumerate}
330\item
331A sequence of underscores is disallowed, e.g., ©12__34© is invalid.
332\item
333Underscores may only appear within a sequence of digits (regardless of the digit radix).
334In other words, an underscore cannot start or end a sequence of digits, e.g., ©_1©, ©1_© and ©_1_© are invalid (actually, the 1st and 3rd examples are identifier names).
335\item
336A numeric prefix may end with an underscore;
337a numeric infix may begin and/or end with an underscore;
338a numeric suffix may begin with an underscore.
339For example, the octal ©0© or hexadecimal ©0x© prefix may end with an underscore ©0_377© or ©0x_ff©;
340the exponent infix ©E© may start or end with an underscore ©1.0_E10©, ©1.0E_10© or ©1.0_E_10©;
341the type suffixes ©U©, ©L©, etc. may start with an underscore ©1_U©, ©1_ll© or ©1.0E10_f©.
342\end{enumerate}
343It is significantly easier to read and enter long constants when they are broken up into smaller groupings (most cultures use comma or period among digits for the same purpose).
344This extension is backwards compatible, matches with the use of underscore in variable names, and appears in Ada and Java 8.
345
346
347\section{Declarations}
348\label{s:Declarations}
349
350C declaration syntax is notoriously confusing and error prone.
351For example, many C programmers are confused by a declaration as simple as:
352\begin{quote2}
353\begin{tabular}{@{}ll@{}}
354\begin{lstlisting}
355int *x[5]
356\end{lstlisting}
357&
358\raisebox{-0.75\totalheight}{\input{Cdecl}}
359\end{tabular}
360\end{quote2}
361Is this an array of 5 pointers to integers or a pointer to an array of 5 integers?
362The fact this declaration is unclear to many C programmers means there are \Index{productivity} and \Index{safety} issues even for basic programs.
363Another example of confusion results from the fact that a routine name and its parameters are embedded within the return type, mimicking the way the return value is used at the routine's call site.
364For example, a routine returning a pointer to an array of integers is defined and used in the following way:
365\begin{lstlisting}
366int (*f())[5] {...};                    §\C{}§
367... (*f())[3] += 1;
368\end{lstlisting}
369Essentially, the return type is wrapped around the routine name in successive layers (like an onion).
370While attempting to make the two contexts consistent is a laudable goal, it has not worked out in practice.
371
372\CFA provides its own type, variable and routine declarations, using a different syntax.
373The new declarations place qualifiers to the left of the base type, while C declarations place qualifiers to the right of the base type.
374In the following example, \R{red} is for the base type and \B{blue} is for the qualifiers.
375The \CFA declarations move the qualifiers to the left of the base type, i.e., blue to the left of the red, while the qualifiers have the same meaning but are ordered left to right to specify a variable's type.
376\begin{quote2}
377\begin{tabular}{@{}l@{\hspace{3em}}l@{}}
378\multicolumn{1}{c@{\hspace{3em}}}{\textbf{\CFA}}        & \multicolumn{1}{c}{\textbf{C}}        \\
379\begin{lstlisting}
380ß[5] *ß ®int® x1;
381ß* [5]ß ®int® x2;
382ß[* [5] int]ß f®( int p )®;
383\end{lstlisting}
384&
385\begin{lstlisting}
386®int® ß*ß x1 ß[5]ß;
387®int® ß(*ßx2ß)[5]ß;
388ßint (*ßf®( int p )®ß)[5]ß;
389\end{lstlisting}
390\end{tabular}
391\end{quote2}
392The only exception is bit field specification, which always appear to the right of the base type.
393% Specifically, the character ©*© is used to indicate a pointer, square brackets ©[©\,©]© are used to represent an array or function return value, and parentheses ©()© are used to indicate a routine parameter.
394However, unlike C, \CFA type declaration tokens are distributed across all variables in the declaration list.
395For instance, variables ©x© and ©y© of type pointer to integer are defined in \CFA as follows:
396\begin{quote2}
397\begin{tabular}{@{}l@{\hspace{3em}}l@{}}
398\multicolumn{1}{c@{\hspace{3em}}}{\textbf{\CFA}}        & \multicolumn{1}{c}{\textbf{C}}        \\
399\begin{lstlisting}
400®*® int x, y;
401\end{lstlisting}
402&
403\begin{lstlisting}
404int ®*®x, ®*®y;
405\end{lstlisting}
406\end{tabular}
407\end{quote2}
408The downside of this semantics is the need to separate regular and pointer declarations:
409\begin{quote2}
410\begin{tabular}{@{}l@{\hspace{3em}}l@{}}
411\multicolumn{1}{c@{\hspace{3em}}}{\textbf{\CFA}}        & \multicolumn{1}{c}{\textbf{C}}        \\
412\begin{lstlisting}
413®*® int x;
414int y;
415\end{lstlisting}
416&
417\begin{lstlisting}
418int ®*®x, y;
419
420\end{lstlisting}
421\end{tabular}
422\end{quote2}
423which is \Index{prescribing} a safety benefit.
424Other examples are:
425\begin{quote2}
426\begin{tabular}{@{}l@{\hspace{3em}}l@{\hspace{2em}}l@{}}
427\multicolumn{1}{c@{\hspace{3em}}}{\textbf{\CFA}}        & \multicolumn{1}{c@{\hspace{2em}}}{\textbf{C}} \\
428\begin{lstlisting}
429[ 5 ] int z;
430[ 5 ] * char w;
431* [ 5 ] double v;
432struct s {
433        int f0:3;
434        * int f1;
435        [ 5 ] * int f2;
436};
437\end{lstlisting}
438&
439\begin{lstlisting}
440int z[ 5 ];
441char *w[ 5 ];
442double (*v)[ 5 ];
443struct s {
444        int f0:3;
445        int *f1;
446        int *f2[ 5 ]
447};
448\end{lstlisting}
449&
450\begin{lstlisting}
451// array of 5 integers
452// array of 5 pointers to char
453// pointer to array of 5 doubles
454
455// common bit field syntax
456
457
458
459\end{lstlisting}
460\end{tabular}
461\end{quote2}
462
463All type qualifiers, e.g., ©const©, ©volatile©, etc., are used in the normal way with the new declarations and also appear left to right, e.g.:
464\begin{quote2}
465\begin{tabular}{@{}l@{\hspace{1em}}l@{\hspace{1em}}l@{}}
466\multicolumn{1}{c@{\hspace{1em}}}{\textbf{\CFA}}        & \multicolumn{1}{c@{\hspace{1em}}}{\textbf{C}} \\
467\begin{lstlisting}
468const * const int x;
469const * [ 5 ] const int y;
470\end{lstlisting}
471&
472\begin{lstlisting}
473int const * const x;
474const int (* const y)[ 5 ]
475\end{lstlisting}
476&
477\begin{lstlisting}
478// const pointer to const integer
479// const pointer to array of 5 const integers
480\end{lstlisting}
481\end{tabular}
482\end{quote2}
483All declaration qualifiers, e.g., ©extern©, ©static©, etc., are used in the normal way with the new declarations but can only appear at the start of a \CFA routine declaration,\footnote{\label{StorageClassSpecifier}
484The placement of a storage-class specifier other than at the beginning of the declaration specifiers in a declaration is an obsolescent feature.~\cite[\S~6.11.5(1)]{C11}} e.g.:
485\begin{quote2}
486\begin{tabular}{@{}l@{\hspace{3em}}l@{\hspace{2em}}l@{}}
487\multicolumn{1}{c@{\hspace{3em}}}{\textbf{\CFA}}        & \multicolumn{1}{c@{\hspace{2em}}}{\textbf{C}} \\
488\begin{lstlisting}
489extern [ 5 ] int x;
490static * const int y;
491\end{lstlisting}
492&
493\begin{lstlisting}
494int extern x[ 5 ];
495const int static *y;
496\end{lstlisting}
497&
498\begin{lstlisting}
499// externally visible array of 5 integers
500// internally visible pointer to constant int
501\end{lstlisting}
502\end{tabular}
503\end{quote2}
504
505Unsupported are K\&R C declarations where the base type defaults to ©int©, if no type is specified,\footnote{
506At least one type specifier shall be given in the declaration specifiers in each declaration, and in the specifier-qualifier list in each structure declaration and type name~\cite[\S~6.7.2(2)]{C11}}
507e.g.:
508\begin{lstlisting}
509x;                                                              §\C{// int x}§
510*y;                                                             §\C{// int *y}§
511f( p1, p2 );                                    §\C{// int f( int p1, int p2 );}§
512f( p1, p2 ) {}                                  §\C{// int f( int p1, int p2 ) {}}§
513\end{lstlisting}
514
515Finally, new \CFA declarations may appear together with C declarations in the same program block, but cannot be mixed within a specific declaration.
516Therefore, a programmer has the option of either continuing to use traditional C declarations or take advantage of the new style.
517Clearly, both styles need to be supported for some time due to existing C-style header-files, particularly for UNIX systems.
518
519
520\section{Pointer / Reference}
521
522C provides a \newterm{pointer type};
523\CFA adds a \newterm{reference type}.
524Both types contain an \newterm{address}, which is normally a location in memory.
525Special addresses are used to denote certain states or access co-processor memory.
526By convention, no variable is placed at address 0, so addresses like 0, 1, 2, 3 are often used to denote no-value or other special states.
527Often dereferencing a special state causes a memory fault, so checking is necessary during execution.
528If the programming language assigns addresses, a program's execution is \Index{sound}, i.e., all addresses are to valid memory locations.
529C allows programmers to assign addresses, so there is the potential for incorrect addresses, both inside and outside of the computer address-space.
530
531Program variables are implicit pointers to memory locations generated by the compiler and automatically dereferenced, as in:
532\begin{quote2}
533\begin{tabular}{@{}lll@{}}
534\begin{lstlisting}
535int x;
536x = 3;
537int y;
538y = x;
539\end{lstlisting}
540&
541\raisebox{-0.45\totalheight}{\input{pointer1}}
542&
543\begin{lstlisting}
544int * const x = (int *)100
545*x = 3;                 // implicit dereference
546int * const y = (int *)104;
547*y = *x;                // implicit dereference
548\end{lstlisting}
549\end{tabular}
550\end{quote2}
551where the right example is how the compiler logically interpreters variables.
552Since a variable name only points to one location during its lifetime, it is a \Index{immutable} pointer;
553hence, variables ©x© and ©y© are constant pointers in the compiler interpretation.
554In general, variable addresses are stored in instructions instead of loaded independently, so an instruction fetch implicitly loads a variable's address.
555\begin{quote2}
556\begin{tabular}{@{}l|l@{}}
557\begin{lstlisting}
558lda             r1,100                  // load address of x
559ld              r2,(r1)                   // load value of x
560lda             r3,104                  // load address of y
561st              r2,(r3)                   // store x into y
562\end{lstlisting}
563&
564\begin{lstlisting}
565
566ld              r2,(100)                // load value of x
567
568st              r2,(104)                // store x into y
569\end{lstlisting}
570\end{tabular}
571\end{quote2}
572Finally, the immutable nature of a variable's address and the fact that there is no storage for a variable address means pointer assignment is impossible.
573Therefore, the expression ©x = y© only has one meaning, ©*x = *y©, i.e., manipulate values, which is why explicitly writing the dereferences is unnecessary even though it occurs implicitly as part of instruction decoding.
574
575A pointer/reference is a generalization of a variable name, i.e., a mutable address that can point to more than one memory location during its lifetime.
576(Similarly, an integer variable can contain multiple integer literals during its lifetime versus an integer constant representing a single literal during its lifetime and may not occupy storage as the literal is embedded directly into instructions.)
577Hence, a pointer occupies memory to store its current address, and the pointer's value is loaded by dereferencing, e.g.:
578\begin{quote2}
579\begin{tabular}{@{}ll@{}}
580\begin{lstlisting}
581int x, y, ®*® p1, ®*® p2, ®**® p3;
582p1 = ®&®x;               // p1 points to x
583p2 = p1;                 // p2 also points to x
584p1 = ®&®y;               // p1 points to y
585p1 = p2 + 1;    // p1 points to y, pointer arithmetic
586p3 = &p2;               // p3 points to p2
587\end{lstlisting}
588&
589\raisebox{-0.45\totalheight}{\input{pointer2.pstex_t}}
590\end{tabular}
591\end{quote2}
592
593Notice, an address has a duality\index{address!duality}: a location in memory or the value at that location.
594In many cases, the compiler can infer the meaning:
595\begin{lstlisting}
596p2 = p1 + x;                            §\C{// compiler infers *p2 = *p1 + x;}§
597\end{lstlisting}
598because adding the arbitrary integer value in ©x© to the address of ©p1© and storing the resulting address into ©p2© is an unlikely operation.
599Algol68~\cite{Algol68} inferences pointer dereferencing to select the best meaning for each pointer usage.
600However, there are ambiguous cases, especially when \Index{pointer arithmetic} is possible, as in C:
601\begin{lstlisting}
602p1 = p2;                                        §\C{// p1 = p2 or *p1 = *p2}§
603p1 = p1 + 1;                            §\C{// p1 = p1 + 1 or *p1 = *p1 + 1}§
604\end{lstlisting}
605
606Most languages pick one meaning as the default and the programmer explicitly indicates the other meaning to resolve the address-duality ambiguity\index{address!duality ambiguity}.
607In C, the default meaning for pointers is to manipulate the pointer's address and the pointed-to value is explicitly accessed by the dereference operator ©*©.
608\begin{lstlisting}
609p1 = p2;                                        §\C{// pointer address assignment}§
610*p1 = *p1 + 1;                          §\C{// pointed-to value assignment / operation}§
611\end{lstlisting}
612which works well for situations where manipulation of addresses in the primary meaning and data is rarely accessed, such as storage management (©malloc©/©free©).
613
614However, in most other situations, the pointed-to value is requested more often than the pointer address.
615\begin{lstlisting}
616*p2 = ((*p1 + *p2) * (**p3 - *p1)) / (**p3 - 15);
617\end{lstlisting}
618In this case, it is tedious to explicitly write the dereferencing, and error prone when pointer arithmetic is allowed.
619It is better to have the compiler generate the dereferencing and have no implicit pointer arithmetic:
620\begin{lstlisting}
621p2 = ((p1 + p2) * (p3 - p1)) / (p3 - 15);
622\end{lstlisting}
623
624To switch the default meaning for an address requires a new kind of pointer, called a \newterm{reference} and denoted by ©&©.
625\begin{lstlisting}
626int x, y, ®&® r1, ®&® r2, ®&&® r3;
627®&®r1 = &x;                                     §\C{// r1 points to x}§
628®&®r2 = &r1;                            §\C{// r2 also points to x}§
629®&®r1 = &y;                                     §\C{// r2 also points to x}§
630®&®r1 = &r2 + 1;                        §\C{// r1 points to y, pointer arithmetic}§
631®&®r3 = ®&®&r2;                         §\C{// r3 points to r2}§
632r2 = ((r1 + r2) * (r3 - r1)) / (r3 - 15); §\C{// implicit dereferencing}§
633\end{lstlisting}
634Except for auto-dereferencing by the compiler, this reference example is the same as the pointer example.
635Hence, a reference behaves like the variable name for the current variable it is pointing-to.
636The simplest way to understand a reference is to imagine the compiler inserting a dereference operator before the reference variable for each reference qualifier in a declaration, e.g.:
637\begin{lstlisting}
638r2 = ((r1 + r2) * (r3 - r1)) / (r3 - 15);
639\end{lstlisting}
640is rewritten as:
641\begin{lstlisting}
642®*®r2 = ((®*®r1 + ®*®r2) ®*® (®**®r3 - ®*®r1)) / (®**®r3 - 15);
643\end{lstlisting}
644When a reference appears beside a dereference, e.g., ©&*©, they cancel out.\footnote{
645The unary ©&© operator yields the address of its operand.
646If the operand has type ``type'', the result has type ``pointer to type''.
647If the operand is the result of a unary ©*© operator, neither that operator nor the ©&© operator is evaluated and the result is as if both were omitted, except that the constraints on the operators still apply and the result is not an lvalue.~\cite[\S~6.5.3.2--3]{C11}}
648Hence, assigning to a reference requires the address of the reference variable (\Index{lvalue}):
649\begin{lstlisting}
650(&®*®)r1 = &x;                          §\C{// (\&*) cancel out giving variable r1 not the variable pointed-to by r1}§
651\end{lstlisting}
652Similarly, the address of a reference can be obtained for assignment or computation (\Index{rvalue}):
653\begin{lstlisting}
654(&®*®)r3 = &(&®*®)r2;           §\C{// (\&*) cancel out giving the address of variable r2}§
655\end{lstlisting}
656\Index{Cancellation}\index{pointer!cancellation rule} works to arbitrary depth, and pointer and reference values are interchangeable because both contain addresses.
657\begin{lstlisting}
658int x, *p1 = &x, **p2 = &p1, ***p3 = &p2,
659                 &r1 = &x, &&r2 = &&r1, &&&r3 = &&r2;
660***p3 = 3;                                      §\C{// change x}§
661r3 = 3;                                         §\C{// change x, ***r3}§
662**p3 = ...;                                     §\C{// change p1}§
663&r3 = ...;                                      §\C{// change r1, (\&*)**r3, 1 cancellation}§
664*p3 = ...;                                      §\C{// change p2}§
665&&r3 = ...;                                     §\C{// change r2, (\&(\&*)*)*r3, 2 cancellations}§
666&&&r3 = p3;                                     §\C{// change r3 to p3, (\&(\&(\&*)*)*)r3, 3 cancellations}§
667\end{lstlisting}
668Finally, implicit dereferencing and cancellation are a static (compilation) phenomenon not a dynamic one.
669That is, all implicit dereferencing and any cancellation is carried out prior to the start of the program, so reference performance is equivalent to pointer performance.
670A programmer selects a pointer or reference type solely on whether the address is dereferences frequently or infrequently, which dictates the amount of direct aid from the compiler;
671otherwise, everything else is equal.
672
673Interestingly, \CC deals with the address duality by making the pointed-to value the default, and prevent\-ing changes to the reference address, which eliminates half of the duality.
674Java deals with the address duality by making address assignment the default and providing a \Index{clone} mechanism to change the pointed-to value.
675
676As for pointers, references may have qualifiers:
677\begin{lstlisting}
678const int cx = 5;                       §\C{// cannot change cx;}§
679const int & r3 = &cx;           §\C{// cannot change what r3 is pointing to}§
680®&®r3 = &cx;                            §\C{// can change r3}§
681r3 = 7;                                         §\C{// error, cannot change cx}§
682int & const r4 = &x;            §\C{// must be initialized, \CC reference}§
683®&®r4 = &x;                                     §\C{// error, cannot change r4}§
684const int & const r5 = &cx;     §\C{// must be initialized, \CC reference}§
685r5 = 7;                                         §\C{// error, cannot change cx}§
686®&®r5 = &cx;                            §\C{// error, cannot change r5}§
687\end{lstlisting}
688Hence, for type ©& const©, there is no pointer assignment, so ©&r4 = &x© is disallowed, and \emph{the address value cannot be ©0©}.
689in effect, the compiler is managing the addresses not the programmer.
690
691\Index{Initialization} is different than \Index{assignment} because initialization occurs on an empty (uninitialized) storage on an object, while assignment occurs on possible initialized storage for an object.
692There are three initialization contexts in \CFA: declaration initialization, argument/parameter binding, return/temporary binding.
693For reference (like pointer) initialization, the initializing value must be an address (lvalue) not a value (rvalue).
694\begin{lstlisting}
695int * p = &x;                           §\C{// both \&x and x are possible interpretations}§
696int & r = x;                            §\C{// x unlikely interpretation, because of auto-dereferencing}§
697\end{lstlisting}
698Hence, the compiler implicitly inserts a reference, ©&©, before the initialization expression:
699Similarly, when a reference is used for a parameter/return type, the call-site argument does not require a reference.
700\begin{lstlisting}
701int & f( int & ri );            §\C{// reference parameter and return}§
702z = f( x ) + f( y );            §\C{// reference not required}§
703\end{lstlisting}
704Within routine ©f©, it is possible to change the argument by changing the corresponding parameter, and parameter ©ri© can be locally reassigned within ©f©.
705The return reference from ©f© is copied into a compiler generated temporary, which is logically treated as an initialization.
706
707When a pointer/reference parameter has a ©const© value (immutable), it is possible to pass literals and expressions.
708\begin{lstlisting}
709void f( const int & cri );
710void g( const int * cri );
711f( 3 );                  g( &3 );
712f( x + y );             g( &(x + y) );
713\end{lstlisting}
714Here, the compiler passes the address to the literal 3 or the temporary for the expression ©x + y©, knowing the argument cannot be changed through the parameter.
715(The ©&© is necessary for the pointer parameter to make the types match, and is common for a C programmer.)
716\CFA extends this semantics to a mutable pointer/reference parameter, and the compiler implicitly creates the necessary temporary (copying the argument), which is subsequently pointed to by the reference parameter, which can be changed.
717Essentially, there is an implicit rvalue to lvalue conversion in this case.\footnote{
718This conversion attempts to address the \newterm{const Hell} problem, when the innocent addition of a ©const© qualifier causes a cascade of type failures, requiring an unknown number of additional ©const© qualifiers, until it is discovered a ©const© qualifier cannot be added and all the ©const© qualifiers must be removed.}
719The implicit conversion allows seamless calls to any routine without having to explicit name/copy the literal/expression to allow the call.
720
721While \CFA attempts to handle pointers and references in a uniform, symmetric manner, C handles routine pointers in an inconsistent way: a routine pointer is both a reference and a pointer (particle and wave).
722\begin{lstlisting}
723void f( int p ) {...}
724void (*fp)( int ) = &f;         §\C{// pointer initialization}§
725void (*fp)( int ) = f;          §\C{// reference initialization}§
726(*fp)(3);                                       §\C{// pointer invocation}§
727fp(3);                                          §\C{// reference invocation}§
728\end{lstlisting}
729A routine variable is best described by a ©const© reference:
730\begin{lstlisting}
731const void (&fp)( int ) = f;
732fp( 3 );
733fp = ...                                        §\C{// change code not allowed}§
734&fp = ...;                                      §\C{// change routine refernce allowed}§
735\end{lstlisting}
736because the value of the routine variable is a routine literal, i.e., the routine code is normally immutable during execution.\footnote{
737Dynamic code rewriting is possible but only in special circumstances.}
738\CFA allows this additional use of references for routine variables in an attempt to give a more consistent meaning for routine variables.
739
740
741\section{Type Operators}
742
743The new declaration syntax can be used in other contexts where types are required, e.g., casts and the pseudo-routine ©sizeof©:
744\begin{quote2}
745\begin{tabular}{@{}l@{\hspace{3em}}l@{}}
746\multicolumn{1}{c@{\hspace{3em}}}{\textbf{\CFA}}        & \multicolumn{1}{c}{\textbf{C}}        \\
747\begin{lstlisting}
748y = (®* int®)x;
749i = sizeof(®[ 5 ] * int®);
750\end{lstlisting}
751&
752\begin{lstlisting}
753y = (®int *®)x;
754i = sizeof(®int *[ 5 ]®);
755\end{lstlisting}
756\end{tabular}
757\end{quote2}
758
759
760\section{Routine Definition}
761
762\CFA also supports a new syntax for routine definition, as well as ISO C and K\&R routine syntax.
763The point of the new syntax is to allow returning multiple values from a routine~\cite{Galletly96,CLU}, e.g.:
764\begin{lstlisting}
765®[ int o1, int o2, char o3 ]® f( int i1, char i2, char i3 ) {
766        §\emph{routine body}§
767}
768\end{lstlisting}
769where routine ©f© has three output (return values) and three input parameters.
770Existing C syntax cannot be extended with multiple return types because it is impossible to embed a single routine name within multiple return type specifications.
771
772In detail, the brackets, ©[]©, enclose the result type, where each return value is named and that name is a local variable of the particular return type.\footnote{
773Michael Tiemann, with help from Doug Lea, provided named return values in g++, circa 1989.}
774The value of each local return variable is automatically returned at routine termination.
775Declaration qualifiers can only appear at the start of a routine definition, e.g.:
776\begin{lstlisting}
777®extern® [ int x ] g( int y ) {§\,§}
778\end{lstlisting}
779Lastly, if there are no output parameters or input parameters, the brackets and/or parentheses must still be specified;
780in both cases the type is assumed to be void as opposed to old style C defaults of int return type and unknown parameter types, respectively, as in:
781\begin{lstlisting}
782\,§] g();                                             §\C{// no input or output parameters}§
783[ void ] g( void );                             §\C{// no input or output parameters}§
784\end{lstlisting}
785
786Routine f is called as follows:
787\begin{lstlisting}
788[ i, j, ch ] = f( 3, 'a', ch );
789\end{lstlisting}
790The list of return values from f and the grouping on the left-hand side of the assignment is called a \newterm{return list} and discussed in Section 12.
791
792\CFA style declarations cannot be used to declare parameters for K\&R style routine definitions because of the following ambiguity:
793\begin{lstlisting}
794int (*f(x))[ 5 ] int x; {}
795\end{lstlisting}
796The string ``©int (*f(x))[ 5 ]©'' declares a K\&R style routine of type returning a pointer to an array of 5 integers, while the string ``©[ 5 ] int x©'' declares a \CFA style parameter x of type array of 5 integers.
797Since the strings overlap starting with the open bracket, ©[©, there is an ambiguous interpretation for the string.
798As well, \CFA-style declarations cannot be used to declare parameters for C-style routine-definitions because of the following ambiguity:
799\begin{lstlisting}
800typedef int foo;
801int f( int (* foo) );                   §\C{// foo is redefined as a parameter name}§
802\end{lstlisting}
803The string ``©int (* foo)©'' declares a C-style named-parameter of type pointer to an integer (the parenthesis are superfluous), while the same string declares a \CFA style unnamed parameter of type routine returning integer with unnamed parameter of type pointer to foo.
804The redefinition of a type name in a parameter list is the only context in C where the character ©*© can appear to the left of a type name, and \CFA relies on all type qualifier characters appearing to the right of the type name.
805The inability to use \CFA declarations in these two contexts is probably a blessing because it precludes programmers from arbitrarily switching between declarations forms within a declaration contexts.
806
807C-style declarations can be used to declare parameters for \CFA style routine definitions, e.g.:
808\begin{lstlisting}
809[ int ] f( * int, int * );              §\C{// returns an integer, accepts 2 pointers to integers}§
810[ * int, int * ] f( int );              §\C{// returns 2 pointers to integers, accepts an integer}§
811\end{lstlisting}
812The reason for allowing both declaration styles in the new context is for backwards compatibility with existing preprocessor macros that generate C-style declaration-syntax, as in:
813\begin{lstlisting}
814#define ptoa( n, d ) int (*n)[ d ]
815int f( ptoa( p, 5 ) ) ...               §\C{// expands to int f( int (*p)[ 5 ] )}§
816[ int ] f( ptoa( p, 5 ) ) ...   §\C{// expands to [ int ] f( int (*p)[ 5 ] )}§
817\end{lstlisting}
818Again, programmers are highly encouraged to use one declaration form or the other, rather than mixing the forms.
819
820
821\subsection{Named Return Values}
822
823\Index{Named return values} handle the case where it is necessary to define a local variable whose value is then returned in a ©return© statement, as in:
824\begin{lstlisting}
825int f() {
826        int x;
827        ... x = 0; ... x = y; ...
828        return x;
829}
830\end{lstlisting}
831Because the value in the return variable is automatically returned when a \CFA routine terminates, the ©return© statement \emph{does not} contain an expression, as in:
832\newline
833\begin{minipage}{\linewidth}
834\begin{lstlisting}
835®[ int x, int y ]® f() {
836        int z;
837        ... x = 0; ... y = z; ...
838        ®return;® §\C{// implicitly return x, y}§
839}
840\end{lstlisting}
841\end{minipage}
842\newline
843When the return is encountered, the current values of ©x© and ©y© are returned to the calling routine.
844As well, ``falling off the end'' of a routine without a ©return© statement is permitted, as in:
845\begin{lstlisting}
846[ int x, int y ] f() {
847        ...
848} §\C{// implicitly return x, y}§
849\end{lstlisting}
850In this case, the current values of ©x© and ©y© are returned to the calling routine just as if a ©return© had been encountered.
851
852
853\subsection{Routine Prototype}
854
855The syntax of the new routine prototype declaration follows directly from the new routine definition syntax;
856as well, parameter names are optional, e.g.:
857\begin{lstlisting}
858[ int x ] f ();                                 §\C{// returning int with no parameters}§
859[ * int ] g (int y);                    §\C{// returning pointer to int with int parameter}§
860[ ] h (int,char);                               §\C{// returning no result with int and char parameters}§
861[ * int,int ] j (int);                  §\C{// returning pointer to int and int, with int parameter}§
862\end{lstlisting}
863This syntax allows a prototype declaration to be created by cutting and pasting source text from the routine definition header (or vice versa).
864It is possible to declare multiple routine-prototypes in a single declaration, but the entire type specification is distributed across \emph{all} routine names in the declaration list (see~\VRef{s:Declarations}), e.g.:
865\begin{quote2}
866\begin{tabular}{@{}l@{\hspace{3em}}l@{}}
867\multicolumn{1}{c@{\hspace{3em}}}{\textbf{\CFA}}        & \multicolumn{1}{c}{\textbf{C}}        \\
868\begin{lstlisting}
869[ int ] f(int), g;
870\end{lstlisting}
871&
872\begin{lstlisting}
873int f(int), g(int);
874\end{lstlisting}
875\end{tabular}
876\end{quote2}
877Declaration qualifiers can only appear at the start of a \CFA routine declaration,\footref{StorageClassSpecifier} e.g.:
878\begin{lstlisting}
879extern [ int ] f (int);
880static [ int ] g (int);
881\end{lstlisting}
882
883
884\section{Routine Pointers}
885
886The syntax for pointers to \CFA routines specifies the pointer name on the right, e.g.:
887\begin{lstlisting}
888* [ int x ] () fp;                      §\C{// pointer to routine returning int with no parameters}§
889* [ * int ] (int y) gp;         §\C{// pointer to routine returning pointer to int with int parameter}§
890* [ ] (int,char) hp;            §\C{// pointer to routine returning no result with int and char parameters}§
891* [ * int,int ] (int) jp;       §\C{// pointer to routine returning pointer to int and int, with int parameter}§
892\end{lstlisting}
893While parameter names are optional, \emph{a routine name cannot be specified};
894for example, the following is incorrect:
895\begin{lstlisting}
896* [ int x ] f () fp;            §\C{// routine name "f" is not allowed}§
897\end{lstlisting}
898
899
900\section{Named and Default Arguments}
901
902Named and default arguments~\cite{Hardgrave76}\footnote{
903Francez~\cite{Francez77} proposed a further extension to the named-parameter passing style, which specifies what type of communication (by value, by reference, by name) the argument is passed to the routine.}
904are two mechanisms to simplify routine call.
905Both mechanisms are discussed with respect to \CFA.
906\begin{description}
907\item[Named (or Keyword) Arguments:]
908provide the ability to specify an argument to a routine call using the parameter name rather than the position of the parameter.
909For example, given the routine:
910\begin{lstlisting}
911void p( int x, int y, int z ) {...}
912\end{lstlisting}
913a positional call is:
914\begin{lstlisting}
915p( 4, 7, 3 );
916\end{lstlisting}
917whereas a named (keyword) call may be:
918\begin{lstlisting}
919p( z : 3, x : 4, y : 7 );       §\C{// rewrite $\Rightarrow$ p( 4, 7, 3 )}§
920\end{lstlisting}
921Here the order of the arguments is unimportant, and the names of the parameters are used to associate argument values with the corresponding parameters.
922The compiler rewrites a named call into a positional call.
923The advantages of named parameters are:
924\begin{itemize}
925\item
926Remembering the names of the parameters may be easier than the order in the routine definition.
927\item
928Parameter names provide documentation at the call site (assuming the names are descriptive).
929\item
930Changes can be made to the order or number of parameters without affecting the call (although the call must still be recompiled).
931\end{itemize}
932
933Unfortunately, named arguments do not work in C-style programming-languages because a routine prototype is not required to specify parameter names, nor do the names in the prototype have to match with the actual definition.
934For example, the following routine prototypes and definition are all valid.
935\begin{lstlisting}
936void p( int, int, int );                        §\C{// equivalent prototypes}§
937void p( int x, int y, int z );
938void p( int y, int x, int z );
939void p( int z, int y, int x );
940void p( int q, int r, int s ) {}        §\C{// match with this definition}§
941\end{lstlisting}
942Forcing matching parameter names in routine prototypes with corresponding routine definitions is possible, but goes against a strong tradition in C programming.
943Alternatively, prototype definitions can be eliminated by using a two-pass compilation, and implicitly creating header files for exports.
944The former is easy to do, while the latter is more complex.
945Currently, \CFA does \emph{not} attempt to support named arguments.
946
947\item[Default Arguments]
948provide the ability to associate a default value with a parameter so it can be optionally specified in the argument list.
949For example, given the routine:
950\begin{lstlisting}
951void p( int x = 1, int y = 2, int z = 3 ) {...}
952\end{lstlisting}
953the allowable positional calls are:
954\begin{lstlisting}
955p();                            §\C{// rewrite $\Rightarrow$ p( 1, 2, 3 )}§
956p( 4 );                         §\C{// rewrite $\Rightarrow$ p( 4, 2, 3 )}§
957p( 4, 4 );                      §\C{// rewrite $\Rightarrow$ p( 4, 4, 3 )}§
958p( 4, 4, 4 );           §\C{// rewrite $\Rightarrow$ p( 4, 4, 4 )}§
959// empty arguments
960p(  , 4, 4 );           §\C{// rewrite $\Rightarrow$ p( 1, 4, 4 )}§
961p( 4,  , 4 );           §\C{// rewrite $\Rightarrow$ p( 4, 2, 4 )}§
962p( 4, 4,   );           §\C{// rewrite $\Rightarrow$ p( 4, 4, 3 )}§
963p( 4,  ,   );           §\C{// rewrite $\Rightarrow$ p( 4, 2, 3 )}§
964p(  , 4,   );           §\C{// rewrite $\Rightarrow$ p( 1, 4, 3 )}§
965p(  ,  , 4 );           §\C{// rewrite $\Rightarrow$ p( 1, 2, 4 )}§
966p(  ,  ,   );           §\C{// rewrite $\Rightarrow$ p( 1, 2, 3 )}§
967\end{lstlisting}
968Here the missing arguments are inserted from the default values in the parameter list.
969The compiler rewrites missing default values into explicit positional arguments.
970The advantages of default values are:
971\begin{itemize}
972\item
973Routines with a large number of parameters are often very generalized, giving a programmer a number of different options on how a computation is performed.
974For many of these kinds of routines, there are standard or default settings that work for the majority of computations.
975Without default values for parameters, a programmer is forced to specify these common values all the time, resulting in long argument lists that are error prone.
976\item
977When a routine's interface is augmented with new parameters, it extends the interface providing generalizability\footnote{
978``It should be possible for the implementor of an abstraction to increase its generality.
979So long as the modified abstraction is a generalization of the original, existing uses of the abstraction will not require change.
980It might be possible to modify an abstraction in a manner which is not a generalization without affecting existing uses, but, without inspecting the modules in which the uses occur, this possibility cannot be determined.
981This criterion precludes the addition of parameters, unless these parameters have default or inferred values that are valid for all possible existing applications.''~\cite[p.~128]{Cormack90}}
982(somewhat like the generalization provided by inheritance for classes).
983That is, all existing calls are still valid, although the call must still be recompiled.
984\end{itemize}
985The only disadvantage of default arguments is that unintentional omission of an argument may not result in a compiler-time error.
986Instead, a default value is used, which may not be the programmer's intent.
987
988Default values may only appear in a prototype versus definition context:
989\begin{lstlisting}
990void p( int x, int y = 2, int z = 3 );          §\C{// prototype: allowed}§
991void p( int, int = 2, int = 3 );                        §\C{// prototype: allowed}§
992void p( int x, int y = 2, int z = 3 ) {}        §\C{// definition: not allowed}§
993\end{lstlisting}
994The reason for this restriction is to allow separate compilation.
995Multiple prototypes with different default values is an error.
996\end{description}
997
998Ellipse (``...'') arguments present problems when used with default arguments.
999The conflict occurs because both named and ellipse arguments must appear after positional arguments, giving two possibilities:
1000\begin{lstlisting}
1001p( /* positional */, . . ., /* named */ );
1002p( /* positional */, /* named */, . . . );
1003\end{lstlisting}
1004While it is possible to implement both approaches, the first possibly is more complex than the second, e.g.:
1005\begin{lstlisting}
1006p( int x, int y, int z, . . . );
1007p( 1, 4, 5, 6, z : 3, y : 2 ); §\C{// assume p( /* positional */, . . ., /* named */ );}§
1008p( 1, z : 3, y : 2, 4, 5, 6 ); §\C{// assume p( /* positional */, /* named */, . . . );}§
1009\end{lstlisting}
1010In the first call, it is necessary for the programmer to conceptually rewrite the call, changing named arguments into positional, before knowing where the ellipse arguments begin.
1011Hence, this approach seems significantly more difficult, and hence, confusing and error prone.
1012In the second call, the named arguments separate the positional and ellipse arguments, making it trivial to read the call.
1013
1014The problem is exacerbated with default arguments, e.g.:
1015\begin{lstlisting}
1016void p( int x, int y = 2, int z = 3. . . );
1017p( 1, 4, 5, 6, z : 3 );         §\C{// assume p( /* positional */, . . ., /* named */ );}§
1018p( 1, z : 3, 4, 5, 6 );         §\C{// assume p( /* positional */, /* named */, . . . );}§
1019\end{lstlisting}
1020The first call is an error because arguments 4 and 5 are actually positional not ellipse arguments;
1021therefore, argument 5 subsequently conflicts with the named argument z : 3.
1022In the second call, the default value for y is implicitly inserted after argument 1 and the named arguments separate the positional and ellipse arguments, making it trivial to read the call.
1023For these reasons, \CFA requires named arguments before ellipse arguments.
1024Finally, while ellipse arguments are needed for a small set of existing C routines, like printf, the extended \CFA type system largely eliminates the need for ellipse arguments (see Section 24), making much of this discussion moot.
1025
1026Default arguments and overloading (see Section 24) are complementary.
1027While in theory default arguments can be simulated with overloading, as in:
1028\begin{quote2}
1029\begin{tabular}{@{}l@{\hspace{3em}}l@{}}
1030\multicolumn{1}{c@{\hspace{3em}}}{\textbf{default arguments}}   & \multicolumn{1}{c}{\textbf{overloading}}      \\
1031\begin{lstlisting}
1032void p( int x, int y = 2, int z = 3 ) {...}
1033
1034
1035\end{lstlisting}
1036&
1037\begin{lstlisting}
1038void p( int x, int y, int z ) {...}
1039void p( int x ) { p( x, 2, 3 ); }
1040void p( int x, int y ) { p( x, y, 3 ); }
1041\end{lstlisting}
1042\end{tabular}
1043\end{quote2}
1044the number of required overloaded routines is linear in the number of default values, which is unacceptable growth.
1045In general, overloading should only be used over default arguments if the body of the routine is significantly different.
1046Furthermore, overloading cannot handle accessing default arguments in the middle of a positional list, via a missing argument, such as:
1047\begin{lstlisting}
1048p( 1, /* default */, 5 );               §\C{// rewrite $\Rightarrow$ p( 1, 2, 5 )}§
1049\end{lstlisting}
1050
1051Given the \CFA restrictions above, both named and default arguments are backwards compatible.
1052\CC only supports default arguments;
1053Ada supports both named and default arguments.
1054
1055
1056\section{Type/Routine Nesting}
1057
1058Nesting of types and routines is useful for controlling name visibility (\newterm{name hiding}).
1059
1060
1061\subsection{Type Nesting}
1062
1063\CFA allows \Index{type nesting}, and type qualification of the nested types (see \VRef[Figure]{f:TypeNestingQualification}), where as C hoists\index{type hoisting} (refactors) nested types into the enclosing scope and has no type qualification.
1064\begin{figure}
1065\begin{tabular}{@{}l@{\hspace{3em}}l|l@{}}
1066\multicolumn{1}{c@{\hspace{3em}}}{\textbf{C Type Nesting}}      & \multicolumn{1}{c}{\textbf{C Implicit Hoisting}}      & \multicolumn{1}{|c}{\textbf{\CFA}}    \\
1067\hline
1068\begin{lstlisting}
1069struct S {
1070        enum C { R, G, B };
1071        struct T {
1072                union U { int i, j; };
1073                enum C c;
1074                short int i, j;
1075        };
1076        struct T t;
1077} s;
1078
1079int fred() {
1080        s.t.c = R;
1081        struct T t = { R, 1, 2 };
1082        enum C c;
1083        union U u;
1084}
1085\end{lstlisting}
1086&
1087\begin{lstlisting}
1088enum C { R, G, B };
1089union U { int i, j; };
1090struct T {
1091        enum C c;
1092        short int i, j;
1093};
1094struct S {
1095        struct T t;
1096} s;
1097       
1098
1099
1100
1101
1102
1103
1104\end{lstlisting}
1105&
1106\begin{lstlisting}
1107struct S {
1108        enum C { R, G, B };
1109        struct T {
1110                union U { int i, j; };
1111                enum C c;
1112                short int i, j;
1113        };
1114        struct T t;
1115} s;
1116
1117int fred() {
1118        s.t.c = ®S.®R;  // type qualification
1119        struct ®S.®T t = { ®S.®R, 1, 2 };
1120        enum ®S.®C c;
1121        union ®S.T.®U u;
1122}
1123\end{lstlisting}
1124\end{tabular}
1125\caption{Type Nesting / Qualification}
1126\label{f:TypeNestingQualification}
1127\end{figure}
1128In the left example in C, types ©C©, ©U© and ©T© are implicitly hoisted outside of type ©S© into the containing block scope.
1129In the right example in \CFA, the types are not hoisted and accessed using the field-selection operator ``©.©'' for type qualification, as does Java, rather than the \CC type-selection operator ``©::©''.
1130
1131
1132\subsection{Routine Nesting}
1133
1134While \CFA does not provide object programming by putting routines into structures, it does rely heavily on locally nested routines to redefine operations at or close to a call site.
1135For example, the C quick-sort is wrapped into the following polymorphic \CFA routine:
1136\begin{lstlisting}
1137forall( otype T | { int ?<?( T, T ); } )
1138void qsort( const T * arr, size_t dimension );
1139\end{lstlisting}
1140which can be used to sort in ascending and descending order by locally redefining the less-than operator into greater-than.
1141\begin{lstlisting}
1142const unsigned int size = 5;
1143int ia[size];
1144...                                             §\C{// assign values to array ia}§
1145qsort( ia, size );              §\C{// sort ascending order using builtin ?<?}§
1146{
1147        ®int ?<?( int x, int y ) { return x > y; }® §\C{// nested routine}§
1148        qsort( ia, size );      §\C{// sort descending order by local redefinition}§
1149}
1150\end{lstlisting}
1151
1152Nested routines are not first-class, meaning a nested routine cannot be returned if it has references to variables in its enclosing blocks;
1153the only exception is references to the external block of the translation unit, as these variables persist for the duration of the program.
1154The following program in undefined in \CFA (and ©gcc©\index{gcc})
1155\begin{lstlisting}
1156[* [int]( int )] foo() {                §\C{// int (*foo())( int )}§
1157        int ®i® = 7;
1158        int bar( int p ) {
1159                ®i® += 1;                               §\C{// dependent on local variable}§
1160                sout | ®i® | endl;
1161        }
1162        return bar;                                     §\C{// undefined because of local dependence}§
1163}
1164int main() {
1165        * [int](int) fp = foo();        §\C{// int (*fp)(int)}§
1166    sout | fp( 3 ) | endl;
1167}
1168\end{lstlisting}
1169because
1170
1171Currently, there are no \Index{lambda} expressions, i.e., unnamed routines because routine names are very important to properly select the correct routine.
1172
1173
1174\section{Lexical List}
1175
1176In C and \CFA, lists of elements appear in several contexts, such as the parameter list for a routine call.
1177(More contexts are added shortly.)
1178A list of such elements is called a \newterm{lexical list}.
1179The general syntax of a lexical list is:
1180\begin{lstlisting}
1181[ §\emph{exprlist}§ ]
1182\end{lstlisting}
1183where ©$\emph{exprlist}$© is a list of one or more expressions separated by commas.
1184The brackets, ©[]©, allow differentiating between lexical lists and expressions containing the C comma operator.
1185The following are examples of lexical lists:
1186\begin{lstlisting}
1187[ x, y, z ]
1188[ 2 ]
1189[ v+w, x*y, 3.14159, f() ]
1190\end{lstlisting}
1191Tuples are permitted to contain sub-tuples (i.e., nesting), such as ©[ [ 14, 21 ], 9 ]©, which is a 2-element tuple whose first element is itself a tuple.
1192Note, a tuple is not a record (structure);
1193a record denotes a single value with substructure, whereas a tuple is multiple values with no substructure (see flattening coercion in Section 12.1).
1194In essence, tuples are largely a compile time phenomenon, having little or no runtime presence.
1195
1196Tuples can be organized into compile-time tuple variables;
1197these variables are of \newterm{tuple type}.
1198Tuple variables and types can be used anywhere lists of conventional variables and types can be used.
1199The general syntax of a tuple type is:
1200\begin{lstlisting}
1201[ §\emph{typelist}§ ]
1202\end{lstlisting}
1203where ©$\emph{typelist}$© is a list of one or more legal \CFA or C type specifications separated by commas, which may include other tuple type specifications.
1204Examples of tuple types include:
1205\begin{lstlisting}
1206[ unsigned int, char ]
1207[ double, double, double ]
1208[ * int, int * ]                §\C{// mix of CFA and ANSI}§
1209[ * [ 5 ] int, * * char, * [ [ int, int ] ] (int, int) ]
1210\end{lstlisting}
1211Like tuples, tuple types may be nested, such as ©[ [ int, int ], int ]©, which is a 2-element tuple type whose first element is itself a tuple type.
1212
1213Examples of declarations using tuple types are:
1214\begin{lstlisting}
1215[ int, int ] x;                 §\C{// 2 element tuple, each element of type int}§
1216* [ char, char ] y;             §\C{// pointer to a 2 element tuple}§
1217[ [ int, int ] ] z ([ int, int ]);
1218\end{lstlisting}
1219The last example declares an external routine that expects a 2 element tuple as an input parameter and returns a 2 element tuple as its result.
1220
1221As mentioned, tuples can appear in contexts requiring a list of value, such as an argument list of a routine call.
1222In unambiguous situations, the tuple brackets may be omitted, e.g., a tuple that appears as an argument may have its
1223square brackets omitted for convenience; therefore, the following routine invocations are equivalent:
1224\begin{lstlisting}
1225f( [ 1, x+2, fred() ] );
1226f( 1, x+2, fred() );
1227\end{lstlisting}
1228Also, a tuple or a tuple variable may be used to supply all or part of an argument list for a routine expecting multiple input parameters or for a routine expecting a tuple as an input parameter.
1229For example, the following are all legal:
1230\begin{lstlisting}
1231[ int, int ] w1;
1232[ int, int, int ] w2;
1233[ void ] f (int, int, int); /* three input parameters of type int */
1234[ void ] g ([ int, int, int ]); /* 3 element tuple as input */
1235f( [ 1, 2, 3 ] );
1236f( w1, 3 );
1237f( 1, w1 );
1238f( w2 );
1239g( [ 1, 2, 3 ] );
1240g( w1, 3 );
1241g( 1, w1 );
1242g( w2 );
1243\end{lstlisting}
1244Note, in all cases 3 arguments are supplied even though the syntax may appear to supply less than 3. As mentioned, a
1245tuple does not have structure like a record; a tuple is simply converted into a list of components.
1246\begin{rationale}
1247The present implementation of \CFA does not support nested routine calls when the inner routine returns multiple values; i.e., a statement such as ©g( f() )© is not supported.
1248Using a temporary variable to store the  results of the inner routine and then passing this variable to the outer routine works, however.
1249\end{rationale}
1250
1251A tuple can contain a C comma expression, provided the expression containing the comma operator is enclosed in parentheses.
1252For instance, the following tuples are equivalent:
1253\begin{lstlisting}
1254[ 1, 3, 5 ]
1255[ 1, (2, 3), 5 ]
1256\end{lstlisting}
1257The second element of the second tuple is the expression (2, 3), which yields the result 3.
1258This requirement is the same as for comma expressions in argument lists.
1259
1260Type qualifiers, i.e., const and volatile, may modify a tuple type.
1261The meaning is the same as for a type qualifier modifying an aggregate type [Int99, x 6.5.2.3(7),x 6.7.3(11)], i.e., the qualifier is distributed across all of the types in the tuple, e.g.:
1262\begin{lstlisting}
1263const volatile [ int, float, const int ] x;
1264\end{lstlisting}
1265is equivalent to:
1266\begin{lstlisting}
1267[ const volatile int, const volatile float, const volatile int ] x;
1268\end{lstlisting}
1269Declaration qualifiers can only appear at the start of a \CFA tuple declaration4, e.g.:
1270\begin{lstlisting}
1271extern [ int, int ] w1;
1272static [ int, int, int ] w2;
1273\end{lstlisting}
1274\begin{rationale}
1275Unfortunately, C's syntax for subscripts precluded treating them as tuples.
1276The C subscript list has the form ©[i][j]...© and not ©[i, j, ...]©.
1277Therefore, there is no syntactic way for a routine returning multiple values to specify the different subscript values, e.g., ©f[g()]© always means a single subscript value because there is only one set of brackets.
1278Fixing this requires a major change to C because the syntactic form ©M[i, j, k]© already has a particular meaning: ©i, j, k© is a comma expression.
1279\end{rationale}
1280
1281
1282\subsection{Tuple Coercions}
1283
1284There are four coercions that can be performed on tuples and tuple variables: closing, opening, flattening and structuring.
1285In addition, the coercion of dereferencing can be performed on a tuple variable to yield its value(s), as for other variables.
1286A \newterm{closing coercion} takes a set of values and converts it into a tuple value, which is a contiguous set of values, as in:
1287\begin{lstlisting}
1288[ int, int, int, int ] w;
1289w = [ 1, 2, 3, 4 ];
1290\end{lstlisting}
1291First the right-hand tuple is closed into a tuple value and then the tuple value is assigned.
1292
1293An \newterm{opening coercion} is the opposite of closing; a tuple value is converted into a tuple of values, as in:
1294\begin{lstlisting}
1295[ a, b, c, d ] = w
1296\end{lstlisting}
1297©w© is implicitly opened to yield a tuple of four values, which are then assigned individually.
1298
1299A \newterm{flattening coercion} coerces a nested tuple, i.e., a tuple with one or more components, which are themselves tuples, into a flattened tuple, which is a tuple whose components are not tuples, as in:
1300\begin{lstlisting}
1301[ a, b, c, d ] = [ 1, [ 2, 3 ], 4 ];
1302\end{lstlisting}
1303First the right-hand tuple is flattened and then the values are assigned individually.
1304Flattening is also performed on tuple types.
1305For example, the type ©[ int, [ int, int ], int ]© can be coerced, using flattening, into the type ©[ int, int, int, int ]©.
1306
1307A \newterm{structuring coercion} is the opposite of flattening;
1308a tuple is structured into a more complex nested tuple.
1309For example, structuring the tuple ©[ 1, 2, 3, 4 ]© into the tuple ©[ 1, [ 2, 3 ], 4 ]© or the tuple type ©[ int, int, int, int ]© into the tuple type ©[ int, [ int, int ], int ]©.
1310In the following example, the last assignment illustrates all the tuple coercions:
1311\begin{lstlisting}
1312[ int, int, int, int ] w = [ 1, 2, 3, 4 ];
1313int x = 5;
1314[ x, w ] = [ w, x ];            §\C{// all four tuple coercions}§
1315\end{lstlisting}
1316Starting on the right-hand tuple in the last assignment statement, w is opened, producing a tuple of four values;
1317therefore, the right-hand tuple is now the tuple ©[ [ 1, 2, 3, 4 ], 5 ]©.
1318This tuple is then flattened, yielding ©[ 1, 2, 3, 4, 5 ]©, which is structured into ©[ 1, [ 2, 3, 4, 5 ] ]© to match the tuple type of the left-hand side.
1319The tuple ©[ 2, 3, 4, 5 ]© is then closed to create a tuple value.
1320Finally, ©x© is assigned ©1© and ©w© is assigned the tuple value using multiple assignment (see Section 14).
1321\begin{rationale}
1322A possible additional language extension is to use the structuring coercion for tuples to initialize a complex record with a tuple.
1323\end{rationale}
1324
1325
1326\section{Mass Assignment}
1327
1328\CFA permits assignment to several variables at once using mass assignment~\cite{CLU}.
1329Mass assignment has the following form:
1330\begin{lstlisting}
1331[ §\emph{lvalue}§, ..., §\emph{lvalue}§ ] = §\emph{expr}§;
1332\end{lstlisting}
1333The left-hand side is a tuple of \emph{lvalues}, which is a list of expressions each yielding an address, i.e., any data object that can appear on the left-hand side of a conventional assignment statement.
1334©$\emph{expr}$© is any standard arithmetic expression.
1335Clearly, the types of the entities being assigned must be type compatible with the value of the expression.
1336
1337Mass assignment has parallel semantics, e.g., the statement:
1338\begin{lstlisting}
1339[ x, y, z ] = 1.5;
1340\end{lstlisting}
1341is equivalent to:
1342\begin{lstlisting}
1343x = 1.5; y = 1.5; z = 1.5;
1344\end{lstlisting}
1345This semantics is not the same as the following in C:
1346\begin{lstlisting}
1347x = y = z = 1.5;
1348\end{lstlisting}
1349as conversions between intermediate assignments may lose information.
1350A more complex example is:
1351\begin{lstlisting}
1352[ i, y[i], z ] = a + b;
1353\end{lstlisting}
1354which is equivalent to:
1355\begin{lstlisting}
1356t = a + b;
1357a1 = &i; a2 = &y[i]; a3 = &z;
1358*a1 = t; *a2 = t; *a3 = t;
1359\end{lstlisting}
1360The temporary ©t© is necessary to store the value of the expression to eliminate conversion issues.
1361The temporaries for the addresses are needed so that locations on the left-hand side do not change as the values are assigned.
1362In this case, ©y[i]© uses the previous value of ©i© and not the new value set at the beginning of the mass assignment.
1363
1364
1365\section{Multiple Assignment}
1366
1367\CFA also supports the assignment of several values at once, known as multiple assignment~\cite{CLU,Galletly96}.
1368Multiple assignment has the following form:
1369\begin{lstlisting}
1370[ §\emph{lvalue}§, . . ., §\emph{lvalue}§ ] = [ §\emph{expr}§, . . ., §\emph{expr}§ ];
1371\end{lstlisting}
1372The left-hand side is a tuple of \emph{lvalues}, and the right-hand side is a tuple of \emph{expr}s.
1373Each \emph{expr} appearing on the righthand side of a multiple assignment statement is assigned to the corresponding \emph{lvalues} on the left-hand side of the statement using parallel semantics for each assignment.
1374An example of multiple assignment is:
1375\begin{lstlisting}
1376[ x, y, z ] = [ 1, 2, 3 ];
1377\end{lstlisting}
1378Here, the values ©1©, ©2© and ©3© are assigned, respectively, to the variables ©x©, ©y© and ©z©.
1379 A more complex example is:
1380\begin{lstlisting}
1381[ i, y[ i ], z ] = [ 1, i, a + b ];
1382\end{lstlisting}
1383Here, the values ©1©, ©i© and ©a + b© are assigned to the variables ©i©, ©y[i]© and ©z©, respectively.
1384 Note, the parallel semantics of
1385multiple assignment ensures:
1386\begin{lstlisting}
1387[ x, y ] = [ y, x ];
1388\end{lstlisting}
1389correctly interchanges (swaps) the values stored in ©x© and ©y©.
1390The following cases are errors:
1391\begin{lstlisting}
1392[ a, b, c ] = [ 1, 2, 3, 4 ];
1393[ a, b, c ] = [ 1, 2 ];
1394\end{lstlisting}
1395because the number of entities in the left-hand tuple is unequal with the right-hand tuple.
1396
1397As for all tuple contexts in C, side effects should not be used because C does not define an ordering for the evaluation of the elements of a tuple;
1398both these examples produce indeterminate results:
1399\begin{lstlisting}
1400f( x++, x++ );                          §\C{// C routine call with side effects in arguments}§
1401[ v1, v2 ] = [ x++, x++ ];      §\C{// side effects in righthand side of multiple assignment}§
1402\end{lstlisting}
1403
1404
1405\section{Cascade Assignment}
1406
1407As in C, \CFA mass and multiple assignments can be cascaded, producing cascade assignment.
1408Cascade assignment has the following form:
1409\begin{lstlisting}
1410§\emph{tuple}§ = §\emph{tuple}§ = ... = §\emph{tuple}§;
1411\end{lstlisting}
1412and it has the same parallel semantics as for mass and multiple assignment.
1413Some examples of cascade assignment are:
1414\begin{lstlisting}
1415x1 = y1 = x2 = y2 = 0;
1416[ x1, y1 ] = [ x2, y2 ] = [ x3, y3 ];
1417[ x1, y1 ] = [ x2, y2 ] = 0;
1418[ x1, y1 ] = z = 0;
1419\end{lstlisting}
1420As in C, the rightmost assignment is performed first, i.e., assignment parses right to left.
1421
1422
1423\section{Unnamed Structure Fields}
1424
1425C requires each field of a structure to have a name, except for a bit field associated with a basic type, e.g.:
1426\begin{lstlisting}
1427struct {
1428        int f1;                 // named field
1429        int f2 : 4;             // named field with bit field size
1430        int : 3;                // unnamed field for basic type with bit field size
1431        int ;                   // disallowed, unnamed field
1432        int *;                  // disallowed, unnamed field
1433        int (*)(int);   // disallowed, unnamed field
1434};
1435\end{lstlisting}
1436This requirement is relaxed by making the field name optional for all field declarations; therefore, all the field declarations in the example are allowed.
1437As for unnamed bit fields, an unnamed field is used for padding a structure to a particular size.
1438A list of unnamed fields is also supported, e.g.:
1439\begin{lstlisting}
1440struct {
1441        int , , ;               // 3 unnamed fields
1442}
1443\end{lstlisting}
1444
1445
1446\section{Field Tuples}
1447
1448Tuples may be used to select multiple fields of a record by field name.
1449Its general form is:
1450\begin{lstlisting}
1451§\emph{expr}§ . [ §\emph{fieldlist}§ ]
1452§\emph{expr}§ -> [ §\emph{fieldlist}§ ]
1453\end{lstlisting}
1454\emph{expr} is any expression yielding a value of type record, e.g., ©struct©, ©union©.
1455Each element of \emph{ fieldlist} is an element of the record specified by \emph{expr}.
1456A record-field tuple may be used anywhere a tuple can be used. An example of the use of a record-field tuple is
1457the following:
1458\begin{lstlisting}
1459struct s {
1460        int f1, f2;
1461        char f3;
1462        double f4;
1463} v;
1464v.[ f3, f1, f2 ] = ['x', 11, 17 ];      §\C{// equivalent to v.f3 = 'x', v.f1 = 11, v.f2 = 17}§
1465f( v.[ f3, f1, f2 ] );                          §\C{// equivalent to f( v.f3, v.f1, v.f2 )}§
1466\end{lstlisting}
1467Note, the fields appearing in a record-field tuple may be specified in any order;
1468also, it is unnecessary to specify all the fields of a struct in a multiple record-field tuple.
1469
1470If a field of a ©struct© is itself another ©struct©, multiple fields of this subrecord can be specified using a nested record-field tuple, as in the following example:
1471\begin{lstlisting}
1472struct inner {
1473        int f2, f3;
1474};
1475struct outer {
1476        int f1;
1477        struct inner i;
1478        double f4;
1479} o;
1480
1481o.[ f1, i.[ f2, f3 ], f4 ] = [ 11, 12, 13, 3.14159 ];
1482\end{lstlisting}
1483
1484
1485\section{Labelled Continue / Break}
1486
1487While C provides ©continue© and ©break© statements for altering control flow, both are restricted to one level of nesting for a particular control structure.
1488Unfortunately, this restriction forces programmers to use ©goto© to achieve the equivalent control-flow for more than one level of nesting.
1489To prevent having to switch to the ©goto©, \CFA extends the ©continue©\index{continue@©continue©}\index{continue@©continue©!labelled}\index{labelled!continue@©continue©} and ©break©\index{break@©break©}\index{break@©break©!labelled}\index{labelled!break@©break©} with a target label to support static multi-level exit\index{multi-level exit}\index{static multi-level exit}~\cite{Buhr85,Java}.
1490For both ©continue© and ©break©, the target label must be directly associated with a ©for©, ©while© or ©do© statement;
1491for ©break©, the target label can also be associated with a ©switch©, ©if© or compound (©{}©) statement.
1492
1493The following example shows the labelled ©continue© specifying which control structure is the target for the next loop iteration:
1494\begin{quote2}
1495\begin{tabular}{@{}l@{\hspace{3em}}l@{}}
1496\multicolumn{1}{c@{\hspace{3em}}}{\textbf{\CFA}}        & \multicolumn{1}{c}{\textbf{C}}        \\
1497\begin{lstlisting}
1498®L1:® do {
1499        ®L2:® while ( ... ) {
1500                ®L3:® for ( ... ) {
1501                        ... continue ®L1®; ...  // continue do
1502                        ... continue ®L2®; ...  // continue while
1503                        ... continue ®L3®; ...  // continue for
1504                } // for
1505        } // while
1506} while ( ... );
1507\end{lstlisting}
1508&
1509\begin{lstlisting}
1510do {
1511        while ( ... ) {
1512                for ( ... ) {
1513                        ... goto L1; ...
1514                        ... goto L2; ...
1515                        ... goto L3; ...
1516                L3: ; }
1517        L2: ; }
1518L1: ; } while ( ... );
1519\end{lstlisting}
1520\end{tabular}
1521\end{quote2}
1522The innermost loop has three restart points, which cause the next loop iteration to begin.
1523
1524The following example shows the labelled ©break© specifying which control structure is the target for exit:
1525\begin{quote2}
1526\begin{tabular}{@{}l@{\hspace{3em}}l@{}}
1527\multicolumn{1}{c@{\hspace{3em}}}{\textbf{\CFA}}        & \multicolumn{1}{c}{\textbf{C}}        \\
1528\begin{lstlisting}
1529®L1:® {
1530        ... §declarations§ ...
1531        ®L2:® switch ( ... ) {
1532          case 3:
1533            ®L3:® if ( ... ) {
1534                        ®L4:® for ( ... ) {
1535                                ... break ®L1®; ...     // exit compound statement
1536                                ... break ®L2®; ...     // exit switch
1537                                ... break ®L3®; ...     // exit if
1538                                ... break ®L4®; ...     // exit loop
1539                        } // for
1540                } else {
1541                        ... break ®L3®; ...             // exit if
1542                } // if
1543        } // switch
1544} // compound
1545\end{lstlisting}
1546&
1547\begin{lstlisting}
1548{
1549        ... §declarations§ ...
1550        switch ( ... ) {
1551          case 3:
1552            if ( ... ) {
1553                        for ( ... ) {
1554                                ... goto L1; ...
1555                                ... goto L2; ...
1556                                ... goto L3; ...
1557                                ... goto L4; ...
1558                        } L4: ;
1559                } else {
1560                        ... goto L3; ...
1561                } L3: ;
1562        } L2: ;
1563} L1: ;
1564\end{lstlisting}
1565\end{tabular}
1566\end{quote2}
1567The innermost loop has four exit points, which cause termination of one or more of the four \Index{nested control structure}s.
1568
1569Both ©continue© and ©break© with target labels are simply a ©goto©\index{goto@©goto©!restricted} restricted in the following ways:
1570\begin{itemize}
1571\item
1572They cannot be used to create a loop.
1573This means that only the looping construct can be used to create a loop.
1574This restriction is important since all situations that can result in repeated execution of statements in a program are clearly delineated.
1575\item
1576Since they always transfer out of containing control structures, they cannot be used to branch into a control structure.
1577\end{itemize}
1578The advantage of the labelled ©continue©/©break© is allowing static multi-level exits without having to use the ©goto© statement and tying control flow to the target control structure rather than an arbitrary point in a program.
1579Furthermore, the location of the label at the \emph{beginning} of the target control structure informs the reader that complex control-flow is occurring in the body of the control structure.
1580With ©goto©, the label is at the end of the control structure, which fails to convey this important clue early enough to the reader.
1581Finally, using an explicit target for the transfer instead of an implicit target allows new constructs to be added or removed without affecting existing constructs.
1582The implicit targets of the current ©continue© and ©break©, i.e., the closest enclosing loop or ©switch©, change as certain constructs are added or removed.
1583
1584
1585\section{Switch Statement}
1586
1587C allows a number of questionable forms for the ©switch© statement:
1588\begin{enumerate}
1589\item
1590By default, the end of a ©case© clause\footnote{
1591In this section, the term \emph{case clause} refers to either a ©case© or ©default© clause.}
1592\emph{falls through} to the next ©case© clause in the ©switch© statement;
1593to exit a ©switch© statement from a ©case© clause requires explicitly terminating the clause with a transfer statement, most commonly ©break©:
1594\begin{lstlisting}
1595switch ( i ) {
1596  case 1:
1597        ...
1598        // fall-through
1599  case 2:
1600        ...
1601        break;  // exit switch statement
1602}
1603\end{lstlisting}
1604The ability to fall-through to the next clause \emph{is} a useful form of control flow, specifically when a sequence of case actions compound:
1605\begin{lstlisting}
1606switch ( argc ) {
1607  case 3:
1608        // open output file
1609        // fall-through
1610  case 2:
1611        // open input file
1612        break;  // exit switch statement
1613  default:
1614        // usage message
1615}
1616\end{lstlisting}
1617In this example, case 2 is always done if case 3 is done.
1618This control flow is difficult to simulate with if statements or a ©switch© statement without fall-through as code must be duplicated or placed in a separate routine.
1619C also uses fall-through to handle multiple case-values resulting in the same action:
1620\begin{lstlisting}
1621switch ( i ) {
1622  case 1: case 3: case 5:       // odd values
1623        // same action
1624        break;
1625  case 2: case 4: case 6:       // even values
1626        // same action
1627        break;
1628}
1629\end{lstlisting}
1630However, this situation is handled in other languages without fall-through by allowing a list of case values.
1631While fall-through itself is not a problem, the problem occurs when fall-through is the default, as this semantics is not intuitive to most programmers and is different from virtually all other programming languages with a ©switch© statement.
1632Hence, default fall-through semantics results in a large number of programming errors as programmers often forget the ©break© statement at the end of a ©case© clause, resulting in inadvertent fall-through.
1633
1634\item
1635It is possible to place ©case© clauses on statements nested \emph{within} the body of the ©switch© statement:
1636\begin{lstlisting}
1637switch ( i ) {
1638  case 0:
1639        if ( j < k ) {
1640                ...
1641          case 1:               // transfer into "if" statement
1642                if ( j < m ) {
1643                        ...
1644                  case 2:       // transfer into "if" statement
1645                        ...
1646                }
1647        }
1648  case 3:
1649        while ( j < 5 ) {
1650                ...
1651          case 4:               // transfer into "while" statement
1652                ...
1653        }
1654}
1655\end{lstlisting}
1656The problem with this usage is branching into control structures, which is known to cause both comprehension and technical difficulties.
1657The comprehension problem occurs from the inability to determine how control reaches a particular point due to the number of branches leading to it.
1658The technical problem results from the inability to ensure allocation and initialization of variables when blocks are not entered at the beginning.
1659Often transferring into a block can bypass variable declaration and/or its initialization, which results in subsequent errors.
1660There are virtually no positive arguments for this kind of control flow, and therefore, there is a strong impetus to eliminate it.
1661Nevertheless, C does have an idiom where this capability is used, known as ``Duff's device''~\cite{Duff83}:
1662\begin{lstlisting}
1663register int n = (count + 7) / 8;
1664switch ( count % 8 ) {
1665case 0: do{ *to = *from++;
1666case 7:         *to = *from++;
1667case 6:         *to = *from++;
1668case 5:         *to = *from++;
1669case 4:         *to = *from++;
1670case 3:         *to = *from++;
1671case 2:         *to = *from++;
1672case 1:         *to = *from++;
1673                } while ( --n > 0 );
1674}
1675\end{lstlisting}
1676which unrolls a loop N times (N = 8 above) and uses the ©switch© statement to deal with any iterations not a multiple of N.
1677While efficient, this sort of special purpose usage is questionable:
1678\begin{quote}
1679Disgusting, no? But it compiles and runs just fine. I feel a combination of pride and revulsion at this
1680discovery.~\cite{Duff83}
1681\end{quote}
1682\item
1683It is possible to place the ©default© clause anywhere in the list of labelled clauses for a ©switch© statement, rather than only at the end.
1684Virtually all programming languages with a ©switch© statement require the ©default© clause to appear last in the case-clause list.
1685The logic for this semantics is that after checking all the ©case© clauses without success, the ©default© clause is selected;
1686hence, physically placing the ©default© clause at the end of the ©case© clause list matches with this semantics.
1687This physical placement can be compared to the physical placement of an ©else© clause at the end of a series of connected ©if©/©else© statements.
1688
1689\item
1690It is possible to place unreachable code at the start of a ©switch© statement, as in:
1691\begin{lstlisting}
1692switch ( x ) {
1693        int y = 1;                      §\C{// unreachable initialization}§
1694        x = 7;                          §\C{// unreachable code}§
1695  case 3: ...
1696        y = 3;
1697        ...
1698}
1699\end{lstlisting}
1700While the declaration of the local variable ©y© is useful and its scope is across all ©case© clauses, the initialization for such a variable is defined to never be executed because control always transfers over it.
1701Furthermore, any statements before the first ©case© clause can only be executed if labelled and transfered to using a ©goto©, either from outside or inside of the ©switch©.
1702As mentioned, transfer into control structures should be forbidden.
1703Transfers from within the ©switch© body using a ©goto© are equally unpalatable.
1704\end{enumerate}
1705Before discussing potential language changes to deal with these problems, it is worth observing that in a typical C program:
1706\begin{itemize}
1707\item
1708the number of ©switch© statements is small,
1709\item
1710most ©switch© statements are well formed (i.e., no Duff's device),
1711\item
1712the ©default© clause is usually written as the last case-clause,
1713\item
1714and there is only a medium amount of fall-through from one ©case© clause to the next, and most of these result from a list of case values executing common code, rather than a sequence of case actions that compound.
1715\end{itemize}
1716These observations help to put the effects of suggested changes into perspective.
1717\begin{enumerate}
1718\item
1719Eliminating default fall-through has the greatest potential for affecting existing code.
1720However, even if fall-through is removed, most ©switch© statements would continue to work because of the explicit transfers already present at the end of each ©case© clause, the common placement of the ©default© clause at the end of the case list, and the most common use of fall-through, i.e., a list of ©case© clauses executing common code, e.g.:
1721\begin{lstlisting}
1722case 1:  case 2:  case 3: ...
1723\end{lstlisting}
1724still work.
1725Nevertheless, reversing the default action would have a non-trivial effect on case actions that compound, such as the above example of processing shell arguments.
1726Therefore, to preserve backwards compatibility, it is necessary to introduce a new kind of ©switch© statement, called ©choose©, with no fall-through semantics.
1727The ©choose© statement is identical to the new ©switch© statement, except there is no implicit fall-through between case-clauses and the ©break© statement applies to the enclosing loop construct (as for the ©continue© statement in a ©switch© statement).
1728It is still possible to fall-through if a case-clause ends with the new keyword ©fallthru©, e.g.:
1729\begin{lstlisting}
1730®choose® ( i ) {
1731  case 3:
1732        ...
1733        ®fallthru®;             §\C{// explicit fall through}§
1734  case 5:
1735        ...                             §\C{// implicit end of switch}§
1736  case 7:
1737        ...
1738        ®break®                 §\C{// transfer to end of enclosing loop / switch}§
1739  default:
1740        j = 3;
1741}
1742\end{lstlisting}
1743The ability to fall-through is retained because it is a sufficient C-idiom that most C programmers simply expect it, and its absence might discourage these programmers from using the ©choose© statement.
1744\item
1745Eliminating Duff's device is straightforward and only invalidates a small amount of very questionable code.
1746The solution is to allow ©case© clauses to only appear at the same nesting level as the ©switch© body, as is done in most other programming languages with ©switch© statements.
1747\item
1748The issue of ©default© at locations other than at the end of the cause clause can be solved by using good programming style, and there are a few reasonable situations involving fall-through where the ©default© clause needs to appear is locations other than at the end.
1749Therefore, no language change is made for this issue.
1750\item
1751Dealing with unreachable code at the start of a ©switch© statement is solved by defining the declaration-list, including any associated initialization, at the start of a ©switch© statement body to be executed \emph{before} the transfer to the appropriate ©case© clause.
1752This semantics is the same as for declarations at the start of a loop body, which are executed before each iteration of the loop body.
1753As well, statements cannot appear before the first ©case© clause.
1754The change is compatible for declarations with initialization in this context because existing code cannot assume the initialization has occurred.
1755The change is incompatible for statements, but any existing code using it is highly questionable, as in:
1756\begin{lstlisting}
1757switch ( i ) {
1758        L: x = 5;               §\C{// questionable code}§
1759  case 0:
1760        ...
1761}
1762\end{lstlisting}
1763The statement after the ©switch© can never be executed unless it is labelled.
1764If it is labelled, it must be transfered to from outside or inside the ©switch© statement, neither of which is acceptable control flow.
1765\end{enumerate}
1766
1767
1768\section{Case Clause}
1769
1770C restricts the ©case© clause of a ©switch© statement to a single value.
1771For multiple ©case© clauses associated with the same statement, it is necessary to have multiple ©case© clauses rather than multiple values.
1772Requiring a ©case© clause for each value does not seem to be in the spirit of brevity normally associated with C.
1773Therefore, the ©case© clause is extended with a list of values, as in:
1774\begin{quote2}
1775\begin{tabular}{@{}l@{\hspace{3em}}l@{\hspace{2em}}l@{}}
1776\multicolumn{1}{c@{\hspace{3em}}}{\textbf{\CFA}}        & \multicolumn{1}{c@{\hspace{2em}}}{\textbf{C}} \\
1777\begin{lstlisting}
1778switch ( i ) {
1779  ®case 1, 3, 5®:
1780        ...
1781  ®case 2, 4, 6®:
1782        ...
1783}
1784\end{lstlisting}
1785&
1786\begin{lstlisting}
1787switch ( i ) {
1788  case 1: case 3 : case 5:
1789        ...
1790  case 2: case 4 : case 6:
1791        ...
1792}
1793\end{lstlisting}
1794&
1795\begin{lstlisting}
1796
1797// odd values
1798
1799// even values
1800
1801
1802\end{lstlisting}
1803\end{tabular}
1804\end{quote2}
1805In addition, two forms of subranges are allowed to specify case values: a new \CFA form and an existing GNU C form.\footnote{
1806The GNU C form \emph{requires} spaces around the ellipse.}
1807\begin{quote2}
1808\begin{tabular}{@{}l@{\hspace{3em}}l@{\hspace{2em}}l@{}}
1809\multicolumn{1}{c@{\hspace{3em}}}{\textbf{\CFA}}        & \multicolumn{1}{c@{\hspace{2em}}}{\textbf{GNU C}}     \\
1810\begin{lstlisting}
1811switch ( i ) {
1812  ®case 1~5:®
1813        ...
1814  ®case 10~15:®
1815        ...
1816}
1817\end{lstlisting}
1818&
1819\begin{lstlisting}
1820switch ( i )
1821  case 1 ... 5:
1822        ...
1823  case 10 ... 15:
1824        ...
1825}
1826\end{lstlisting}
1827&
1828\begin{lstlisting}
1829
1830// 1, 2, 3, 4, 5
1831
1832// 10, 11, 12, 13, 14, 15
1833
1834
1835\end{lstlisting}
1836\end{tabular}
1837\end{quote2}
1838Lists of subranges are also allowed.
1839\begin{lstlisting}
1840case ®1~5, 12~21, 35~42®:
1841\end{lstlisting}
1842
1843
1844\section{Exception Handling}
1845
1846Exception handling provides two mechanim: change of control flow from a raise to a handler, and commumication from the riase to the handler.
1847\begin{lstlisting}
1848exception void h( int i );
1849exception int h( int i, double d );
1850
1851void f(...) {
1852        ... throw h( 3 );
1853        ... i = resume h( 3, 5.1 );
1854}
1855
1856try {
1857        f(...);
1858} catch h( int w ) {
1859        // reset
1860} resume h( int p, double x ) {
1861        return 17;  // recover
1862} finally {
1863}
1864\end{lstlisting}
1865So the type raised would be the mangled name of the exception prototype and that name would be matched at the handler clauses by comparing the strings.
1866The arguments for the call would have to be packed in a message and unpacked at handler clause and then a call made to the handler.
1867
1868
1869\section{Types}
1870
1871\subsection{Type Definitions}
1872
1873\CFA allows users to define new types using the keyword type.
1874
1875\begin{lstlisting}
1876// SensorValue is a distinct type and represented as an int
1877type SensorValue = int;
1878\end{lstlisting}
1879
1880A type definition is different from a typedef in C because a typedef just creates an alias for a type,  while Do.s type definition creates a distinct type.
1881This means that users can define distinct function overloads for the new type (see Overloading for more information).
1882For example:
1883
1884\begin{lstlisting}
1885type SensorValue = int;
1886void printValue(int v) {...}
1887void printValue(SensorValue v) {...}
1888void process(int v) {...}
1889
1890SensorValue s = ...;
1891
1892printValue(s); // calls version with SensorValue argument
1893
1894printValue((int) s); // calls version with int argument
1895
1896process(s); // implicit conversion to int
1897\end{lstlisting}
1898
1899If SensorValue was defined with a typedef, then these two print functions would not have unique signatures.
1900This can be very useful to create a distinct type that has the same representation as another type.
1901
1902The compiler will assume it can safely convert from the old type to the new type, implicitly.
1903Users may override this and define a function that must be called to convert from one type to another.
1904
1905\begin{lstlisting}
1906type SensorValue = int;
1907// ()? is the overloaded conversion operator identifier
1908// This function converts an int to a SensorValue
1909SensorValue ()?(int val) {
1910        ...
1911}
1912void process(int v) {...}
1913
1914SensorValue s = ...;
1915process(s); // implicit call to conversion operator
1916\end{lstlisting}
1917
1918In many cases, it is not desired for the compiler to do this implicit conversion.
1919To avoid that, the user can use the explicit modifier on the conversion operator.
1920Any places where the conversion is needed but not explicit (with a cast), will result in a compile-time error.
1921
1922\begin{lstlisting}
1923type SensorValue = int;
1924
1925// conversion from int to SensorValue; must be explicit
1926explicit SensorValue ()?(int val) {
1927        ...
1928}
1929
1930void process(int v) {...}
1931
1932SensorValue s = ...;
1933process(s); // implicit cast to int: compile-time error
1934process((int) s); // explicit cast to int: calls conversion func
1935\end{lstlisting}
1936
1937The conversion may not require any code, but still need to be explicit; in that case, the syntax can be simplified to:
1938\begin{lstlisting}
1939type SensorValue = int;
1940explicit SensorValue ()?(int);
1941void process(int v) {...}
1942
1943SensorValue s = ...;
1944process(s); // compile-time error
1945process((int) s); // type is converted, no function is called
1946\end{lstlisting}
1947
1948
1949\subsection{Structures}
1950
1951Structures in \CFA are basically the same as structures in C.
1952A structure is defined with the same syntax as in C.
1953When referring to a structure in \CFA, users may omit the struct keyword.
1954\begin{lstlisting}
1955struct Point {
1956        double x;
1957        double y;
1958};
1959
1960Point p = {0.0, 0.0};
1961\end{lstlisting}
1962
1963\CFA does not support inheritance among types, but instead uses composition to enable reuse of structure fields.
1964Composition is achieved by embedding one type into another.
1965When type A is embedded in type B, an object with type B may be used as an object of type A, and the fields of type A are directly accessible.
1966Embedding types is achieved using anonymous members.
1967For example, using Point from above:
1968\begin{lstlisting}
1969void foo(Point p);
1970
1971struct ColoredPoint {
1972        Point; // anonymous member (no identifier)
1973        int Color;
1974};
1975...
1976        ColoredPoint cp = ...;
1977        cp.x = 10.3; // x from Point is accessed directly
1978        cp.color = 0x33aaff; // color is accessed normally
1979        foo(cp); // cp can be used directly as a Point
1980\end{lstlisting}
1981
1982
1983\subsection{Constructors and Destructors}
1984
1985\CFA supports C initialization of structures, but it also adds constructors for more advanced initialization.
1986Additionally, \CFA adds destructors that are called when a variable is de-allocated (variable goes out of scope or object is deleted).
1987These functions take a reference to the structure as a parameter (see
1988References for more information).
1989
1990\begin{figure}
1991\begin{lstlisting}
1992struct Widget {
1993        int id;
1994        float size;
1995        Parts *optionalParts;
1996};
1997
1998// ?{} is the constructor operator identifier
1999// The first argument is a reference to the type to initialize
2000// Subsequent arguments can be specified for initialization
2001
2002void ?{}(Widget &w) { // default constructor
2003        w.id = -1;
2004        w.size = 0.0;
2005        w.optionalParts = 0;
2006}
2007
2008// constructor with values (does not need to include all fields)
2009void ?{}(Widget &w, int id, float size) {
2010        w.id = id;
2011        w.size = size;
2012        w.optionalParts = 0;
2013}
2014
2015// ^? is the destructor operator identifier
2016void ^?(Widget &w) { // destructor
2017        w.id = 0;
2018        w.size = 0.0;
2019        if (w.optionalParts != 0) {
2020        // This is the only pointer to optionalParts, free it
2021        free(w.optionalParts);
2022        w.optionalParts = 0;
2023        }
2024}
2025
2026Widget baz; // reserve space only
2027Widget foo{}; // calls default constructor
2028Widget bar{23, 2.45}; // calls constructor with values
2029baz{24, 0.91}; // calls constructor with values
2030?{}(baz, 24, 0.91}; // explicit call to constructor
2031^bar; // explicit call to destructor
2032^?(bar); // explicit call to destructor
2033\end{lstlisting}
2034\caption{Constructors and Destructors}
2035\end{figure}
2036
2037
2038\begin{comment}
2039\section{References}
2040
2041\CFA supports reference types similar to rvalue references in \CC.
2042A reference is essentially an alias for a variable, allowing multiple names to refer to the same object.
2043A reference can be used as a safer alternative to a pointer, because it can be used to pass a variable by reference, but it must always reference an object.
2044It cannot be NULL, it must be assigned a value at initialization, and the object it references cannot change once it is set.
2045By introducing references in parameter types, users are given an easy way to pass a value by reference, without the need for NULL pointer checks.
2046In structures, a reference can replace a pointer to an object that should always have a valid value.
2047When a structure contains a reference, all of its constructors must initialize the reference and all instances of this structure must initialize it upon definition.
2048
2049The syntax for using references in \CFA is the same as \CC with the exception of reference initialization.
2050Use ©&© to specify a reference, and access references just like regular objects, not like pointers (use dot notation to access fields).
2051When initializing a reference, \CFA uses a different syntax which differentiates reference initialization from assignment to a reference.
2052The ©&© is used on both sides of the expression to clarify that the address of the reference is being set to the address of the variable to which it refers.
2053
2054\begin{figure}
2055\begin{lstlisting}
2056// parameter p is a reference to a Point
2057void movePointUp(Point &p) {
2058        p.y += 1.0; // Uses dot-notation to access fields
2059}
2060
2061Point p1 = ...;
2062ColoredPoint cp1 = ...;
2063movePoint(p1); // reference to p1 passed to movePoint
2064movePoint(cp1); // reference to cp1 passed to movePoint
2065
2066// a ListElement cannot be created without a valid list
2067
2068struct ListElement {
2069        int element;
2070        List &list; // a list element has a reference to the list
2071}
2072
2073// The constructor must initialize the reference
2074void ?{}(ListElement &le, int e, List &l) {
2075        le.element = e;
2076        &le.list = &l; // initialize the reference
2077}
2078
2079ListElement e1{88, numberList}; // uses constructor
2080ListElement e2; // compiler error: uninitialized reference
2081Listing 10: References
2082\end{lstlisting}
2083\end{figure}
2084\end{comment}
2085
2086
2087\section{Overloading}
2088
2089Overloading refers to the capability of a programmer to define and use multiple objects in a program with the same name.
2090In \CFA, a declaration may overload declarations from outer scopes with the same name, instead of hiding them as is the case in C.
2091This may cause identical C and \CFA programs to behave differently.
2092The compiler selects the appropriate object (overload resolution) based on context information at the place where it is used.
2093Overloading allows programmers to give functions with different signatures but similar semantics the same name, simplifying the interface to users.
2094Disadvantages of overloading are that it can be used to give functions with different semantics the same name, causing confusion, or that the compiler may resolve to a different function from what the programmer expected.
2095\CFA allows overloading of functions, operators, variables, and even the constants 0 and 1.
2096
2097The compiler follows some overload resolution rules to determine the best interpretation of all of these overloads.
2098The best valid interpretations are the valid interpretations that use the fewest unsafe conversions.
2099Of these, the best are those where the functions and objects involved are the least polymorphic.
2100Of these, the best have the lowest total conversion cost, including all implicit conversions in the argument expressions.
2101Of these, the best have the highest total conversion cost for the implicit conversions (if any) applied to the argument expressions.
2102If there is no single best valid interpretation, or if the best valid interpretation is ambiguous, then the resulting interpretation is ambiguous.
2103For details about type inference and overload resolution, please see the \CFA Language Specification.
2104\begin{lstlisting}
2105int foo(int a, int b) {
2106        float sum = 0.0;
2107        float special = 1.0;
2108        {
2109                int sum = 0;
2110                // both the float and int versions of sum are available
2111                float special = 4.0;
2112                // this inner special hides the outer version
2113                ...
2114        }
2115        ...
2116}
2117\end{lstlisting}
2118
2119
2120\subsection{Overloaded Constant}
2121
2122The constants 0 and 1 have special meaning.
2123In \CFA, as in C, all scalar types can be incremented and
2124decremented, which is defined in terms of adding or subtracting 1.
2125The operations ©&&©, ©||©, and ©!© can be applied to any scalar arguments and are defined in terms of comparison against 0 (ex. ©(a && b)© becomes ©(a != 0 && b != 0)©).
2126
2127In C, the integer constants 0 and 1 suffice because the integer promotion rules can convert them to any
2128arithmetic type, and the rules for pointer expressions treat constant expressions evaluating to 0 as a
2129special case.
2130However, user-defined arithmetic types often need the equivalent of a 1 or 0 for their
2131functions or operators, polymorphic functions often need 0 and 1 constants of a type matching their
2132polymorphic parameters, and user-defined pointer-like types may need a null value.
2133Defining special
2134constants for a user-defined type is more efficient than defining a conversion to the type from ©_Bool©.
2135
2136Why just 0 and 1? Why not other integers? No other integers have special status in C.
2137A facility that let programmers declare specific constants..const Rational 12., for instance. would not be much of an improvement.
2138Some facility for defining the creation of values of programmer-defined types from arbitrary integer tokens would be needed.
2139The complexity of such a feature doesn.t seem worth the gain.
2140
2141For example, to define the constants for a complex type, the programmer would define the following:
2142
2143\begin{lstlisting}
2144struct Complex {
2145        double real;
2146        double imaginary;
2147}
2148
2149const Complex 0 = {0, 0};
2150const Complex 1 = {1, 0};
2151...
2152
2153        Complex a = 0;
2154...
2155
2156        a++;
2157...
2158        if (a) { // same as if (a == 0)
2159...
2160}
2161\end{lstlisting}
2162
2163
2164\subsection{Variable Overloading}
2165
2166The overload rules of \CFA allow a programmer to define multiple variables with the same name, but different types.
2167Allowing overloading of variable names enables programmers to use the same name across multiple types, simplifying naming conventions and is compatible with the other overloading that is allowed.
2168For example, a developer may want to do the following:
2169\begin{lstlisting}
2170int pi = 3;
2171float pi = 3.14;
2172char pi = .p.;
2173\end{lstlisting}
2174
2175
2176\subsection{Function Overloading}
2177
2178Overloaded functions in \CFA are resolved based on the number and type of arguments, type of return value, and the level of specialization required (specialized functions are preferred over generic).
2179
2180The examples below give some basic intuition about how the resolution works.
2181\begin{lstlisting}
2182// Choose the one with less conversions
2183int doSomething(int value) {...} // option 1
2184int doSomething(short value) {...} // option 2
2185
2186int a, b = 4;
2187short c = 2;
2188
2189a = doSomething(b); // chooses option 1
2190a = doSomething(c); // chooses option 2
2191
2192// Choose the specialized version over the generic
2193
2194generic(type T)
2195T bar(T rhs, T lhs) {...} // option 3
2196float bar(float rhs, float lhs){...} // option 4
2197float a, b, c;
2198double d, e, f;
2199c = bar(a, b); // chooses option 4
2200
2201// specialization is preferred over unsafe conversions
2202
2203f = bar(d, e); // chooses option 5
2204\end{lstlisting}
2205
2206
2207\subsection{Operator Overloading}
2208
2209\CFA also allows operators to be overloaded, to simplify the use of user-defined types.
2210Overloading the operators allows the users to use the same syntax for their custom types that they use for built-in types, increasing readability and improving productivity.
2211\CFA uses the following special identifiers to name overloaded operators:
2212
2213\begin{table}[hbt]
2214\hfil
2215\begin{tabular}[t]{ll}
2216%identifier & operation \\ \hline
2217©?[?]© & subscripting \impl{?[?]}\\
2218©?()© & function call \impl{?()}\\
2219©?++© & postfix increment \impl{?++}\\
2220©?--© & postfix decrement \impl{?--}\\
2221©++?© & prefix increment \impl{++?}\\
2222©--?© & prefix decrement \impl{--?}\\
2223©*?© & dereference \impl{*?}\\
2224©+?© & unary plus \impl{+?}\\
2225©-?© & arithmetic negation \impl{-?}\\
2226©~?© & bitwise negation \impl{~?}\\
2227©!?© & logical complement \impl{"!?}\\
2228©?*?© & multiplication \impl{?*?}\\
2229©?/?© & division \impl{?/?}\\
2230\end{tabular}\hfil
2231\begin{tabular}[t]{ll}
2232%identifier & operation \\ \hline
2233©?%?© & remainder \impl{?%?}\\
2234©?+?© & addition \impl{?+?}\\
2235©?-?© & subtraction \impl{?-?}\\
2236©?<<?© & left shift \impl{?<<?}\\
2237©?>>?© & right shift \impl{?>>?}\\
2238©?<?© & less than \impl{?<?}\\
2239©?<=?© & less than or equal \impl{?<=?}\\
2240©?>=?© & greater than or equal \impl{?>=?}\\
2241©?>?© & greater than \impl{?>?}\\
2242©?==?© & equality \impl{?==?}\\
2243©?!=?© & inequality \impl{?"!=?}\\
2244©?&& bitwise AND \impl{?&?}\\
2245\end{tabular}\hfil
2246\begin{tabular}[t]{ll}
2247%identifier & operation \\ \hline
2248©?^& exclusive OR \impl{?^?}\\
2249©?|?© & inclusive OR \impl{?"|?}\\
2250©?=?© & simple assignment \impl{?=?}\\
2251©?*=?© & multiplication assignment \impl{?*=?}\\
2252©?/=?© & division assignment \impl{?/=?}\\
2253©?%=?© & remainder assignment \impl{?%=?}\\
2254©?+=?© & addition assignment \impl{?+=?}\\
2255©?-=?© & subtraction assignment \impl{?-=?}\\
2256©?<<=?© & left-shift assignment \impl{?<<=?}\\
2257©?>>=?© & right-shift assignment \impl{?>>=?}\\
2258©?&=?© & bitwise AND assignment \impl{?&=?}\\
2259©?^=?© & exclusive OR assignment \impl{?^=?}\\
2260©?|=?© & inclusive OR assignment \impl{?"|=?}\\
2261\end{tabular}
2262\hfil
2263\caption{Operator Identifiers}
2264\label{opids}
2265\end{table}
2266
2267These identifiers are defined such that the question marks in the name identify the location of the operands.
2268These operands represent the parameters to the functions, and define how the operands are mapped to the function call.
2269For example, ©a + b© becomes ©?+?(a, b)©.
2270
2271In the example below, a new type, myComplex, is defined with an overloaded constructor, + operator, and string operator.
2272These operators are called using the normal C syntax.
2273
2274\begin{lstlisting}
2275type Complex = struct { // define a Complex type
2276        double real;
2277        double imag;
2278}
2279
2280// Constructor with default values
2281
2282void ?{}(Complex &c, double real = 0.0, double imag = 0.0) {
2283        c.real = real;
2284        c.imag = imag;
2285}
2286
2287Complex ?+?(Complex lhs, Complex rhs) {
2288        Complex sum;
2289        sum.real = lhs.real + rhs.real;
2290        sum.imag = lhs.imag + rhs.imag;
2291        return sum;
2292}
2293
2294String ()?(const Complex c) {
2295        // use the string conversions for the structure members
2296        return (String)c.real + . + . + (String)c.imag + .i.;
2297}
2298...
2299
2300Complex a, b, c = {1.0}; // constructor for c w/ default imag
2301...
2302c = a + b;
2303print(.sum = . + c);
2304\end{lstlisting}
2305
2306
2307\section{Auto Type-Inferencing}
2308
2309Auto type-inferencing occurs in a declaration where a variable's type is inferred from its initialization expression type.
2310\begin{quote2}
2311\begin{tabular}{@{}l@{\hspace{3em}}ll@{}}
2312\multicolumn{1}{c@{\hspace{3em}}}{\textbf{\CC}} & \multicolumn{1}{c}{©gcc©}\index{gcc} \\
2313\begin{lstlisting}
2314
2315auto j = 3.0 * 4;
2316int i;
2317auto k = i;
2318\end{lstlisting}
2319&
2320\begin{lstlisting}
2321#define expr 3.0 * i
2322typeof(expr) j = expr;
2323int i;
2324typeof(i) k = i;
2325\end{lstlisting}
2326&
2327\begin{lstlisting}
2328
2329// use type of initialization expression
2330
2331// use type of primary variable
2332\end{lstlisting}
2333\end{tabular}
2334\end{quote2}
2335The two important capabilities are:
2336\begin{itemize}
2337\item
2338preventing having to determine or write out long generic types,
2339\item
2340ensure secondary variables, related to a primary variable, always have the same type.
2341\end{itemize}
2342
2343In \CFA, ©typedef© provides a mechanism to alias long type names with short ones, both globally and locally, but not eliminate the use of the short name.
2344©gcc© provides ©typeof© to declare a secondary variable from a primary variable.
2345\CFA also relies heavily on the specification of the left-hand side of assignment for type inferencing, so in many cases it is crucial to specify the type of the left-hand side to select the correct type of the right-hand expression.
2346Only for overloaded routines with the same return type is variable type-inferencing possible.
2347Finally, ©auto© presents the programming problem of tracking down a type when the type is actually needed.
2348For example, given
2349\begin{lstlisting}
2350auto j = ®...®
2351\end{lstlisting}
2352and the need to write a routine to compute using ©j©
2353\begin{lstlisting}
2354void rtn( ®...® parm );
2355rtn( j );
2356\end{lstlisting}
2357A programmer must work backwards to determine the type of ©j©'s initialization expression, reconstructing the possibly long generic type-name.
2358In this situation, having the type name or a short alias is very useful.
2359
2360There is also the conundrum in type inferencing of when to \emph{\Index{brand}} a type.
2361That is, when is the type of the variable more important than the type of its initialization expression.
2362For example, if a change is made in an initialization expression, it can cause hundreds or thousands of cascading type changes and/or errors.
2363At some point, a programmer wants the type of the variable to remain constant and the expression to be in error when it changes.
2364
2365Given ©typedef© and ©typeof© in \CFA, and the strong need to use the type of left-hand side in inferencing, auto type-inferencing is not supported at this time.
2366Should a significant need arise, this feature can be revisited.
2367
2368
2369\section{Generics}
2370
2371\CFA supports parametric polymorphism to allow users to define generic functions and types.
2372Generics allow programmers to use type variables in place of concrete types so that the code can be reused with multiple types.
2373The type parameters can be restricted to satisfy a set of constraints.
2374This enables \CFA to build fully compiled generic functions and types, unlike other languages like \CC where templates are expanded or must be explicitly instantiated.
2375
2376
2377\subsection{Generic Functions}
2378
2379Generic functions in \CFA are similar to template functions in \CC, and will sometimes be expanded into specialized versions, just like in \CC.
2380The difference, however, is that generic functions in \CFA can also be separately compiled, using function pointers for callers to pass in all needed functionality for the given type.
2381This means that compiled libraries can contain generic functions that can be used by programs linked with them (statically or dynamically).
2382Another advantage over \CC templates is unlike templates, generic functions are statically checked, even without being instantiated.
2383
2384A simple example of using Do.s parametric polymorphism to create a generic swap function would look like this:
2385
2386\begin{lstlisting}
2387generic(type T)
2388void swap(T &a, T &b) {
2389        T tmp = a;
2390        a = b;
2391        b = a;
2392}
2393
2394int a, b;
2395swap(a, b);
2396
2397Point p1, p2;
2398swap(p1, p2);
2399\end{lstlisting}
2400
2401Here, instead of specifying types for the parameters a and b, the function has a generic type parameter, type T.
2402This function can be called with any type, and the compiler will handle generating the proper code for that type, using call site inference to determine the appropriate value for T.
2403
2404
2405\subsection{Bounded Quantification}
2406
2407Some generic functions only work (or make sense) for any type that satisfies a given property.
2408For example, here is a function to pick the minimum of two values of some type.
2409\begin{lstlisting}
2410generic (type T | bool ?<?(T, T) )
2411
2412T min(T a, T b) {
2413        return a < b ? a : b;
2414}
2415\end{lstlisting}
2416
2417It only makes sense to call min with values of a type that has an ordering: a way to decide whether one value is less than another.
2418The ordering function used here is the less than operator, <.
2419The syntax used to reference the operator is discussed in further detail in Operator Overloading.
2420In \CFA, this assertion on the type of a generic is written as the bound, (type T | bool ?<?(T, T)).
2421The \CFA compiler enforces that minis only called with types for which the less than operator is defined, and reports a compile-time error otherwise.
2422
2423Bounds can also involve multiple types, and multiple requirements, as shown below:
2424\begin{lstlisting}
2425generic (type T, type U | { T foo(T, U); U bar(U); })
2426
2427T baz(T t, U u) {
2428        return foo(t, bar(u));
2429}
2430\end{lstlisting}
2431
2432
2433\subsection{Interfaces}
2434
2435Type bounds as shown above are not very informative, merely requiring that a function exists with the right name and type.
2436Suppose you try to call a polymorphic function and \CFA gives you an error that int combine(int, int) is not defined.
2437Can you define it? What is it supposed to do? Perhaps it should compute the sum, or the bitwise and, or the maximum, or the least common multiple; or perhaps it's an operation that can't be defined for integers.
2438The function signature doesn't say.
2439
2440Interfaces gather together a set of function signatures under a common name, which solves two problems.
2441First, an interface name can be used in type bounds instead of function signatures.
2442This avoids repetition when a bound is used in many functions.
2443Second, interfaces explicitly document the existence of a commonly used set of functionality, making programs easier to understand.
2444\begin{lstlisting}
2445generic (type T)
2446interface Orderable {
2447        bool ?<?(T, T);
2448};
2449
2450generic (type T | Orderable(T))
2451T min(T a, T b) {
2452        return a < b ? a : b;
2453}
2454\end{lstlisting}
2455
2456This definition of the interface Orderable makes the generic function min easier to read and understand.
2457Orderable can also be reused for other generic functions, max for example.
2458Interfaces can also build on top of other interfaces.
2459For example:
2460\begin{lstlisting}
2461generic (type T | Orderable(T)
2462interface FooBarable {
2463        int foo(T, T);
2464        int Bar(T, T);
2465};
2466\end{lstlisting}
2467
2468The FooBarable interface specifies all of the bounds of the Orderable interface, plus the additional bounds specified in its definition.
2469A type does not need to specify that it satisfies any interface, the compiler can figure this out at compile time.
2470For example, there is no need to add some special syntax to show that a type implements the Orderable interface, just define a ?<? operator and it is satisfied.
2471
2472
2473\subsection{Generic Typedefs}
2474
2475Type synonyms can be defined generically using the typedef keyword together with a generic type annotation.
2476These can be used to abbreviate complicated type expressions, especially in generic code.
2477\begin{lstlisting}
2478// typedef the generic function pointers for later use
2479
2480generic(type T)
2481typedef int (*predicate)(T);
2482generic(type Captured, type T)
2483typedef void (*callback)(Captured, T);
2484
2485generic(type T)
2486void find(int length, T *array,
2487        predicate(T) p, callback(void *, T)f) {
2488        int i;
2489        for (i = 0; i < length; i++)
2490        if (p(array[i])) f(NULL, array[i]);
2491}
2492\end{lstlisting}
2493
2494
2495\subsection{Generic Types}
2496
2497Generic types are defined using the same mechanisms as those described above for generic functions.
2498This feature allows users to create types that have one or more fields that use generic parameters as types, similar to a template classes in \CC.
2499For example, to make a generic linked list, a placeholder is created for the type of the elements, so that the specific type of the elements in the list need not be specified when defining the list.
2500In C, something like this would have to be done using void pointers and unsafe casting.
2501As in generic functions, Do.s generic types are different from \CC templates in that they can be fully compiled, while \CC templates are more like macro expansions.
2502This means that a \CFA generic type from a compiled library can be used with any type that satisfies the bounds.
2503
2504The syntax for defining a generic type looks very similar to that of a generic function.
2505Generic types support bounds and interfaces, using the same syntax as generic functions.
2506\begin{lstlisting}
2507generic (type T)
2508struct LinkedListElem {
2509        T elem;
2510        LinkedListElem(T) *next;
2511};
2512
2513LinkedListElem *++?(LinkedListElem **elem) {
2514        return *elem = elem->next;
2515}
2516
2517generic (type T)
2518struct LinkedList {
2519        LinkedListElem(T) *head;
2520        unsigned int size;
2521}
2522
2523generic (type T | bool ?==?(T, T))
2524bool contains(LinkedList(T) *list, T elem) {
2525        for(LinkedListElem *iter = list->head; iter != 0; ++iter) {
2526        if (iter->elem == elem) return true;
2527        }
2528        return false;
2529}
2530\end{lstlisting}
2531
2532
2533\section{Safety}
2534
2535Safety, along with productivity, is a key goal of Do.
2536This section discusses the safety features that have been included in \CFA to help programmers create more stable, reliable, and secure code.
2537
2538
2539\subsection{Exceptions}
2540
2541\CFA introduces support for exceptions as an easier way to recover from exceptional conditions that may be detected within a block of code.
2542In C, developers can use error codes and special return values to report to a caller that an error occurred in a function.
2543The major problem with error codes is that they can be easily ignored by the caller.
2544Failure to properly check for errors can result in the caller making incorrect assumptions about the current state or about the return value that are very likely to result in errors later on in the program, making the source of the problem more difficult to find when debugging.
2545An unhandled exception on the other hand will cause a crash, revealing the original source of the erroneous state.
2546
2547Exceptions in \CFA allow a different type of control flow.
2548Throwing an exception terminates execution of the current block, invokes the destructors of variables that are local to the block, and propagates the exception to the parent block.
2549The exception is immediately re-thrown from the parent block unless it is caught as described below.
2550\CFA uses keywords similar to \CC for exception handling.
2551An exception is thrown using a throw statement, which accepts one argument.
2552
2553\begin{lstlisting}
2554        ...
2555
2556        throw 13;
2557
2558        ...
2559\end{lstlisting}
2560
2561An exception can be caught using a catch statement, which specifies the type of the exception it can catch.
2562A catch is specified immediately after a guarded block to signify that it can catch an exception from that block.
2563A guarded block is specified using the try keyword, followed by a block of code inside of curly braces.
2564
2565\begin{lstlisting}
2566        ...
2567
2568        try {
2569                throw 13;
2570        }
2571        catch(int e) {
2572                printf(.caught an exception: %d\n., e);
2573        }
2574\end{lstlisting}
2575
2576
2577\subsection{Memory Management}
2578
2579
2580\subsubsection{Manual Memory Management}
2581
2582Using malloc and free to dynamically allocate memory exposes several potential, and common, errors.
2583First, malloc breaks type safety because it returns a pointer to void.
2584There is no relationship between the type that the returned pointer is cast to, and the amount of memory allocated.
2585This problem is solved with a type-safe malloc.
2586Do.s type-safe malloc does not take any arguments for size.
2587Instead, it infers the type based on the return value, and then allocates space for the inferred type.
2588
2589\begin{lstlisting}
2590float *f = malloc(); // allocates the size of a float
2591
2592struct S {
2593        int i, j, k;
2594};
2595
2596struct S *s = malloc(); // allocates the size of a struct S
2597\end{lstlisting}
2598
2599In addition to the improved malloc, \CFA also provides a technique for combining allocation and initialization into one step, using the new function.
2600For all constructors defined for a given type (see Operator Overloading), a corresponding call to new can be used to allocate and construct that type.
2601
2602\begin{lstlisting}
2603type Complex = struct {
2604        float real;
2605        float imag;
2606};
2607
2608// default constructor
2609
2610void ?{}(Complex &c) {
2611        c.real = 0.0;
2612        c.imag = 0.0;
2613}
2614
2615
2616
2617// 2 parameter constructor
2618
2619void ?{}(Complex &c, float real, float imag) {
2620        c.real = real;
2621        c.imag = imag;
2622}
2623
2624
2625int main() {
2626        Complex c1; // No constructor is called
2627        Complex c2{}; // Default constructor called
2628        Complex c3{1.0, -1.0}; // 2 parameter constructor is called
2629
2630        Complex *p1 = malloc(); // allocate
2631        Complex *p2 = new(); // allocate + default constructor
2632        Complex *p3 = new(0.5, 1.0); // allocate + 2 param constructor
2633}
2634
2635\end{lstlisting}
2636
2637
2638\subsubsection{Automatic Memory Management}
2639
2640\CFA may also support automatic memory management to further improve safety.
2641If the compiler can insert all of the code needed to manage dynamically allocated memory (automatic reference counting), then developers can avoid problems with dangling pointers, double frees, memory leaks, etc.
2642This feature requires further investigation.
2643\CFA will not have a garbage collector, but might use some kind of region-based memory management.
2644
2645
2646\subsection{Unsafe C Constructs}
2647
2648C programmers are able to access all of the low-level tricks that are sometimes needed for close-to-the-hardware programming.
2649Some of these practices however are often error-prone and difficult to read and maintain.
2650Since \CFA is designed to be safer than C, such constructs are disallowed in \CFA code.
2651If a programmer wants to use one of these unsafe C constructs, the unsafe code must be contained in a C linkage block (see Interoperability), which will be compiled like C code.
2652This block means that the user is telling the tools, .I know this is unsafe, but I.m going to do it anyway..
2653
2654The exact set of unsafe C constructs that will be disallowed in \CFA has not yet been decided, but is sure to include pointer arithmetic, pointer casting, etc.
2655Once the full set is decided, the rules will be listed here.
2656
2657
2658\section{Syntactic Anomalies}
2659
2660The number 0 and 1 are treated specially in \CFA, and can be redefined as variables.
2661One syntactic anomaly is when a field in an structure is names 0 or 1:
2662\begin{lstlisting}
2663struct S {
2664        int 0, 1;
2665} s;
2666\end{lstlisting}
2667The problem occurs in accesing these fields using the selection operation ``©.©'':
2668\begin{lstlisting}
2669s.0 = 0;        // ambiguity with floating constant .0
2670s.1 = 1;        // ambiguity with floating constant .1
2671\end{lstlisting}
2672To make this work, a space is required after the field selection:
2673\begin{lstlisting}
2674®s.§\textvisiblespace§0® = 0;
2675®s.§\textvisiblespace§1® = 1;
2676\end{lstlisting}
2677While this sytact is awkward, it is unlikely many programers will name fields of a structure 0 or 1.
2678Like the \CC lexical problem with closing template-syntax, e.g, ©Foo<Bar<int®>>®©, this issue can be solved with a more powerful lexer/parser.
2679
2680There are several ambiguous cases with operator identifiers, e.g., ©int *?*?()©, where the string ©*?*?© can be lexed as ©*©/©?*?© or ©*?©/©*?©.
2681Since it is common practise to put a unary operator juxtaposed to an identifier, e.g., ©*i©, users will be annoyed if they cannot do this with respect to operator identifiers.
2682Even with this special hack, there are 5 general cases that cannot be handled.
2683The first case is for the function-call identifier ©?()©:
2684\begin{lstlisting}
2685int *§\textvisiblespace§?()();  // declaration: space required after '*'
2686\textvisiblespace§?()();              // expression: space required after '*'
2687\end{lstlisting}
2688Without the space, the string ©*?()© is ambiguous without N character look ahead;
2689it requires scanning ahead to determine if there is a ©'('©, which is the start of an argument/parameter list.
2690
2691The 4 remaining cases occur in expressions:
2692\begin{lstlisting}
2693i++§\textvisiblespace§?i:0;             // space required before '?'
2694i--§\textvisiblespace§?i:0;             // space required before '?'
2695\textvisiblespace§?++i:0;             // space required after '?'
2696\textvisiblespace§?--i:0;             // space required after '?'
2697\end{lstlisting}
2698In the first two cases, the string ©i++?© is ambiguous, where this string can be lexed as ©i© / ©++?© or ©i++© / ©?©;
2699it requires scanning ahead to determine if there is a ©'('©, which is the start of an argument list.
2700In the second two cases, the string ©?++x© is ambiguous, where this string can be lexed as ©?++© / ©x© or ©?© / y©++x©;
2701it requires scanning ahead to determine if there is a ©'('©, which is the start of an argument list.
2702
2703
2704\section{Concurrency}
2705
2706Today's processors for nearly all use cases, ranging from embedded systems to large cloud computing servers, are composed of multiple cores, often heterogeneous.
2707As machines grow in complexity, it becomes more difficult for a program to make the most use of the hardware available.
2708\CFA includes built-in concurrency features to enable high performance and improve programmer productivity on these multi-/many-core machines.
2709
2710Concurrency support in \CFA is implemented on top of a highly efficient runtime system of light-weight, M:N, user level threads.
2711The model integrates concurrency features into the language by making the structure type the core unit of concurrency.
2712All communication occurs through method calls, where data is sent via method arguments, and received via the return value.
2713This enables a very familiar interface to all programmers, even those with no parallel programming experience.
2714It also allows the compiler to do static type checking of all communication, a very important safety feature.
2715This controlled communication with type safety has some similarities with channels in Go, and can actually implement
2716channels exactly, as well as create additional communication patterns that channels cannot.
2717Mutex objects, monitors, are used to contain mutual exclusion within an object and synchronization across concurrent threads.
2718
2719Three new keywords are added to support these features:
2720
2721monitor creates a structure with implicit locking when accessing fields
2722
2723mutex implies use of a monitor requiring the implicit locking
2724
2725task creates a type with implicit locking, separate stack, and a thread
2726
2727
2728\subsection{Monitors}
2729
2730A monitor is a structure in \CFA which includes implicit locking of its fields.
2731Users of a monitor interact with it just like any structure, but the compiler handles code as needed to ensure mutual exclusion.
2732An example of the definition of a monitor is shown here:
2733\begin{lstlisting}
2734type Account = monitor {
2735        const unsigned long number; // account number
2736        float balance; // account balance
2737};
2738\end{lstlisting}
2739
2740Since a monitor structure includes an implicit locking mechanism, it does not make sense to copy a monitor;
2741it is always passed by reference.
2742Users can specify to the compiler whether or not a function will require mutual exclusion of the monitor using the mutex modifier on the parameter.
2743When mutex is specified, the compiler inserts locking before executing the body of the function, and unlocking after the body.
2744This means that a function requiring mutual exclusion could block if the lock is already held by another thread.
2745Blocking on a monitor lock does not block the kernel thread, it simply blocks the user thread, which yields its kernel thread while waiting to obtain the lock.
2746If multiple mutex parameters are specified, they will be locked in parameter order (i.e. first parameter is locked first) and unlocked in the
2747reverse order.
2748\begin{lstlisting}
2749// This function accesses a constant field, it does not require
2750// mutual exclusion
2751
2752export unsigned long getAccountNumber(Account &a) {
2753        return a.number;
2754}
2755
2756// This function accesses and modifies a shared field; it
2757// requires mutual exclusion
2758
2759export float withdrawal(mutex Account &a, float amount) {
2760        a.balance -= amount;
2761        return a.balance;
2762}
2763\end{lstlisting}
2764
2765Often, one function using a monitor will call another function using that same monitor.
2766If both require mutual exclusion, then the thread would be waiting for itself to release the lock when it calls the inner function.
2767This situation is resolved by allowing recursive entry (reentrant locks), meaning that if the lock is already held by the caller, it can be locked again.
2768It will still be unlocked the same number of times.
2769An example of this situation is shown below:
2770
2771\begin{lstlisting}
2772// deleting a job from a worker requires mutual exclusion
2773
2774void deleteJob(mutex Worker &w, Job &j) {
2775        ...
2776}
2777
2778// transferring requires mutual exclusion and calls deleteJob
2779
2780void transferJob(mutex Worker &from, Worker &to) {
2781        ...
2782        deleteJob(j);
2783        ...
2784}
2785\end{lstlisting}
2786
2787
2788\subsection{Tasks}
2789
2790\CFA also provides a simple mechanism for creating and utilizing user level threads.
2791A task provides mutual exclusion like a monitor, and also has its own execution state and a thread of control.
2792Similar to a monitor, a task is defined like a structure:
2793\begin{lstlisting}
2794type Adder = task {
2795        int *row;
2796        int size;
2797        int &subtotal;
2798}
2799\end{lstlisting}
2800
2801A task may define a constructor, which will be called upon allocation and run on the caller.s thread.
2802A destructor may also be defined, which is called at de-allocation (when a dynamic object is deleted or when a local object goes out of scope).
2803After a task is allocated and initialized, its thread is spawned implicitly and begins executing in its function call method.
2804All tasks must define this function call method, with a void return value and no additional parameters, or the compiler will report an error.
2805Below are example functions for the above Adder task, and its usage to sum up a matrix on multiple threads.
2806(Note that this example is designed to display the syntax and functionality, not the best method to solve this problem)
2807\begin{lstlisting}
2808void ?{}(Adder &a, int r[], int s, int &st) { // constructor
2809        a.row = r;
2810        a.size = s;
2811        a.subtotal = st;
2812}
2813
2814// implicitly spawn thread and begin execution here
2815
2816void ?()(Adder &a) {
2817        int c;
2818        subtotal = 0;
2819        for (c=0; c<a.size; ++c) {
2820        subtotal += row[c];
2821        }
2822}
2823
2824int main() {
2825        const int rows = 100, cols = 1000000;
2826        int matrix[rows][cols];
2827        int subtotals[rows];
2828        int total = 0;
2829        int r;
2830
2831        { // create a new scope here for our adders
2832        Adder adders[rows];
2833        // read in the matrix
2834        ...
2835        for (r=0; r<rows; ++r) {
2836        // tasks are initialized on this thread
2837        Adders[r] = {matrix[r], cols, subtotals[r]};
2838        Adders[r](); // spawn thread and begin execution
2839        }
2840        } // adders go out of scope; block here until they all finish
2841        total += subtotals[r];
2842        printf(.total is %d\n., total);
2843}
2844\end{lstlisting}
2845
2846
2847\subsection{Cooperative Scheduling}
2848
2849Tasks in \CFA are cooperatively scheduled, meaning that a task will not be interrupted by another task, except at specific yield points.
2850In Listing 31, there are no yield points, so each task runs to completion with no interruptions.
2851Places where a task could yield include waiting for a lock (explicitly or implicitly), waiting for I/O, or waiting for a specific function (or one of a set of functions) to be called.
2852This last option is introduced with the yield function. yield is used to indicate that this task should yield its thread until the specified function is called.
2853For example, the code below defines a monitor that maintains a generic list.
2854When a task tries to pop from the list, but it is empty, the task should yield until another task puts something into the list, with the push function.
2855Similarly, when a task tries to push something onto the list, but it is full, it will yield until another task frees some space with the pop function.
2856
2857\begin{lstlisting}
2858// type T is used as a generic type for all definitions inside
2859// the curly brackets
2860
2861generic(type T) {
2862        type Channel = monitor {
2863        List(T) list; // list is a simple generic list type
2864        };
2865
2866        T pop(mutex &Channel(T) ch) {
2867        if (ch.list.empty()) {
2868        // yield until push is called for this channel
2869        yield(push);
2870        }
2871        return ch.list.pop();
2872        }
2873
2874        void push(mutex &Channel(T)ch, T val) {
2875        if (ch.list.full()) {
2876        // yield until pop is called for this channel
2877        yield(pop);
2878        }
2879        ch.list.push(val);
2880        }
2881}
2882\end{lstlisting}
2883
2884A task can also yield indefinitely by calling yield with no arguments.
2885This will tell the scheduler to yield this task until it is resumed by some other task.
2886A task can resume another task by using its functional call operator.
2887The code below shows a simple ping-pong example, where two tasks yield back and forth to each other using these methods.
2888
2889\begin{lstlisting}
2890type Ping = task {
2891        Pong *partner;
2892};
2893
2894void ?{}(Ping &p, Pong *partner = 0) {
2895        p.partner = partner;
2896}
2897
2898void ?()(Ping &p) {
2899        for(;;) { // loop forever
2900        printf(.ping\n.);
2901        partner(); // resumes the partner task
2902        yield(); // yields this task
2903        }
2904}
2905
2906type Pong = task {
2907        Ping *partner;
2908};
2909
2910void ?{}(Pong &p, Ping *partner = 0) {
2911        p.partner = partner;
2912}
2913
2914void ?()(Pong &p) {
2915        for(;;) { // loop forever
2916        yield(); // yields this task
2917        printf(.pong/n.);
2918        partner(); // resumes the partner task
2919        }
2920}
2921
2922void main() {
2923        Ping ping; // allocate ping
2924        Pong pong{ping}; // allocate, initialize, and start pong
2925        Ping{pong}; // initialize and start ping
2926}
2927\end{lstlisting}
2928
2929The same functionality can be accomplished by providing functions to be called by the partner task.
2930\begin{lstlisting}
2931type Pingpong = task {
2932        String msg;
2933        Pingpong *partner;
2934};
2935
2936void ?{}(Pingpong &p, String msg, Pingpong *partner = 0) {
2937        p.msg = msg;
2938        p.partner = partner;
2939}
2940
2941void ?()(Pingpong &p) {
2942        for(;;) {
2943        yield(go);
2944        }
2945}
2946
2947void go(Pingpong &p) {
2948        print(.%(p.msg)\n.);
2949        go(p.partner);
2950}
2951
2952void main() {
2953        Pingpong ping = {.ping.};
2954        Pingpong pong = {.pong., ping};
2955        ping.partner = pong;
2956        go(ping);
2957}
2958\end{lstlisting}
2959
2960
2961\section{Modules and Packages }
2962
2963\begin{comment}
2964High-level encapsulation is useful for organizing code into reusable units, and accelerating compilation speed.
2965\CFA provides a convenient mechanism for creating, building and sharing groups of functionality that enhances productivity and improves compile time.
2966
2967There are two levels of encapsulation in \CFA, module and package.
2968A module is a logical grouping of functionality that can be easily pulled into another project, much like a module in Python or a package in Go.
2969A module forms a namespace to limit the visibility and prevent naming conflicts of variables.
2970Furthermore, a module is an independent translation unit, which can be compiled separately to accelerate the compilation speed.
2971
2972A package is a physical grouping of one or more modules that is used for code distribution and version management.
2973Package is also the level of granularity at which dependences are managed.
2974A package is similar to the Crate in Rust.
2975
2976
2977\subsection{No Declarations, No Header Files}
2978
2979In C and \CC, it is necessary to declare or define every global variable, global function, and type before it is used in each file.
2980Header files and a preprocessor are normally used to avoid repeating code.
2981Thus, many variables, functions, and types are described twice, which exposes an opportunity for errors and causes additional maintenance work.
2982Instead of following this model, the \CFA tools can extract all of the same information from the code automatically.
2983This information is then stored in the object files for each module, in a format that can quickly be read by the compiler, and stored at the top of the file, for quick access.
2984In addition to the user productivity improvements, this simple change also improves compile time, by saving the information in a simple machine readable format, instead of making the compiler parse the same information over and over from a header file.
2985This seems like a minor change, but according to (Pike, Go at Google: Language Design in the Service of Software Engineering), this simple change can cause massive reductions in compile time.
2986
2987In \CFA, multiple definitions are not necessary.
2988Within a module, all of the module's global definitions are visible throughout the module.
2989For example, the following code compiles, even though isOdd was not declared before being called:
2990\begin{lstlisting}
2991bool isEven(unsigned int x) {
2992        if (x == 0) return true;
2993        else return !isOdd(x);
2994}
2995
2996bool isOdd(unsigned int x) {
2997        if (x == 1) return true;
2998        else return !isEven(x - 2);
2999}
3000\end{lstlisting}
3001
3002Header files in C are used to expose the declarations from a library, so that they can be used externally.
3003With \CFA, this functionality is replaced with module exports, discussed below.
3004When building a \CFA module which needs to be callable from C code, users can use the tools to generate a header file suitable for including in these C files with all of the needed declarations.
3005
3006In order to interoperate with existing C code, \CFA files can still include header files, the contents of which will be enclosed in a C linkage section to indicate C calling conventions (see Interoperability for more information).
3007
3008
3009\subsection{Modules}
3010
3011A module typically contains a set of related types and methods, with some objects accessible from outside the package, and some limited to use inside the module.
3012These modules can then be easily shared and reused in multiple projects.
3013As modules are intended to be distributed for reuse, they should generally have stable, well-defined interfaces.
3014
3015\CFA adds the following keywords to express the module systems: module, export, import, as.
3016
3017
3018\subsubsection{Module Declaration}
3019
3020The syntax to declare a module is module moduleName;.
3021
3022The module declaration must be at the beginning of a file, and each file can only belong to one module.
3023If there is no module declaration at the beginning of a file, the file belongs to the global module.
3024A module can span several files.
3025By convention, a module and the files belonging to the module have additional mapping relationship which is described in the Do-Lang Tooling documentation.
3026
3027The moduleName follows the same rules of a variable name, except that it can use slash "/" to indicate the module/sub-module relationship.
3028For example, container/vector is a valid module name, where container is the parent module name, and vector is the sub-module under container.
3029
3030Only the interfaces of a module are visible from outside, when the module is imported. export is a type decorator to declare a module interface.
3031A method, a global variable or a type can be declared as a module interface.
3032Types defined in a module and referenced by an exported function or a variable must be exported, too.
3033
3034The following code is a simple module declaration example.
3035\begin{lstlisting}
3036module M;
3037
3038//visible outside module M
3039
3040export int f(int i) { return i + 1; }
3041export double aCounter;
3042
3043//not visible outside module M
3044
3045int g(int i) { return i - 1; }
3046
3047double bCounter;
3048\end{lstlisting}
3049
3050export module moduleName; can be use to re-export all the visible (exported) names in moduleName from the current module.
3051
3052
3053\subsubsection{Module Import}
3054
3055The syntax to import a module is import moduleName; or import moduleName as anotherName;.
3056One package cannot be imported with both of the two types of syntax in one file.
3057A package imported in one file will only be visible in this file.
3058For example, two files, A and B belong to the same module.
3059If file A imports another module, M, the exported names in M are not visible in file B.
3060
3061All of the exported names are visible in the file that imports the module.
3062The exported names can be accessed within a namespace based on the module name in the first syntax (ex moduleName.foo).
3063If moduleName has several elements separated by '/' to describe a sub-module (ex. import container/vector;), the last element in the moduleName is used as the namespace to access the visible names in that module (ex vector.add(...);).
3064The as keyword is used to confine the imported names in a unique namespace (ex. anotherName.foo). anotherName must be a valid identifier (same rules as a variable name) which means it cannot have '/' in it.
3065Conflicts in namespaces will be reported by the compiler.
3066The second method can be used to solve conflicting name problems.
3067The following code snippets show the two situations.
3068
3069\begin{lstlisting}
3070module util/counter;
3071export int f(int i) { return i+1; }
3072
3073import util/counter;
3074
3075int main() {
3076        return counter.f(200); // f() from the package counter
3077}
3078
3079import util/counter as ct;
3080int main() {
3081        return ct.f(200); // f() from the package counter
3082}
3083\end{lstlisting}
3084
3085
3086Additionally, using the .as. syntax, a user can force the compiler to add the imported names into the current namespace using .as ..With these module rules, the following module definitions and imports can be achieved without any problem.
3087
3088\begin{lstlisting}
3089module M1;
3090export int f(int i) { return i+1;} // visible outside
3091
3092int g(int i) { return i-1;} // not visible outside
3093
3094module M2;
3095int f(int i) { return i * 2; } // not visible outside
3096export int g(int g) { return i / 2; } // visible outside
3097
3098import M1 as .;
3099
3100import M2 as .;
3101
3102
3103int main() {
3104        return f(3) + g(4); //f() from M1 and g() from M2;
3105}
3106\end{lstlisting}
3107
3108
3109\subsubsection{Sub-Module and Module Aggregation}
3110
3111Several modules can be organized in a parent module and sub-modules relationship.
3112The sub-module names are based on hierarchical naming, and use slash, "/", to indicate the relationship.
3113For example, std/vector and std/io are sub-modules of module std.
3114The exported names in a sub-module are NOT visible if the parent module is imported, which means the exported names in the sub-module are
3115not implicitly exported in the parent module.
3116
3117Aggregation is a mechanism to support components and simplified importing.
3118The mechanism is not based on naming but based on manual declaration.
3119For example, the following is the aggregated sequence module.
3120The export {...} is syntactic sugar for many lines of export module aModule;.
3121If an aggregated module is imported, all the included modules in the aggregation are imported.
3122
3123\begin{lstlisting}
3124module std/sequence;
3125
3126export {
3127        module std/vector;
3128        module std/list;
3129        module std/array;
3130        module std/deque;
3131        module std/forward_list;
3132        module std/queue;
3133        module std/stack;
3134};
3135\end{lstlisting}
3136
3137After importing the aggregated module, each individual name is still contained in the original name space.
3138For example, vector.add() and list.add() should be used to reference the add methods if there are add methods in both the vector module and the list module.
3139
3140
3141\subsubsection{Import from Repository}
3142
3143When a module is imported, the tools locate the module in the one of the accessible package paths (defined by command line flag or environment variable).
3144The tools also support retrieving modules of a package from external repositories.
3145See Listing 40: Package directory structure
3146
3147
3148\subsubsection{Package Import}
3149
3150Because packages are the places where the building tool looks for modules, there is no code required in the \CFA source file to import a package.
3151In order to use modules in a package, the programmer needs to guide the building tool to locate the right package by 1) Adding the package's parent path into \$DOPATH;
3152or 2) Adding the package dependence into the current project's Do.prj.
3153More details about locating a module in a package are explained in the next section.
3154
3155
3156\subsubsection{Package Versioning}
3157
3158A package must have a version number.
3159The version number is a string.
3160For example "1.0", "1.a", "A1", and "1ec5fab753eb979d3886a491845b8ae152d58c8f" are all valid version numbers.
3161By convention, a package is stored in a directory named packageName-packageVersion.
3162For example, the util package with version 1.1 is stored in a directory named util-1.1.
3163
3164The project description file can optionally specify the version of the package used in the current project.
3165If not defined, because the version number is a string, and all the different versions for the same package will be sorted in increasing order, the package with the largest version number will be used in the compilation.
3166The builder tool will record the specific package version used in the build in the project's "Do.lock" file to enable fully repeatable builds.
3167
3168
3169\subsection{Module and Package Organization}
3170
3171\CFA has two level of encapsulations, module and package.
3172This section explains the object model of modules, packages and other language concepts.
3173It also explains how programmers should organize their code, and the method used by the build tools to locate packages, and import modules for compilation.
3174
3175
3176\subsubsection{Object Model}
3177
3178There are several concepts in Do.
3179\begin{itemize}
3180\item
3181File: a \CFA source file
3182\item
3183Module: a container to organize a set of related types and methods; It has a module name, and several interfaces visible from outside
3184\item
3185Package: a container to organize modules for distribution; It has attributes like name, author,
3186version, dependences, etc.
3187\item
3188Project: a working set for a \CFA project; It has attributes like name, author, version, dependences, etc.
3189\end{itemize}
3190
3191The following rules summarize the object model of all the above concepts:
3192\begin{itemize}
3193\item
3194A module contains one or more files
3195\begin{itemize}
3196\item
3197One file can only belong to one module
3198\item
3199A module has its name and interfaces exported
3200\item
3201A file without a module declaration at the beginning belongs to the global module
3202\item
3203\end{itemize}
3204
3205\item
3206A package contains one or more modules
3207\begin{itemize}
3208\item
3209A package has additional meta info described in Do.prj file
3210\item
3211A package may be dependent on other packages.
3212\end{itemize}
3213
3214\item
3215A project contains one or more modules in its source code
3216\begin{itemize}
3217\item
3218A project has additional meta info described in Do.prj file
3219\item
3220A project may be dependent on other packages
3221\item
3222A project can be transformed into a package for distribution
3223\item
3224A project can generate one or more executable binaries
3225\end{itemize}
3226\end{itemize}
3227
3228
3229\subsubsection{Module File Organization}
3230
3231The rules of this section are the conventions to organize module files in one package.
3232
3233The file location of a module in a package must match the module/submodule naming hierarchy.
3234The names separated by slash "/" must match the directory levels.
3235If only one file is used to implement one module, there is no need to put the module implementation file inside a sub-directory.
3236The file can be put inside its parent module's sub-directory with the sub module's name as the file name.
3237
3238Here is an example of a package, util.
3239\begin{lstlisting}
3240+ util
3241Do.prj #package description file
3242        heap.do #Case 1: module heap;
3243        list.do #Case 1: mdoule list;
3244        ring.do #Case 1: module ring;
3245        + string #Case 2
3246        impl1.do #module string;
3247        + std
3248        vector.do
3249        list.do
3250        + array #Case 3
3251        array1.do #module std/array;
3252        array2.do #module std/array;
3253        sequence.do #Case 4, module std/sequence;
3254        test.do #Case 5
3255\end{lstlisting}
3256
3257\begin{itemize}
3258\item
3259Case 1: Each individual file implements a module
3260\item
3261Case 2: Put the implementation of a module under the sub-directory, but there is only one file
3262\item
3263Case 3: Put the implementation of a module under the sub-directory; There are several files to
3264implement one module
3265\item
3266Case 4: One file to express one aggregation
3267\item
3268Case 5: The file does not belong to any module; It is used for testing purpose
3269\end{itemize}
3270
3271The example only uses source code, ".do" files, to show the module file organization.
3272Other module packaging formats, like binary, must also follow the same rules.
3273
3274
3275\subsection{Module File Format}
3276
3277\CFA supports different types of module file formats.
3278
3279\begin{itemize}
3280\item
3281Pure source code format: The files should be organized following the previous section's definition.
3282\item
3283IR format (TBD): The \CFA compiler IR format, similar to the source code format
3284\item
3285Binary format, including ".a" static library or ".so" dynamic linkage library
3286\begin{itemize}
3287\item
3288The file's name must match the right level's module name defined in the previous section
3289\item
3290E.g. "util.so" includes all modules for the package util.
3291\item
3292E.g. "string.so" under the package directory to include files belonging to "module string;"
3293\end{itemize}
3294\item.
3295Archive format
3296\begin{itemize}
3297\item
3298The archive is named as ".dar", and is a zip archive of the source code or the binary for a package
3299\item
3300E.g. "util.dar" is the whole package for util package including the package direction file
3301\end{itemize}
3302\item
3303Hybrid format
3304\begin{itemize}
3305\item
3306A package can be distributed partly in source code, partly in binary format, and/or packaged in the archive format
3307\item
3308The only limitation is that the names of the files must match the module location names defined in previous section
3309\end{itemize}
3310\end{itemize}
3311Package and Module Locating and the \CFA Language Tooling documentation for more details.
3312
3313
3314\subsection{Packages}
3315
3316A package is synonymous with a library in other languages.
3317The intent of the package level encapsulation is to facilitate code distribution, version control, and dependence management.
3318A package is a physical grouping of one or more modules in a directory (an archive file for a directory).
3319The concept of a package is the convention for grouping code, and the contract between the language and the building tool to search for imported modules.
3320
3321
3322\subsubsection{Package Definition}
3323
3324A package is defined by putting a project description file, Do.prj, with one or more modules into a directory.
3325This project description file contains the package's meta data, including package name, author, version, dependences, etc.
3326It should be in the root of the package directory.
3327
3328The modules in the package could be either source code, or compiled binary format.
3329The location of the module files should follow the module name's path.
3330
3331Here is a simple example of the directory structure of a package, core.
3332It contains a module std and several sub-modules under std.
3333\begin{lstlisting}
3334+ core
3335        Do.prj
3336        + std
3337        + io
3338        file.do # module std/io/file;
3339        network.do #module std/io/network;
3340        + container
3341        vector.do #module std/container/vector;
3342        list.do #module std/container/list;
3343\end{lstlisting}
3344
3345
3346\subsubsection{Package Import}
3347
3348Because packages are the places where the building tool looks for modules, there is no code required in the \CFA source file to import a package.
3349In order to use modules in a package, the programmer needs to guide the building tool to locate the right package by 1) Adding the package's parent path into \$DOPATH; or 2) Adding the package dependence into the current project's Do.prj.
3350More details about locating a module in a package are explained in the next section.
3351
3352
3353\subsubsection{Package Versioning}
3354
3355A package must have a version number.
3356The version number is a string.
3357For example "1.0", "1.a", "A1", and "1ec5fab753eb979d3886a491845b8ae152d58c8f" are all valid version numbers.
3358By convention, a package is stored in a directory named packageName-packageVersion.
3359For example, the util package with version 1.1 is stored in a directory named util-1.1.
3360
3361The project description file can optionally specify the version of the package used in the current project.
3362If not defined, because the version number is a string, and all the different versions for the same package will be sorted in increasing order, the package with the largest version number will be used in the compilation.
3363The builder tool will record the specific package version used in the build in the project's "Do.lock" file to enable fully repeatable builds.
3364
3365
3366\subsection{Module and Package Organization}
3367
3368\CFA has two level of encapsulations, module and package.
3369This section explains the object model of modules, packages and other language concepts.
3370It also explains how programmers should organize their code, and the method used by the build tools to locate packages, and import modules for compilation.
3371
3372
3373\subsubsection{Object Model}
3374
3375There are several concepts in Do.
3376\begin{itemize}
3377\item
3378File: a \CFA source file
3379\item
3380Module: a container to organize a set of related types and methods; It has a module name, and several interfaces visible from outside
3381\item
3382Package: a container to organize modules for distribution; It has attributes like name, author, version, dependences, etc.
3383\item
3384Project: a working set for a \CFA project; It has attributes like name, author, version, dependences, etc.
3385\end{itemize}
3386
3387The following rules summarize the object model of all the above concepts:
3388\begin{itemize}
3389\item
3390A module contains one or more files
3391\begin{itemize}
3392\item
3393One file can only belong to one module
3394\item
3395A module has its name and interfaces exported
3396\item
3397A file without a module declaration at the beginning belongs to the global module
3398\end{itemize}
3399\item
3400A package contains one or more modules
3401\begin{itemize}
3402\item
3403A package has additional meta info described in Do.prj file
3404\item
3405A package may be dependent on other packages.
3406\end{itemize}
3407\item
3408A project contains one or more modules in its source code
3409\begin{itemize}
3410\item
3411A project has additional meta info described in Do.prj file
3412\item
3413A project may be dependent on other packages
3414\item
3415A project can be transformed into a package for distribution
3416\item
3417A project can generate one or more executable binaries
3418\end{itemize}
3419\end{itemize}
3420
3421
3422\subsubsection{Module File Organization}
3423
3424The rules of this section are the conventions to organize module files in one package.
3425
3426The file location of a module in a package must match the module/submodule naming hierarchy.
3427The names separated by slash "/" must match the directory levels.
3428If only one file is used to implement one module, there is no need to put the module implementation file inside a sub-directory.
3429The file can be put inside its parent module's sub-directory with the sub module's name as the file name.
3430
3431Here is an example of a package, util.
3432\begin{lstlisting}
3433+ util
3434        Do.prj #package description file
3435        heap.do #Case 1: module heap;
3436        list.do #Case 1: mdoule list;
3437        ring.do #Case 1: module ring;
3438        + string #Case 2
3439        impl1.do #module string;
3440        + std
3441        vector.do
3442        list.do
3443        + array #Case 3
3444        array1.do #module std/array;
3445        array2.do #module std/array;
3446        sequence.do #Case 4, module std/sequence;
3447        test.do #Case 5
3448\end{lstlisting}
3449
3450
3451\begin{itemize}
3452\item
3453Case 1: Each individual file implements a module
3454\item
3455Case 2: Put the implementation of a module under the sub-directory, but there is only one file
3456\item
3457Case 3: Put the implementation of a module under the sub-directory; There are several files to implement one module
3458\item
3459Case 4: One file to express one aggregation
3460\item
3461Case 5: The file does not belong to any module; It is used for testing purpose
3462\end{itemize}
3463
3464The example only uses source code, ".do" files, to show the module file organization.
3465Other module packaging formats, like binary, must also follow the same rules.
3466
3467
3468\subsubsection{Module File Format}
3469
3470\CFA supports different types of module file formats.
3471
3472\begin{itemize}
3473\item
3474Pure source code format: The files should be organized following the previous section's definition.
3475\item
3476IR format (TBD): The \CFA compiler IR format, similar to the source code format
3477\item
3478Binary format, including ".a" static library or ".so" dynamic linkage library
3479\begin{itemize}
3480\item
3481The file's name must match the right level's module name defined in the previous section
3482\item
3483E.g. "util.so" includes all modules for the package util.
3484\item
3485E.g. "string.so" under the package directory to include files belonging to "module string;"
3486\end{itemize}
3487\item
3488Archive format
3489\begin{itemize}
3490\item
3491The archive is named as ".dar", and is a zip archive of the source code or the binary for a package
3492\item
3493E.g. "util.dar" is the whole package for util package including the package direction file
3494\end{itemize}
3495\item
3496Hybrid format
3497\begin{itemize}
3498\item
3499A package can be distributed partly in source code, partly in binary format, and/or packaged in the archive format
3500\item
3501The only limitation is that the names of the files must match the module location names defined in previous section
3502\end{itemize}
3503\end{itemize}
3504
3505
3506\subsection{Package and Module Locating}
3507
3508The high-level build tools provided by \CFA will handle finding a package in your local filesystem or retrieving it from a repository if necessary, building it if necessary, and linking with it.
3509If a programmer prefers, one can directly call the compiler, docc to build the source files and create and link to static libraries.
3510
3511When a source file imports a module, the \CFA build tool and docc compiler will locate the module according to the following order:
3512
3513\begin{enumerate}
3514\item
3515This source file's directory tree, which is typically the project's src directory
3516\item
3517All of the dependent packages (in a directory or in an archive file) under the current \CFA project's pkg directory
3518\item
3519The dependent packages (in a directory or in an archive file) inside the paths defined in the DOPATH environment variable
3520\item
3521The dependent packages (in a directory or in an archive file) inside the global \CFA SDK installation's pkg directory
3522\item
3523If one dependent package is still not found, the builder tool will automatically retrieve it from the repository defined in the SDK installation's configuration, and store it in the SDK's pkg directory
3524\end{enumerate}
3525
3526The module found first in a package will shadow the modules with the same name in the later packages in the search sequence.
3527
3528
3529\subsubsection{Dependent Package}
3530
3531Dependent packages are those packages containing modules that the current project's source code will import from.
3532Dependent packages are defined implicitly or explicitly in one \CFA project.
3533All of the packages under the current project's pkg directory are implicitly dependent packages.
3534For others, the dependent packages must be defined in the project's Do.prj file.
3535
3536
3537\subsubsection{Package and Module Locating Example}
3538
3539\begin{lstlisting}
3540# A project's source code tree
3541
3542--------------------------------------
3543
3544+ testProject
3545        Do.prj
3546        + src
3547        main.do
3548        + pkg
3549        + security-1.1
3550        Do.prj
3551        security.do #module security
3552
3553--------------------------------------
3554
3555# Do.prj
3556
3557--------------------------------------
3558
3559[dependences]
3560std
3561util = "0.2"
3562
3563--------------------------------------
3564
3565# main.do
3566
3567---------------------------------------
3568
3569import security;
3570import std/vector;
3571import container;
3572
3573----------------------------------------
3574\end{lstlisting}
3575
3576
3577\begin{lstlisting}
3578# pkg directory's source code tree
3579
3580-----------------------------------------
3581
3582+ pkg
3583        + std-1.0
3584        Do.prj
3585        vector.do #module std/vector;
3586        queue.do #module std/queue;
3587        + std-1.1
3588        Do.prj
3589        vector.do #module std/vector;
3590        queue.do #module std/queue;
3591        list.do #module std/list;
3592        + util-0.1
3593        Do.prj
3594        container.do #module container;
3595        + security-1.0
3596        security.do #module security;
3597------------------------------------------
3598\end{lstlisting}
3599
3600
3601During the compiling of main.do file import security;
3602The security module appears in both the local security-1.1 package, and the global security-1.0 package.
3603According to the locating sequence, the local security module in security-1.1 will be used.
3604And because the security-1.1 package is under local's pkg directory.
3605No dependence description is required in the project Do.prj file.
3606
3607import std/vector;
3608
3609The std/vector package appears in two different versions' packages in the global path and the project dependence doesn't specify the version. std-1.1 is used in this case.
3610
3611import container;
3612
3613The Do.prj specifies the version 0.2 should be used to locate container module from util package but only version 0.1 is available in the local file system.
3614The builder tool then will try to retrieve it from the web and store it in the global pkg directory.
3615After that, the container module from the newly downloaded package will be used in the compilation.
3616\end{comment}
3617
3618
3619\section{Comparison with Other Languages}
3620
3621\CFA is one of many languages that attempts to improve upon C.
3622In developing \CFA, many other languages were consulted for ideas, constructs, and syntax.
3623Therefore, it is important to show how these languages each compare with Do.
3624In this section, \CFA is compared with what the writers of this document consider to be the closest competitors of Do: \CC, Go, Rust, and D.
3625
3626
3627\subsection[Comparing Key Features of CFA]{Comparing Key Features of \CFA}
3628
3629
3630\subsubsection{Constructors and Destructors}
3631
3632\lstset{basicstyle=\sf\relsize{-2}}
3633
3634\begin{flushleft}
3635\begin{tabular}{@{}l|l|l|l@{}}
3636\multicolumn{1}{c|}{\textbf{\CFA}}      & \multicolumn{1}{c|}{\textbf{\CC}} & \multicolumn{1}{c|}{\textbf{Go}} & \multicolumn{1}{c}{\textbf{Rust}}      \\
3637\hline
3638\begin{lstlisting}
3639struct Line {
3640        float lnth;
3641}
3642// default constructor
3643void ?{}( Line * l ) {
3644        l->lnth = 0.0;
3645        sout | "default" | endl;
3646}
3647
3648
3649// constructor with length
3650void ?{}( Line * l, float lnth ) {
3651        l->lnth = lnth;
3652        sout | "lnth" | l->lnth | endl;
3653
3654}
3655
3656// destructor
3657void ^?() {
3658        sout | "destroyed" | endl;
3659        l.lnth = 0.0;
3660}
3661
3662// usage
3663Line line1;
3664Line line2 = { 3.4 };
3665\end{lstlisting}
3666&
3667\begin{lstlisting}[language=C++]
3668class Line {
3669        float lnth;
3670
3671        // default constructor
3672        Line() {
3673                cout << "default" << endl;
3674                lnth = 0.0;
3675        }
3676
3677
3678        // constructor with lnth
3679        Line( float l ) {
3680                cout << "length " << length
3681                         << endl;
3682                length = l;
3683        }
3684
3685        // destructor
3686        ~Line() {
3687                cout << "destroyed" << endl;
3688                length = 0.0;
3689        }
3690}
3691// usage
3692Line line1;
3693Line line2( 3.4 );
3694\end{lstlisting}
3695&
3696\begin{lstlisting}[language=Golang]
3697type Line struct {
3698        length float32
3699}
3700// default constructor
3701func makeLine() Line {
3702        fmt.PrintLn( "default" )
3703        return Line{0.0}
3704}
3705
3706
3707// constructor with length
3708func makeLine( length float32 ) Line {
3709        fmt.Printf( "length %v", length )
3710
3711        return Line{length}
3712}
3713
3714// no destructor
3715
3716
3717
3718
3719
3720// usage
3721line1 := makeLine()
3722line2 := makeLine( 3.4 )
3723\end{lstlisting}
3724&
3725\begin{lstlisting}
3726struct Line {
3727        length: f32
3728}
3729// default constructor
3730impl Default for Line {
3731        fn default () -> Line {
3732                println!( "default" );
3733                Line{ length: 0.0 }
3734        }
3735}
3736// constructor with length
3737impl Line {
3738        fn make( len: f32 ) -> Line {
3739                println!( "length: {}", len );
3740                Line{ length: len }
3741        }
3742}
3743// destructor
3744impl Drop for Line {
3745        fn drop( &mut self ) {
3746                self.length = 0.0
3747        }
3748}
3749// usage
3750let line1:Line = Default::default();
3751Line line2( 3.4 );
3752\end{lstlisting}
3753\end{tabular}
3754\end{flushleft}
3755
3756
3757\subsubsection{Operator Overloading}
3758
3759\begin{flushleft}
3760\begin{tabular}{@{}l|l|l|l@{}}
3761\multicolumn{1}{c|}{\textbf{\CFA}}      & \multicolumn{1}{c|}{\textbf{\CC}} & \multicolumn{1}{c|}{\textbf{Go}} & \multicolumn{1}{c}{\textbf{Rust}}      \\
3762\hline
3763\begin{lstlisting}
3764struct Cpx {
3765        double re, im;
3766};
3767// overload addition operator
3768Cpx ?+?( Cpx l, const Cpx r ) {
3769        return (Cpx){l.re+l.im, l.im+r.im};
3770}
3771Cpx a, b, c;
3772c = a + b;
3773\end{lstlisting}
3774&
3775\begin{lstlisting}
3776struct Cpx {
3777        double re, im;
3778};
3779// overload addition operator
3780Cpx operator+( Cpx l, const Cpx r ) {
3781        return (Cpx){l.re+l.im, l.im+r.im};
3782}
3783Cpx a, b, c;
3784c = a + b;
3785\end{lstlisting}
3786&
3787\begin{lstlisting}
3788// no operator overloading
3789
3790
3791
3792
3793
3794
3795
3796\end{lstlisting}
3797&
3798\begin{lstlisting}
3799struct Cpx {
3800        re: f32,
3801        im: f32
3802}
3803// overload addition operator
3804impl Add for Cpx {
3805        type Output = Cpx
3806        fn add(self, r: Cpx) -> Cpx {
3807                let mut res = Cpx{re: 0.0, im: 0.0};
3808                res.re = self.re + r.re;
3809                res.im = self.im + r.im;
3810                return res
3811        }
3812}
3813let (a, b, mut c) = ...;
3814c = a + b
3815\end{lstlisting}
3816\end{tabular}
3817\end{flushleft}
3818
3819
3820\subsubsection{Calling C Functions}
3821
3822\begin{flushleft}
3823\begin{tabular}{@{}l|l|l@{}}
3824\multicolumn{1}{c|}{\textbf{\CFA/\CC}} & \multicolumn{1}{c|}{\textbf{Go}} & \multicolumn{1}{c}{\textbf{Rust}}   \\
3825\hline
3826\begin{lstlisting}[boxpos=t]
3827extern "C" {
3828#include <sys/types.h>
3829#include <sys/stat.h>
3830#include <unistd.h>
3831}
3832size_t fileSize( const char *path ) {
3833        struct stat s;
3834        stat(path, &s);
3835        return s.st_size;
3836}
3837\end{lstlisting}
3838&
3839\begin{lstlisting}[boxpos=t]
3840/*
3841#cgo
3842#include <sys/types.h>
3843#include <sys/stat.h>
3844#include <unistd.h>
3845*/
3846import "C"
3847import "unsafe"
3848
3849func fileSize(path string) C.size_t {
3850        var buf C.struct_stat
3851        c_string := C.CString(path)
3852        C.stat(p, &buf)
3853        C.free(unsafe.Pointer(c_string))
3854        return buf._st_size
3855}
3856\end{lstlisting}
3857&
3858\begin{lstlisting}[boxpos=t]
3859use libc::{c_int, size_t};
3860// translated from sys/stat.h
3861#[repr(C)]
3862struct stat_t {
3863        ...
3864        st_size: size_t,
3865        ...
3866}
3867#[link(name = "libc")]
3868extern {
3869        fn stat(path: *const u8,
3870        buf: *mut stat_t) -> c_int;
3871}
3872fn fileSize(path: *const u8) -> size_t
3873{
3874        unsafe {
3875                let mut buf: stat_t = uninit();
3876                stat(path, &mut buf);
3877                buf.st_size
3878        }
3879}
3880\end{lstlisting}
3881\end{tabular}
3882\end{flushleft}
3883
3884
3885\subsubsection{Generic Functions}
3886
3887\begin{flushleft}
3888\begin{tabular}{@{}l|l|l|l@{}}
3889\multicolumn{1}{c|}{\textbf{\CFA}}      & \multicolumn{1}{c|}{\textbf{\CC}} & \multicolumn{1}{c|}{\textbf{Go}} & \multicolumn{1}{c}{\textbf{Rust}}      \\
3890\hline
3891\begin{lstlisting}
3892generic(type T, type N |
3893        { int ?<?(N, N); })
3894T *maximize(N (*f)(const T&),
3895        int n, T *a) {
3896        T *bestX = NULL;
3897        N bestN;
3898        for (int i = 0; i < n; i++) {
3899        N curN = f(a[i]);
3900        if (bestX == NULL ||
3901        curN > bestN) {
3902        bestX = &a[i]; bestN = curN;
3903        }
3904        }
3905        return bestX;
3906}
3907
3908string *longest(int n, string *p)
3909{
3910        return maximize(length, n, p);
3911}
3912\end{lstlisting}
3913&
3914\begin{lstlisting}
3915template<typename T, typename F>
3916T *maximize(const F &f,
3917        int n, T *a) {
3918        typedef decltype(f(a[0])) N;
3919        T *bestX = NULL;
3920        N bestN;
3921        for (int i = 0; i < n; i++) {
3922        N curN = f(a[i]);
3923        if (bestX == NULL || curN > bestN)
3924        {
3925        bestX = &a[i]; bestN = curN;
3926        }
3927        }
3928        return bestX;
3929}
3930
3931string *longest(int n, string *p) {
3932        return maximize(
3933        [](const string &s) {
3934        return s.length();
3935        }, n, p);
3936}
3937\end{lstlisting}
3938&
3939\begin{lstlisting}
3940// Go does not support generics!
3941func maximize(
3942        gt func(interface{}, interface{}) bool,
3943        f func(interface{}) interface{},
3944        a []interface{}) interface{} {
3945        var bestX interface{} = nil
3946        var bestN interface{} = nil
3947        for _, x := range a {
3948        curN := f(x)
3949        if bestX == nil || gt(curN, bestN)
3950        {
3951        bestN = curN
3952        bestX = x
3953        }
3954        }
3955        return bestX
3956}
3957
3958func longest(
3959        a []interface{}) interface{} {
3960        return maximize(
3961        func(a, b interface{}) bool {
3962        return a.(int) > b.(int) },
3963        func(s interface{}) interface{} {
3964        return len(s.(string)) },
3965        a).(string)
3966}
3967\end{lstlisting}
3968&
3969\begin{lstlisting}
3970use std::cmp::Ordering;
3971
3972fn maximize<N: Ord + Copy, T, F:
3973Fn(&T) -> N>(f: F, a: &Vec<T>) ->
3974Option<&T> {
3975        let mut best_x: Option<&T> = None;
3976        let mut best_n: Option<N> = None;
3977        for x in a {
3978        let n = f(x);
3979        if (match best_n { None => true,
3980        Some(bn) =>
3981        n.cmp(&bn) == Ordering::Greater })
3982        {
3983        best_x = Some(x);
3984        best_n = Some(n);
3985        }
3986        }
3987        return best_x
3988}
3989
3990fn longest(a: &Vec<String>) ->
3991        Option<&String> {
3992        return
3993        maximize(|x: &String| x.len(), a)
3994}
3995\end{lstlisting}
3996\end{tabular}
3997\end{flushleft}
3998
3999
4000\begin{comment}
4001\subsubsection{Modules / Packages}
4002
4003\begin{lstlisting}
4004\CFA
4005\CC
4006
4007
4008module example/M;
4009
4010export int inc(int val) {
4011        return val + 1;
4012}
4013
4014
4015
4016
4017--------------------------------------
4018//Use the module in another file
4019import example/M;
4020int main() {
4021        print(M.inc(100));
4022        return 0;
4023}
4024// Using \CC17 module proposal
4025
4026module example.M;
4027
4028export {
4029        int inc(int val);
4030}
4031
4032int inc(inv val) {
4033        return val + 1;
4034}
4035--------------------------------------
4036// Use the module in another file
4037import example.M;
4038int main() {
4039        cout << inc(100) << endl;
4040        return 0;
4041}
4042
4043Go
4044Rust
4045package example/M;
4046
4047func Inc(val int32) int32 {
4048        // Capitalization indicates exported
4049        return val + 100
4050}
4051
4052
4053--------------------------------------
4054//Use the package in another file
4055package main
4056import .fmt.
4057import "example/M"
4058
4059func main() int32 {
4060        fmt.Printf(.%v., M.Inc(100))
4061}
4062pub mod example {
4063        pub mod M {
4064        pub inc(val i32) -> i32 {
4065        return val + 100;
4066        }
4067        }
4068}
4069
4070--------------------------------------
4071//Use the module in another file
4072use example::M;
4073
4074
4075
4076fn main() {
4077        println!(.{}., M::inc(100));
4078}
4079\end{lstlisting}
4080\end{comment}
4081
4082
4083\subsubsection{Parallel Tasks}
4084
4085\begin{flushleft}
4086\begin{tabular}{@{}l|l|l|l@{}}
4087\multicolumn{1}{c|}{\textbf{\CFA}}      & \multicolumn{1}{c|}{\textbf{\CC}} & \multicolumn{1}{c|}{\textbf{Go}} & \multicolumn{1}{c}{\textbf{Rust}}      \\
4088\hline
4089\begin{lstlisting}
4090task Nonzero {
4091        int *data;
4092        int start;
4093        int end;
4094        int* res;
4095};
4096
4097void ?{}(Nonzero &a, int d[], int s,
4098        int e, int* subres) {
4099        // constructor
4100        a.data = d;
4101        a.start = s;
4102        a.end = e;
4103        a.res = subres;
4104}
4105
4106// implicitly spawn thread here
4107void ?()(NonzeroCounter &a) {
4108        int i;
4109        int nonzero = 0;
4110        for (i=start; c<end; ++i) {
4111        if(a.data[i]!=0){ nonzero++;}
4112        }
4113        *a.res = nonzero;
4114}
4115
4116int main() {
4117        int sz = ...
4118        int data[sz] = ...;
4119        int r1 = 0, r2=0;
4120        int res;
4121        { // create a scope for Nonzero
4122        Nonzero n1{data, 0, sz/2, &n1};
4123        Nonzero n2{data, sz/2, sz, &n2};
4124        n1();//spawn
4125        n2();//spawn
4126        }
4127        res = r1+r2;
4128        return res;
4129}
4130\end{lstlisting}
4131&
4132\begin{lstlisting}
4133#include <thread>
4134#include <mutex>
4135
4136std::mutex m;
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149void task(const vector<int>&v,
4150        int* res, size_t s,
4151        size_t e) {
4152        int non_zero = 0;
4153        for(size_t i = s; i < e; ++i){
4154        if(v[i]!=0) { non_zero++;}
4155        }
4156        std::unique_lock<mutex> lck {m};
4157        *res += non_zero;
4158}
4159
4160int main() {
4161        vector<int> data = ...; //data
4162        int res = 0;
4163        std::thread t1 {task, ref(data),
4164        &res, 0,
4165        data.size()/2};
4166        std::thread t2 {task, ref(data),
4167        &res, data.size()/2,
4168        data.size()};
4169        t1.join();
4170        t2.join();
4171        return res;
4172}
4173\end{lstlisting}
4174&
4175\begin{lstlisting}
4176package main
4177
4178import "fmt"
4179
4180func nonzero(data []int, c chan int) {
4181        nz := 0
4182        for _, v:=range data {
4183        if(v!=0) { nz := nz+1 }
4184        }
4185        c <- nz
4186}
4187
4188func main() {
4189        sz := ...
4190        data := make([]int, sz)
4191        ... // data init
4192        go nonzero(data[:len(data)/2], c)
4193        go nonzero(data[len(data)/2:], c)
4194        n1, n2 := <-c, <-c
4195        res := n1 + n2
4196        fmt.Println(res)
4197}
4198\end{lstlisting}
4199&
4200\begin{lstlisting}
4201use std::thread;
4202use std::sync:mpsc::channel;
4203
4204fn main() {
4205        let sz = ...;
4206        let mut data:Vec<i32> =
4207        Vec::with_capacity(sz as usize);
4208        ... //init data
4209        let (tx, rx) = channel();
4210        for i in 0..1 {
4211        let tx = tx.clone();
4212        let data = data.clone()
4213        thread::spawn(move|| {
4214        let mut nz := 0;
4215        let mut s = 0;
4216        let mut e = sz / 2;
4217        if i == 1 {
4218        s = sz/2;
4219        e = data.len();
4220        }
4221        for i in s..(e - 1) {
4222        if data[i] != 0 (
4223        nz = nz + 1
4224        }
4225        }
4226        tx.send(nz).unwrap();
4227        });
4228        }
4229        let res = rx.recv().unwrap() +
4230        rx.recv().unwrap();
4231        println!(.{}., res);
4232}
4233\end{lstlisting}
4234\end{tabular}
4235\end{flushleft}
4236
4237\lstset{basicstyle=\sf\relsize{-1}}
4238
4239
4240\subsection{Summary of Language Comparison}
4241
4242
4243\subsubsection[C++]{\CC}
4244
4245\CC is a general-purpose programming language.
4246It has imperative, object-oriented and generic programming features, while also providing facilities for low-level memory manipulation. (Wikipedia)
4247
4248The primary focus of \CC seems to be adding object-oriented programming to C, and this is the primary difference between \CC and Do.
4249\CC uses classes to encapsulate data and the functions that operate on that data, and to hide the internal representation of the data.
4250\CFA uses modules instead to perform these same tasks.
4251Classes in \CC also enable inheritance among types.
4252Instead of inheritance, \CFA embraces composition and interfaces to achieve the same goals with more flexibility.
4253There are many studies and articles comparing inheritance and composition (or is-a versus has-a relationships), so we will not go into more detail here (Venners, 1998) (Pike, Go at Google: Language Design in the Service of Software Engineering , 2012).
4254
4255Overloading in \CFA is very similar to overloading in \CC, with the exception of the additional use, in \CFA, of the return type to differentiate between overloaded functions.
4256References and exceptions in \CFA are heavily based on the same features from \CC.
4257The mechanism for interoperating with C code in \CFA is also borrowed from \CC.
4258
4259Both \CFA and \CC provide generics, and the syntax is quite similar.
4260The key difference between the two, is that in \CC templates are expanded at compile time for each type for which the template is instantiated, while in \CFA, function pointers are used to make the generic fully compilable.
4261This means that a generic function can be defined in a compiled library, and still be used as expected from source.
4262
4263
4264\subsubsection{Go}
4265
4266Go, also commonly referred to as golang, is a programming language developed at Google in 2007 [.].
4267It is a statically typed language with syntax loosely derived from that of C, adding garbage collection, type
4268safety, some structural typing capabilities, additional built-in types such as variable-length arrays and key-value maps, and a large standard library. (Wikipedia)
4269
4270Go and \CFA differ significantly in syntax and implementation, but the underlying core concepts of the two languages are aligned.
4271Both Go and \CFA use composition and interfaces as opposed to inheritance to enable encapsulation and abstraction.
4272Both languages (along with their tooling ecosystem) provide a simple packaging mechanism for building units of code for easy sharing and reuse.
4273Both languages also include built-in light weight, user level threading concurrency features that attempt to simplify the effort and thought process required for writing parallel programs while maintaining high performance.
4274
4275Go has a significant runtime which handles the scheduling of its light weight threads, and performs garbage collection, among other tasks.
4276\CFA uses a cooperative scheduling algorithm for its tasks, and uses automatic reference counting to enable advanced memory management without garbage collection.
4277This results in Go requiring significant overhead to interface with C libraries while \CFA has no overhead.
4278
4279
4280\subsubsection{Rust}
4281
4282Rust is a general-purpose, multi-paradigm, compiled programming language developed by Mozilla Research.
4283It is designed to be a "safe, concurrent, practical language", supporting pure-functional, concurrent-actor[dubious . discuss][citation needed], imperative-procedural, and object-oriented styles.
4284
4285The primary focus of Rust is in safety, especially in concurrent programs.
4286To enforce a high level of safety, Rust has added ownership as a core feature of the language to guarantee memory safety.
4287This safety comes at the cost of a difficult learning curve, a change in the thought model of the program, and often some runtime overhead.
4288
4289Aside from those key differences, Rust and \CFA also have several similarities.
4290Both languages support no overhead interoperability with C and have minimal runtimes.
4291Both languages support inheritance and polymorphism through the use of interfaces (traits).
4292
4293
4294\subsubsection{D}
4295
4296The D programming language is an object-oriented, imperative, multi-paradigm system programming
4297language created by Walter Bright of Digital Mars and released in 2001. [.]
4298Though it originated as a re-engineering of \CC, D is a distinct language, having redesigned some core \CC features while also taking inspiration from other languages, notably Java, Python, Ruby, C\#, and Eiffel.
4299
4300D and \CFA both start with C and add productivity features.
4301The obvious difference is that D uses classes and inheritance while \CFA uses composition and interfaces.
4302D is closer to \CFA than \CC since it is limited to single inheritance and also supports interfaces.
4303Like \CC, and unlike \CFA, D uses garbage collection and has compile-time expanded templates.
4304D does not have any built-in concurrency constructs in the
4305language, though it does have a standard library for concurrency which includes the low-level primitives for concurrency.
4306
4307
4308\appendix
4309
4310
4311\section{Incompatible}
4312
4313The following incompatibles exist between \CFA and C, and are similar to Annex C for \CC~\cite{ANSI14:C++}.
4314
4315\begin{enumerate}
4316\item
4317\begin{description}
4318\item[Change:] add new keywords \\
4319New keywords are added to \CFA.
4320\item[Rationale:] keywords added to implement new semantics of \CFA.
4321\item[Effect on original feature:] change to semantics of well-defined feature. \\
4322Any ISO C programs using these keywords as identifiers are invalid \CFA programs.
4323\item[Difficulty of converting:] keyword clashes are accommodated by syntactic transformations using the \CFA backquote escape-mechanism:
4324\begin{lstlisting}
4325int `otype` = 3;                                // make keyword an indentifier
4326double `choose` = 3.5;
4327\end{lstlisting}
4328Programs can be converted automatically by enclosing keyword identifiers in backquotes, and the backquotes can be remove later when the identifier name is changed.
4329Clashes in C system libraries (include files) can be handled automatically using preprocessor, ©#include_next© and ©-Ifilename©:
4330\begin{lstlisting}
4331// include file uses the CFA keyword "otype".
4332#if ! defined( otype )                  // nesting ?
4333#define otype `otype`
4334#define __CFA_BFD_H__
4335#endif // ! otype
4336
4337#include_next <bfd.h>                   // must have internal check for multiple expansion
4338
4339#if defined( otype ) && defined( __CFA_BFD_H__ )        // reset only if set
4340#undef otype
4341#undef __CFA_BFD_H__
4342#endif // otype && __CFA_BFD_H__
4343\end{lstlisting}
4344\item[How widely used:] clashes among new \CFA keywords and existing identifiers are rare.
4345\end{description}
4346
4347\item
4348\begin{description}
4349\item[Change:] type of character literal ©int© to ©char© to allow more intuitive overloading:
4350\begin{lstlisting}
4351int rtn( int i );
4352int rtn( char c );
4353rtn( 'x' );                                             // programmer expects 2nd rtn to be called
4354\end{lstlisting}
4355\item[Rationale:] it is more intuitive for the call to ©rtn© to match the second version of definition of ©rtn© rather than the first.
4356In particular, output of ©char© variable now print a character rather than the decimal ASCII value of the character.
4357\begin{lstlisting}
4358sout | 'x' | " " | (int)'x' | endl;
4359x 120
4360\end{lstlisting}
4361Having to cast ©'x'© to ©char© is non-intuitive.
4362\item[Effect on original feature:] change to semantics of well-defined feature that depend on:
4363\begin{lstlisting}
4364sizeof( 'x' ) == sizeof( int )
4365\end{lstlisting}
4366no long work the same in \CFA programs.
4367\item[Difficulty of converting:] simple
4368\item[How widely used:] programs that depend upon ©sizeof( 'x' )© are rare and can be changed to ©sizeof(char)©.
4369\end{description}
4370
4371\item
4372\begin{description}
4373\item[Change:] make string literals ©const©:
4374\begin{lstlisting}
4375char * p = "abc";                               // valid in C, deprecated in §\CFA§
4376char * q = expr ? "abc" : "de"; // valid in C, invalid in §\CFA§
4377\end{lstlisting}
4378The type of a string literal is changed from ©[] char© to ©const [] char©.
4379Similarly, the type of a wide string literal is changed from ©[] wchar_t© to ©const [] wchar_t©.
4380\item[Rationale:] This change is a safety issue:
4381\begin{lstlisting}
4382char * p = "abc";
4383p[0] = 'w';                                             // segment fault or change constant literal
4384\end{lstlisting}
4385The same problem occurs when passing a string literal to a routine that changes its argument.
4386\item[Effect on original feature:] change to semantics of well-defined feature.
4387\item[Difficulty of converting:] simple syntactic transformation, because string literals can be converted to ©char *©.
4388\item[How widely used:] programs that have a legitimate reason to treat string literals as pointers to potentially modifiable memory are rare.
4389\end{description}
4390
4391\item
4392\begin{description}
4393\item[Change:] remove \newterm{tentative definitions}, which only occurs at file scope:
4394\begin{lstlisting}
4395int i;                                                  // forward definition
4396int *j = ®&i®;                                  // forward reference, valid in C, invalid in §\CFA§
4397int i = 0;                                              // definition
4398\end{lstlisting}
4399is valid in C, and invalid in \CFA because duplicate overloaded object definitions at the same scope level are disallowed.
4400This change makes it impossible to define mutually referential file-local static objects, if initializers are restricted to the syntactic forms of C. For example,
4401\begin{lstlisting}
4402struct X { int i; struct X *next; };
4403static struct X a;                              // forward definition
4404static struct X b = { 0, ®&};        // forward reference, valid in C, invalid in §\CFA§
4405static struct X a = { 1, &b };  // definition
4406\end{lstlisting}
4407\item[Rationale:] avoids having different initialization rules for builtin types and userdefined types.
4408\item[Effect on original feature:] change to semantics of well-defined feature.
4409\item[Difficulty of converting:] the initializer for one of a set of mutually-referential file-local static objects must invoke a routine call to achieve the initialization.
4410\item[How widely used:] seldom
4411\end{description}
4412
4413\item
4414\begin{description}
4415\item[Change:] have ©struct© introduce a scope for nested types
4416In C, the name of the nested types belongs to the same scope as the name of the outermost enclosing
4417Example:
4418\begin{lstlisting}
4419enum ®Colour® { R, G, B, Y, C, M };
4420struct Person {
4421        enum ®Colour® { R, G, B };      // nested type
4422        struct Face {                           // nested type
4423                ®Colour® Eyes, Hair;            // type defined outside (1 level)
4424        };
4425        ß.ß®Colour® shirt;                              // type defined outside (top level)
4426        ®Colour® pants;                         // type defined same level
4427        Face looks[10];                         // type defined same level
4428};
4429®Colour® c = R;                                 // type/enum defined same level
4430Personß.ß®Colour® pc = Personß.ßR;      // type/enum defined inside
4431Personß.ßFace pretty;                           // type defined inside
4432\end{lstlisting}
4433\item[Rationale:] ©struct© scope is crucial to \CFA as an information structuring and hiding mechanism.
4434\item[Effect on original feature:] change to semantics of well-defined feature.
4435\item[Difficulty of converting:] Semantic transformation.
4436\item[How widely used:] C programs rarely have nest types because they are equivalent to the hoisted version.
4437
4438\CFA is C \emph{incompatible} on this issue, and provides semantics similar to \CC.
4439Nested types are not hoisted and can be referenced using the field selection operator ``©.©'', unlike the \CC scope-resolution operator ``©::©''.
4440Given that nested types in C are equivalent to not using them, i.e., they are essentially useless, it is unlikely there are any realistic usages that break because of this incompatibility.
4441\end{description}
4442
4443\item
4444\begin{description}
4445\item[Change:] In C++, the name of a nested class is local to its enclosing class.
4446\item[Rationale:] C++ classes have member functions which require that classes establish scopes.
4447\item[Difficulty of converting:] Semantic transformation. To make the struct type name visible in the scope of the enclosing struct, the struct tag could be declared in the scope of the enclosing struct, before the enclosing struct is defined. Example:
4448\begin{lstlisting}
4449struct Y; // struct Y and struct X are at the same scope
4450struct X {
4451struct Y { /* ... */ } y;
4452};
4453\end{lstlisting}
4454All the definitions of C struct types enclosed in other struct definitions and accessed outside the scope of the enclosing struct could be exported to the scope of the enclosing struct.
4455Note: this is a consequence of the difference in scope rules, which is documented in 3.3.
4456\item[How widely used:] Seldom.
4457\end{description}
4458
4459\item
4460\begin{description}
4461\item[Change:] comma expression is disallowed as subscript
4462\item[Rationale:] safety issue to prevent subscripting error for multidimensional arrays: ©x[i,j]© instead of ©x[i][j]©, and this syntactic form then taken by \CFA for new styple arrays.
4463\item[Effect on original feature:] change to semantics of well-defined feature.
4464\item[Difficulty of converting:] semantic transformation of ©x[i,j]© to ©x[(i,j)]©
4465\item[How widely used:] seldom.
4466\end{description}
4467\end{enumerate}
4468
4469
4470\section{I/O Library}
4471\label{s:IOLibrary}
4472\index{input/output library}
4473
4474The goal for \CFA I/O is to make I/O as simple as possible for the general case, while fully supporting polmorphism and user defined types in a consistent way.
4475The general case is printing out a sequence of variables separated by whitespace.
4476\begin{quote2}
4477\begin{tabular}{@{}l@{\hspace{3em}}l@{}}
4478\multicolumn{1}{c@{\hspace{3em}}}{\textbf{\CFA}}        & \multicolumn{1}{c}{\textbf{\CC}}      \\
4479\begin{lstlisting}
4480int x = 0, y = 1, z = 2;
4481®sout® ®|® x ®|® y ®|® z ®| endl®;
4482\end{lstlisting}
4483&
4484\begin{lstlisting}
4485
4486cout << x << " " << y << " " << z << endl;
4487\end{lstlisting}
4488\end{tabular}
4489\end{quote2}
4490The \CFA form is half as many characters, and is similar to \Index{Python} I/O with respect to implicit separators.
4491
4492The logical-or operator is used because it is the lowest-priority overloadable operator, other than assignment.
4493Therefore, fewer output expressions require parenthesis.
4494\begin{quote2}
4495\begin{tabular}{@{}ll@{}}
4496\textbf{\CFA:}
4497&
4498\begin{lstlisting}
4499sout | x * 3 | y + 1 | z << 2 | x == y | (x | y) | (x || y) | (x > z ? 1 : 2) | endl;
4500\end{lstlisting}
4501\\
4502\textbf{\CC:}
4503&
4504\begin{lstlisting}
4505cout << x * 3 << y + 1 << (z << 2) << (x == y) << (x | y) << (x || y) << (x > z ? 1 : 2) << endl;
4506\end{lstlisting}
4507\end{tabular}
4508\end{quote2}
4509Finally, the logical-or operator has a link with the Shell pipe-operator for moving data, although data flows in the opposite direction.
4510
4511The implicit seperator\index{I/O separator} character (space/blank) is a separator not a terminator.
4512The rules for implicitly adding the separator are:
4513\begin{enumerate}
4514\item
4515A seperator does not appear at the start or end of a line.
4516\begin{lstlisting}[belowskip=0pt]
4517sout | 1 | 2 | 3 | endl;
4518\end{lstlisting}
4519\begin{lstlisting}[mathescape=off,showspaces=true,aboveskip=0pt,belowskip=0pt]
45201 2 3
4521\end{lstlisting}
4522\item
4523A seperator does not appear before or after a character literal or variable.
4524\begin{lstlisting}
4525sout | '1' | '2' | '3' | endl;
4526123
4527\end{lstlisting}
4528\item
4529A seperator does not appear before or after a null (empty) C string
4530\begin{lstlisting}
4531sout | 1 | "" | 2 | "" | 3 | endl;
4532123
4533\end{lstlisting}
4534which is a local mechanism to disable insertion of the separator character.
4535\item
4536A seperator does not appear before a C string starting with the (extended) \Index{ASCII}\index{ASCII!extended} characters: \lstinline[mathescape=off]@([{$£¥¡¿«@
4537%$
4538\begin{lstlisting}[mathescape=off]
4539sout | "x (" | 1 | "x [" | 2 | "x {" | 3 | "x $" | 4 | "x £" | 5 | "x ¥" | 6 | "x ¡" | 7 | "x ¿" | 8 | "x «" | 9 | endl;
4540\end{lstlisting}
4541%$
4542\begin{lstlisting}[mathescape=off,showspaces=true,aboveskip=0pt,belowskip=0pt]
4543x (1 x [2 x {3 x $4 x £5 x ¥6 x ¡7 x ¿8 x «9
4544\end{lstlisting}
4545%$
4546\item
4547A seperator does not appear after a C string ending with the (extended) \Index{ASCII}\index{ASCII!extended} characters: ©,.:;!?)]}%¢»©
4548\begin{lstlisting}[belowskip=0pt]
4549sout | 1 | ", x" | 2 | ". x" | 3 | ": x" | 4 | "; x" | 5 | "! x" | 6 | "? x" | 7 | ") x" | 8 | "] x" | 9 | "} x"
4550         | 10 | "% x" | 11 | "¢ x" | 12 | "» x" | endl;
4551\end{lstlisting}
4552\begin{lstlisting}[mathescape=off,showspaces=true,aboveskip=0pt,belowskip=0pt]
45531, x 2. x 3: x 4; x 5! x 6? x 7) x 8] x 9} x 10% x 11¢ 12»
4554\end{lstlisting}
4555\item
4556A seperator does not appear before or after a C string begining/ending with the \Index{ASCII} quote or whitespace characters: \lstinline[showspaces=true]@`'" \t\v\f\r\n@
4557\begin{lstlisting}[belowskip=0pt]
4558sout | "x`" | 1 | "`x'" | 2 | "'x\"" | 3 | "\"x" | "x " | 4 | " x" | "x\t" | 1 | "\tx" | endl;
4559\end{lstlisting}
4560\begin{lstlisting}[mathescape=off,showspaces=true,showtabs=true,aboveskip=0pt,belowskip=0pt]
4561x`1`x'2'x"3"x x 4 x x   1       x
4562\end{lstlisting}
4563\end{enumerate}
4564The following \CC-style \Index{manipulator}s allow further control over implicit seperation.
4565\begin{lstlisting}[mathescape=off,belowskip=0pt]
4566sout | sepOn | 1 | 2 | 3 | sepOn | endl;        // separator at start of line
4567\end{lstlisting}
4568\begin{lstlisting}[mathescape=off,showspaces=true,aboveskip=0pt,belowskip=0pt]
4569 1 2 3
4570\end{lstlisting}
4571\begin{lstlisting}[mathescape=off,aboveskip=0pt,belowskip=0pt]
4572sout | 1 | sepOff | 2 | 3 | endl;                       // turn off implicit separator temporarily
4573\end{lstlisting}
4574\begin{lstlisting}[mathescape=off,showspaces=true,aboveskip=0pt,belowskip=0pt]
457512 3
4576\end{lstlisting}
4577\begin{lstlisting}[mathescape=off,aboveskip=0pt,belowskip=0pt]
4578sout | sepDisable | 1 | 2 | 3 | endl;           // turn off implicit separation, affects all subsequent prints
4579\end{lstlisting}
4580\begin{lstlisting}[mathescape=off,showspaces=true,aboveskip=0pt,belowskip=0pt]
4581123
4582\end{lstlisting}
4583\begin{lstlisting}[mathescape=off,aboveskip=0pt,belowskip=0pt]
4584sout | 1 | sepOn | 2 | 3 | endl;                        // turn on implicit separator temporarily
4585\end{lstlisting}
4586\begin{lstlisting}[mathescape=off,showspaces=true,aboveskip=0pt,belowskip=0pt]
45871 23
4588\end{lstlisting}
4589\begin{lstlisting}[mathescape=off,aboveskip=0pt,belowskip=0pt]
4590sout | sepEnable | 1 | 2 | 3 | endl;            // turn on implicit separation, affects all subsequent prints
4591\end{lstlisting}
4592\begin{lstlisting}[mathescape=off,showspaces=true,aboveskip=0pt,belowskip=0pt]
4593 1 2 3
4594\end{lstlisting}
4595\begin{lstlisting}[mathescape=off,aboveskip=0pt,aboveskip=0pt,belowskip=0pt]
4596sepSet( sout, ", $" );                                          // change separator from " " to ", $"
4597sout | 1 | 2 | 3 | endl;
4598\end{lstlisting}
4599%$
4600\begin{lstlisting}[mathescape=off,showspaces=true,aboveskip=0pt]
46011, $2, $3
4602\end{lstlisting}
4603%$
4604\begin{comment}
4605#include <fstream>
4606
4607int main() {
4608        int x = 3, y = 5, z = 7;
4609        sout | x * 3 | y + 1 | z << 2 | x == y | (x | y) | (x || y) | (x > z ? 1 : 2) | endl;
4610        sout | 1 | 2 | 3 | endl;
4611        sout | '1' | '2' | '3' | endl;
4612        sout | 1 | "" | 2 | "" | 3 | endl;
4613        sout | "x (" | 1 | "x [" | 2 | "x {" | 3 | "x $" | 4 | "x £" | 5 | "x ¥" | 6 | "x ¡" | 7 | "x ¿" | 8 | "x «" | 9 | endl;
4614        sout | 1 | ", x" | 2 | ". x" | 3 | ": x" | 4 | "; x" | 5 | "! x" | 6 | "? x" | 7 | ") x" | 8 | "] x" | 9 | "} x"
4615                 | 10 | "% x" | 11 | "¢ x" | 12 | "» x" | endl;
4616        sout | "x`" | 1 | "`x'" | 2 | "'x\"" | 3 | "\"x" | "x " | 4 | " x" | "x\t" | 1 | "\tx" | endl;
4617        sout | sepOn | 1 | 2 | 3 | sepOn | endl;        // separator at start of line
4618        sout | 1 | sepOff | 2 | 3 | endl;                       // turn off implicit separator temporarily
4619        sout | sepDisable | 1 | 2 | 3 | endl;           // turn off implicit separation, affects all subsequent prints
4620        sout | 1 | sepOn | 2 | 3 | endl;                        // turn on implicit separator temporarily
4621        sout | sepEnable | 1 | 2 | 3 | endl;            // turn on implicit separation, affects all subsequent prints
4622        sepSet( sout, ", $" );                                          // change separator from " " to ", $"
4623        sout | 1 | 2 | 3 | endl;
4624
4625}
4626
4627// Local Variables: //
4628// tab-width: 4 //
4629// End: //
4630\end{comment}
4631%$
4632
4633
4634\section{Standard Library}
4635\label{s:StandardLibrary}
4636
4637The goal of the \CFA standard-library is to wrap many of the existing C library-routines that are explicitly polymorphic into implicitly polymorphic versions.
4638
4639
4640\subsection{malloc}
4641
4642\begin{lstlisting}
4643forall( otype T ) T * malloc( void );§\indexc{malloc}§
4644forall( otype T ) T * malloc( char fill );
4645forall( otype T ) T * malloc( T * ptr, size_t size );
4646forall( otype T ) T * malloc( T * ptr, size_t size, unsigned char fill );
4647forall( otype T ) T * calloc( size_t nmemb );§\indexc{calloc}§
4648forall( otype T ) T * realloc( T * ptr, size_t size );§\indexc{ato}§
4649forall( otype T ) T * realloc( T * ptr, size_t size, unsigned char fill );
4650
4651forall( otype T ) T * aligned_alloc( size_t alignment );§\indexc{ato}§
4652forall( otype T ) T * memalign( size_t alignment );             // deprecated
4653forall( otype T ) int posix_memalign( T ** ptr, size_t alignment );
4654
4655forall( otype T ) T * memset( T * ptr, unsigned char fill ); // use default value '\0' for fill
4656forall( otype T ) T * memset( T * ptr );                                // remove when default value available
4657\end{lstlisting}
4658
4659
4660\subsection{ato / strto}
4661
4662\begin{lstlisting}
4663int ato( const char * ptr );§\indexc{ato}§
4664unsigned int ato( const char * ptr );
4665long int ato( const char * ptr );
4666unsigned long int ato( const char * ptr );
4667long long int ato( const char * ptr );
4668unsigned long long int ato( const char * ptr );
4669float ato( const char * ptr );
4670double ato( const char * ptr );
4671long double ato( const char * ptr );
4672float _Complex ato( const char * ptr );
4673double _Complex ato( const char * ptr );
4674long double _Complex ato( const char * ptr );
4675
4676int strto( const char * sptr, char ** eptr, int base );
4677unsigned int strto( const char * sptr, char ** eptr, int base );
4678long int strto( const char * sptr, char ** eptr, int base );
4679unsigned long int strto( const char * sptr, char ** eptr, int base );
4680long long int strto( const char * sptr, char ** eptr, int base );
4681unsigned long long int strto( const char * sptr, char ** eptr, int base );
4682float strto( const char * sptr, char ** eptr );
4683double strto( const char * sptr, char ** eptr );
4684long double strto( const char * sptr, char ** eptr );
4685float _Complex strto( const char * sptr, char ** eptr );
4686double _Complex strto( const char * sptr, char ** eptr );
4687long double _Complex strto( const char * sptr, char ** eptr );
4688\end{lstlisting}
4689
4690
4691\subsection{bsearch / qsort}
4692
4693\begin{lstlisting}
4694forall( otype T | { int ?<?( T, T ); } )
4695T * bsearch( const T key, const T * arr, size_t dimension );§\indexc{bsearch}§
4696
4697forall( otype T | { int ?<?( T, T ); } )
4698void qsort( const T * arr, size_t dimension );§\indexc{qsort}§
4699\end{lstlisting}
4700
4701
4702\subsection{abs}
4703
4704\begin{lstlisting}
4705char abs( char );§\indexc{abs}§
4706int abs( int );
4707long int abs( long int );
4708long long int abs( long long int );
4709float abs( float );
4710double abs( double );
4711long double abs( long double );
4712float abs( float _Complex );
4713double abs( double _Complex );
4714long double abs( long double _Complex );
4715\end{lstlisting}
4716
4717
4718\subsection{random}
4719
4720\begin{lstlisting}
4721void rand48seed( long int s );§\indexc{rand48seed}§
4722char rand48();§\indexc{rand48}§
4723int rand48();
4724unsigned int rand48();
4725long int rand48();
4726unsigned long int rand48();
4727float rand48();
4728double rand48();
4729float _Complex rand48();
4730double _Complex rand48();
4731long double _Complex rand48();
4732\end{lstlisting}
4733
4734
4735\subsection{min / max / swap}
4736
4737\begin{lstlisting}
4738forall( otype T | { int ?<?( T, T ); } )
4739T min( const T t1, const T t2 );§\indexc{min}§
4740
4741forall( otype T | { int ?>?( T, T ); } )
4742T max( const T t1, const T t2 );§\indexc{max}§
4743
4744forall( otype T )
4745void swap( T * t1, T * t2 );§\indexc{swap}§
4746\end{lstlisting}
4747
4748
4749\section{Math Library}
4750\label{s:Math Library}
4751
4752The goal of the \CFA math-library is to wrap many of the existing C math library-routines that are explicitly polymorphic into implicitly polymorphic versions.
4753
4754
4755\subsection{General}
4756
4757\begin{lstlisting}
4758float fabs( float );§\indexc{fabs}§
4759double fabs( double );
4760long double fabs( long double );
4761float cabs( float _Complex );
4762double cabs( double _Complex );
4763long double cabs( long double _Complex );
4764
4765float ?%?( float, float );§\indexc{fmod}§
4766float fmod( float, float );
4767double ?%?( double, double );
4768double fmod( double, double );
4769long double ?%?( long double, long double );
4770long double fmod( long double, long double );
4771
4772float remainder( float, float );§\indexc{remainder}§
4773double remainder( double, double );
4774long double remainder( long double, long double );
4775
4776[ int, float ] remquo( float, float );§\indexc{remquo}§
4777float remquo( float, float, int * );
4778[ int, double ] remquo( double, double );
4779double remquo( double, double, int * );
4780[ int, long double ] remquo( long double, long double );
4781long double remquo( long double, long double, int * );
4782
4783[ int, float ] div( float, float );                                             // alternative name for remquo
4784float div( float, float, int * );§\indexc{div}§
4785[ int, double ] div( double, double );
4786double div( double, double, int * );
4787[ int, long double ] div( long double, long double );
4788long double div( long double, long double, int * );
4789
4790float fma( float, float, float );§\indexc{fma}§
4791double fma( double, double, double );
4792long double fma( long double, long double, long double );
4793
4794float fdim( float, float );§\indexc{fdim}§
4795double fdim( double, double );
4796long double fdim( long double, long double );
4797
4798float nan( const char * );§\indexc{nan}§
4799double nan( const char * );
4800long double nan( const char * );
4801\end{lstlisting}
4802
4803
4804\subsection{Exponential}
4805
4806\begin{lstlisting}
4807float exp( float );§\indexc{exp}§
4808double exp( double );
4809long double exp( long double );
4810float _Complex exp( float _Complex );
4811double _Complex exp( double _Complex );
4812long double _Complex exp( long double _Complex );
4813
4814float exp2( float );§\indexc{exp2}§
4815double exp2( double );
4816long double exp2( long double );
4817float _Complex exp2( float _Complex );
4818double _Complex exp2( double _Complex );
4819long double _Complex exp2( long double _Complex );
4820
4821float expm1( float );§\indexc{expm1}§
4822double expm1( double );
4823long double expm1( long double );
4824
4825float log( float );§\indexc{log}§
4826double log( double );
4827long double log( long double );
4828float _Complex log( float _Complex );
4829double _Complex log( double _Complex );
4830long double _Complex log( long double _Complex );
4831
4832float log2( float );§\indexc{log2}§
4833double log2( double );
4834long double log2( long double );
4835float _Complex log2( float _Complex );
4836double _Complex log2( double _Complex );
4837long double _Complex log2( long double _Complex );
4838
4839float log10( float );§\indexc{log10}§
4840double log10( double );
4841long double log10( long double );
4842float _Complex log10( float _Complex );
4843double _Complex log10( double _Complex );
4844long double _Complex log10( long double _Complex );
4845
4846float log1p( float );§\indexc{log1p}§
4847double log1p( double );
4848long double log1p( long double );
4849
4850int ilogb( float );§\indexc{ilogb}§
4851int ilogb( double );
4852int ilogb( long double );
4853
4854float logb( float );§\indexc{logb}§
4855double logb( double );
4856long double logb( long double );
4857\end{lstlisting}
4858
4859
4860\subsection{Power}
4861
4862\begin{lstlisting}
4863float sqrt( float );§\indexc{sqrt}§
4864double sqrt( double );
4865long double sqrt( long double );
4866float _Complex sqrt( float _Complex );
4867double _Complex sqrt( double _Complex );
4868long double _Complex sqrt( long double _Complex );
4869
4870float cbrt( float );§\indexc{cbrt}§
4871double cbrt( double );
4872long double cbrt( long double );
4873
4874float hypot( float, float );§\indexc{hypot}§
4875double hypot( double, double );
4876long double hypot( long double, long double );
4877
4878float pow( float, float );§\indexc{pow}§
4879double pow( double, double );
4880long double pow( long double, long double );
4881float _Complex pow( float _Complex, float _Complex );
4882double _Complex pow( double _Complex, double _Complex );
4883long double _Complex pow( long double _Complex, long double _Complex );
4884\end{lstlisting}
4885
4886
4887\subsection{Trigonometric}
4888
4889\begin{lstlisting}
4890float sin( float );§\indexc{sin}§
4891double sin( double );
4892long double sin( long double );
4893float _Complex sin( float _Complex );
4894double _Complex sin( double _Complex );
4895long double _Complex sin( long double _Complex );
4896
4897float cos( float );§\indexc{cos}§
4898double cos( double );
4899long double cos( long double );
4900float _Complex cos( float _Complex );
4901double _Complex cos( double _Complex );
4902long double _Complex cos( long double _Complex );
4903
4904float tan( float );§\indexc{tan}§
4905double tan( double );
4906long double tan( long double );
4907float _Complex tan( float _Complex );
4908double _Complex tan( double _Complex );
4909long double _Complex tan( long double _Complex );
4910
4911float asin( float );§\indexc{asin}§
4912double asin( double );
4913long double asin( long double );
4914float _Complex asin( float _Complex );
4915double _Complex asin( double _Complex );
4916long double _Complex asin( long double _Complex );
4917
4918float acos( float );§\indexc{acos}§
4919double acos( double );
4920long double acos( long double );
4921float _Complex acos( float _Complex );
4922double _Complex acos( double _Complex );
4923long double _Complex acos( long double _Complex );
4924
4925float atan( float );§\indexc{atan}§
4926double atan( double );
4927long double atan( long double );
4928float _Complex atan( float _Complex );
4929double _Complex atan( double _Complex );
4930long double _Complex atan( long double _Complex );
4931
4932float atan2( float, float );§\indexc{atan2}§
4933double atan2( double, double );
4934long double atan2( long double, long double );
4935
4936float atan( float, float );                                                             // alternative name for atan2
4937double atan( double, double );§\indexc{atan}§
4938long double atan( long double, long double );
4939\end{lstlisting}
4940
4941
4942\subsection{Hyperbolic}
4943
4944\begin{lstlisting}
4945float sinh( float );§\indexc{sinh}§
4946double sinh( double );
4947long double sinh( long double );
4948float _Complex sinh( float _Complex );
4949double _Complex sinh( double _Complex );
4950long double _Complex sinh( long double _Complex );
4951
4952float cosh( float );§\indexc{cosh}§
4953double cosh( double );
4954long double cosh( long double );
4955float _Complex cosh( float _Complex );
4956double _Complex cosh( double _Complex );
4957long double _Complex cosh( long double _Complex );
4958
4959float tanh( float );§\indexc{tanh}§
4960double tanh( double );
4961long double tanh( long double );
4962float _Complex tanh( float _Complex );
4963double _Complex tanh( double _Complex );
4964long double _Complex tanh( long double _Complex );
4965
4966float asinh( float );§\indexc{asinh}§
4967double asinh( double );
4968long double asinh( long double );
4969float _Complex asinh( float _Complex );
4970double _Complex asinh( double _Complex );
4971long double _Complex asinh( long double _Complex );
4972
4973float acosh( float );§\indexc{acosh}§
4974double acosh( double );
4975long double acosh( long double );
4976float _Complex acosh( float _Complex );
4977double _Complex acosh( double _Complex );
4978long double _Complex acosh( long double _Complex );
4979
4980float atanh( float );§\indexc{atanh}§
4981double atanh( double );
4982long double atanh( long double );
4983float _Complex atanh( float _Complex );
4984double _Complex atanh( double _Complex );
4985long double _Complex atanh( long double _Complex );
4986\end{lstlisting}
4987
4988
4989\subsection{Error / Gamma}
4990
4991\begin{lstlisting}
4992float erf( float );§\indexc{erf}§
4993double erf( double );
4994long double erf( long double );
4995float _Complex erf( float _Complex );
4996double _Complex erf( double _Complex );
4997long double _Complex erf( long double _Complex );
4998
4999float erfc( float );§\indexc{erfc}§
5000double erfc( double );
5001long double erfc( long double );
5002float _Complex erfc( float _Complex );
5003double _Complex erfc( double _Complex );
5004long double _Complex erfc( long double _Complex );
5005
5006float lgamma( float );§\indexc{lgamma}§
5007double lgamma( double );
5008long double lgamma( long double );
5009float lgamma( float, int * );
5010double lgamma( double, int * );
5011long double lgamma( long double, int * );
5012
5013float tgamma( float );§\indexc{tgamma}§
5014double tgamma( double );
5015long double tgamma( long double );
5016\end{lstlisting}
5017
5018
5019\subsection{Nearest Integer}
5020
5021\begin{lstlisting}
5022float floor( float );§\indexc{floor}§
5023double floor( double );
5024long double floor( long double );
5025
5026float ceil( float );§\indexc{ceil}§
5027double ceil( double );
5028long double ceil( long double );
5029
5030float trunc( float );§\indexc{trunc}§
5031double trunc( double );
5032long double trunc( long double );
5033
5034float rint( float );§\indexc{rint}§
5035long double rint( long double );
5036long int rint( float );
5037long int rint( double );
5038long int rint( long double );
5039long long int rint( float );
5040long long int rint( double );
5041long long int rint( long double );
5042
5043long int lrint( float );§\indexc{lrint}§
5044long int lrint( double );
5045long int lrint( long double );
5046long long int llrint( float );
5047long long int llrint( double );
5048long long int llrint( long double );
5049
5050float nearbyint( float );§\indexc{nearbyint}§
5051double nearbyint( double );
5052long double nearbyint( long double );
5053
5054float round( float );§\indexc{round}§
5055long double round( long double );
5056long int round( float );
5057long int round( double );
5058long int round( long double );
5059long long int round( float );
5060long long int round( double );
5061long long int round( long double );
5062
5063long int lround( float );§\indexc{lround}§
5064long int lround( double );
5065long int lround( long double );
5066long long int llround( float );
5067long long int llround( double );
5068long long int llround( long double );
5069\end{lstlisting}
5070
5071
5072\subsection{Manipulation}
5073
5074\begin{lstlisting}
5075float copysign( float, float );§\indexc{copysign}§
5076double copysign( double, double );
5077long double copysign( long double, long double );
5078
5079float frexp( float, int * );§\indexc{frexp}§
5080double frexp( double, int * );
5081long double frexp( long double, int * );
5082
5083float ldexp( float, int );§\indexc{ldexp}§
5084double ldexp( double, int );
5085long double ldexp( long double, int );
5086
5087[ float, float ] modf( float );§\indexc{modf}§
5088float modf( float, float * );
5089[ double, double ] modf( double );
5090double modf( double, double * );
5091[ long double, long double ] modf( long double );
5092long double modf( long double, long double * );
5093
5094float nextafter( float, float );§\indexc{nextafter}§
5095double nextafter( double, double );
5096long double nextafter( long double, long double );
5097
5098float nexttoward( float, long double );§\indexc{nexttoward}§
5099double nexttoward( double, long double );
5100long double nexttoward( long double, long double );
5101
5102float scalbn( float, int );§\indexc{scalbn}§
5103double scalbn( double, int );
5104long double scalbn( long double, int );
5105
5106float scalbln( float, long int );§\indexc{scalbln}§
5107double scalbln( double, long int );
5108long double scalbln( long double, long int );
5109\end{lstlisting}
5110
5111
5112\section{Rational Numbers}
5113\label{s:RationalNumbers}
5114
5115Rational numbers are numbers written as a ratio, i.e., as a fraction, where the numerator (top number) and the denominator (bottom number) are whole numbers.
5116When creating and computing with rational numbers, results are constantly reduced to keep the numerator and denominator as small as possible.
5117
5118\begin{lstlisting}
5119// implementation
5120struct Rational {§\indexc{Rational}§
5121        long int numerator, denominator;                                        // invariant: denominator > 0
5122}; // Rational
5123
5124// constants
5125extern struct Rational 0;
5126extern struct Rational 1;
5127
5128// constructors
5129Rational rational();
5130Rational rational( long int n );
5131Rational rational( long int n, long int d );
5132
5133// getter/setter for numerator/denominator
5134long int numerator( Rational r );
5135long int numerator( Rational r, long int n );
5136long int denominator( Rational r );
5137long int denominator( Rational r, long int d );
5138
5139// comparison
5140int ?==?( Rational l, Rational r );
5141int ?!=?( Rational l, Rational r );
5142int ?<?( Rational l, Rational r );
5143int ?<=?( Rational l, Rational r );
5144int ?>?( Rational l, Rational r );
5145int ?>=?( Rational l, Rational r );
5146
5147// arithmetic
5148Rational -?( Rational r );
5149Rational ?+?( Rational l, Rational r );
5150Rational ?-?( Rational l, Rational r );
5151Rational ?*?( Rational l, Rational r );
5152Rational ?/?( Rational l, Rational r );
5153
5154// conversion
5155double widen( Rational r );
5156Rational narrow( double f, long int md );
5157
5158// I/O
5159forall( dtype istype | istream( istype ) ) istype * ?|?( istype *, Rational * );
5160forall( dtype ostype | ostream( ostype ) ) ostype * ?|?( ostype *, Rational );
5161\end{lstlisting}
5162
5163
5164\bibliographystyle{plain}
5165\bibliography{cfa}
5166
5167
5168\addcontentsline{toc}{section}{\indexname} % add index name to table of contents
5169\begin{theindex}
5170Italic page numbers give the location of the main entry for the referenced term.
5171Plain page numbers denote uses of the indexed term.
5172Entries for grammar non-terminals are italicized.
5173A typewriter font is used for grammar terminals and program identifiers.
5174\indexspace
5175\input{user.ind}
5176\end{theindex}
5177
5178
5179\end{document}
5180
5181% Local Variables: %
5182% tab-width: 4 %
5183% fill-column: 100 %
5184% compile-command: "make" %
5185% End: %
Note: See TracBrowser for help on using the repository browser.