Changeset d27e340 for doc/papers/general


Ignore:
Timestamp:
Feb 15, 2018, 11:11:00 AM (6 years ago)
Author:
Thierry Delisle <tdelisle@…>
Branches:
ADT, aaron-thesis, arm-eh, ast-experimental, cleanup-dtors, deferred_resn, demangler, enum, forall-pointer-decay, jacob/cs343-translation, jenkins-sandbox, master, new-ast, new-ast-unique-expr, new-env, no_list, persistent-indexer, pthread-emulation, qualifiedEnum, resolv-new, with_gc
Children:
271326e, ac7d921, d55d7a6
Parents:
b69ea6b (diff), f3543b0 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.
Message:

Merge branch 'master' of plg.uwaterloo.ca:software/cfa/cfa-cc

File:
1 edited

Legend:

Unmodified
Added
Removed
  • doc/papers/general/Paper.tex

    rb69ea6b rd27e340  
    44\usepackage{epic,eepic}
    55\usepackage{xspace,calc,comment}
    6 \usepackage{upquote}                                                                    % switch curled `'" to straight
    7 \usepackage{listings}                                                                   % format program code
     6\usepackage{upquote}                                            % switch curled `'" to straight
     7\usepackage{listings}                                           % format program code
     8\usepackage{enumitem}
     9\usepackage[flushmargin]{footmisc}                      % support label/reference in footnote
    810\usepackage{rotating}
    911\usepackage[usenames]{color}
    10 \usepackage{pslatex}                                    % reduce size of san serif font
     12\usepackage{pslatex}                                            % reduce size of san serif font
    1113\usepackage[plainpages=false,pdfpagelabels,pdfpagemode=UseNone,pagebackref=true,breaklinks=true,colorlinks=true,linkcolor=blue,citecolor=blue,urlcolor=blue]{hyperref}
    1214
    1315\setlength{\textheight}{9in}
    1416%\oddsidemargin 0.0in
    15 \renewcommand{\topfraction}{0.8}                % float must be greater than X of the page before it is forced onto its own page
    16 \renewcommand{\bottomfraction}{0.8}             % float must be greater than X of the page before it is forced onto its own page
    17 \renewcommand{\floatpagefraction}{0.8}  % float must be greater than X of the page before it is forced onto its own page
    18 \renewcommand{\textfraction}{0.0}               % the entire page maybe devoted to floats with no text on the page at all
    19 
    20 \lefthyphenmin=4                                                % hyphen only after 4 characters
     17\renewcommand{\topfraction}{0.8}                        % float must be greater than X of the page before it is forced onto its own page
     18\renewcommand{\bottomfraction}{0.8}                     % float must be greater than X of the page before it is forced onto its own page
     19\renewcommand{\floatpagefraction}{0.8}          % float must be greater than X of the page before it is forced onto its own page
     20\renewcommand{\textfraction}{0.0}                       % the entire page maybe devoted to floats with no text on the page at all
     21
     22\lefthyphenmin=4                                                        % hyphen only after 4 characters
    2123\righthyphenmin=4
    2224
     
    2426
    2527\newcommand{\CFAIcon}{\textsf{C}\raisebox{\depth}{\rotatebox{180}{\textsf{A}}}\xspace} % Cforall symbolic name
    26 \newcommand{\CFA}{\protect\CFAIcon} % safe for section/caption
    27 \newcommand{\CFL}{\textrm{Cforall}\xspace} % Cforall symbolic name
    28 \newcommand{\Celeven}{\textrm{C11}\xspace} % C11 symbolic name
     28\newcommand{\CFA}{\protect\CFAIcon}             % safe for section/caption
     29\newcommand{\CFL}{\textrm{Cforall}\xspace}      % Cforall symbolic name
     30\newcommand{\Celeven}{\textrm{C11}\xspace}      % C11 symbolic name
    2931\newcommand{\CC}{\textrm{C}\kern-.1em\hbox{+\kern-.25em+}\xspace} % C++ symbolic name
    3032\newcommand{\CCeleven}{\textrm{C}\kern-.1em\hbox{+\kern-.25em+}11\xspace} % C++11 symbolic name
     
    5658\newcommand{\LstCommentStyle}[1]{{\lst@basicstyle{\lst@commentstyle{#1}}}}
    5759
    58 \newlength{\gcolumnposn}                                % temporary hack because lstlisting does not handle tabs correctly
     60\newlength{\gcolumnposn}                                        % temporary hack because lstlisting does not handle tabs correctly
    5961\newlength{\columnposn}
    6062\setlength{\gcolumnposn}{2.75in}
     
    7274
    7375% Latin abbreviation
    74 \newcommand{\abbrevFont}{\textit}       % set empty for no italics
     76\newcommand{\abbrevFont}{\textit}                       % set empty for no italics
    7577\newcommand{\EG}{\abbrevFont{e}.\abbrevFont{g}.}
    7678\newcommand*{\eg}{%
     
    103105
    104106\newenvironment{cquote}{%
    105         \list{}{\lstset{resetmargins=true,aboveskip=0pt,belowskip=0pt}\topsep=4pt\parsep=0pt\leftmargin=\parindent\rightmargin\leftmargin}%
     107        \list{}{\lstset{resetmargins=true,aboveskip=0pt,belowskip=0pt}\topsep=4pt\parsep=0pt\leftmargin=\parindentlnth\rightmargin\leftmargin}%
    106108        \item\relax
    107109}{%
     
    193195The TIOBE~\cite{TIOBE} ranks the top 5 most popular programming languages as: Java 16\%, \Textbf{C 7\%}, \Textbf{\CC 5\%}, \Csharp 4\%, Python 4\% = 36\%, where the next 50 languages are less than 3\% each with a long tail.
    194196The top 3 rankings over the past 30 years are:
    195 \lstDeleteShortInline@%
    196197\begin{center}
    197198\setlength{\tabcolsep}{10pt}
     199\lstDeleteShortInline@%
    198200\begin{tabular}{@{}rccccccc@{}}
    199201                & 2017  & 2012  & 2007  & 2002  & 1997  & 1992  & 1987          \\ \hline
     
    202204\CC             & 3             & 3             & 3             & 3             & 2             & 2             & 4                     \\
    203205\end{tabular}
     206\lstMakeShortInline@%
    204207\end{center}
    205 \lstMakeShortInline@%
    206208Love it or hate it, C is extremely popular, highly used, and one of the few systems languages.
    207209In many cases, \CC is often used solely as a better C.
     
    257259Crucial to the design of a new programming language are the libraries to access thousands of external software features.
    258260Like \CC, \CFA inherits a massive compatible library-base, where other programming languages must rewrite or provide fragile inter-language communication with C.
    259 A simple example is leveraging the existing type-unsafe (@void *@) C @bsearch@ to binary search a sorted floating-point array:
     261A simple example is leveraging the existing type-unsafe (@void *@) C @bsearch@ to binary search a sorted float array:
    260262\begin{lstlisting}
    261263void * bsearch( const void * key, const void * base, size_t nmemb, size_t size,
     
    263265int comp( const void * t1, const void * t2 ) { return *(double *)t1 < *(double *)t2 ? -1 :
    264266                                *(double *)t2 < *(double *)t1 ? 1 : 0; }
    265 double key = 5.0, vals[10] = { /* 10 sorted floating-point values */ };
     267double key = 5.0, vals[10] = { /* 10 sorted float values */ };
    266268double * val = (double *)bsearch( &key, vals, 10, sizeof(vals[0]), comp );      $\C{// search sorted array}$
    267269\end{lstlisting}
     
    505507In many languages, functions can return at most one value;
    506508however, many operations have multiple outcomes, some exceptional.
    507 Consider C's @div@ and @remquo@ functions, which return the quotient and remainder for a division of integer and floating-point values, respectively.
     509Consider C's @div@ and @remquo@ functions, which return the quotient and remainder for a division of integer and float values, respectively.
    508510\begin{lstlisting}
    509511typedef struct { int quo, rem; } div_t;         $\C{// from include stdlib.h}$
     
    936938
    937939\section{Control Structures}
     940
     941\CFA identifies missing and problematic control structures in C, and extends and modifies these control structures to increase functionality and safety.
    938942
    939943
     
    10441048The implicit targets of the current @continue@ and @break@, \ie the closest enclosing loop or @switch@, change as certain constructs are added or removed.
    10451049
     1050
    10461051\subsection{\texorpdfstring{Enhanced \LstKeywordStyle{switch} Statement}{Enhanced switch Statement}}
    10471052
    1048 \CFA also fixes a number of ergonomic defecits in the @switch@ statements of standard C.
    1049 C can specify a number of equivalent cases by using the default ``fall-through'' semantics of @case@ clauses, \eg @case 1: case 2: case 3:@ -- this syntax is cluttered, however, so \CFA includes a more concise list syntax, @case 1, 2, 3:@.
    1050 For contiguous ranges, \CFA provides an even more concise range syntax as well, @case 1~3:@; lists of ranges are also allowed in case selectors.
    1051 
     1053There are a number of deficiencies with the C @switch@ statements: enumerating @case@ lists, placement of @case@ clauses, scope of the switch body, and fall through between case clauses.
     1054
     1055C has no shorthand for specifying a list of case values, whether the list is non-contiguous or contiguous\footnote{C provides this mechanism via fall through.}.
     1056\CFA provides a shorthand for a non-contiguous list:
     1057\begin{cquote}
     1058\lstDeleteShortInline@%
     1059\begin{tabular}{@{}l@{\hspace{\parindentlnth}}l@{}}
     1060\multicolumn{1}{c@{\hspace{\parindentlnth}}}{\textbf{\CFA}}     & \multicolumn{1}{c}{\textbf{C}}        \\
     1061\begin{cfa}
     1062case 2, 10, 34, 42:
     1063\end{cfa}
     1064&
     1065\begin{cfa}
     1066case 2: case 10: case 34: case 42:
     1067\end{cfa}
     1068\end{tabular}
     1069\lstMakeShortInline@%
     1070\end{cquote}
     1071for a contiguous list:\footnote{gcc provides the same mechanism with awkward syntax, \lstinline@2 ... 42@, where spaces are required around the ellipse.}
     1072\begin{cquote}
     1073\lstDeleteShortInline@%
     1074\begin{tabular}{@{}l@{\hspace{\parindentlnth}}l@{}}
     1075\multicolumn{1}{c@{\hspace{\parindentlnth}}}{\textbf{\CFA}}     & \multicolumn{1}{c}{\textbf{C}}        \\
     1076\begin{cfa}
     1077case 2~42:
     1078\end{cfa}
     1079&
     1080\begin{cfa}
     1081case 2: case 3: ... case 41: case 42:
     1082\end{cfa}
     1083\end{tabular}
     1084\lstMakeShortInline@%
     1085\end{cquote}
     1086and a combination:
     1087\begin{cfa}
     1088case -12~-4, -1~5, 14~21, 34~42:
     1089\end{cfa}
     1090
     1091C allows placement of @case@ clauses \emph{within} statements nested in the @switch@ body (see Duff's device~\cite{Duff83});
     1092\begin{cfa}
     1093switch ( i ) {
     1094  case 0:
     1095        for ( int i = 0; i < 10; i += 1 ) {
     1096                ...
     1097  `case 1:`             // no initialization of loop index
     1098                ...
     1099        }
     1100}
     1101\end{cfa}
     1102\CFA precludes this form of transfer into a control structure because it causes undefined behaviour, especially with respect to missed initialization, and provides very limited functionality.
     1103
     1104C allows placement of declaration within the @switch@ body and unreachable code at the start, resulting in undefined behaviour:
     1105\begin{cfa}
     1106switch ( x ) {
     1107        `int y = 1;`                            $\C{// unreachable initialization}$
     1108        `x = 7;`                                        $\C{// unreachable code without label/branch}$
     1109  case 0:
     1110        ...
     1111        `int z = 0;`                            $\C{// unreachable initialization, cannot appear after case}$
     1112        z = 2;
     1113  case 1:
     1114        `x = z;`                                        $\C{// without fall through, z is undefined}$
     1115}
     1116\end{cfa}
     1117\CFA allows the declaration of local variables, \eg @y@, at the start of the @switch@ with scope across the entire @switch@ body, \ie all @case@ clauses, but no statements.
     1118\CFA disallows the declaration of local variable, \eg @z@, directly within the @switch@ body, because a declaration cannot occur immediately after a @case@ since a label can only be attached to a statement, and the use of @z@ is undefined in @case 1@ as neither storage allocation nor initialization may have occurred.
     1119
     1120C @switch@ provides multiple entry points into the statement body, but once an entry point is selected, control continues across \emph{all} @case@ clauses until the end of the @switch@ body, called \newterm{fall through};
     1121@case@ clauses are made disjoint by the @break@ statement.
     1122While the ability to fall through \emph{is} a useful form of control flow, it does not match well with programmer intuition, resulting in many errors from missing @break@ statements.
     1123\CFA provides a new control structure, @choose@, which mimics @switch@, but reverses the meaning of fall through:
     1124\begin{cquote}
     1125\lstDeleteShortInline@%
     1126\begin{tabular}{@{}l@{\hspace{\parindentlnth}}l@{}}
     1127\multicolumn{1}{c@{\hspace{\parindentlnth}}}{\textbf{\CFA}}     & \multicolumn{1}{c}{\textbf{C}}        \\
     1128\begin{cfa}
     1129`choose` ( day ) {
     1130  case Mon~Thu:
     1131        // program
     1132
     1133  case Fri:
     1134        // program
     1135        wallet += pay;
     1136        `fallthrough;`
     1137  case Sat:
     1138        // party
     1139        wallet -= party;
     1140
     1141  case Sun:
     1142        // rest
     1143
     1144  default:
     1145        // error
     1146}
     1147\end{cfa}
     1148&
     1149\begin{cfa}
     1150switch ( day ) {
     1151  case Mon: case Tue: case Wed: case Thu:
     1152        // program
     1153        `break;`
     1154  case Fri:
     1155        // program
     1156        wallet += pay;
     1157
     1158  case Sat:
     1159        // party
     1160        wallet -= party;
     1161        `break;`
     1162  case Sun:
     1163        // rest
     1164        `break;`
     1165  default:
     1166        // error
     1167}
     1168\end{cfa}
     1169\end{tabular}
     1170\lstMakeShortInline@%
     1171\end{cquote}
     1172Collectively, these enhancements reduce programmer burden and increase readability and safety.
     1173
     1174\begin{comment}
    10521175Forgotten @break@ statements at the end of @switch@ cases are a persistent sort of programmer error in C, and the @break@ statements themselves introduce visual clutter and an un-C-like keyword-based block delimiter.
    10531176\CFA addresses this error by introducing a @choose@ statement, which works identically to a @switch@ except that its default end-of-case behaviour is to break rather than to fall through for all non-empty cases.
     
    10701193}
    10711194\end{cfa}
     1195\end{comment}
     1196
    10721197
    10731198\subsection{\texorpdfstring{\LstKeywordStyle{with} Clause / Statement}{with Clause / Statement}}
     
    12551380\lstDeleteShortInline@%
    12561381\lstset{moredelim=**[is][\color{blue}]{+}{+}}
    1257 \begin{tabular}{@{}l@{\hspace{3em}}l@{}}
    1258 \multicolumn{1}{c@{\hspace{3em}}}{\textbf{\CFA}}        & \multicolumn{1}{c}{\textbf{C}}        \\
     1382\begin{tabular}{@{}l@{\hspace{\parindentlnth}}l@{}}
     1383\multicolumn{1}{c@{\hspace{\parindentlnth}}}{\textbf{\CFA}}     & \multicolumn{1}{c}{\textbf{C}}        \\
    12591384\begin{cfa}
    12601385+[5] *+ `int` x1;
    12611386+* [5]+ `int` x2;
    1262 +[* [5] int]+ f`( int p )`;
     1387`[* [5] int]` f+( int p )+;
    12631388\end{cfa}
    12641389&
     
    12661391`int` +*+ x1 +[5]+;
    12671392`int` +(*+x2+)[5]+;
    1268 +int (*+f`( int p )`+)[5]+;
     1393`int (*`f+( int p )+`)[5]`;
    12691394\end{cfa}
    12701395\end{tabular}
     
    12771402\begin{cquote}
    12781403\lstDeleteShortInline@%
    1279 \begin{tabular}{@{}l@{\hspace{3em}}l@{}}
    1280 \multicolumn{1}{c@{\hspace{3em}}}{\textbf{\CFA}}        & \multicolumn{1}{c}{\textbf{C}}        \\
     1404\begin{tabular}{@{}l@{\hspace{\parindentlnth}}l@{}}
     1405\multicolumn{1}{c@{\hspace{\parindentlnth}}}{\textbf{\CFA}}     & \multicolumn{1}{c}{\textbf{C}}        \\
    12811406\begin{cfa}
    12821407`*` int x, y;
     
    12921417\begin{cquote}
    12931418\lstDeleteShortInline@%
    1294 \begin{tabular}{@{}l@{\hspace{3em}}l@{}}
    1295 \multicolumn{1}{c@{\hspace{3em}}}{\textbf{\CFA}}        & \multicolumn{1}{c}{\textbf{C}}        \\
     1419\begin{tabular}{@{}l@{\hspace{\parindentlnth}}l@{}}
     1420\multicolumn{1}{c@{\hspace{\parindentlnth}}}{\textbf{\CFA}}     & \multicolumn{1}{c}{\textbf{C}}        \\
    12961421\begin{cfa}
    12971422`*` int x;
     
    13101435\begin{cquote}
    13111436\lstDeleteShortInline@%
    1312 \begin{tabular}{@{}l@{\hspace{3em}}l@{\hspace{2em}}l@{}}
    1313 \multicolumn{1}{c@{\hspace{3em}}}{\textbf{\CFA}}        & \multicolumn{1}{c@{\hspace{2em}}}{\textbf{C}} \\
     1437\begin{tabular}{@{}l@{\hspace{\parindentlnth}}l@{\hspace{\parindentlnth}}l@{}}
     1438\multicolumn{1}{c@{\hspace{\parindentlnth}}}{\textbf{\CFA}}     & \multicolumn{1}{c@{\hspace{\parindentlnth}}}{\textbf{C}}      \\
    13141439\begin{cfa}
    13151440[ 5 ] int z;
     
    13511476\begin{cquote}
    13521477\lstDeleteShortInline@%
    1353 \begin{tabular}{@{}l@{\hspace{1em}}l@{\hspace{1em}}l@{}}
    1354 \multicolumn{1}{c@{\hspace{1em}}}{\textbf{\CFA}}        & \multicolumn{1}{c@{\hspace{1em}}}{\textbf{C}} \\
     1478\begin{tabular}{@{}l@{\hspace{\parindentlnth}}l@{\hspace{\parindentlnth}}l@{}}
     1479\multicolumn{1}{c@{\hspace{\parindentlnth}}}{\textbf{\CFA}}     & \multicolumn{1}{c@{\hspace{\parindentlnth}}}{\textbf{C}}      \\
    13551480\begin{cfa}
    13561481const * const int x;
     
    13741499\begin{cquote}
    13751500\lstDeleteShortInline@%
    1376 \begin{tabular}{@{}l@{\hspace{3em}}l@{\hspace{2em}}l@{}}
    1377 \multicolumn{1}{c@{\hspace{3em}}}{\textbf{\CFA}}        & \multicolumn{1}{c@{\hspace{2em}}}{\textbf{C}} \\
     1501\begin{tabular}{@{}l@{\hspace{\parindentlnth}}l@{\hspace{\parindentlnth}}l@{}}
     1502\multicolumn{1}{c@{\hspace{\parindentlnth}}}{\textbf{\CFA}}     & \multicolumn{1}{c@{\hspace{\parindentlnth}}}{\textbf{C}}      \\
    13781503\begin{cfa}
    13791504extern [ 5 ] int x;
     
    13971522\begin{cquote}
    13981523\lstDeleteShortInline@%
    1399 \begin{tabular}{@{}l@{\hspace{3em}}l@{}}
    1400 \multicolumn{1}{c@{\hspace{3em}}}{\textbf{\CFA}}        & \multicolumn{1}{c}{\textbf{C}}        \\
     1524\begin{tabular}{@{}l@{\hspace{\parindentlnth}}l@{}}
     1525\multicolumn{1}{c@{\hspace{\parindentlnth}}}{\textbf{\CFA}}     & \multicolumn{1}{c}{\textbf{C}}        \\
    14011526\begin{cfa}
    14021527y = (* int)x;
     
    14151540Therefore, a programmer has the option of either continuing to use traditional C declarations or take advantage of the new style.
    14161541Clearly, both styles need to be supported for some time due to existing C-style header-files, particularly for UNIX-like systems.
     1542
     1543The syntax of the new routine prototype declaration follows directly from the new routine definition syntax;
     1544as well, parameter names are optional, \eg:
     1545\begin{cfa}
     1546[ int x ] f ();                                                 $\C{// returning int with no parameters}$
     1547[ * int ] g (int y);                                    $\C{// returning pointer to int with int parameter}$
     1548[ ] h ( int, char );                                    $\C{// returning no result with int and char parameters}$
     1549[ * int, int ] j ( int );                               $\C{// returning pointer to int and int, with int parameter}$
     1550\end{cfa}
     1551This syntax allows a prototype declaration to be created by cutting and pasting source text from the routine definition header (or vice versa).
     1552Like C, it is possible to declare multiple routine-prototypes in a single declaration, where the return type is distributed across \emph{all} routine names in the declaration list, \eg:
     1553\begin{cquote}
     1554\lstDeleteShortInline@%
     1555\begin{tabular}{@{}l@{\hspace{\parindentlnth}}l@{}}
     1556\multicolumn{1}{c@{\hspace{\parindentlnth}}}{\textbf{\CFA}}     & \multicolumn{1}{c}{\textbf{C}}        \\
     1557\begin{cfa}
     1558[double] foo(), foo( int ), foo( double ) {...}
     1559\end{cfa}
     1560&
     1561\begin{cfa}
     1562double foo1(), foo2( int ), foo3( double );
     1563\end{cfa}
     1564\end{tabular}
     1565\lstMakeShortInline@%
     1566\end{cquote}
     1567\CFA allows the last routine in the list to define its body.
     1568
     1569Declaration qualifiers can only appear at the start of a \CFA routine declaration,\footref{StorageClassSpecifier} \eg:
     1570\begin{cfa}
     1571extern [ int ] f ( int );
     1572static [ int ] g ( int );
     1573\end{cfa}
     1574
     1575The syntax for pointers to \CFA routines specifies the pointer name on the right, \eg:
     1576\begin{cfa}
     1577* [ int x ] () fp;                                              $\C{// pointer to routine returning int with no parameters}$
     1578* [ * int ] (int y) gp;                                 $\C{// pointer to routine returning pointer to int with int parameter}$
     1579* [ ] (int,char) hp;                                    $\C{// pointer to routine returning no result with int and char parameters}$
     1580* [ * int,int ] ( int ) jp;                             $\C{// pointer to routine returning pointer to int and int, with int parameter}$
     1581\end{cfa}
     1582While parameter names are optional, \emph{a routine name cannot be specified};
     1583for example, the following is incorrect:
     1584\begin{cfa}
     1585* [ int x ] f () fp;                                    $\C{// routine name "f" is not allowed}$
     1586\end{cfa}
    14171587
    14181588
     
    15931763In addition to the expressive power, \lstinline|@=| provides a simple path for migrating legacy C code to \CFA, by providing a mechanism to incrementally convert initializers; the \CFA design team decided to introduce a new syntax for this escape hatch because we believe that our RAII implementation will handle the vast majority of code in a desirable way, and we wished to maintain familiar syntax for this common case.
    15941764
     1765
     1766\subsection{Type Nesting}
     1767
     1768\CFA allows \newterm{type nesting}, and type qualification of the nested types (see Figure~\ref{f:TypeNestingQualification}), where as C hoists (refactors) nested types into the enclosing scope and has no type qualification.
     1769\begin{figure}
     1770\centering
     1771\lstDeleteShortInline@%
     1772\begin{tabular}{@{}l@{\hspace{3em}}l|l@{}}
     1773\multicolumn{1}{c@{\hspace{3em}}}{\textbf{C Type Nesting}}      & \multicolumn{1}{c}{\textbf{C Implicit Hoisting}}      & \multicolumn{1}{|c}{\textbf{\CFA}}    \\
     1774\hline
     1775\begin{cfa}
     1776struct S {
     1777        enum C { R, G, B };
     1778        struct T {
     1779                union U { int i, j; };
     1780                enum C c;
     1781                short int i, j;
     1782        };
     1783        struct T t;
     1784} s;
     1785
     1786int rtn() {
     1787        s.t.c = R;
     1788        struct T t = { R, 1, 2 };
     1789        enum C c;
     1790        union U u;
     1791}
     1792\end{cfa}
     1793&
     1794\begin{cfa}
     1795enum C { R, G, B };
     1796union U { int i, j; };
     1797struct T {
     1798        enum C c;
     1799        short int i, j;
     1800};
     1801struct S {
     1802        struct T t;
     1803} s;
     1804       
     1805
     1806
     1807
     1808
     1809
     1810
     1811\end{cfa}
     1812&
     1813\begin{cfa}
     1814struct S {
     1815        enum C { R, G, B };
     1816        struct T {
     1817                union U { int i, j; };
     1818                enum C c;
     1819                short int i, j;
     1820        };
     1821        struct T t;
     1822} s;
     1823
     1824int rtn() {
     1825        s.t.c = `S.`R;  // type qualification
     1826        struct `S.`T t = { `S.`R, 1, 2 };
     1827        enum `S.`C c;
     1828        union `S.T.`U u;
     1829}
     1830\end{cfa}
     1831\end{tabular}
     1832\lstMakeShortInline@%
     1833\caption{Type Nesting / Qualification}
     1834\label{f:TypeNestingQualification}
     1835\end{figure}
     1836In the left example in C, types @C@, @U@ and @T@ are implicitly hoisted outside of type @S@ into the containing block scope.
     1837In the right example in \CFA, the types are not hoisted and accessed using the field-selection operator ``@.@'' for type qualification, as does Java, rather than the \CC type-selection operator ``@::@''.
     1838
     1839
    15951840\subsection{Default Parameters}
    15961841
     
    15981843\section{Literals}
    15991844
    1600 C already includes limited polymorphism for literals -- @0@ can be either an integer or a pointer literal, depending on context, while the syntactic forms of literals of the various integer and floating-point types are very similar, differing from each other only in suffix.
     1845C already includes limited polymorphism for literals -- @0@ can be either an integer or a pointer literal, depending on context, while the syntactic forms of literals of the various integer and float types are very similar, differing from each other only in suffix.
    16011846In keeping with the general \CFA approach of adding features while respecting ``the C way'' of doing things, we have extended both C's polymorphic zero and typed literal syntax to interoperate with user-defined types, while maintaining a backwards-compatible semantics.
    16021847
     
    16221867struct Weight { double stones; };
    16231868
    1624 void ?{}( Weight & w ) { w.stones = 0; } $\C{// operations}$
     1869void ?{}( Weight & w ) { w.stones = 0; }        $\C{// operations}$
    16251870void ?{}( Weight & w, double w ) { w.stones = w; }
    16261871Weight ?+?( Weight l, Weight r ) { return (Weight){ l.stones + r.stones }; }
     
    16311876
    16321877int main() {
    1633         Weight w, hw = { 14 };                  $\C{// 14 stone}$
     1878        Weight w, hw = { 14 };                                  $\C{// 14 stone}$
    16341879        w = 11@`st@ + 1@`lb@;
    16351880        w = 70.3@`kg@;
    16361881        w = 155@`lb@;
    1637         w = 0x_9b_u@`lb@;                               $\C{// hexadecimal unsigned weight (155)}$
    1638         w = 0_233@`lb@;                                 $\C{// octal weight (155)}$
     1882        w = 0x_9b_u@`lb@;                                               $\C{// hexadecimal unsigned weight (155)}$
     1883        w = 0_233@`lb@;                                                 $\C{// octal weight (155)}$
    16391884        w = 5@`st@ + 8@`kg@ + 25@`lb@ + hw;
    16401885}
    16411886\end{cfa}
    16421887}%
     1888
     1889
     1890\section{Libraries}
     1891
     1892As stated in Section~\ref{sec:poly-fns}, \CFA inherits a large corpus of library code, where other programming languages must rewrite or provide fragile inter-language communication with C.
     1893\CFA has replacement libraries condensing hundreds of existing C names into tens of \CFA overloaded names, all without rewriting the actual computations.
     1894In many cases, the interface is an inline wrapper providing overloading during compilation but zero cost at runtime.
     1895The following sections give a glimpse of the interface reduction to many C libraries.
     1896In many cases, @signed@/@unsigned@ @char@ and @short@ routines are available (but not shown) to ensure expression computations remain in a single type, as conversions can distort results.
     1897
     1898
     1899\subsection{Limits}
     1900
     1901C library @limits.h@ provides lower and upper bound constants for the basic types.
     1902\CFA name overloading is used to condense these typed constants, \eg:
     1903\begin{cquote}
     1904\lstDeleteShortInline@%
     1905\begin{tabular}{@{}l@{\hspace{\parindentlnth}}l@{}}
     1906\multicolumn{1}{c@{\hspace{\parindentlnth}}}{\textbf{Definition}}       & \multicolumn{1}{c}{\textbf{Usage}}    \\
     1907\begin{cfa}
     1908const short int `MIN` = -32768;
     1909const int `MIN` = -2147483648;
     1910const long int `MIN` = -9223372036854775808L;
     1911\end{cfa}
     1912&
     1913\begin{cfa}
     1914short int si = `MIN`;
     1915int i = `MIN`;
     1916long int li = `MIN`;
     1917\end{cfa}
     1918\end{tabular}
     1919\lstMakeShortInline@%
     1920\end{cquote}
     1921The result is a significant reduction in names to access typed constants, \eg:
     1922\begin{cquote}
     1923\lstDeleteShortInline@%
     1924\begin{tabular}{@{}l@{\hspace{\parindentlnth}}l@{}}
     1925\multicolumn{1}{c@{\hspace{\parindentlnth}}}{\textbf{\CFA}}     & \multicolumn{1}{c}{\textbf{C}}        \\
     1926\begin{cfa}
     1927MIN
     1928MAX
     1929M_PI
     1930M_E
     1931\end{cfa}
     1932&
     1933\begin{cfa}
     1934SCHAR_MIN, CHAR_MIN, SHRT_MIN, INT_MIN, LONG_MIN, LLONG_MIN,
     1935SCHAR_MAX, UCHAR_MAX, SHRT_MAX, INT_MAX, LONG_MAX, LLONG_MAX,
     1936M_PI, M_PIl, M_CPI, M_CPIl,
     1937M_E, M_El, M_CE, M_CEl
     1938\end{cfa}
     1939\end{tabular}
     1940\lstMakeShortInline@%
     1941\end{cquote}
     1942
     1943
     1944\subsection{Math}
     1945
     1946C library @math.h@ provides many mathematical routines.
     1947\CFA routine overloading is used to condense these mathematical routines, \eg:
     1948\begin{cquote}
     1949\lstDeleteShortInline@%
     1950\begin{tabular}{@{}l@{\hspace{\parindentlnth}}l@{}}
     1951\multicolumn{1}{c@{\hspace{\parindentlnth}}}{\textbf{Definition}}       & \multicolumn{1}{c}{\textbf{Usage}}    \\
     1952\begin{cfa}
     1953float `log`( float x );
     1954double `log`( double );
     1955double _Complex `log`( double _Complex x );
     1956\end{cfa}
     1957&
     1958\begin{cfa}
     1959float f = `log`( 3.5 );
     1960double d = `log`( 3.5 );
     1961double _Complex dc = `log`( 3.5+0.5I );
     1962\end{cfa}
     1963\end{tabular}
     1964\lstMakeShortInline@%
     1965\end{cquote}
     1966The result is a significant reduction in names to access math routines, \eg:
     1967\begin{cquote}
     1968\lstDeleteShortInline@%
     1969\begin{tabular}{@{}l@{\hspace{\parindentlnth}}l@{}}
     1970\multicolumn{1}{c@{\hspace{\parindentlnth}}}{\textbf{\CFA}}     & \multicolumn{1}{c}{\textbf{C}}        \\
     1971\begin{cfa}
     1972log
     1973sqrt
     1974sin
     1975\end{cfa}
     1976&
     1977\begin{cfa}
     1978logf, log, logl, clogf, clog, clogl
     1979sqrtf, sqrt, sqrtl, csqrtf, csqrt, csqrtl
     1980sinf, sin, sinl, csinf, csin, csinl
     1981\end{cfa}
     1982\end{tabular}
     1983\lstMakeShortInline@%
     1984\end{cquote}
     1985While \Celeven has type-generic math~\cite[\S~7.25]{C11} in @tgmath.h@ to provide a similar mechanism, these macros are limited, matching a routine name with a single set of floating type(s).
     1986For example, it is not possible to overload @atan@ for both one and two arguments;
     1987instead the names @atan@ and @atan2@ are required.
     1988The key observation is that only a restricted set of type-generic macros are provided for a limited set of routine names, which do not generalize across the type system, as in \CFA.
     1989
     1990
     1991\subsection{Standard}
     1992
     1993C library @stdlib.h@ provides many general routines.
     1994\CFA routine overloading is used to condense these utility routines, \eg:
     1995\begin{cquote}
     1996\lstDeleteShortInline@%
     1997\begin{tabular}{@{}l@{\hspace{\parindentlnth}}l@{}}
     1998\multicolumn{1}{c@{\hspace{\parindentlnth}}}{\textbf{Definition}}       & \multicolumn{1}{c}{\textbf{Usage}}    \\
     1999\begin{cfa}
     2000unsigned int `abs`( int );
     2001double `abs`( double );
     2002double abs( double _Complex );
     2003\end{cfa}
     2004&
     2005\begin{cfa}
     2006unsigned int i = `abs`( -1 );
     2007double d = `abs`( -1.5 );
     2008double d = `abs`( -1.5+0.5I );
     2009\end{cfa}
     2010\end{tabular}
     2011\lstMakeShortInline@%
     2012\end{cquote}
     2013The result is a significant reduction in names to access utility routines, \eg:
     2014\begin{cquote}
     2015\lstDeleteShortInline@%
     2016\begin{tabular}{@{}l@{\hspace{\parindentlnth}}l@{}}
     2017\multicolumn{1}{c@{\hspace{\parindentlnth}}}{\textbf{\CFA}}     & \multicolumn{1}{c}{\textbf{C}}        \\
     2018\begin{cfa}
     2019abs
     2020strto
     2021random
     2022\end{cfa}
     2023&
     2024\begin{cfa}
     2025abs, labs, llabs, fabsf, fabs, fabsl, cabsf, cabs, cabsl
     2026strtol, strtoul, strtoll, strtoull, strtof, strtod, strtold
     2027srand48, mrand48, lrand48, drand48
     2028\end{cfa}
     2029\end{tabular}
     2030\lstMakeShortInline@%
     2031\end{cquote}
     2032In additon, there are polymorphic routines, like @min@ and @max@, which work on any type with operators @?<?@ or @?>?@.
     2033
     2034The following shows one example where \CFA \emph{extends} an existing standard C interface to reduce complexity and provide safety.
     2035C/\Celeven provide a number of complex and overlapping storage-management operation to support the following capabilities:
     2036\begin{description}[itemsep=2pt,parsep=0pt]
     2037\item[fill]
     2038after allocation the storage is filled with a specified character.
     2039\item[resize]
     2040an existing allocation is decreased or increased in size.
     2041In either case, new storage may or may not be allocated and, if there is a new allocation, as much data from the existing allocation is copied.
     2042For an increase in storage size, new storage after the copied data may be filled.
     2043\item[alignment]
     2044an allocation starts on a specified memory boundary, \eg, an address multiple of 64 or 128 for cache-line purposes.
     2045\item[array]
     2046the allocation size is scaled to the specified number of array elements.
     2047An array may be filled, resized, or aligned.
     2048\end{description}
     2049Table~\ref{t:StorageManagementOperations} shows the capabilities provided by C/\Celeven allocation-routines and how all the capabilities can be combined into two \CFA routines.
     2050
     2051\CFA storage-management routines extend the C equivalents by overloading, providing shallow type-safety, and removing the need to specify the base allocation-size.
     2052The following example contrasts \CFA and C storage-allocation operation performing the same operations with the same type safety:
     2053\begin{cquote}
     2054\begin{cfa}[aboveskip=0pt]
     2055size_t  dim = 10;                                                       $\C{// array dimension}$
     2056char fill = '\xff';                                                     $\C{// initialization fill value}$
     2057int * ip;
     2058\end{cfa}
     2059\lstDeleteShortInline@%
     2060\begin{tabular}{@{}l@{\hspace{\parindentlnth}}l@{}}
     2061\multicolumn{1}{c@{\hspace{\parindentlnth}}}{\textbf{\CFA}}     & \multicolumn{1}{c}{\textbf{C}}        \\
     2062\begin{cfa}
     2063ip = alloc();
     2064ip = alloc( fill );
     2065ip = alloc( dim );
     2066ip = alloc( dim, fill );
     2067ip = alloc( ip, 2 * dim );
     2068ip = alloc( ip, 4 * dim, fill );
     2069
     2070ip = align_alloc( 16 );
     2071ip = align_alloc( 16, fill );
     2072ip = align_alloc( 16, dim );
     2073ip = align_alloc( 16, dim, fill );
     2074\end{cfa}
     2075&
     2076\begin{cfa}
     2077ip = (int *)malloc( sizeof( int ) );
     2078ip = (int *)malloc( sizeof( int ) ); memset( ip, fill, sizeof( int ) );
     2079ip = (int *)malloc( dim * sizeof( int ) );
     2080ip = (int *)malloc( sizeof( int ) ); memset( ip, fill, dim * sizeof( int ) );
     2081ip = (int *)realloc( ip, 2 * dim * sizeof( int ) );
     2082ip = (int *)realloc( ip, 4 * dim * sizeof( int ) ); memset( ip, fill, 4 * dim * sizeof( int ) );
     2083
     2084ip = memalign( 16, sizeof( int ) );
     2085ip = memalign( 16, sizeof( int ) ); memset( ip, fill, sizeof( int ) );
     2086ip = memalign( 16, dim * sizeof( int ) );
     2087ip = memalign( 16, dim * sizeof( int ) ); memset( ip, fill, dim * sizeof( int ) );
     2088\end{cfa}
     2089\end{tabular}
     2090\lstMakeShortInline@%
     2091\end{cquote}
     2092Variadic @new@ (see Section~\ref{sec:variadic-tuples}) cannot support the same overloading because extra parameters are for initialization.
     2093Hence, there are @new@ and @anew@ routines for single and array variables, and the fill value is the arguments to the constructor, \eg:
     2094\begin{cfa}
     2095struct S { int i, j; };
     2096void ?{}( S & s, int i, int j ) { s.i = i; s.j = j; }
     2097S * s = new( 2, 3 );                                            $\C{// allocate storage and run constructor}$
     2098S * as = anew( dim, 2, 3 );                                     $\C{// each array element initialized to 2, 3}$
     2099\end{cfa}
     2100Note, \CC can only initialization array elements via the default constructor.
     2101
     2102Finally, the \CFA memory-allocator has \newterm{sticky properties} for dynamic storage: fill and alignment are remembered with an object's storage in the heap.
     2103When a @realloc@ is performed, the sticky properties are respected, so that new storage is correctly aligned and initialized with the fill character.
     2104
     2105\begin{table}
     2106\centering
     2107\lstDeleteShortInline@%
     2108\lstMakeShortInline~%
     2109\begin{tabular}{@{}r|r|l|l|l|l@{}}
     2110\multicolumn{1}{c}{}&           & \multicolumn{1}{c|}{fill}     & resize        & alignment     & array \\
     2111\hline
     2112C               & ~malloc~                      & no                    & no            & no            & no    \\
     2113                & ~calloc~                      & yes (0 only)  & no            & no            & yes   \\
     2114                & ~realloc~                     & no/copy               & yes           & no            & no    \\
     2115                & ~memalign~            & no                    & no            & yes           & no    \\
     2116                & ~posix_memalign~      & no                    & no            & yes           & no    \\
     2117\hline
     2118C11             & ~aligned_alloc~       & no                    & no            & yes           & no    \\
     2119\hline
     2120\CFA    & ~alloc~                       & yes/copy              & no/yes        & no            & yes   \\
     2121                & ~align_alloc~         & yes                   & no            & yes           & yes   \\
     2122\end{tabular}
     2123\lstDeleteShortInline~%
     2124\lstMakeShortInline@%
     2125\caption{Storage-Management Operations}
     2126\label{t:StorageManagementOperations}
     2127\end{table}
     2128
     2129
     2130\subsection{I/O}
     2131\label{s:IOLibrary}
     2132
     2133The goal of \CFA I/O is to simplify the common cases, while fully supporting polymorphism and user defined types in a consistent way.
     2134The approach combines ideas from \CC and Python.
     2135The \CFA header file for the I/O library is @fstream@.
     2136
     2137The common case is printing out a sequence of variables separated by whitespace.
     2138\begin{cquote}
     2139\lstDeleteShortInline@%
     2140\begin{tabular}{@{}l@{\hspace{\parindentlnth}}l@{}}
     2141\multicolumn{1}{c@{\hspace{\parindentlnth}}}{\textbf{\CFA}}     & \multicolumn{1}{c}{\textbf{\CC}}      \\
     2142\begin{cfa}
     2143int x = 1, y = 2, z = 3;
     2144sout | x `|` y `|` z | endl;
     2145\end{cfa}
     2146&
     2147\begin{cfa}
     2148
     2149cout << x `<< " "` << y `<< " "` << z << endl;
     2150\end{cfa}
     2151\\
     2152\begin{cfa}[showspaces=true,aboveskip=0pt,belowskip=0pt]
     21531` `2` `3
     2154\end{cfa}
     2155&
     2156\begin{cfa}[showspaces=true,aboveskip=0pt,belowskip=0pt]
     21571 2 3
     2158\end{cfa}
     2159\end{tabular}
     2160\lstMakeShortInline@%
     2161\end{cquote}
     2162The \CFA form has half the characters of the \CC form, and is similar to Python I/O with respect to implicit separators.
     2163Similar simplification occurs for tuple I/O, which prints all tuple values separated by ``\lstinline[showspaces=true]@, @''.
     2164\begin{cfa}
     2165[int, [ int, int ] ] t1 = [ 1, [ 2, 3 ] ], t2 = [ 4, [ 5, 6 ] ];
     2166sout | t1 | t2 | endl;                                  $\C{// print tuples}$
     2167\end{cfa}
     2168\begin{cfa}[showspaces=true,aboveskip=0pt]
     21691`, `2`, `3 4`, `5`, `6
     2170\end{cfa}
     2171Finally, \CFA uses the logical-or operator for I/O as it is the lowest-priority overloadable operator, other than assignment.
     2172Therefore, fewer output expressions require parenthesis.
     2173\begin{cquote}
     2174\lstDeleteShortInline@%
     2175\begin{tabular}{@{}ll@{}}
     2176\textbf{\CFA:}
     2177&
     2178\begin{cfa}
     2179sout | x * 3 | y + 1 | z << 2 | x == y | (x | y) | (x || y) | (x > z ? 1 : 2) | endl;
     2180\end{cfa}
     2181\\
     2182\textbf{\CC:}
     2183&
     2184\begin{cfa}
     2185cout << x * 3 << y + 1 << `(`z << 2`)` << `(`x == y`)` << (x | y) << (x || y) << (x > z ? 1 : 2) << endl;
     2186\end{cfa}
     2187\\
     2188\textbf{output:}
     2189&
     2190\begin{cfa}[showspaces=true,aboveskip=0pt]
     21913 3 12 0 3 1 2
     2192\end{cfa}
     2193\end{tabular}
     2194\lstMakeShortInline@%
     2195\end{cquote}
     2196There is a weak similarity between the \CFA logical-or operator and the Shell pipe-operator for moving data, where data flows in the correct direction for input but the opposite direction for output.
     2197
     2198The implicit separator character (space/blank) is a separator not a terminator.
     2199The rules for implicitly adding the separator are:
     2200\begin{itemize}[itemsep=2pt,parsep=0pt]
     2201\item
     2202A separator does not appear at the start or end of a line.
     2203\item
     2204A separator does not appear before or after a character literal or variable.
     2205\item
     2206A separator does not appear before or after a null (empty) C string, which is a local mechanism to disable insertion of the separator character.
     2207\item
     2208A separator does not appear before a C string starting with the characters: \lstinline[mathescape=off,basicstyle=\tt]@([{=$@
     2209\item
     2210A seperator does not appear after a C string ending with the characters: \lstinline[basicstyle=\tt]@,.;!?)]}%@
     2211\item
     2212{\lstset{language=CFA,deletedelim=**[is][]{`}{`}}
     2213A seperator does not appear before or after a C string begining/ending with the quote or whitespace characters: \lstinline[basicstyle=\tt,showspaces=true]@`'": \t\v\f\r\n@
     2214}%
     2215\item
     2216There are routines to set and get the separator string, and manipulators to toggle separation on and off in the middle of output.
     2217\end{itemize}
     2218
     2219
     2220\subsection{Multi-precision Integers}
     2221\label{s:MultiPrecisionIntegers}
     2222
     2223\CFA has an interface to the GMP multi-precision signed-integers~\cite{GMP}, similar to the \CC interface provided by GMP.
     2224The \CFA interface wraps GMP routines into operator routines to make programming with multi-precision integers identical to using fixed-sized integers.
     2225The \CFA type name for multi-precision signed-integers is @Int@ and the header file is @gmp@.
     2226The following multi-precision factorial programs contrast using GMP with the \CFA and C interfaces.
     2227\begin{cquote}
     2228\lstDeleteShortInline@%
     2229\begin{tabular}{@{}l@{\hspace{\parindentlnth}}@{\hspace{\parindentlnth}}l@{}}
     2230\multicolumn{1}{c@{\hspace{\parindentlnth}}}{\textbf{\CFA}}     & \multicolumn{1}{@{\hspace{\parindentlnth}}c}{\textbf{C}}      \\
     2231\begin{cfa}
     2232#include <gmp>
     2233int main( void ) {
     2234        sout | "Factorial Numbers" | endl;
     2235        Int fact = 1;
     2236
     2237        sout | 0 | fact | endl;
     2238        for ( unsigned int i = 1; i <= 40; i += 1 ) {
     2239                fact *= i;
     2240                sout | i | fact | endl;
     2241        }
     2242}
     2243\end{cfa}
     2244&
     2245\begin{cfa}
     2246#include <gmp.h>
     2247int main( void ) {
     2248        `gmp_printf`( "Factorial Numbers\n" );
     2249        `mpz_t` fact;
     2250        `mpz_init_set_ui`( fact, 1 );
     2251        `gmp_printf`( "%d %Zd\n", 0, fact );
     2252        for ( unsigned int i = 1; i <= 40; i += 1 ) {
     2253                `mpz_mul_ui`( fact, fact, i );
     2254                `gmp_printf`( "%d %Zd\n", i, fact );
     2255        }
     2256}
     2257\end{cfa}
     2258\end{tabular}
     2259\lstMakeShortInline@%
     2260\end{cquote}
     2261
    16432262
    16442263\section{Evaluation}
     
    17042323
    17052324\begin{table}
     2325\centering
    17062326\caption{Properties of benchmark code}
    17072327\label{tab:eval}
Note: See TracChangeset for help on using the changeset viewer.