Ignore:
File:
1 edited

Legend:

Unmodified
Added
Removed
  • doc/papers/general/Paper.tex

    rc659968 r06b176d  
    22
    33\usepackage{fullpage}
     4\usepackage{epic,eepic}
    45\usepackage{xspace,calc,comment}
    56\usepackage{upquote}                                                                    % switch curled `'" to straight
     
    3637%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    3738
    38 \newcommand{\Textbf}[1]{{\color{red}\textbf{#1}}}
     39\newcommand{\Textbf}[2][red]{{\color{#1}{\textbf{#2}}}}
    3940\newcommand{\TODO}[1]{\textbf{TODO}: {\itshape #1}} % TODO included
    4041%\newcommand{\TODO}[1]{} % TODO elided
     
    10521053\label{s:WithClauseStatement}
    10531054
    1054 Grouping heterogenous data into \newterm{aggregate}s is a common programming practice, and an aggregate can be further organized into more complex structures, such as arrays and containers:
    1055 \begin{cfa}
    1056 struct S {                                                              $\C{// aggregate}$
    1057         char c;                                                         $\C{// fields}$
     1055Grouping heterogenous data into \newterm{aggregate}s (structure/union) is a common programming practice, and an aggregate can be further organized into more complex structures, such as arrays and containers:
     1056\begin{cfa}
     1057struct S {                                                                      $\C{// aggregate}$
     1058        char c;                                                                 $\C{// fields}$
    10581059        int i;
    10591060        double d;
     
    10611062S s, as[10];
    10621063\end{cfa}
    1063 However, routines manipulating aggregates have repeition of the aggregate name to access its containing fields:
     1064However, routines manipulating aggregates must repeat the aggregate name to access its containing fields:
    10641065\begin{cfa}
    10651066void f( S s ) {
    1066         `s.`c; `s.`i; `s.`d;                            $\C{// access containing fields}$
     1067        `s.`c; `s.`i; `s.`d;                                    $\C{// access containing fields}$
    10671068}
    10681069\end{cfa}
     
    10701071\begin{C++}
    10711072class C {
    1072         char c;                                                         $\C{// fields}$
     1073        char c;                                                                 $\C{// fields}$
    10731074        int i;
    10741075        double d;
    1075         int mem() {                                                     $\C{// implicit "this" parameter}$
    1076                 `this->`c; `this->`i; `this->`d;$\C{// access containing fields}$
     1076        int mem() {                                                             $\C{// implicit "this" parameter}$
     1077                `this->`c; `this->`i; `this->`d;        $\C{// access containing fields}$
    10771078        }
    10781079}
    10791080\end{C++}
    1080 Nesting of member routines in a \lstinline[language=C++]@class@ allows eliding \lstinline[language=C++]@this->@ because of nested lexical-scoping.
     1081Nesting of member routines in a \lstinline[language=C++]@class@ allows eliding \lstinline[language=C++]@this->@ because of lexical scoping.
     1082However, for other aggregate parameters, qualification is necessary:
     1083\begin{cfa}
     1084struct T { double m, n; };
     1085int C::mem( T & t ) {                                           $\C{// multiple aggregate parameters}$
     1086        c; i; d;                                                                $\C{\color{red}// this-\textgreater.c, this-\textgreater.i, this-\textgreater.d}$
     1087        `t.`m; `t.`n;                                                   $\C{// must qualify}$
     1088}
     1089\end{cfa}
    10811090
    10821091% In object-oriented programming, there is an implicit first parameter, often names @self@ or @this@, which is elided.
     
    10881097% \TODO{Fill out section. Be sure to mention arbitrary expressions in with-blocks, recent change driven by Thierry to prioritize field name over parameters.}
    10891098
    1090 \CFA provides a @with@ clause/statement (see Pascal~\cite[\S~4.F]{Pascal}) to elide aggregate qualification to fields by opening a scope containing field identifiers.
    1091 Hence, the qualified fields become variables, and making it easier to optimize field references in a block.
    1092 \begin{cfa}
    1093 void f( S s ) `with( s )` {                             $\C{// with clause}$
    1094         c; i; d;                                                        $\C{\color{red}// s.c, s.i, s.d}$
     1099To simplify the programmer experience, \CFA provides a @with@ clause/statement (see Pascal~\cite[\S~4.F]{Pascal}) to elide aggregate qualification to fields by opening a scope containing the field identifiers.
     1100Hence, the qualified fields become variables with the side-effect that it is easier to optimizing field references in a block.
     1101\begin{cfa}
     1102void f( S s ) `with( s )` {                                     $\C{// with clause}$
     1103        c; i; d;                                                                $\C{\color{red}// s.c, s.i, s.d}$
    10951104}
    10961105\end{cfa}
     
    10981107\begin{cfa}
    10991108int mem( S & this ) `with( this )` {            $\C{// with clause}$
    1100         c; i; d;                                                        $\C{\color{red}// this.c, this.i, this.d}$
    1101 }
    1102 \end{cfa}
    1103 The key generality over the object-oriented approach is that one aggregate parameter \lstinline[language=C++]@this@ is not treated specially over other aggregate parameters:
    1104 \begin{cfa}
    1105 struct T { double m, n; };
     1109        c; i; d;                                                                $\C{\color{red}// this.c, this.i, this.d}$
     1110}
     1111\end{cfa}
     1112with the generality of opening multiple aggregate-parameters:
     1113\begin{cfa}
    11061114int mem( S & s, T & t ) `with( s, t )` {        $\C{// multiple aggregate parameters}$
    1107         c; i; d;                                                        $\C{\color{red}// s.c, s.i, s.d}$
    1108         m; n;                                                           $\C{\color{red}// t.m, t.n}$
    1109 }
    1110 \end{cfa}
    1111 The equivalent object-oriented style is:
    1112 \begin{cfa}
    1113 int S::mem( T & t ) {                                   $\C{// multiple aggregate parameters}$
    1114         c; i; d;                                                        $\C{\color{red}// this-\textgreater.c, this-\textgreater.i, this-\textgreater.d}$
    1115         `t.`m; `t.`n;
     1115        c; i; d;                                                                $\C{\color{red}// s.c, s.i, s.d}$
     1116        m; n;                                                                   $\C{\color{red}// t.m, t.n}$
     1117}
     1118\end{cfa}
     1119
     1120In detail, the @with@ clause/statement has the form:
     1121\begin{cfa}
     1122$\emph{with-statement}$:
     1123        'with' '(' $\emph{expression-list}$ ')' $\emph{compound-statement}$
     1124\end{cfa}
     1125and may appear as the body of a routine or nested within a routine body.
     1126Each expression in the expression-list provides a type and object.
     1127The type must be an aggregate type.
     1128(Enumerations are already opened.)
     1129The object is the implicit qualifier for the open structure-fields.
     1130
     1131All expressions in the expression list are open in ``parallel'' within the compound statement.
     1132This semantic is different from Pascal, which nests the openings.
     1133The difference between parallel and nesting occurs for fields with the same name but different type:
     1134\begin{cfa}
     1135struct S { int i; int j; double m; } s, w;
     1136struct T { int i; int k; int m } t, w;
     1137with( s, t ) {
     1138        j + k;                                                                  $\C{// unambiguous, s.j + t.m}$
     1139        m = 5.0;                                                                $\C{// unambiguous, t.m = 5.0}$
     1140        m = 1;                                                                  $\C{// unambiguous, s.m = 1}$
     1141        int a = s.i + m;                                                $\C{// unambiguous, a = s.i + t.i}$
     1142        int b = s.i + t.i;                                              $\C{// unambiguous, qualification}$
     1143        sout | (double)m | endl;                                $\C{// unambiguous, cast}$
     1144        i;                                                                              $\C{// ambiguous}$
     1145}
     1146\end{cfa}
     1147\CFA's ability to overload variables means usages of field with the same names can be automatically disambiguated, eliminating most qualification.
     1148Qualification or a cast is used to disambiguate.
     1149A cast may be necessary to disambiguate between the overload variables in a @with@ expression:
     1150\begin{cfa}
     1151with( w ) { ... }                                                       $\C{// ambiguous, same name and no context}$
     1152with( (S)w ) { ... }                                            $\C{// unambiguous}$
     1153\end{cfa}
     1154
     1155\begin{cfa}
     1156struct S { int i, j; } sv;
     1157with( sv ) {
     1158        S & sr = sv;
     1159        with( sr ) {
     1160                S * sp = &sv;
     1161                with( *sp ) {
     1162                        i = 3; j = 4;                                   $\C{\color{red}// sp-{\textgreater}i, sp-{\textgreater}j}$
     1163                }
     1164                i = 3; j = 4;                                           $\C{\color{red}// sr.i, sr.j}$
     1165        }
     1166        i = 3; j = 4;                                                   $\C{\color{red}// sv.i, sv.j}$
    11161167}
    11171168\end{cfa}
     
    11221173        struct S1 { ... } s1;
    11231174        struct S2 { ... } s2;
    1124         `with( s1 )` {                                          $\C{// with statement}$
     1175        `with( s1 )` {                                                  $\C{// with statement}$
    11251176                // access fields of s1 without qualification
    1126                 `with( s2 )` {                                  $\C{// nesting}$
     1177                `with( s2 )` {                                          $\C{// nesting}$
    11271178                        // access fields of s1 and s2 without qualification
    11281179                }
     
    11341185\end{cfa}
    11351186
    1136 When opening multiple structures, fields with the same name and type are ambiguous and must be fully qualified.
    1137 For fields with the same name but different type, context/cast can be used to disambiguate.
    1138 \begin{cfa}
    1139 struct S { int i; int j; double m; } a, c;
    1140 struct T { int i; int k; int m } b, c;
    1141 `with( a, b )` {
    1142         j + k;                                                  $\C{// unambiguous, unique names define unique types}$
    1143         i;                                                              $\C{// ambiguous, same name and type}$
    1144         a.i + b.i;                                              $\C{// unambiguous, qualification defines unique names}$
    1145         m;                                                              $\C{// ambiguous, same name and no context to define unique type}$
    1146         m = 5.0;                                                $\C{// unambiguous, same name and context defines unique type}$
    1147         m = 1;                                                  $\C{// unambiguous, same name and context defines unique type}$
    1148 }
    1149 `with( c )` { ... }                                     $\C{// ambiguous, same name and no context}$
    1150 `with( (S)c )` { ... }                                  $\C{// unambiguous, same name and cast defines unique type}$
    1151 \end{cfa}
    1152 
    1153 The components in the "with" clause
    1154 
    1155   with ( a, b, c ) { ... }
    1156 
    1157 serve 2 purposes: each component provides a type and object. The type must be a
    1158 structure type. Enumerations are already opened, and I think a union is opened
    1159 to some extent, too. (Or is that just unnamed unions?) The object is the target
    1160 that the naked structure-fields apply to. The components are open in "parallel"
    1161 at the scope of the "with" clause/statement, so opening "a" does not affect
    1162 opening "b", etc. This semantic is different from Pascal, which nests the
    1163 openings.
    1164 
    1165 Having said the above, it seems reasonable to allow a "with" component to be an
    1166 expression. The type is the static expression-type and the object is the result
    1167 of the expression. Again, the type must be an aggregate. Expressions require
    1168 parenthesis around the components.
    1169 
    1170   with( a, b, c ) { ... }
    1171 
    1172 Does this now make sense?
    1173 
    1174 Having written more CFA code, it is becoming clear to me that I *really* want
    1175 the "with" to be implemented because I hate having to type all those object
    1176 names for fields. It's a great way to drive people away from the language.
    1177 
    11781187
    11791188\subsection{Exception Handling ???}
     
    11891198
    11901199\subsection{Alternative Declaration Syntax}
     1200
     1201\newcommand{\R}[1]{\Textbf{#1}}
     1202\newcommand{\B}[1]{{\Textbf[blue]{#1}}}
     1203\newcommand{\G}[1]{{\Textbf[OliveGreen]{#1}}}
     1204
     1205C declaration syntax is notoriously confusing and error prone.
     1206For example, many C programmers are confused by a declaration as simple as:
     1207\begin{flushleft}
     1208\lstDeleteShortInline@%
     1209\begin{tabular}{@{}ll@{}}
     1210\begin{cfa}
     1211int * x[5]
     1212\end{cfa}
     1213&
     1214\raisebox{-0.75\totalheight}{\input{Cdecl}}
     1215\end{tabular}
     1216\lstMakeShortInline@%
     1217\end{flushleft}
     1218Is this an array of 5 pointers to integers or a pointer to an array of 5 integers?
     1219The fact this declaration is unclear to many C programmers means there are productivity and safety issues even for basic programs.
     1220Another example of confusion results from the fact that a routine name and its parameters are embedded within the return type, mimicking the way the return value is used at the routine's call site.
     1221For example, a routine returning a pointer to an array of integers is defined and used in the following way:
     1222\begin{cfa}
     1223int `(*`f`())[`5`]` {...};                              $\C{// definition}$
     1224 ... `(*`f`())[`3`]` += 1;                              $\C{// usage}$
     1225\end{cfa}
     1226Essentially, the return type is wrapped around the routine name in successive layers (like an onion).
     1227While attempting to make the two contexts consistent is a laudable goal, it has not worked out in practice.
     1228
     1229\CFA provides its own type, variable and routine declarations, using a different syntax.
     1230The new declarations place qualifiers to the left of the base type, while C declarations place qualifiers to the right of the base type.
     1231In the following example, \R{red} is the base type and \B{blue} is qualifiers.
     1232The \CFA declarations move the qualifiers to the left of the base type, \ie move the blue to the left of the red, while the qualifiers have the same meaning but are ordered left to right to specify a variable's type.
     1233\begin{quote}
     1234\lstDeleteShortInline@%
     1235\lstset{moredelim=**[is][\color{blue}]{+}{+}}
     1236\begin{tabular}{@{}l@{\hspace{3em}}l@{}}
     1237\multicolumn{1}{c@{\hspace{3em}}}{\textbf{\CFA}}        & \multicolumn{1}{c}{\textbf{C}}        \\
     1238\begin{cfa}
     1239+[5] *+ `int` x1;
     1240+* [5]+ `int` x2;
     1241+[* [5] int]+ f`( int p )`;
     1242\end{cfa}
     1243&
     1244\begin{cfa}
     1245`int` +*+ x1 +[5]+;
     1246`int` +(*+x2+)[5]+;
     1247+int (*+f`( int p )`+)[5]+;
     1248\end{cfa}
     1249\end{tabular}
     1250\lstMakeShortInline@%
     1251\end{quote}
     1252The only exception is bit field specification, which always appear to the right of the base type.
     1253% Specifically, the character ©*© is used to indicate a pointer, square brackets ©[©\,©]© are used to represent an array or function return value, and parentheses ©()© are used to indicate a routine parameter.
     1254However, unlike C, \CFA type declaration tokens are distributed across all variables in the declaration list.
     1255For instance, variables ©x© and ©y© of type pointer to integer are defined in \CFA as follows:
     1256\begin{quote}
     1257\lstDeleteShortInline@%
     1258\begin{tabular}{@{}l@{\hspace{3em}}l@{}}
     1259\multicolumn{1}{c@{\hspace{3em}}}{\textbf{\CFA}}        & \multicolumn{1}{c}{\textbf{C}}        \\
     1260\begin{cfa}
     1261`*` int x, y;
     1262\end{cfa}
     1263&
     1264\begin{cfa}
     1265int `*`x, `*`y;
     1266\end{cfa}
     1267\end{tabular}
     1268\lstMakeShortInline@%
     1269\end{quote}
     1270The downside of this semantics is the need to separate regular and pointer declarations:
     1271\begin{quote}
     1272\lstDeleteShortInline@%
     1273\begin{tabular}{@{}l@{\hspace{3em}}l@{}}
     1274\multicolumn{1}{c@{\hspace{3em}}}{\textbf{\CFA}}        & \multicolumn{1}{c}{\textbf{C}}        \\
     1275\begin{cfa}
     1276`*` int x;
     1277int y;
     1278\end{cfa}
     1279&
     1280\begin{cfa}
     1281int `*`x, y;
     1282
     1283\end{cfa}
     1284\end{tabular}
     1285\lstMakeShortInline@%
     1286\end{quote}
     1287which is prescribing a safety benefit.
     1288Other examples are:
     1289\begin{quote}
     1290\lstDeleteShortInline@%
     1291\begin{tabular}{@{}l@{\hspace{3em}}l@{\hspace{2em}}l@{}}
     1292\multicolumn{1}{c@{\hspace{3em}}}{\textbf{\CFA}}        & \multicolumn{1}{c@{\hspace{2em}}}{\textbf{C}} \\
     1293\begin{cfa}
     1294[ 5 ] int z;
     1295[ 5 ] * char w;
     1296* [ 5 ] double v;
     1297struct s {
     1298        int f0:3;
     1299        * int f1;
     1300        [ 5 ] * int f2;
     1301};
     1302\end{cfa}
     1303&
     1304\begin{cfa}
     1305int z[ 5 ];
     1306char * w[ 5 ];
     1307double (* v)[ 5 ];
     1308struct s {
     1309        int f0:3;
     1310        int * f1;
     1311        int * f2[ 5 ]
     1312};
     1313\end{cfa}
     1314&
     1315\begin{cfa}
     1316// array of 5 integers
     1317// array of 5 pointers to char
     1318// pointer to array of 5 doubles
     1319
     1320// common bit field syntax
     1321
     1322
     1323
     1324\end{cfa}
     1325\end{tabular}
     1326\lstMakeShortInline@%
     1327\end{quote}
     1328
     1329All type qualifiers, \eg ©const©, ©volatile©, etc., are used in the normal way with the new declarations and also appear left to right, \eg:
     1330\begin{quote}
     1331\lstDeleteShortInline@%
     1332\begin{tabular}{@{}l@{\hspace{1em}}l@{\hspace{1em}}l@{}}
     1333\multicolumn{1}{c@{\hspace{1em}}}{\textbf{\CFA}}        & \multicolumn{1}{c@{\hspace{1em}}}{\textbf{C}} \\
     1334\begin{cfa}
     1335const * const int x;
     1336const * [ 5 ] const int y;
     1337\end{cfa}
     1338&
     1339\begin{cfa}
     1340int const * const x;
     1341const int (* const y)[ 5 ]
     1342\end{cfa}
     1343&
     1344\begin{cfa}
     1345// const pointer to const integer
     1346// const pointer to array of 5 const integers
     1347\end{cfa}
     1348\end{tabular}
     1349\lstMakeShortInline@%
     1350\end{quote}
     1351All declaration qualifiers, \eg ©extern©, ©static©, etc., are used in the normal way with the new declarations but can only appear at the start of a \CFA routine declaration,\footnote{\label{StorageClassSpecifier}
     1352The placement of a storage-class specifier other than at the beginning of the declaration specifiers in a declaration is an obsolescent feature.~\cite[\S~6.11.5(1)]{C11}} \eg:
     1353\begin{quote}
     1354\lstDeleteShortInline@%
     1355\begin{tabular}{@{}l@{\hspace{3em}}l@{\hspace{2em}}l@{}}
     1356\multicolumn{1}{c@{\hspace{3em}}}{\textbf{\CFA}}        & \multicolumn{1}{c@{\hspace{2em}}}{\textbf{C}} \\
     1357\begin{cfa}
     1358extern [ 5 ] int x;
     1359static * const int y;
     1360\end{cfa}
     1361&
     1362\begin{cfa}
     1363int extern x[ 5 ];
     1364const int static * y;
     1365\end{cfa}
     1366&
     1367\begin{cfa}
     1368// externally visible array of 5 integers
     1369// internally visible pointer to constant int
     1370\end{cfa}
     1371\end{tabular}
     1372\lstMakeShortInline@%
     1373\end{quote}
     1374
     1375The new declaration syntax can be used in other contexts where types are required, \eg casts and the pseudo-routine ©sizeof©:
     1376\begin{quote}
     1377\lstDeleteShortInline@%
     1378\begin{tabular}{@{}l@{\hspace{3em}}l@{}}
     1379\multicolumn{1}{c@{\hspace{3em}}}{\textbf{\CFA}}        & \multicolumn{1}{c}{\textbf{C}}        \\
     1380\begin{cfa}
     1381y = (`* int`)x;
     1382i = sizeof(`[ 5 ] * int`);
     1383\end{cfa}
     1384&
     1385\begin{cfa}
     1386y = (`int *`)x;
     1387i = sizeof(`int * [ 5 ]`);
     1388\end{cfa}
     1389\end{tabular}
     1390\lstMakeShortInline@%
     1391\end{quote}
     1392
     1393Finally, new \CFA declarations may appear together with C declarations in the same program block, but cannot be mixed within a specific declaration.
     1394Therefore, a programmer has the option of either continuing to use traditional C declarations or take advantage of the new style.
     1395Clearly, both styles need to be supported for some time due to existing C-style header-files, particularly for UNIX systems.
    11911396
    11921397
     
    12391444In \CFA, the address of a @T&@ is a lvalue @T*@, as the address of the underlying @T@ is stored in the reference, and can thus be mutated there.
    12401445The result of this rule is that any reference can be rebound using the existing pointer assignment semantics by assigning a compatible pointer into the address of the reference, \eg @&r1 = &x;@ above.
    1241 This rebinding can occur to an arbitrary depth of reference nesting; loosely speaking, nested address-of operators will produce an lvalue nested pointer up to as deep as the reference they're applied to.
    1242 These explicit address-of operators can be thought of as ``cancelling out'' the implicit dereference operators, \eg @(&`*`)r1 = &x@ or @(&(&`*`)`*`)r3 = &(&`*`)r1@ or even @(&`*`)r2 = (&`*`)`*`r3@ for @&r2 = &r3@.
    1243 More precisely:
    1244 \begin{itemize}
    1245         \item
    1246         if @R@ is an rvalue of type {@T &@$_1 \cdots$@ &@$_r$} where $r \ge 1$ references (@&@ symbols) than @&R@ has type {@T `*`&@$_{\color{red}2} \cdots$@ &@$_{\color{red}r}$}, \\ \ie @T@ pointer with $r-1$ references (@&@ symbols).
    1247        
    1248         \item
    1249         if @L@ is an lvalue of type {@T &@$_1 \cdots$@ &@$_l$} where $l \ge 0$ references (@&@ symbols) then @&L@ has type {@T `*`&@$_{\color{red}1} \cdots$@ &@$_{\color{red}l}$}, \\ \ie @T@ pointer with $l$ references (@&@ symbols).
    1250 \end{itemize}
     1446This rebinding can occur to an arbitrary depth of reference nesting; $n$ address-of operators applied to a reference nested $m$ times will produce an lvalue pointer nested $n$ times if $n \le m$ (note that $n = m+1$ is simply the usual C rvalue address-of operator applied to the $n = m$ case).
     1447The explicit address-of operators can be thought of as ``cancelling out'' the implicit dereference operators, \eg @(&`*`)r1 = &x@ or @(&(&`*`)`*`)r3 = &(&`*`)r1@ or even @(&`*`)r2 = (&`*`)`*`r3@ for @&r2 = &r3@.
    12511448
    12521449Since pointers and references share the same internal representation, code using either is equally performant; in fact the \CFA compiler converts references to pointers internally, and the choice between them in user code can be made based solely on convenience.
     
    12831480In particular, \CFA does not implement class-based encapsulation: neither the constructor nor any other function has privileged access to the implementation details of a type, except through the translation-unit-scope method of opaque structs provided by C.
    12841481
    1285 In \CFA, a constructor is a function named @?{}@, while a destructor is a function named @^?{}@; like other \CFA operators, these names represent the syntax used to call the constructor or destructor, \eg @x{ ... };@ or @^x{};@.
     1482In \CFA, a constructor is a function named @?{}@, while a destructor is a function named @^?{}@; like other \CFA operators, these names represent the syntax used to call the constructor or destructor, \eg @S s = { ... };@ or @^(s){};@.
    12861483Every constructor and destructor must have a return type of @void@, and its first parameter must have a reference type whose base type is the type of the object the function constructs or destructs.
    12871484This first parameter is informally called the @this@ parameter, as in many object-oriented languages, though a programmer may give it an arbitrary name.
     
    13361533\begin{cfa}
    13371534Array a, b;
    1338 a{};                            $\C{// default construct}$
    1339 b{ a };                         $\C{// copy construct}$
    1340 ^a{};                           $\C{// destruct}$
    1341 a{ 5, 0xFFFFFFFF };     $\C{// explicit constructor call}$
     1535(a){};                                  $\C{// default construct}$
     1536(b){ a };                               $\C{// copy construct}$
     1537^(a){};                                 $\C{// destruct}$
     1538(a){ 5, 0xFFFFFFFF };   $\C{// explicit constructor call}$
    13421539\end{cfa}
    13431540
Note: See TracChangeset for help on using the changeset viewer.