Context Navigation

Reverse Diff

Paper.tex [c659968:06b176d]

File:

: 1 edited

doc/papers/general/Paper.tex (modified) (13 diffs)

Legend:

: Unmodified
: Added
: Removed

doc/papers/general/Paper.tex

-              rc659968
+              r06b176d
 \usepackage{fullpage}
+\usepackage{epic,eepic}
 \usepackage{xspace,calc,comment}
 \usepackage{upquote}                                                                    % switch curled `'" to straight
 …
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \newcommand{\Textbf}[1]{{\color{red}\textbf{#1}}}
+\newcommand{\Textbf}[2][red]{{\color{#1}{\textbf{#2}}}}
 \newcommand{\TODO}[1]{\textbf{TODO}: {\itshape #1}} % TODO included
 %\newcommand{\TODO}[1]{} % TODO elided
 …
 \label{s:WithClauseStatement}
 Grouping heterogenous data into \newterm{aggregate}s is a common programming practice, and an aggregate can be further organized into more complex structures, such as arrays and containers:
 \begin{cfa}
 struct S {                                                              $\C{// aggregate}$
         char c;                                                         $\C{// fields}$
+Grouping heterogenous data into \newterm{aggregate}s (structure/union) is a common programming practice, and an aggregate can be further organized into more complex structures, such as arrays and containers:
+\begin{cfa}
+struct S {                                                                      $\C{// aggregate}$
+        char c;                                                                 $\C{// fields}$
         int i;
         double d;
 …
 S s, as[10];
 \end{cfa}
 However, routines manipulating aggregates have repeition of the aggregate name to access its containing fields:
+However, routines manipulating aggregates must repeat the aggregate name to access its containing fields:
 \begin{cfa}
 void f( S s ) {
         `s.`c; `s.`i; `s.`d;                            $\C{// access containing fields}$
+        `s.`c; `s.`i; `s.`d;                                    $\C{// access containing fields}$
+}
 \end{cfa}
 …
 \begin{C++}
 class C {
         char c;                                                         $\C{// fields}$
+        char c;                                                                 $\C{// fields}$
         int i;
         double d;
         int mem() {                                                     $\C{// implicit "this" parameter}$
                 `this->`c; `this->`i; `this->`d;$\C{// access containing fields}$
+        int mem() {                                                             $\C{// implicit "this" parameter}$
+                `this->`c; `this->`i; `this->`d;        $\C{// access containing fields}$
+        }
+}
 \end{C++}
+Nesting of member routines in a \lstinline[language=C++]@class@ allows eliding \lstinline[language=C++]@this->@ because of nested lexical-scoping.
+Nesting of member routines in a \lstinline[language=C++]@class@ allows eliding \lstinline[language=C++]@this->@ because of lexical scoping.
+However, for other aggregate parameters, qualification is necessary:
+\begin{cfa}
+struct T { double m, n; };
+int C::mem( T & t ) {                                           $\C{// multiple aggregate parameters}$
+        c; i; d;                                                                $\C{\color{red}// this-\textgreater.c, this-\textgreater.i, this-\textgreater.d}$
+        `t.`m; `t.`n;                                                   $\C{// must qualify}$
+}
+\end{cfa}
 % In object-oriented programming, there is an implicit first parameter, often names @self@ or @this@, which is elided.
 …
 % \TODO{Fill out section. Be sure to mention arbitrary expressions in with-blocks, recent change driven by Thierry to prioritize field name over parameters.}
 \CFA provides a @with@ clause/statement (see Pascal~\cite[\S~4.F]{Pascal}) to elide aggregate qualification to fields by opening a scope containing field identifiers.
 Hence, the qualified fields become variables, and making it easier to optimize field references in a block.
 \begin{cfa}
 void f( S s ) `with( s )` {                             $\C{// with clause}$
         c; i; d;                                                        $\C{\color{red}// s.c, s.i, s.d}$
+To simplify the programmer experience, \CFA provides a @with@ clause/statement (see Pascal~\cite[\S~4.F]{Pascal}) to elide aggregate qualification to fields by opening a scope containing the field identifiers.
+Hence, the qualified fields become variables with the side-effect that it is easier to optimizing field references in a block.
+\begin{cfa}
+void f( S s ) `with( s )` {                                     $\C{// with clause}$
+        c; i; d;                                                                $\C{\color{red}// s.c, s.i, s.d}$
+}
 \end{cfa}
 …
 \begin{cfa}
 int mem( S & this ) `with( this )` {            $\C{// with clause}$
+        c; i; d;                                                        $\C{\color{red}// this.c, this.i, this.d}$
+}
+\end{cfa}
+The key generality over the object-oriented approach is that one aggregate parameter \lstinline[language=C++]@this@ is not treated specially over other aggregate parameters:
+\begin{cfa}
+struct T { double m, n; };
+        c; i; d;                                                                $\C{\color{red}// this.c, this.i, this.d}$
+}
+\end{cfa}
+with the generality of opening multiple aggregate-parameters:
+\begin{cfa}
 int mem( S & s, T & t ) `with( s, t )` {        $\C{// multiple aggregate parameters}$
+        c; i; d;                                                        $\C{\color{red}// s.c, s.i, s.d}$
+        m; n;                                                           $\C{\color{red}// t.m, t.n}$
+}
+\end{cfa}
+The equivalent object-oriented style is:
+\begin{cfa}
+int S::mem( T & t ) {                                   $\C{// multiple aggregate parameters}$
+        c; i; d;                                                        $\C{\color{red}// this-\textgreater.c, this-\textgreater.i, this-\textgreater.d}$
+        `t.`m; `t.`n;
+        c; i; d;                                                                $\C{\color{red}// s.c, s.i, s.d}$
+        m; n;                                                                   $\C{\color{red}// t.m, t.n}$
+}
+\end{cfa}
+In detail, the @with@ clause/statement has the form:
+\begin{cfa}
+$\emph{with-statement}$:
+        'with' '(' $\emph{expression-list}$ ')' $\emph{compound-statement}$
+\end{cfa}
+and may appear as the body of a routine or nested within a routine body.
+Each expression in the expression-list provides a type and object.
+The type must be an aggregate type.
+(Enumerations are already opened.)
+The object is the implicit qualifier for the open structure-fields.
+All expressions in the expression list are open in ``parallel'' within the compound statement.
+This semantic is different from Pascal, which nests the openings.
+The difference between parallel and nesting occurs for fields with the same name but different type:
+\begin{cfa}
+struct S { int i; int j; double m; } s, w;
+struct T { int i; int k; int m } t, w;
+with( s, t ) {
+        j + k;                                                                  $\C{// unambiguous, s.j + t.m}$
+        m = 5.0;                                                                $\C{// unambiguous, t.m = 5.0}$
+        m = 1;                                                                  $\C{// unambiguous, s.m = 1}$
+        int a = s.i + m;                                                $\C{// unambiguous, a = s.i + t.i}$
+        int b = s.i + t.i;                                              $\C{// unambiguous, qualification}$
+        sout | (double)m | endl;                                $\C{// unambiguous, cast}$
+        i;                                                                              $\C{// ambiguous}$
+}
+\end{cfa}
+\CFA's ability to overload variables means usages of field with the same names can be automatically disambiguated, eliminating most qualification.
+Qualification or a cast is used to disambiguate.
+A cast may be necessary to disambiguate between the overload variables in a @with@ expression:
+\begin{cfa}
+with( w ) { ... }                                                       $\C{// ambiguous, same name and no context}$
+with( (S)w ) { ... }                                            $\C{// unambiguous}$
+\end{cfa}
+\begin{cfa}
+struct S { int i, j; } sv;
+with( sv ) {
+        S & sr = sv;
+        with( sr ) {
+                S * sp = &sv;
+                with( *sp ) {
+                        i = 3; j = 4;                                   $\C{\color{red}// sp-{\textgreater}i, sp-{\textgreater}j}$
+                }
+                i = 3; j = 4;                                           $\C{\color{red}// sr.i, sr.j}$
+        }
+        i = 3; j = 4;                                                   $\C{\color{red}// sv.i, sv.j}$
+}
 \end{cfa}
 …
         struct S1 { ... } s1;
         struct S2 { ... } s2;
         `with( s1 )` {                                          $\C{// with statement}$
+        `with( s1 )` {                                                  $\C{// with statement}$
                 // access fields of s1 without qualification
                 `with( s2 )` {                                  $\C{// nesting}$
+                `with( s2 )` {                                          $\C{// nesting}$
                         // access fields of s1 and s2 without qualification
+                }
 …
 \end{cfa}
-When opening multiple structures, fields with the same name and type are ambiguous and must be fully qualified.
-For fields with the same name but different type, context/cast can be used to disambiguate.
-\begin{cfa}
-struct S { int i; int j; double m; } a, c;
-struct T { int i; int k; int m } b, c;
-`with( a, b )` {
-        j + k;                                                  $\C{// unambiguous, unique names define unique types}$
-        i;                                                              $\C{// ambiguous, same name and type}$
-        a.i + b.i;                                              $\C{// unambiguous, qualification defines unique names}$
-        m;                                                              $\C{// ambiguous, same name and no context to define unique type}$
-        m = 5.0;                                                $\C{// unambiguous, same name and context defines unique type}$
-        m = 1;                                                  $\C{// unambiguous, same name and context defines unique type}$
+}
-`with( c )` { ... }                                     $\C{// ambiguous, same name and no context}$
-`with( (S)c )` { ... }                                  $\C{// unambiguous, same name and cast defines unique type}$
-\end{cfa}
-The components in the "with" clause
-  with ( a, b, c ) { ... }
-serve 2 purposes: each component provides a type and object. The type must be a
-structure type. Enumerations are already opened, and I think a union is opened
-to some extent, too. (Or is that just unnamed unions?) The object is the target
-that the naked structure-fields apply to. The components are open in "parallel"
-at the scope of the "with" clause/statement, so opening "a" does not affect
-opening "b", etc. This semantic is different from Pascal, which nests the
-openings.
-Having said the above, it seems reasonable to allow a "with" component to be an
-expression. The type is the static expression-type and the object is the result
-of the expression. Again, the type must be an aggregate. Expressions require
-parenthesis around the components.
-  with( a, b, c ) { ... }
-Does this now make sense?
-Having written more CFA code, it is becoming clear to me that I *really* want
-the "with" to be implemented because I hate having to type all those object
-names for fields. It's a great way to drive people away from the language.
 \subsection{Exception Handling ???}
 …
 \subsection{Alternative Declaration Syntax}
+\newcommand{\R}[1]{\Textbf{#1}}
+\newcommand{\B}[1]{{\Textbf[blue]{#1}}}
+\newcommand{\G}[1]{{\Textbf[OliveGreen]{#1}}}
+C declaration syntax is notoriously confusing and error prone.
+For example, many C programmers are confused by a declaration as simple as:
+\begin{flushleft}
+\lstDeleteShortInline@%
+\begin{tabular}{@{}ll@{}}
+\begin{cfa}
+int * x[5]
+\end{cfa}
+&
+\raisebox{-0.75\totalheight}{\input{Cdecl}}
+\end{tabular}
+\lstMakeShortInline@%
+\end{flushleft}
+Is this an array of 5 pointers to integers or a pointer to an array of 5 integers?
+The fact this declaration is unclear to many C programmers means there are productivity and safety issues even for basic programs.
+Another example of confusion results from the fact that a routine name and its parameters are embedded within the return type, mimicking the way the return value is used at the routine's call site.
+For example, a routine returning a pointer to an array of integers is defined and used in the following way:
+\begin{cfa}
+int `(*`f`())[`5`]` {...};                              $\C{// definition}$
+ ... `(*`f`())[`3`]` += 1;                              $\C{// usage}$
+\end{cfa}
+Essentially, the return type is wrapped around the routine name in successive layers (like an onion).
+While attempting to make the two contexts consistent is a laudable goal, it has not worked out in practice.
+\CFA provides its own type, variable and routine declarations, using a different syntax.
+The new declarations place qualifiers to the left of the base type, while C declarations place qualifiers to the right of the base type.
+In the following example, \R{red} is the base type and \B{blue} is qualifiers.
+The \CFA declarations move the qualifiers to the left of the base type, \ie move the blue to the left of the red, while the qualifiers have the same meaning but are ordered left to right to specify a variable's type.
+\begin{quote}
+\lstDeleteShortInline@%
+\lstset{moredelim=**[is][\color{blue}]{+}{+}}
+\begin{tabular}{@{}l@{\hspace{3em}}l@{}}
+\multicolumn{1}{c@{\hspace{3em}}}{\textbf{\CFA}}        & \multicolumn{1}{c}{\textbf{C}}        \\
+\begin{cfa}
++[5] *+ `int` x1;
++* [5]+ `int` x2;
++[* [5] int]+ f`( int p )`;
+\end{cfa}
+&
+\begin{cfa}
+`int` +*+ x1 +[5]+;
+`int` +(*+x2+)[5]+;
++int (*+f`( int p )`+)[5]+;
+\end{cfa}
+\end{tabular}
+\lstMakeShortInline@%
+\end{quote}
+The only exception is bit field specification, which always appear to the right of the base type.
+% Specifically, the character ©*© is used to indicate a pointer, square brackets ©[©\,©]© are used to represent an array or function return value, and parentheses ©()© are used to indicate a routine parameter.
+However, unlike C, \CFA type declaration tokens are distributed across all variables in the declaration list.
+For instance, variables ©x© and ©y© of type pointer to integer are defined in \CFA as follows:
+\begin{quote}
+\lstDeleteShortInline@%
+\begin{tabular}{@{}l@{\hspace{3em}}l@{}}
+\multicolumn{1}{c@{\hspace{3em}}}{\textbf{\CFA}}        & \multicolumn{1}{c}{\textbf{C}}        \\
+\begin{cfa}
+`*` int x, y;
+\end{cfa}
+&
+\begin{cfa}
+int `*`x, `*`y;
+\end{cfa}
+\end{tabular}
+\lstMakeShortInline@%
+\end{quote}
+The downside of this semantics is the need to separate regular and pointer declarations:
+\begin{quote}
+\lstDeleteShortInline@%
+\begin{tabular}{@{}l@{\hspace{3em}}l@{}}
+\multicolumn{1}{c@{\hspace{3em}}}{\textbf{\CFA}}        & \multicolumn{1}{c}{\textbf{C}}        \\
+\begin{cfa}
+`*` int x;
+int y;
+\end{cfa}
+&
+\begin{cfa}
+int `*`x, y;
+\end{cfa}
+\end{tabular}
+\lstMakeShortInline@%
+\end{quote}
+which is prescribing a safety benefit.
+Other examples are:
+\begin{quote}
+\lstDeleteShortInline@%
+\begin{tabular}{@{}l@{\hspace{3em}}l@{\hspace{2em}}l@{}}
+\multicolumn{1}{c@{\hspace{3em}}}{\textbf{\CFA}}        & \multicolumn{1}{c@{\hspace{2em}}}{\textbf{C}} \\
+\begin{cfa}
+[ 5 ] int z;
+[ 5 ] * char w;
+* [ 5 ] double v;
+struct s {
+        int f0:3;
+        * int f1;
+        [ 5 ] * int f2;
+};
+\end{cfa}
+&
+\begin{cfa}
+int z[ 5 ];
+char * w[ 5 ];
+double (* v)[ 5 ];
+struct s {
+        int f0:3;
+        int * f1;
+        int * f2[ 5 ]
+};
+\end{cfa}
+&
+\begin{cfa}
+// array of 5 integers
+// array of 5 pointers to char
+// pointer to array of 5 doubles
+// common bit field syntax
+\end{cfa}
+\end{tabular}
+\lstMakeShortInline@%
+\end{quote}
+All type qualifiers, \eg ©const©, ©volatile©, etc., are used in the normal way with the new declarations and also appear left to right, \eg:
+\begin{quote}
+\lstDeleteShortInline@%
+\begin{tabular}{@{}l@{\hspace{1em}}l@{\hspace{1em}}l@{}}
+\multicolumn{1}{c@{\hspace{1em}}}{\textbf{\CFA}}        & \multicolumn{1}{c@{\hspace{1em}}}{\textbf{C}} \\
+\begin{cfa}
+const * const int x;
+const * [ 5 ] const int y;
+\end{cfa}
+&
+\begin{cfa}
+int const * const x;
+const int (* const y)[ 5 ]
+\end{cfa}
+&
+\begin{cfa}
+// const pointer to const integer
+// const pointer to array of 5 const integers
+\end{cfa}
+\end{tabular}
+\lstMakeShortInline@%
+\end{quote}
+All declaration qualifiers, \eg ©extern©, ©static©, etc., are used in the normal way with the new declarations but can only appear at the start of a \CFA routine declaration,\footnote{\label{StorageClassSpecifier}
+The placement of a storage-class specifier other than at the beginning of the declaration specifiers in a declaration is an obsolescent feature.~\cite[\S~6.11.5(1)]{C11}} \eg:
+\begin{quote}
+\lstDeleteShortInline@%
+\begin{tabular}{@{}l@{\hspace{3em}}l@{\hspace{2em}}l@{}}
+\multicolumn{1}{c@{\hspace{3em}}}{\textbf{\CFA}}        & \multicolumn{1}{c@{\hspace{2em}}}{\textbf{C}} \\
+\begin{cfa}
+extern [ 5 ] int x;
+static * const int y;
+\end{cfa}
+&
+\begin{cfa}
+int extern x[ 5 ];
+const int static * y;
+\end{cfa}
+&
+\begin{cfa}
+// externally visible array of 5 integers
+// internally visible pointer to constant int
+\end{cfa}
+\end{tabular}
+\lstMakeShortInline@%
+\end{quote}
+The new declaration syntax can be used in other contexts where types are required, \eg casts and the pseudo-routine ©sizeof©:
+\begin{quote}
+\lstDeleteShortInline@%
+\begin{tabular}{@{}l@{\hspace{3em}}l@{}}
+\multicolumn{1}{c@{\hspace{3em}}}{\textbf{\CFA}}        & \multicolumn{1}{c}{\textbf{C}}        \\
+\begin{cfa}
+y = (`* int`)x;
+i = sizeof(`[ 5 ] * int`);
+\end{cfa}
+&
+\begin{cfa}
+y = (`int *`)x;
+i = sizeof(`int * [ 5 ]`);
+\end{cfa}
+\end{tabular}
+\lstMakeShortInline@%
+\end{quote}
+Finally, new \CFA declarations may appear together with C declarations in the same program block, but cannot be mixed within a specific declaration.
+Therefore, a programmer has the option of either continuing to use traditional C declarations or take advantage of the new style.
+Clearly, both styles need to be supported for some time due to existing C-style header-files, particularly for UNIX systems.
 …
 In \CFA, the address of a @T&@ is a lvalue @T*@, as the address of the underlying @T@ is stored in the reference, and can thus be mutated there.
 The result of this rule is that any reference can be rebound using the existing pointer assignment semantics by assigning a compatible pointer into the address of the reference, \eg @&r1 = &x;@ above.
+This rebinding can occur to an arbitrary depth of reference nesting; loosely speaking, nested address-of operators will produce an lvalue nested pointer up to as deep as the reference they're applied to.
+These explicit address-of operators can be thought of as ``cancelling out'' the implicit dereference operators, \eg @(&`*`)r1 = &x@ or @(&(&`*`)`*`)r3 = &(&`*`)r1@ or even @(&`*`)r2 = (&`*`)`*`r3@ for @&r2 = &r3@.
+More precisely:
+\begin{itemize}
+        \item
+        if @R@ is an rvalue of type {@T &@$_1 \cdots$@ &@$_r$} where $r \ge 1$ references (@&@ symbols) than @&R@ has type {@T `*`&@$_{\color{red}2} \cdots$@ &@$_{\color{red}r}$}, \\ \ie @T@ pointer with $r-1$ references (@&@ symbols).
+        \item
+        if @L@ is an lvalue of type {@T &@$_1 \cdots$@ &@$_l$} where $l \ge 0$ references (@&@ symbols) then @&L@ has type {@T `*`&@$_{\color{red}1} \cdots$@ &@$_{\color{red}l}$}, \\ \ie @T@ pointer with $l$ references (@&@ symbols).
+\end{itemize}
+This rebinding can occur to an arbitrary depth of reference nesting; $n$ address-of operators applied to a reference nested $m$ times will produce an lvalue pointer nested $n$ times if $n \le m$ (note that $n = m+1$ is simply the usual C rvalue address-of operator applied to the $n = m$ case).
+The explicit address-of operators can be thought of as ``cancelling out'' the implicit dereference operators, \eg @(&`*`)r1 = &x@ or @(&(&`*`)`*`)r3 = &(&`*`)r1@ or even @(&`*`)r2 = (&`*`)`*`r3@ for @&r2 = &r3@.
 Since pointers and references share the same internal representation, code using either is equally performant; in fact the \CFA compiler converts references to pointers internally, and the choice between them in user code can be made based solely on convenience.
 …
 In particular, \CFA does not implement class-based encapsulation: neither the constructor nor any other function has privileged access to the implementation details of a type, except through the translation-unit-scope method of opaque structs provided by C.
 In \CFA, a constructor is a function named @?{}@, while a destructor is a function named @^?{}@; like other \CFA operators, these names represent the syntax used to call the constructor or destructor, \eg @x{ ... };@ or @^x{};@.
+In \CFA, a constructor is a function named @?{}@, while a destructor is a function named @^?{}@; like other \CFA operators, these names represent the syntax used to call the constructor or destructor, \eg @S s = { ... };@ or @^(s){};@.
 Every constructor and destructor must have a return type of @void@, and its first parameter must have a reference type whose base type is the type of the object the function constructs or destructs.
 This first parameter is informally called the @this@ parameter, as in many object-oriented languages, though a programmer may give it an arbitrary name.
 …
 \begin{cfa}
 Array a, b;
 a{};                            $\C{// default construct}$
 b{ a };                         $\C{// copy construct}$
 ^a{};                           $\C{// destruct}$
 a{ 5, 0xFFFFFFFF };     $\C{// explicit constructor call}$
+(a){};                                  $\C{// default construct}$
+(b){ a };                               $\C{// copy construct}$
+^(a){};                                 $\C{// destruct}$
+(a){ 5, 0xFFFFFFFF };   $\C{// explicit constructor call}$
 \end{cfa}

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changes in doc/papers/general/Paper.tex [c659968:06b176d]

Legend:

doc/papers/general/Paper.tex

Download in other formats: