Changeset 4ada74e for doc/papers/general


Ignore:
Timestamp:
Feb 22, 2018, 10:09:40 AM (6 years ago)
Author:
Peter A. Buhr <pabuhr@…>
Branches:
ADT, aaron-thesis, arm-eh, ast-experimental, cleanup-dtors, deferred_resn, demangler, enum, forall-pointer-decay, jacob/cs343-translation, jenkins-sandbox, master, new-ast, new-ast-unique-expr, new-env, no_list, persistent-indexer, pthread-emulation, qualifiedEnum, resolv-new, with_gc
Children:
2a8427c6, 4136ae4, 5a806be4
Parents:
eaa1759
Message:

more updates

File:
1 edited

Legend:

Unmodified
Added
Removed
  • doc/papers/general/Paper.tex

    reaa1759 r4ada74e  
    77\usepackage{listings}                                           % format program code
    88\usepackage{enumitem}
     9\setlist[itemize]{topsep=3pt,itemsep=2pt,parsep=0pt}% global
    910\usepackage[flushmargin]{footmisc}                      % support label/reference in footnote
    1011\usepackage{rotating}
     
    10721073
    10731074Both labelled @continue@ and @break@ are a @goto@ restricted in the following ways:
    1074 \begin{itemize}[topsep=3pt,itemsep=2pt,parsep=0pt]
     1075\begin{itemize}
    10751076\item
    10761077They cannot create a loop, which means only the looping constructs cause looping.
     
    13551356\subsection{Exception Handling}
    13561357
    1357 \CFA provides two forms of exception handling: \newterm{resumption} (fix-up) and \newterm{recovery}.
     1358\CFA provides two forms of exception handling: \newterm{resumption} (fix-up) and \newterm{recovery} (see Figure~\ref{f:CFAExceptionHandling}).
    13581359Both mechanisms provide dynamic call to a handler using dynamic name-lookup, where fix-up has dynamic return and recovery has static return from the handler.
     1360\CFA restricts exception types to those defined by aggregate type @_Exception@.
     1361The form of the raise dictates the set of handlers examined during propagation: \newterm{resumption propagation} (@resume@) only examines resumption handlers (@catchResume@); \newterm{terminating propagation} (@throw@) only examines termination handlers (@catch@).
     1362If @resume@ or @throw@ have no exception type, it is a reresume/rethrow, meaning the currently exception continues propagation.
     1363If there is no current exception, the reresume/rethrow results in an error.
     1364
     1365\begin{figure}
    13591366\begin{cquote}
    13601367\lstDeleteShortInline@%
     
    13621369\multicolumn{1}{c@{\hspace{\parindentlnth}}}{\textbf{Resumption}}       & \multicolumn{1}{c}{\textbf{Recovery}} \\
    13631370\begin{cfa}
    1364 _Exception E { int fix; };
     1371`_Exception R { int fix; };`
    13651372void f() {
    1366         E e;
    1367         ... _Resume e;
    1368         ... e.fix // control returns here after handler
    1369 try {
    1370         f();
    1371 } catchResume( E e ) {
    1372         ... e.fix = ...; // return correction to raise
     1373        R r;
     1374        ... `resume( r );` ...
     1375        ... r.fix // control does return here after handler
     1376`try` {
     1377        ... f(); ...
     1378} `catchResume( R r )` {
     1379        ... r.fix = ...; // return correction to raise
    13731380} // dynamic return to _Resume
    13741381\end{cfa}
    13751382&
    13761383\begin{cfa}
    1377 _Exception E {};
     1384`_Exception T {};`
    13781385void f() {
    13791386
    1380         ... _Throw E{};
     1387        ... `throw( T{} );` ...
    13811388        // control does NOT return here after handler
    1382 try {
    1383         f();
    1384 } catch( E e ) {
     1389`try` {
     1390        ... f(); ...
     1391} `catch( T t )` {
    13851392        ... // recover and continue
    13861393} // static return to next statement
     
    13891396\lstMakeShortInline@%
    13901397\end{cquote}
     1398\caption{\CFA Exception Handling}
     1399\label{f:CFAExceptionHandling}
     1400\end{figure}
     1401
     1402The set of exception types in a list of catch clause may include both a resumption and termination handler:
     1403\begin{cfa}
     1404try {
     1405        ... resume( `R{}` ); ...
     1406} catchResume( `R` r ) { ... throw( R{} ); ... } $\C{\color{red}// H1}$
     1407   catch( `R` r ) { ... }                                       $\C{\color{red}// H2}$
     1408
     1409\end{cfa}
     1410The resumption propagation raises @R@ and the stack is not unwound;
     1411the exception is caught by the @catchResume@ clause and handler H1 is invoked.
     1412The termination propagation in handler H1 raises @R@ and the stack is unwound;
     1413the exception is caught by the @catch@ clause and handler H2 is invoked.
     1414The termination handler is available because the resumption propagation did not unwind the stack.
     1415
     1416An additional feature is conditional matching in a catch clause:
     1417\begin{cfa}
     1418try {
     1419        ... write( `datafile`, ... ); ...               $\C{// may throw IOError}$
     1420        ... write( `logfile`, ... ); ...
     1421} catch ( IOError err; `err == datafile` ) { ... } $\C{// handle datafile error}$
     1422   catch ( IOError err; `err == logfile` ) { ... } $\C{// handle logfile error}$
     1423   catch ( IOError err ) { ... }                        $\C{// handler error from other files}$
     1424\end{cfa}
     1425where the throw inserts the failing file-handle in the I/O exception.
     1426Conditional catch cannot be trivially mimicked by other mechanisms because once an exception is caught, handler clauses in that @try@ statement are no longer eligible..
     1427
     1428The resumption raise can specify an alternate stack on which to raise an exception, called a \newterm{nonlocal raise}:
     1429\begin{cfa}
     1430resume [ $\emph{exception-type}$ ] [ _At $\emph{alternate-stack}$ ] ;
     1431\end{cfa}
     1432The @_At@ clause raises the specified exception or the currently propagating exception (reresume) at another coroutine or task~\cite{Delisle18}.
     1433Nonlocal raise is restricted to resumption to provide the exception handler the greatest flexibility because processing the exception does not unwind its stack, allowing it to continue after the handle returns.
     1434
     1435To facilitate nonlocal exception, \CFA provides dynamic enabling and disabling of nonlocal exception-propagation.
     1436The constructs for controlling propagation of nonlocal exceptions are the @enable@ and the @disable@ blocks:
     1437\begin{cquote}
     1438\lstDeleteShortInline@%
     1439\begin{tabular}{@{}l@{\hspace{2\parindentlnth}}l@{}}
     1440\begin{cfa}
     1441enable $\emph{exception-type-list}$ {
     1442        // allow non-local resumption
     1443}
     1444\end{cfa}
     1445&
     1446\begin{cfa}
     1447disable $\emph{exception-type-list}$ {
     1448        // disallow non-local resumption
     1449}
     1450\end{cfa}
     1451\end{tabular}
     1452\lstMakeShortInline@%
     1453\end{cquote}
     1454The arguments for @enable@/@disable@ specify the exception types allowed to be propagated or postponed, respectively.
     1455Specifying no exception type is shorthand for specifying all exception types.
     1456Both @enable@ and @disable@ blocks can be nested, turning propagation on/off on entry, and on exit, the specified exception types are restored to their prior state.
     1457
     1458Finally, \CFA provides a Java like  @finally@ clause after the catch clauses:
     1459\begin{cfa}
     1460try {
     1461        ... f(); ...
     1462// catchResume or catch clauses
     1463} `finally` {
     1464        // house keeping
     1465}
     1466\end{cfa}
     1467The finally clause is always executed, i.e., if the try block ends normally or if an exception is raised.
     1468If an exception is raised and caught, the handler is run before the finally clause.
     1469Like a destructor (see Section~\ref{s:ConstructorsDestructors}), a finally clause can raise an exception but not if there is an exception being propagated.
     1470Mimicking the @finally@ clause with mechanisms like RAII is non-trivially when there are multiple types and local accesses.
    13911471
    13921472
     
    14211501For example, a routine returning a pointer to an array of integers is defined and used in the following way:
    14221502\begin{cfa}
    1423 int `(*`f`())[`5`]` {...};                              $\C{// definition}$
    1424  ... `(*`f`())[`3`]` += 1;                              $\C{// usage}$
     1503int `(*`f`())[`5`]` {...};                                      $\C{// definition}$
     1504 ... `(*`f`())[`3`]` += 1;                                      $\C{// usage}$
    14251505\end{cfa}
    14261506Essentially, the return type is wrapped around the routine name in successive layers (like an onion).
     
    15651645as well, parameter names are optional, \eg:
    15661646\begin{cfa}
    1567 [ int x ] f ( /* void */ );                             $\C{// returning int with no parameters}$
    1568 [ int x ] f (...);                                              $\C{// returning int with unknown parameters}$
    1569 [ * int ] g ( int y );                                  $\C{// returning pointer to int with int parameter}$
    1570 [ void ] h ( int, char );                               $\C{// returning no result with int and char parameters}$
    1571 [ * int, int ] j ( int );                               $\C{// returning pointer to int and int, with int parameter}$
     1647[ int x ] f ( /* void */ );                                     $\C{// returning int with no parameters}$
     1648[ int x ] f (...);                                                      $\C{// returning int with unknown parameters}$
     1649[ * int ] g ( int y );                                          $\C{// returning pointer to int with int parameter}$
     1650[ void ] h ( int, char );                                       $\C{// returning no result with int and char parameters}$
     1651[ * int, int ] j ( int );                                       $\C{// returning pointer to int and int, with int parameter}$
    15721652\end{cfa}
    15731653This syntax allows a prototype declaration to be created by cutting and pasting source text from the routine definition header (or vice versa).
     
    15911671The syntax for pointers to \CFA routines specifies the pointer name on the right, \eg:
    15921672\begin{cfa}
    1593 * [ int x ] () fp;                                              $\C{// pointer to routine returning int with no parameters}$
    1594 * [ * int ] ( int y ) gp;                               $\C{// pointer to routine returning pointer to int with int parameter}$
    1595 * [ ] ( int, char ) hp;                                 $\C{// pointer to routine returning no result with int and char parameters}$
    1596 * [ * int, int ] ( int ) jp;                    $\C{// pointer to routine returning pointer to int and int, with int parameter}$
     1673* [ int x ] () fp;                                                      $\C{// pointer to routine returning int with no parameters}$
     1674* [ * int ] ( int y ) gp;                                       $\C{// pointer to routine returning pointer to int with int parameter}$
     1675* [ ] ( int, char ) hp;                                         $\C{// pointer to routine returning no result with int and char parameters}$
     1676* [ * int, int ] ( int ) jp;                            $\C{// pointer to routine returning pointer to int and int, with int parameter}$
    15971677\end{cfa}
    15981678Note, \emph{a routine name cannot be specified}:
    15991679\begin{cfa}
    1600 * [ int x ] f () fp;                                    $\C{// routine name "f" is disallowed}$
     1680* [ int x ] f () fp;                                            $\C{// routine name "f" is disallowed}$
    16011681\end{cfa}
    16021682
     
    16241704\begin{cfa}
    16251705int x = 1, y = 2, * p1, * p2, ** p3;
    1626 p1 = &x;                                                                $\C{// p1 points to x}$
    1627 p2 = &y;                                                                $\C{// p2 points to y}$
    1628 p3 = &p1;                                                               $\C{// p3 points to p1}$
     1706p1 = &x;                                                                        $\C{// p1 points to x}$
     1707p2 = &y;                                                                        $\C{// p2 points to y}$
     1708p3 = &p1;                                                                       $\C{// p3 points to p1}$
    16291709*p2 = ((*p1 + *p2) * (**p3 - *p1)) / (**p3 - 15);
    16301710\end{cfa}
     
    16381718\begin{cfa}
    16391719int x = 1, y = 2, & r1, & r2, && r3;
    1640 &r1 = &x;  $\C{// r1 points to x}$
    1641 &r2 = &y;  $\C{// r2 points to y}$
    1642 &&r3 = &&r1;  $\C{// r3 points to r2}$
     1720&r1 = &x;                                                                       $\C{// r1 points to x}$
     1721&r2 = &y;                                                                       $\C{// r2 points to y}$
     1722&&r3 = &&r1;                                                            $\C{// r3 points to r2}$
    16431723r2 = ((r1 + r2) * (r3 - r1)) / (r3 - 15);       $\C{// implicit dereferencing}$
    16441724\end{cfa}
     
    16611741\begin{cfa}
    16621742int & r = *new( int );
    1663 ...
     1743...                                                                                     $\C{// non-null reference}$
    16641744delete &r;
    1665 r += 1;                 // undefined reference
     1745r += 1;                                                                         $\C{// undefined reference}$
    16661746\end{cfa}
    16671747\end{lrbox}
    16681748Rebinding allows \CFA references to be default-initialized (\eg to a null pointer\footnote{
    1669 While effort has been put into non-null reference checking in \CC and Java, the exercise seems moot for any non-managed languages (C/\CC), given that it only handles one of many different error situations:
     1749While effort has been made into non-null reference checking in \CC and Java, the exercise seems moot for any non-managed languages (C/\CC), given that it only handles one of many different error situations:
    16701750\begin{cquote}
    16711751\usebox{\LstBox}
     
    16821762These explicit address-of operators can be thought of as ``cancelling out'' the implicit dereference operators, \eg @(&`*`)r1 = &x@ or @(&(&`*`)`*`)r3 = &(&`*`)r1@ or even @(&`*`)r2 = (&`*`)`*`r3@ for @&r2 = &r3@.
    16831763More precisely:
    1684 \begin{itemize}[topsep=3pt,itemsep=2pt,parsep=0pt]
     1764\begin{itemize}
    16851765\item
    16861766if @R@ is an rvalue of type {@T &@$_1 \cdots$@ &@$_r$} where $r \ge 1$ references (@&@ symbols) than @&R@ has type {@T `*`&@$_{\color{red}2} \cdots$@ &@$_{\color{red}r}$}, \\ \ie @T@ pointer with $r-1$ references (@&@ symbols).
     
    17941874\label{f:TypeNestingQualification}
    17951875\end{figure}
    1796 In the left example in C, types @C@, @U@ and @T@ are implicitly hoisted outside of type @S@ into the containing block scope.
    1797 In the right example in \CFA, the types are not hoisted and accessed .
     1876In the C left example, types @C@, @U@ and @T@ are implicitly hoisted outside of type @S@ into the containing block scope.
     1877In the \CFA right example, the types are not hoisted and accessible.
    17981878
    17991879
    18001880\subsection{Constructors and Destructors}
     1881\label{s:ConstructorsDestructors}
    18011882
    18021883One of the strengths (and weaknesses) of C is memory-management control, allowing resource release to be precisely specified versus unknown release with garbage-collected memory-management.
     
    18091890
    18101891In \CFA, a constructor is named @?{}@ and a destructor is named @^?{}@.
    1811 The name @{}@ comes from the syntax for the initializer: @struct S { int i, j; } s = `{` 2, 3 `}`@.
    1812 The symbol \lstinline+^+ is used because it was the last remaining binary operator that could be used in a unary context.
     1892The name @{}@ comes from the syntax for the initializer: @struct S { int i, j; } s = `{` 2, 3 `}`@\footnote{%
     1893The symbol \lstinline+^+ is used for the destructor name because it was the last binary operator that could be used in a unary context.}.
    18131894Like other \CFA operators, these names represent the syntax used to call the constructor or destructor, \eg @?{}(x, ...)@ or @^{}(x, ...)@.
    1814 The constructor and destructor have return type @void@ and a first parameter of reference to the object type to be constructed or destructs.
     1895The constructor and destructor have return type @void@, and the first parameter is a reference to the object type to be constructed or destructed.
    18151896While the first parameter is informally called the @this@ parameter, as in object-oriented languages, any variable name may be used.
    18161897Both constructors and destructors allow additional parametes after the @this@ parameter for specifying values for initialization/de-initialization\footnote{
     
    18211902};
    18221903void ?{}( VLA & vla ) with ( vla ) {            $\C{// default constructor}$
    1823         len = 10;  data = alloc( len );
     1904        len = 10;  data = alloc( len );                 $\C{// shallow copy}$
    18241905}
    18251906void ^?{}( VLA & vla ) with ( vla ) {           $\C{// destructor}$
     
    18301911}                                                                                       $\C{// implicit:  ?\^{}\{\}( x );}$
    18311912\end{cfa}
     1913(Note, the example is purposely kept simple by using shallow-copy semantics.)
    18321914@VLA@ is a \newterm{managed type}\footnote{
    18331915A managed type affects the runtime environment versus a self-contained type.}: a type requiring a non-trivial constructor or destructor, or with a field of a managed type.
    1834 A managed type is implicitly constructed upon allocation and destructed upon deallocation to ensure proper interaction with runtime resources, in this case the @data@ array in the heap.
    1835 For details of the placement of implicit constructor and destructor calls among complex executable statements see~\cite[\S~2.2]{Schluntz17}.
     1916A managed type is implicitly constructed at allocation and destructed at deallocation to ensure proper interaction with runtime resources, in this case, the @data@ array in the heap.
     1917For details of the code-generation placement of implicit constructor and destructor calls among complex executable statements see~\cite[\S~2.2]{Schluntz17}.
    18361918
    18371919\CFA also provides syntax for \newterm{initialization} and \newterm{copy}:
     
    18841966
    18851967In some circumstance programmers may not wish to have constructor and destructor calls.
    1886 In these cases, \CFA provides the initialization syntax \lstinline|S x @= {}|, and the object becomes unmanaged, so constructors and destructors calls are not generated.
     1968In these cases, \CFA provides the initialization syntax \lstinline|S x @= {}|, and the object becomes unmanaged, so implicit constructor and destructor calls are not generated.
    18871969Any C initializer can be the right-hand side of an \lstinline|@=| initializer, \eg \lstinline|VLA a @= { 0, 0x0 }|, with the usual C initialization semantics.
    18881970The point of \lstinline|@=| is to provide a migration path from legacy C code to \CFA, by providing a mechanism to incrementally convert to implicit initialization.
     
    18951977
    18961978C already includes limited polymorphism for literals -- @0@ can be either an integer or a pointer literal, depending on context, while the syntactic forms of literals of the various integer and float types are very similar, differing from each other only in suffix.
    1897 In keeping with the general \CFA approach of adding features while respecting ``the C way'' of doing things, we have extended both C's polymorphic zero and typed literal syntax to interoperate with user-defined types, while maintaining a backwards-compatible semantics.
     1979In keeping with the general \CFA approach of adding features while respecting the ``C-style'' of doing things, C's polymorphic constants and typed literal syntax are extended to interoperate with user-defined types, while maintaining a backwards-compatible semantics.
     1980A trivial example is allowing the underscore to separate prefixes, digits, and suffixes in all \CFA constants, as in Ada, \eg @0x`_`1.ffff`_`ffff`_`p`_`128`_`l@.
    18981981
    18991982
     
    19121995
    19131996
     1997\subsection{Integral Suffixes}
     1998
     1999Additional integral suffixes are added to cover all the integral types and lengths.
     2000\begin{cquote}
     2001\lstDeleteShortInline@%
     2002\begin{tabular}{@{}l@{\hspace{\parindentlnth}}l@{\hspace{\parindentlnth}}l@{}}
     2003\begin{cfa}
     200420_hh     // signed char
     200521_hhu   // unsigned char
     200622_h      // signed short int
     200723_uh    // unsigned short int
     200824z        // size_t
     2009\end{cfa}
     2010&
     2011\begin{cfa}
     201220_L8     // int8_t
     201321_ul8    // uint8_t
     201422_l16    // int16_t
     201523_ul16  // uint16_t
     201624_l32    // int32_t
     2017\end{cfa}
     2018&
     2019\begin{cfa}
     202025_ul32      // uint32_t
     202126_l64        // int64_t
     202227_l64u      // uint64_t
     202326_L128     // int128
     202427_L128u  // unsigned int128
     2025\end{cfa}
     2026\end{tabular}
     2027\lstMakeShortInline@%
     2028\end{cquote}
     2029
     2030
    19142031\subsection{Units}
    19152032
     
    19192036\begin{cfa}
    19202037struct Weight { double stones; };
    1921 
    19222038void ?{}( Weight & w ) { w.stones = 0; }        $\C{// operations}$
    19232039void ?{}( Weight & w, double w ) { w.stones = w; }
     
    19292045
    19302046int main() {
    1931         Weight w, hw = { 14 };                                  $\C{// 14 stone}$
    1932         w = 11|`st| + 1|`lb|;
    1933         w = 70.3|`kg|;
     2047        Weight w, heavy = { 20 };                               $\C{// 20 stone}$
    19342048        w = 155|`lb|;
    19352049        w = 0x_9b_u|`lb|;                                               $\C{// hexadecimal unsigned weight (155)}$
    19362050        w = 0_233|`lb|;                                                 $\C{// octal weight (155)}$
    1937         w = 5|`st| + 8|`kg| + 25|`lb| + hw;
     2051        w = 5|`st| + 8|`kg| + 25|`lb| + heavy;
    19382052}
    19392053\end{cfa}
     
    20382152\end{cquote}
    20392153While \Celeven has type-generic math~\cite[\S~7.25]{C11} in @tgmath.h@ to provide a similar mechanism, these macros are limited, matching a routine name with a single set of floating type(s).
    2040 For example, it is not possible to overload @atan@ for both one and two arguments;
     2154For example, it is impossible to overload @atan@ for both one and two arguments;
    20412155instead the names @atan@ and @atan2@ are required.
    20422156The key observation is that only a restricted set of type-generic macros are provided for a limited set of routine names, which do not generalize across the type system, as in \CFA.
     
    21212235ip = alloc( ip, 2 * dim );
    21222236ip = alloc( ip, 4 * dim, fill );
     2237
     2238ip = align_alloc( 16 );
     2239ip = align_alloc( 16, fill );
     2240ip = align_alloc( 16, dim );
     2241ip = align_alloc( 16, dim, fill );
    21232242\end{cfa}
    21242243&
     
    21302249ip = (int *)realloc( ip, 2 * dim * sizeof( int ) );
    21312250ip = (int *)realloc( ip, 4 * dim * sizeof( int ) ); memset( ip, fill, 4 * dim * sizeof( int ) );
    2132 \end{cfa}
    2133 \end{tabular}
    2134 \begin{tabular}{@{}l@{\hspace{\parindentlnth}}l@{}}
    2135 \begin{cfa}
    2136 ip = align_alloc( 16 );
    2137 ip = align_alloc( 16, fill );
    2138 ip = align_alloc( 16, dim );
    2139 ip = align_alloc( 16, dim, fill );
    2140 \end{cfa}
    2141 &
    2142 \begin{cfa}
     2251
    21432252ip = memalign( 16, sizeof( int ) );
    21442253ip = memalign( 16, sizeof( int ) ); memset( ip, fill, sizeof( int ) );
     
    22572366The implicit separator character (space/blank) is a separator not a terminator.
    22582367The rules for implicitly adding the separator are:
    2259 \begin{itemize}[topsep=3pt,itemsep=2pt,parsep=0pt]
     2368\begin{itemize}
    22602369\item
    22612370A separator does not appear at the start or end of a line.
     
    22722381A separator does not appear before or after a C string beginning/ending with the quote or whitespace characters: \lstinline[basicstyle=\tt,showspaces=true]@`'": \t\v\f\r\n@
    22732382}%
    2274 \item
     2383\end{itemize}
    22752384There are routines to set and get the separator string, and manipulators to toggle separation on and off in the middle of output.
    2276 \end{itemize}
    22772385
    22782386
     
    22932401        sout | "Factorial Numbers" | endl;
    22942402        Int fact = 1;
    2295 
    22962403        sout | 0 | fact | endl;
    22972404        for ( unsigned int i = 1; i <= 40; i += 1 ) {
     
    23062413int main( void ) {
    23072414        `gmp_printf`( "Factorial Numbers\n" );
    2308         `mpz_t` fact;
    2309         `mpz_init_set_ui`( fact, 1 );
     2415        `mpz_t` fact;  `mpz_init_set_ui`( fact, 1 );
    23102416        `gmp_printf`( "%d %Zd\n", 0, fact );
    23112417        for ( unsigned int i = 1; i <= 40; i += 1 ) {
Note: See TracChangeset for help on using the changeset viewer.