Changes in doc/papers/general/Paper.tex [87555b7:28bc8c8]
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
doc/papers/general/Paper.tex
r87555b7 r28bc8c8 267 267 int m = max( max, -max ); $\C{// uses (3) and (1) twice, by matching return type}$ 268 268 \end{cfa} 269 270 269 \CFA maximizes the ability to reuse names to aggressively address the naming problem. 271 270 In some cases, hundreds of names can be reduced to tens, resulting in a significant cognitive reduction. … … 286 285 287 286 288 \subsection{\texorpdfstring{\ protect\lstinline{forall} Functions}{forall Functions}}287 \subsection{\texorpdfstring{\LstKeywordStyle{forall} Functions}{forall Functions}} 289 288 \label{sec:poly-fns} 290 289 … … 436 435 One approach is to write bespoke data-structures for each context in which they are needed. 437 436 While this approach is flexible and supports integration with the C type-checker and tooling, it is also tedious and error-prone, especially for more complex data structures. 438 A second approach is to use @void *@ 437 A second approach is to use @void *@--based polymorphism, \eg the C standard-library functions @bsearch@ and @qsort@, which allow reuse of code with common functionality. 439 438 However, basing all polymorphism on @void *@ eliminates the type-checker's ability to ensure that argument types are properly matched, often requiring a number of extra function parameters, pointer indirection, and dynamic allocation that is not otherwise needed. 440 439 A third approach to generic code is to use preprocessor macros, which does allow the generated code to be both generic and type-checked, but errors may be difficult to interpret. … … 508 507 The offset arrays are statically generated where possible. 509 508 If a dynamic generic-type is declared to be passed or returned by value from a polymorphic function, the translator can safely assume the generic type is complete (\ie has a known layout) at any call-site, and the offset array is passed from the caller; 510 if the generic type is concrete at the call site, the elements of this offset array can even be statically generated using the C @offsetof@ macro. 511 As an example, the body of the second @value@ function is implemented as:512 \begin{cfa} 513 _assign_T( _retval, p + _offsetof_pair[1]); $\C{// return *p.second}$514 \end{cfa} 515 @_assign_T@ is passed in as an implicit parameter from @otype T@, and takes two @T *@ (@void*@ in the generated code), a destination and a source; @_retval@ is the pointer to a caller-allocated buffer for the return value, the usual \CFA method to handle dynamically-sized return types.509 if the generic type is concrete at the call site, the elements of this offset array can even be statically generated using the C @offsetof@ macro. 510 As an example, the body of the second @value@ function is implemented like this: 511 \begin{cfa} 512 _assign_T(_retval, p + _offsetof_pair[1]); $\C{// return *p.second}$ 513 \end{cfa} 514 @_assign_T@ is passed in as an implicit parameter from @otype T@, and takes two @T*@ (@void*@ in the generated code), a destination and a source; @_retval@ is the pointer to a caller-allocated buffer for the return value, the usual \CFA method to handle dynamically-sized return types. 516 515 @_offsetof_pair@ is the offset array passed into @value@; this array is generated at the call site as: 517 516 \begin{cfa} 518 size_t _offsetof_pair[] = { offsetof( _pair_conc0, first ), offsetof( _pair_conc0, second) }517 size_t _offsetof_pair[] = { offsetof(_pair_conc0, first), offsetof(_pair_conc0, second) } 519 518 \end{cfa} 520 519 … … 540 539 The most important such pattern is using @forall(dtype T) T *@ as a type-checked replacement for @void *@, \eg creating a lexicographic comparison for pairs of pointers used by @bsearch@ or @qsort@: 541 540 \begin{cfa} 542 forall( dtype T) int lexcmp( pair( T *, T * ) * a, pair( T *, T * ) * b, int (* cmp)( T *, T * ) ) {541 forall(dtype T) int lexcmp( pair( T *, T * ) * a, pair( T *, T * ) * b, int (* cmp)( T *, T * ) ) { 543 542 return cmp( a->first, b->first ) ? : cmp( a->second, b->second ); 544 543 } 545 544 \end{cfa} 546 Since @pair( 545 Since @pair(T *, T * )@ is a concrete type, there are no implicit parameters passed to @lexcmp@, so the generated code is identical to a function written in standard C using @void *@, yet the \CFA version is type-checked to ensure the fields of both pairs and the arguments to the comparison function match in type. 547 546 548 547 Another useful pattern enabled by reused dtype-static type instantiations is zero-cost \newterm{tag-structures}. 549 548 Sometimes information is only used for type-checking and can be omitted at runtime, \eg: 550 549 \begin{cfa} 551 forall( dtype Unit) struct scalar { unsigned long value; };550 forall(dtype Unit) struct scalar { unsigned long value; }; 552 551 struct metres {}; 553 552 struct litres {}; 554 553 555 forall( 554 forall(dtype U) scalar(U) ?+?( scalar(U) a, scalar(U) b ) { 556 555 return (scalar(U)){ a.value + b.value }; 557 556 } … … 808 807 Due to the implicit flattening and structuring conversions involved in argument passing, @otype@ and @dtype@ parameters are restricted to matching only with non-tuple types, \eg: 809 808 \begin{cfa} 810 forall( otype T, dtype U) void f( T x, U * y );809 forall(otype T, dtype U) void f( T x, U * y ); 811 810 f( [5, "hello"] ); 812 811 \end{cfa} … … 815 814 For example, a plus operator can be written to add two triples together. 816 815 \begin{cfa} 817 forall( otype T | { T ?+?( T, T ); }) [T, T, T] ?+?( [T, T, T] x, [T, T, T] y ) {816 forall(otype T | { T ?+?( T, T ); }) [T, T, T] ?+?( [T, T, T] x, [T, T, T] y ) { 818 817 return [x.0 + y.0, x.1 + y.1, x.2 + y.2]; 819 818 } … … 826 825 \begin{cfa} 827 826 int f( [int, double], double ); 828 forall( otype T, otype U | { T f( T, U, U ); }) void g( T, U );827 forall(otype T, otype U | { T f( T, U, U ); }) void g( T, U ); 829 828 g( 5, 10.21 ); 830 829 \end{cfa} 831 830 Hence, function parameter and return lists are flattened for the purposes of type unification allowing the example to pass expression resolution. 832 831 This relaxation is possible by extending the thunk scheme described by Bilson~\cite{Bilson03}. 833 %Whenever a candidate's parameter structure does not exactly match the formal parameter's structure, a thunk is generated to specialize calls to the actual function:834 %\begin{cfa}835 %int _thunk( int _p0, double _p1, double _p2 ) { return f( [_p0, _p1], _p2 ); }836 %\end{cfa}837 %so the thunk provides flattening and structuring conversions to inferred functions, improving the compatibility of tuples and polymorphism.838 % These thunks are generated locally using gcc nested-functions, rather hositing them to the external scope, so they can easily access local state.832 Whenever a candidate's parameter structure does not exactly match the formal parameter's structure, a thunk is generated to specialize calls to the actual function: 833 \begin{cfa} 834 int _thunk( int _p0, double _p1, double _p2 ) { return f( [_p0, _p1], _p2 ); } 835 \end{cfa} 836 so the thunk provides flattening and structuring conversions to inferred functions, improving the compatibility of tuples and polymorphism. 837 These thunks take advantage of gcc C nested-functions to produce closures that have the usual function-pointer signature WHAT DOES THIS MEAN???. 839 838 840 839 … … 853 852 \begin{cfa} 854 853 int sum$\(_0\)$() { return 0; } 855 forall( 854 forall(ttype Params | { int sum( Params ); } ) int sum$\(_1\)$( int x, Params rest ) { 856 855 return x + sum( rest ); 857 856 } … … 866 865 \begin{cfa} 867 866 int sum( int x, int y ) { return x + y; } 868 forall( 867 forall(ttype Params | { int sum( int, Params ); } ) int sum( int x, int y, Params rest ) { 869 868 return sum( x + y, rest ); 870 869 } … … 872 871 One more step permits the summation of any summable type with all arguments of the same type: 873 872 \begin{cfa} 874 trait summable( otype T) {873 trait summable(otype T) { 875 874 T ?+?( T, T ); 876 875 }; 877 forall( 876 forall(otype R | summable( R ) ) R sum( R x, R y ) { 878 877 return x + y; 879 878 } 880 forall( 879 forall(otype R, ttype Params | summable(R) | { R sum(R, Params); } ) R sum(R x, R y, Params rest) { 881 880 return sum( x + y, rest ); 882 881 } … … 889 888 \begin{cfa} 890 889 struct S { int x, y; }; 891 forall( otype T, ttype Params | { void print(T); void print(Params); }) void print(T arg, Params rest) {890 forall(otype T, ttype Params | { void print(T); void print(Params); }) void print(T arg, Params rest) { 892 891 print(arg); print(rest); 893 892 } … … 928 927 is transformed into: 929 928 \begin{cfa} 930 forall( dtype T0, dtype T1 | sized(T0) | sized(T1)) struct _tuple2 {929 forall(dtype T0, dtype T1 | sized(T0) | sized(T1)) struct _tuple2 { 931 930 T0 field_0; $\C{// generated before the first 2-tuple}$ 932 931 T1 field_1; … … 934 933 _tuple2(int, int) f() { 935 934 _tuple2(double, double) x; 936 forall( dtype T0, dtype T1, dtype T2 | sized(T0) | sized(T1) | sized(T2)) struct _tuple3 {935 forall(dtype T0, dtype T1, dtype T2 | sized(T0) | sized(T1) | sized(T2)) struct _tuple3 { 937 936 T0 field_0; $\C{// generated before the first 3-tuple}$ 938 937 T1 field_1; … … 942 941 } 943 942 \end{cfa} 944 {\sloppy 943 \begin{sloppypar} 945 944 Tuple expressions are then simply converted directly into compound literals, \eg @[5, 'x', 1.24]@ becomes @(_tuple3(int, char, double)){ 5, 'x', 1.24 }@. 946 \ par}%945 \end{sloppypar} 947 946 948 947 \begin{comment} … … 1005 1004 \section{Control Structures} 1006 1005 1007 \CFA identifies inconsistent, problematic, and missing control structures in C, and extends, modifies, and adds control structures to increase functionality and safety.1008 1009 1010 \subsection{\texorpdfstring{\ protect\lstinline{if} Statement}{if Statement}}1006 \CFA identifies inconsistent, problematic, and missing control structures in C, and extends, modifies, and adds to control structures to increase functionality and safety. 1007 1008 1009 \subsection{\texorpdfstring{\LstKeywordStyle{if} Statement}{if Statement}} 1011 1010 1012 1011 The @if@ expression allows declarations, similar to @for@ declaration expression: … … 1020 1019 1021 1020 1022 \subsection{\texorpdfstring{\ protect\lstinline{switch} Statement}{switch Statement}}1021 \subsection{\texorpdfstring{\LstKeywordStyle{switch} Statement}{switch Statement}} 1023 1022 1024 1023 There are a number of deficiencies with the C @switch@ statements: enumerating @case@ lists, placement of @case@ clauses, scope of the switch body, and fall through between case clauses. … … 1040 1039 \lstMakeShortInline@% 1041 1040 \end{cquote} 1042 for a contiguous list:\footnote{gcc has the same mechanism but awkward syntax, \lstinline@2 ...42@, because a space is required after a number, otherwise the period is a decimal point.}1041 for a contiguous list:\footnote{gcc provides the same mechanism with awkward syntax, \lstinline@2 ... 42@, where spaces are required around the ellipse.} 1043 1042 \begin{cquote} 1044 1043 \lstDeleteShortInline@% … … 1091 1090 C @switch@ provides multiple entry points into the statement body, but once an entry point is selected, control continues across \emph{all} @case@ clauses until the end of the @switch@ body, called \newterm{fall through}; 1092 1091 @case@ clauses are made disjoint by the @break@ statement. 1093 While fall through \emph{is} a useful form of control flow, it does not match well with programmer intuition, resulting in errors from missing @break@ statements. 1094 For backwards compatibility, \CFA provides a \emph{new} control structure, @choose@, which mimics @switch@, but reverses the meaning of fall through (see Figure~\ref{f:ChooseSwitchStatements}), similar to Go. 1092 While the ability to fall through \emph{is} a useful form of control flow, it does not match well with programmer intuition, resulting in many errors from missing @break@ statements. 1093 For backwards compatibility, \CFA provides a \emph{new} control structure, @choose@, which mimics @switch@, but reverses the meaning of fall through (see Figure~\ref{f:ChooseSwitchStatements}). 1094 1095 Collectively, these enhancements reduce programmer burden and increase readability and safety. 1095 1096 1096 1097 \begin{figure} … … 1136 1137 \end{figure} 1137 1138 1138 Finally, @fallthrough@ may appear in contexts other than terminating a @case@ clause, and have an explicit transfer label allowing separate cases but common final-code for a set of cases: 1139 \begin{cquote} 1140 \lstDeleteShortInline@% 1141 \begin{tabular}{@{}l@{\hspace{2\parindentlnth}}l@{}} 1142 \multicolumn{1}{c@{\hspace{2\parindentlnth}}}{\textbf{non-terminator}} & \multicolumn{1}{c}{\textbf{target label}} \\ 1143 \begin{cfa} 1144 choose ( ... ) { 1145 case 3: 1146 if ( ... ) { 1147 ... `fallthrough;` // goto case 4 1148 } else { 1149 ... 1150 } 1151 // implicit break 1152 case 4: 1153 \end{cfa} 1154 & 1155 \begin{cfa} 1156 choose ( ... ) { 1157 case 3: 1158 ... `fallthrough common;` 1159 case 4: 1160 ... `fallthrough common;` 1161 common: 1162 ... // common code for cases 3 and 4 1163 // implicit break 1164 case 4: 1165 \end{cfa} 1166 \end{tabular} 1167 \lstMakeShortInline@% 1168 \end{cquote} 1169 The target label may be case @default@. 1170 1171 Collectively, these control-structure enhancements reduce programmer burden and increase readability and safety. 1172 1173 1174 \subsection{\texorpdfstring{Labelled \protect\lstinline{continue} / \protect\lstinline{break}}{Labelled continue / break}} 1139 \begin{comment} 1140 Forgotten @break@ statements at the end of @switch@ cases are a persistent sort of programmer error in C, and the @break@ statements themselves introduce visual clutter and an un-C-like keyword-based block delimiter. 1141 \CFA addresses this error by introducing a @choose@ statement, which works identically to a @switch@ except that its default end-of-case behaviour is to break rather than to fall through for all non-empty cases. 1142 Since empty cases like @case 7:@ in @case 7: case 11:@ still have fall-through semantics and explicit @break@ is still allowed at the end of a @choose@ case, many idiomatic uses of @switch@ in standard C can be converted to @choose@ statements by simply changing the keyword. 1143 Where fall-through is desired for a non-empty case, it can be specified with the new @fallthrough@ statement, making @choose@ equivalently powerful to @switch@, but more concise in the common case where most non-empty cases end with a @break@ statement, as in the example below: 1144 1145 \begin{cfa} 1146 choose( i ) { 1147 case 2: 1148 printf("even "); 1149 fallthrough; 1150 case 3: case 5: case 7: 1151 printf("small prime\n"); 1152 case 4,6,8,9: 1153 printf("small composite\n"); 1154 case 13~19: 1155 printf("teen\n"); 1156 default: 1157 printf("something else\n"); 1158 } 1159 \end{cfa} 1160 \end{comment} 1161 1162 1163 \subsection{\texorpdfstring{Labelled \LstKeywordStyle{continue} / \LstKeywordStyle{break}}{Labelled continue / break}} 1175 1164 1176 1165 While C provides @continue@ and @break@ statements for altering control flow, both are restricted to one level of nesting for a particular control structure. … … 1281 1270 \subsection{Exception Handling} 1282 1271 1283 The following framework for \CFA exception handling is in place, excluding some run time type-information and virtual functions.1272 The following framework for \CFA exception handling is in place, excluding some run-time type-information and dynamic casts. 1284 1273 \CFA provides two forms of exception handling: \newterm{fix-up} and \newterm{recovery} (see Figure~\ref{f:CFAExceptionHandling})~\cite{Buhr92b,Buhr00a}. 1285 1274 Both mechanisms provide dynamic call to a handler using dynamic name-lookup, where fix-up has dynamic return and recovery has static return from the handler. … … 1351 1340 catch ( IOError err ) { ... } $\C{// handler error from other files}$ 1352 1341 \end{cfa} 1353 where the throw inserts the failing file-handle in tothe I/O exception.1342 where the throw inserts the failing file-handle in the I/O exception. 1354 1343 Conditional catch cannot be trivially mimicked by other mechanisms because once an exception is caught, handler clauses in that @try@ statement are no longer eligible.. 1355 1344 … … 1359 1348 resume( $\emph{alternate-stack}$ ) 1360 1349 \end{cfa} 1361 These overloads of @resume@ raise the specified exception or the currently propagating exception (reresume) at another \CFA coroutine or task \footnote{\CFA coroutine and concurrency features are discussed in a separately submitted paper.}~\cite{Delisle18}.1362 Nonlocal raise is restricted to resumption to provide the exception handler the greatest flexibility because processing the exception does not unwind its stack, allowing it to continue after the handle rreturns.1363 1364 To facilitate nonlocal raise, \CFA provides dynamic enabling and disabling of nonlocal exception-propagation.1350 These overloads of @resume@ raise the specified exception or the currently propagating exception (reresume) at another \CFA coroutine or task~\cite{Delisle18}.\footnote{\CFA coroutine and concurrency features are discussed in a separately submitted paper.} 1351 Nonlocal raise is restricted to resumption to provide the exception handler the greatest flexibility because processing the exception does not unwind its stack, allowing it to continue after the handle returns. 1352 1353 To facilitate nonlocal exception, \CFA provides dynamic enabling and disabling of nonlocal exception-propagation. 1365 1354 The constructs for controlling propagation of nonlocal exceptions are the @enable@ and the @disable@ blocks: 1366 1355 \begin{cquote} … … 1369 1358 \begin{cfa} 1370 1359 enable $\emph{exception-type-list}$ { 1371 // allow non-local r aise1360 // allow non-local resumption 1372 1361 } 1373 1362 \end{cfa} … … 1375 1364 \begin{cfa} 1376 1365 disable $\emph{exception-type-list}$ { 1377 // disallow non-local r aise1366 // disallow non-local resumption 1378 1367 } 1379 1368 \end{cfa} … … 1386 1375 Coroutines and tasks start with non-local exceptions disabled, allowing handlers to be put in place, before non-local exceptions are explicitly enabled. 1387 1376 \begin{cfa} 1388 void main( mytask & t ) { $\C{// thread starts here}$ 1389 // non-local exceptions disabled 1390 try { $\C{// establish handles for non-local exceptions}$ 1391 enable { $\C{// allow non-local exception delivery}$ 1377 void main( mytask & c ) { 1378 try { 1379 enable { $\C{// now allow non-local exception delivery}$ 1392 1380 // task body 1393 1381 } 1394 // appropriate catchResume/catch handlers1382 // appropriate catchResume/catch 1395 1383 } 1396 1384 } … … 1412 1400 1413 1401 1414 \subsection{\texorpdfstring{\ protect\lstinline{with} Clause / Statement}{with Clause / Statement}}1402 \subsection{\texorpdfstring{\LstKeywordStyle{with} Clause / Statement}{with Clause / Statement}} 1415 1403 \label{s:WithClauseStatement} 1416 1404 … … 1812 1800 int & r = *new( int ); 1813 1801 ... $\C{// non-null reference}$ 1814 delete &r; $\C{// unmanaged (programmer) memory-management}$1802 delete &r; 1815 1803 r += 1; $\C{// undefined reference}$ 1816 1804 \end{cfa} … … 1959 1947 Constructor calls seamlessly integrate with existing C initialization syntax, providing a simple and familiar syntax to C programmers and allowing constructor calls to be inserted into legacy C code with minimal code changes. 1960 1948 1961 In \CFA, a constructor is named @?{}@ and a destructor is named @^?{}@\footnote{% 1949 In \CFA, a constructor is named @?{}@ and a destructor is named @^?{}@. 1950 The name @{}@ comes from the syntax for the initializer: @struct S { int i, j; } s = `{` 2, 3 `}`@\footnote{% 1962 1951 The symbol \lstinline+^+ is used for the destructor name because it was the last binary operator that could be used in a unary context.}. 1963 The name @{}@ comes from the syntax for the initializer: @struct S { int i, j; } s = `{` 2, 3 `}`@.1964 1952 Like other \CFA operators, these names represent the syntax used to call the constructor or destructor, \eg @?{}(x, ...)@ or @^{}(x, ...)@. 1965 1953 The constructor and destructor have return type @void@, and the first parameter is a reference to the object type to be constructed or destructed. … … 2083 2071 \subsection{0/1} 2084 2072 2085 In C, @0@ has the special property that it is the only ``false'' value; 2086 from the standard, any value that compares equal to @0@ is false, while any value that compares unequal to @0@ is true. 2087 As such, an expression @x@ in any boolean context (such as the condition of an @if@ or @while@ statement, or the arguments to @&&@, @||@, or @?:@\,) can be rewritten as @x != 0@ without changing its semantics. 2088 Operator overloading in \CFA provides a natural means to implement this truth-value comparison for arbitrary types, but the C type system is not precise enough to distinguish an equality comparison with @0@ from an equality comparison with an arbitrary integer or pointer. 2089 To provide this precision, \CFA introduces a new type @zero_t@ as the type of literal @0@ (somewhat analagous to @nullptr_t@ and @nullptr@ in \CCeleven); 2090 @zero_t@ can only take the value @0@, but has implicit conversions to the integer and pointer types so that C code involving @0@ continues to work. 2091 With this addition, \CFA rewrites @if (x)@ and similar expressions to @if ((x) != 0)@ or the appropriate analogue, and any type @T@ is ``truthy'' by defining an operator overload @int ?!=?(T, zero_t)@. 2092 \CC makes types truthy by adding a conversion to @bool@; 2093 prior to the addition of explicit cast operators in \CCeleven, this approach had the pitfall of making truthy types transitively convertable to any numeric type; 2094 \CFA avoids this issue. 2095 2096 Similarly, \CFA also has a special type for @1@, @one_t@; 2097 like @zero_t@, @one_t@ has built-in implicit conversions to the various integral types so that @1@ maintains its expected semantics in legacy code for operations @++@ and @--@. 2098 The addition of @one_t@ allows generic algorithms to handle the unit value uniformly for types where it is meaningful. 2099 \TODO{Make this sentence true} 2100 In particular, polymorphic functions in the \CFA prelude define @++x@ and @x++@ in terms of @x += 1@, allowing users to idiomatically define all forms of increment for a type @T@ by defining the single function @T & ?+=(T &, one_t)@; 2101 analogous overloads for the decrement operators are present as well. 2073 In C, @0@ has the special property that it is the only ``false'' value; by the standard, any value which compares equal to @0@ is false, while any value that compares unequal to @0@ is true. 2074 As such, an expression @x@ in any boolean context (such as the condition of an @if@ or @while@ statement, or the arguments to @&&@, @||@, or @?:@) can be rewritten as @x != 0@ without changing its semantics. 2075 The operator overloading feature of \CFA provides a natural means to implement this truth value comparison for arbitrary types, but the C type system is not precise enough to distinguish an equality comparison with @0@ from an equality comparison with an arbitrary integer or pointer. 2076 To provide this precision, \CFA introduces a new type @zero_t@ as type type of literal @0@ (somewhat analagous to @nullptr_t@ and @nullptr@ in \CCeleven); @zero_t@ can only take the value @0@, but has implicit conversions to the integer and pointer types so that C code involving @0@ continues to work properly. 2077 With this addition, the \CFA compiler rewrites @if (x)@ and similar expressions to @if ((x) != 0)@ or the appropriate analogue, and any type @T@ can be made ``truthy'' by defining an operator overload @int ?!=?(T, zero_t)@. 2078 \CC makes types truthy by adding a conversion to @bool@; prior to the addition of explicit cast operators in \CCeleven this approach had the pitfall of making truthy types transitively convertable to any numeric type; our design for \CFA avoids this issue. 2079 2080 \CFA also includes a special type for @1@, @one_t@; like @zero_t@, @one_t@ has built-in implicit conversions to the various integral types so that @1@ maintains its expected semantics in legacy code. 2081 The addition of @one_t@ allows generic algorithms to handle the unit value uniformly for types where that is meaningful. 2082 \TODO{Make this sentence true} In particular, polymorphic functions in the \CFA prelude define @++x@ and @x++@ in terms of @x += 1@, allowing users to idiomatically define all forms of increment for a type @T@ by defining the single function @T & ?+=(T &, one_t)@; analogous overloads for the decrement operators are present as well. 2102 2083 2103 2084 … … 2107 2088 The left of Figure~\ref{f:UserLiteral} shows the \CFA alternative call-syntax (literal argument before function name), using the backquote, to convert basic literals into user literals. 2108 2089 The backquote is a small character, making the unit (function name) predominate. 2109 For examples, the multi-precision integer -type in Section~\ref{s:MultiPrecisionIntegers} hasuser literals:2090 For examples, the multi-precision integers in Section~\ref{s:MultiPrecisionIntegers} make use of user literals: 2110 2091 {\lstset{language=CFA,moredelim=**[is][\color{red}]{|}{|},deletedelim=**[is][]{`}{`}} 2111 2092 \begin{cfa} … … 2327 2308 \lstMakeShortInline@% 2328 2309 \end{cquote} 2329 In additon, there are polymorphic functions, like @min@ and @max@, thatwork on any type with operators @?<?@ or @?>?@.2310 In additon, there are polymorphic functions, like @min@ and @max@, which work on any type with operators @?<?@ or @?>?@. 2330 2311 2331 2312 The following shows one example where \CFA \emph{extends} an existing standard C interface to reduce complexity and provide safety. … … 2338 2319 In either case, new storage may or may not be allocated and, if there is a new allocation, as much data from the existing allocation is copied. 2339 2320 For an increase in storage size, new storage after the copied data may be filled. 2340 \item[align ]2321 \item[alignment] 2341 2322 an allocation on a specified memory boundary, \eg, an address multiple of 64 or 128 for cache-line purposes. 2342 2323 \item[array] 2343 allocation with aspecified number of elements.2324 allocation of the specified number of elements. 2344 2325 An array may be filled, resized, or aligned. 2345 2326 \end{description} … … 2353 2334 \lstMakeShortInline~% 2354 2335 \begin{tabular}{@{}r|r|l|l|l|l@{}} 2355 \multicolumn{1}{c}{}& & \multicolumn{1}{c|}{fill} & resize & align & array \\2336 \multicolumn{1}{c}{}& & \multicolumn{1}{c|}{fill} & resize & alignment & array \\ 2356 2337 \hline 2357 2338 C & ~malloc~ & no & no & no & no \\ … … 2566 2547 In fact, \CFA's features for generic programming can enable faster runtime execution than idiomatic @void *@-based C code. 2567 2548 This claim is demonstrated through a set of generic-code-based micro-benchmarks in C, \CFA, and \CC (see stack implementations in Appendix~\ref{sec:BenchmarkStackImplementation}). 2568 Since all these languages share a subset essentially comprising standard C, maximal-performance benchmarks would show little runtime variance, other thanin length and clarity of source code.2549 Since all these languages share a subset essentially comprising standard C, maximal-performance benchmarks would show little runtime variance, differing only in length and clarity of source code. 2569 2550 A more illustrative benchmark measures the costs of idiomatic usage of each language's features. 2570 Figure~\ref{fig:BenchmarkTest} shows the \CFA benchmark tests for a generic stack based on a singly linked-list , a generic pair-data-structure, and a variadic @print@ function similar to that in Section~\ref{sec:variadic-tuples}.2551 Figure~\ref{fig:BenchmarkTest} shows the \CFA benchmark tests for a generic stack based on a singly linked-list. 2571 2552 The benchmark test is similar for C and \CC. 2572 The experiment uses element types @int@ and @pair( _Bool, char)@, and pushes $N=40M$ elements on a generic stack, copies the stack, clears one of the stacks, finds the maximum value in the other stack, and prints $N/2$ (to reduce graph height) constants.2553 The experiment uses element types @int@ and @pair(short, char)@, and pushes $N=40M$ elements on a generic stack, copies the stack, clears one of the stacks, and finds the maximum value in the other stack. 2573 2554 2574 2555 \begin{figure} 2575 2556 \begin{cfa}[xleftmargin=3\parindentlnth,aboveskip=0pt,belowskip=0pt] 2576 int main( int argc, char * argv[]) {2557 int main() { 2577 2558 int max = 0, val = 42; 2578 2559 stack( int ) si, ti; 2579 2560 2580 2561 REPEAT_TIMED( "push_int", N, push( si, val ); ) 2581 TIMED( "copy_int", ti = si; )2562 TIMED( "copy_int", ti{ si }; ) 2582 2563 TIMED( "clear_int", clear( si ); ) 2583 REPEAT_TIMED( "pop_int", N, int x = pop( ti ); if ( x > max ) max = x; ) 2584 2585 pair( _Bool, char ) max = { (_Bool)0, '\0' }, val = { (_Bool)1, 'a' }; 2586 stack( pair( _Bool, char ) ) sp, tp; 2564 REPEAT_TIMED( "pop_int", N, 2565 int x = pop( ti ); if ( x > max ) max = x; ) 2566 2567 pair( short, char ) max = { 0h, '\0' }, val = { 42h, 'a' }; 2568 stack( pair( short, char ) ) sp, tp; 2587 2569 2588 2570 REPEAT_TIMED( "push_pair", N, push( sp, val ); ) 2589 TIMED( "copy_pair", tp = sp; )2571 TIMED( "copy_pair", tp{ sp }; ) 2590 2572 TIMED( "clear_pair", clear( sp ); ) 2591 REPEAT_TIMED( "pop_pair", N, pair(_Bool, char) x = pop( tp ); if ( x > max ) max = x; ) 2573 REPEAT_TIMED( "pop_pair", N, 2574 pair(short, char) x = pop( tp ); if ( x > max ) max = x; ) 2592 2575 } 2593 2576 \end{cfa} … … 2600 2583 hence runtime checks are necessary to safely down-cast objects. 2601 2584 The most notable difference among the implementations is in memory layout of generic types: \CFA and \CC inline the stack and pair elements into corresponding list and pair nodes, while C and \CCV lack such a capability and instead must store generic objects via pointers to separately-allocated objects. 2602 For the print benchmark, idiomatic printing is used: the C and \CFA variants used @stdio.h@, while the \CC and \CCV variants used @iostream@; preliminary tests show this distinction has negligible runtime impact. 2603 Note, the C benchmark uses unchecked casts as there is no runtime mechanism to perform such checks, while \CFA and \CC provide type-safety statically. 2585 Note that the C benchmark uses unchecked casts as there is no runtime mechanism to perform such checks, while \CFA and \CC provide type-safety statically. 2604 2586 2605 2587 Figure~\ref{fig:eval} and Table~\ref{tab:eval} show the results of running the benchmark in Figure~\ref{fig:BenchmarkTest} and its C, \CC, and \CCV equivalents. 2606 2588 The graph plots the median of 5 consecutive runs of each program, with an initial warm-up run omitted. 2607 All code is compiled at \texttt{-O2} by gcc or g++ 6. 2.0, with all \CC code compiled as \CCfourteen.2589 All code is compiled at \texttt{-O2} by gcc or g++ 6.3.0, with all \CC code compiled as \CCfourteen. 2608 2590 The benchmarks are run on an Ubuntu 16.04 workstation with 16 GB of RAM and a 6-core AMD FX-6300 CPU with 3.5 GHz maximum clock frequency. 2609 2591 … … 2623 2605 & \CT{C} & \CT{\CFA} & \CT{\CC} & \CT{\CCV} \\ \hline 2624 2606 maximum memory usage (MB) & 10001 & 2502 & 2503 & 11253 \\ 2625 source code size (lines) & 247 & 222 & 165 & 339\\2626 redundant type annotations (lines) & 39 & 2 & 2 & 15\\2627 binary size (KB) & 14 & 2 29 & 18 & 38\\2607 source code size (lines) & 187 & 188 & 133 & 303 \\ 2608 redundant type annotations (lines) & 25 & 0 & 2 & 16 \\ 2609 binary size (KB) & 14 & 257 & 14 & 37 \\ 2628 2610 \end{tabular} 2629 2611 \end{table} 2630 2612 2631 2613 The C and \CCV variants are generally the slowest with the largest memory footprint, because of their less-efficient memory layout and the pointer-indirection necessary to implement generic types; 2632 this inefficiency is exacerbated by the second level of generic types in the pair -basedbenchmarks.2633 By contrast, the \CFA and \CC variants run in roughly equivalent time for both the integer and pair of @ _Bool@ and @char@ because the storage layout is equivalent, with the inlined libraries (\ie no separate compilation) and greater maturity of the \CC compiler contributing to its lead.2614 this inefficiency is exacerbated by the second level of generic types in the pair benchmarks. 2615 By contrast, the \CFA and \CC variants run in roughly equivalent time for both the integer and pair of @short@ and @char@ because the storage layout is equivalent, with the inlined libraries (\ie no separate compilation) and greater maturity of the \CC compiler contributing to its lead. 2634 2616 \CCV is slower than C largely due to the cost of runtime type-checking of down-casts (implemented with @dynamic_cast@); 2635 There are two outliers in the graph for \CFA: all prints and pop of @pair@. 2636 Both of these cases result from the complexity of the C-generated polymorphic code, so that the gcc compiler is unable to optimize some dead code and condense nested calls. 2637 A compiler designed for \CFA could easily perform these optimizations. 2617 The outlier in the graph for \CFA, pop @pair@, results from the complexity of the generated-C polymorphic code. 2618 The gcc compiler is unable to optimize some dead code and condense nested calls; a compiler designed for \CFA could easily perform these optimizations. 2638 2619 Finally, the binary size for \CFA is larger because of static linking with the \CFA libraries. 2639 2620 2640 \CFA is also competitive in terms of source code size, measured as a proxy for programmer effort. The line counts in Table~\ref{tab:eval} include implementations of @pair@ and @stack@ types for all four languages for purposes of direct comparison, though it should be noted that \CFA and \CC have pre-written data structures in their standard libraries that programmers would generally use instead. Use of these standard library types has minimal impact on the performance benchmarks, but shrinks the \CFA and \CC benchmarks to 73 and 54lines, respectively.2621 \CFA is also competitive in terms of source code size, measured as a proxy for programmer effort. The line counts in Table~\ref{tab:eval} include implementations of @pair@ and @stack@ types for all four languages for purposes of direct comparison, though it should be noted that \CFA and \CC have pre-written data structures in their standard libraries that programmers would generally use instead. Use of these standard library types has minimal impact on the performance benchmarks, but shrinks the \CFA and \CC benchmarks to 41 and 42 lines, respectively. 2641 2622 On the other hand, C does not have a generic collections-library in its standard distribution, resulting in frequent reimplementation of such collection types by C programmers. 2642 \CCV does not use the \CC standard template library by construction, and in fact includes the definition of @object@ and wrapper classes for @ bool@, @char@, @int@, and @const char *@ in its line count, which inflates this count somewhat, as an actual object-oriented language would include these in the standard library;2623 \CCV does not use the \CC standard template library by construction, and in fact includes the definition of @object@ and wrapper classes for @char@, @short@, and @int@ in its line count, which inflates this count somewhat, as an actual object-oriented language would include these in the standard library; 2643 2624 with their omission, the \CCV line count is similar to C. 2644 2625 We justify the given line count by noting that many object-oriented languages do not allow implementing new interfaces on library types without subclassing or wrapper types, which may be similarly verbose. … … 2646 2627 Raw line-count, however, is a fairly rough measure of code complexity; 2647 2628 another important factor is how much type information the programmer must manually specify, especially where that information is not checked by the compiler. 2648 Such unchecked type information produces a heavier documentation burden and increased potential for runtime bugs, and is much less common in \CFA than C, with its manually specified function pointer sarguments and format codes, or \CCV, with its extensive use of un-type-checked downcasts (\eg @object@ to @integer@ when popping a stack, or @object@ to @printable@ when printing the elements of a @pair@).2629 Such unchecked type information produces a heavier documentation burden and increased potential for runtime bugs, and is much less common in \CFA than C, with its manually specified function pointer arguments and format codes, or \CCV, with its extensive use of un-type-checked downcasts (\eg @object@ to @integer@ when popping a stack, or @object@ to @printable@ when printing the elements of a @pair@). 2649 2630 To quantify this, the ``redundant type annotations'' line in Table~\ref{tab:eval} counts the number of lines on which the type of a known variable is re-specified, either as a format specifier, explicit downcast, type-specific function, or by name in a @sizeof@, struct literal, or @new@ expression. 2650 2631 The \CC benchmark uses two redundant type annotations to create a new stack nodes, while the C and \CCV benchmarks have several such annotations spread throughout their code. 2651 The two instances in which the \CFA benchmark still uses redundant type specifiers are to cast the result of a polymorphic @malloc@ call (the @sizeof@ argument is inferred by the compiler). 2652 These uses are similar to the @new@ expressions in \CC, though the \CFA compiler's type resolver should shortly render even these type casts superfluous. 2653 2632 The \CFA benchmark was able to eliminate all redundant type annotations through use of the polymorphic @alloc@ function discussed in Section~\ref{sec:libraries}. 2654 2633 2655 2634 \section{Related Work} 2656 2657 2635 2658 2636 \subsection{Polymorphism} … … 2735 2713 user defined: D, Objective-C 2736 2714 2737 2738 2715 \section{Conclusion and Future Work} 2739 2716 … … 2748 2725 Finally, we demonstrate that \CFA performance for some idiomatic cases is better than C and close to \CC, showing the design is practically applicable. 2749 2726 2750 There is ongoing work on a wide range of \CFA feature extensions, including arrays with size, runtime type-information, virtual functions,user-defined conversions, concurrent primitives, and modules.2727 There is ongoing work on a wide range of \CFA feature extensions, including arrays with size, user-defined conversions, concurrent primitives, and modules. 2751 2728 (While all examples in the paper compile and run, a public beta-release of \CFA will take another 8--12 months to finalize these additional extensions.) 2752 2729 In addition, there are interesting future directions for the polymorphism design. … … 2783 2760 \CFA 2784 2761 \begin{cfa}[xleftmargin=2\parindentlnth,aboveskip=0pt,belowskip=0pt] 2785 forall( otype T ) struct stack_node; 2786 forall( otype T ) struct stack { 2787 stack_node(T) * head; 2788 }; 2789 forall( otype T ) struct stack_node { 2762 forall(otype T) struct stack_node { 2790 2763 T value; 2791 2764 stack_node(T) * next; 2792 2765 }; 2793 forall( otype T) void ?{}( stack(T) & s ) { (s.head){ 0 }; } 2794 forall( otype T) void ?{}( stack(T) & s, stack(T) t ) { 2766 forall(otype T) struct stack { stack_node(T) * head; }; 2767 forall(otype T) void ?{}( stack(T) & s ) { (s.head){ 0 }; } 2768 forall(otype T) void ?{}( stack(T) & s, stack(T) t ) { 2795 2769 stack_node(T) ** crnt = &s.head; 2796 2770 for ( stack_node(T) * next = t.head; next; next = next->next ) { 2797 stack_node(T) * new_node = ((stack_node(T)*)malloc()); 2798 (*new_node){ next->value }; /***/ 2799 *crnt = new_node; 2800 stack_node(T) * acrnt = *crnt; 2801 crnt = &acrnt->next; 2771 *crnt = alloc(); 2772 ((*crnt)->value){ next->value }; 2773 crnt = &(*crnt)->next; 2802 2774 } 2803 2775 *crnt = 0; 2804 2776 } 2805 forall( otype T) stack(T) ?=?( stack(T) & s, stack(T) t ) {2777 forall(otype T) stack(T) ?=?( stack(T) & s, stack(T) t ) { 2806 2778 if ( s.head == t.head ) return s; 2807 2779 clear( s ); … … 2809 2781 return s; 2810 2782 } 2811 forall( otype T ) void ^?{}( stack(T) & s) { clear( s ); } 2812 forall( otype T ) _Bool empty( const stack(T) & s ) { return s.head == 0; } 2813 forall( otype T ) void push( stack(T) & s, T value ) { 2814 stack_node(T) * new_node = ((stack_node(T)*)malloc()); 2815 (*new_node){ value, s.head }; /***/ 2816 s.head = new_node; 2817 } 2818 forall( otype T ) T pop( stack(T) & s ) { 2819 stack_node(T) * n = s.head; 2820 s.head = n->next; 2821 T v = n->value; 2822 delete( n ); 2823 return v; 2824 } 2825 forall( otype T ) void clear( stack(T) & s ) { 2826 for ( stack_node(T) * next = s.head; next; ) { 2783 forall(otype T) void ^?{}( stack(T) & s) { clear( s ); } 2784 forall(otype T) _Bool empty( const stack(T) & s ) { return s.head == 0; } 2785 forall(otype T) void push( stack(T) & s, T value ) { 2786 stack_node(T) * n = alloc(); 2787 (*n){ value, head }; 2788 head = n; 2789 } 2790 forall(otype T) T pop( stack(T) & s ) { 2791 stack_node(T) * n = head; 2792 head = n->next; 2793 T x = n->value; 2794 ^(*n){}; 2795 free( n ); 2796 return x; 2797 } 2798 forall(otype T) void clear( stack(T) & s ) { 2799 for ( stack_node(T) * next = head; next; ) { 2827 2800 stack_node(T) * crnt = next; 2828 2801 next = crnt->next; 2829 delete( crnt ); 2802 ^(*crnt){}; 2803 free(crnt); 2830 2804 } 2831 s.head = 0;2805 head = 0; 2832 2806 } 2833 2807 \end{cfa} … … 2836 2810 \CC 2837 2811 \begin{cfa}[xleftmargin=2\parindentlnth,aboveskip=0pt,belowskip=0pt] 2838 template<typename T> classstack {2812 template<typename T> struct stack { 2839 2813 struct node { 2840 2814 T value; … … 2843 2817 }; 2844 2818 node * head; 2845 void copy(const stack<T> & o) {2819 void copy(const stack<T> & o) { 2846 2820 node ** crnt = &head; 2847 2821 for ( node * next = o.head;; next; next = next->next ) { … … 2851 2825 *crnt = nullptr; 2852 2826 } 2853 public:2854 2827 stack() : head(nullptr) {} 2855 stack(const stack<T> & o) { copy(o); }2828 stack(const stack<T> & o) { copy(o); } 2856 2829 stack(stack<T> && o) : head(o.head) { o.head = nullptr; } 2857 2830 ~stack() { clear(); } 2858 stack & operator= (const stack<T> & o) {2831 stack & operator= (const stack<T> & o) { 2859 2832 if ( this == &o ) return *this; 2860 2833 clear(); … … 2895 2868 struct stack_node * next; 2896 2869 }; 2870 struct stack { struct stack_node* head; }; 2897 2871 struct stack new_stack() { return (struct stack){ NULL }; /***/ } 2898 2872 void copy_stack(struct stack * s, const struct stack * t, void * (*copy)(const void *)) { … … 2900 2874 for ( struct stack_node * next = t->head; next; next = next->next ) { 2901 2875 *crnt = malloc(sizeof(struct stack_node)); /***/ 2902 **crnt = (struct stack_node){ copy(next->value) }; /***/2876 (*crnt)->value = copy(next->value); 2903 2877 crnt = &(*crnt)->next; 2904 2878 } 2905 *crnt = 0;2879 *crnt = NULL; 2906 2880 } 2907 2881 _Bool stack_empty(const struct stack * s) { return s->head == NULL; } … … 2932 2906 \CCV 2933 2907 \begin{cfa}[xleftmargin=2\parindentlnth,aboveskip=0pt,belowskip=0pt] 2934 stack::node::node( const object & v, node * n ) : value( v.new_copy() ), next( n ) {} 2935 void stack::copy(const stack & o) { 2936 node ** crnt = &head; 2937 for ( node * next = o.head; next; next = next->next ) { 2938 *crnt = new node{ *next->value }; 2939 crnt = &(*crnt)->next; 2908 struct stack { 2909 struct node { 2910 ptr<object> value; 2911 node* next; 2912 node( const object & v, node * n ) : value( v.new_copy() ), next( n ) {} 2913 }; 2914 node* head; 2915 void copy(const stack & o) { 2916 node ** crnt = &head; 2917 for ( node * next = o.head; next; next = next->next ) { 2918 *crnt = new node{ *next->value }; /***/ 2919 crnt = &(*crnt)->next; 2920 } 2921 *crnt = nullptr; 2940 2922 } 2941 *crnt = nullptr; 2942 } 2943 stack::stack() : head(nullptr) {} 2944 stack::stack(const stack & o) { copy(o); } 2945 stack::stack(stack && o) : head(o.head) { o.head = nullptr; } 2946 stack::~stack() { clear(); } 2947 stack & stack::operator= (const stack & o) { 2948 if ( this == &o ) return *this; 2949 clear(); 2950 copy(o); 2951 return *this; 2952 } 2953 stack & stack::operator= (stack && o) { 2954 if ( this == &o ) return *this; 2955 head = o.head; 2956 o.head = nullptr; 2957 return *this; 2958 } 2959 bool stack::empty() const { return head == nullptr; } 2960 void stack::push(const object & value) { head = new node{ value, head }; /***/ } 2961 ptr<object> stack::pop() { 2962 node * n = head; 2963 head = n->next; 2964 ptr<object> x = std::move(n->value); 2965 delete n; 2966 return x; 2967 } 2968 void stack::clear() { 2969 for ( node * next = head; next; ) { 2970 node * crnt = next; 2971 next = crnt->next; 2972 delete crnt; 2923 stack() : head(nullptr) {} 2924 stack(const stack & o) { copy(o); } 2925 stack(stack && o) : head(o.head) { o.head = nullptr; } 2926 ~stack() { clear(); } 2927 stack & operator= (const stack & o) { 2928 if ( this == &o ) return *this; 2929 clear(); 2930 copy(o); 2931 return *this; 2973 2932 } 2974 head = nullptr; 2975 } 2933 stack & operator= (stack && o) { 2934 if ( this == &o ) return *this; 2935 head = o.head; 2936 o.head = nullptr; 2937 return *this; 2938 } 2939 bool empty() const { return head == nullptr; } 2940 void push(const object & value) { head = new node{ value, head }; /***/ } 2941 ptr<object> pop() { 2942 node * n = head; 2943 head = n->next; 2944 ptr<object> x = std::move(n->value); 2945 delete n; 2946 return x; 2947 } 2948 void clear() { 2949 for ( node * next = head; next; ) { 2950 node * crnt = next; 2951 next = crnt->next; 2952 delete crnt; 2953 } 2954 head = nullptr; 2955 } 2956 }; 2976 2957 \end{cfa} 2977 2958
Note: See TracChangeset
for help on using the changeset viewer.