Changeset d03fa6d
- Timestamp:
- Feb 4, 2018, 10:30:07 AM (7 years ago)
- Branches:
- ADT, aaron-thesis, arm-eh, ast-experimental, cleanup-dtors, deferred_resn, demangler, enum, forall-pointer-decay, jacob/cs343-translation, jenkins-sandbox, master, new-ast, new-ast-unique-expr, new-env, no_list, persistent-indexer, pthread-emulation, qualifiedEnum, resolv-new, with_gc
- Children:
- 51b5a02
- Parents:
- 3ce0c915
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
doc/papers/general/Paper.tex
r3ce0c915 rd03fa6d 39 39 \newcommand{\TODO}[1]{\textbf{TODO}: {\itshape #1}} % TODO included 40 40 %\newcommand{\TODO}[1]{} % TODO elided 41 41 42 % Default underscore is too low and wide. Cannot use lstlisting "literate" as replacing underscore 42 % removes it as a variable-name character so keyworks in variables are highlighted 43 \DeclareTextCommandDefault{\textunderscore}{\leavevmode\makebox[1.2ex][c]{\rule{1ex}{0.1ex}}} 43 % removes it as a variable-name character so keywords in variables are highlighted. MUST APPEAR 44 % AFTER HYPERREF. 45 %\DeclareTextCommandDefault{\textunderscore}{\leavevmode\makebox[1.2ex][c]{\rule{1ex}{0.1ex}}} 46 \renewcommand{\textunderscore}{\leavevmode\makebox[1.2ex][c]{\rule{1ex}{0.075ex}}} 44 47 45 48 \makeatletter … … 49 52 \setlength{\parindentlnth}{\parindent} 50 53 54 \newcommand{\LstKeywordStyle}[1]{{\lst@basicstyle{\lst@keywordstyle{#1}}}} 55 \newcommand{\LstCommentStyle}[1]{{\lst@basicstyle{\lst@commentstyle{#1}}}} 56 51 57 \newlength{\gcolumnposn} % temporary hack because lstlisting does not handle tabs correctly 52 58 \newlength{\columnposn} 53 59 \setlength{\gcolumnposn}{2.75in} 54 60 \setlength{\columnposn}{\gcolumnposn} 55 \newcommand{\C}[2][\@empty]{\ifx#1\@empty\else\global\setlength{\columnposn}{#1}\global\columnposn=\columnposn\fi\hfill\makebox[\textwidth-\columnposn][l]{\lst@ commentstyle{#2}}}61 \newcommand{\C}[2][\@empty]{\ifx#1\@empty\else\global\setlength{\columnposn}{#1}\global\columnposn=\columnposn\fi\hfill\makebox[\textwidth-\columnposn][l]{\lst@basicstyle{\LstCommentStyle{#2}}}} 56 62 \newcommand{\CRT}{\global\columnposn=\gcolumnposn} 57 63 58 64 % Latin abbreviation 59 65 \newcommand{\abbrevFont}{\textit} % set empty for no italics 66 \newcommand{\EG}{\abbrevFont{e}.\abbrevFont{g}.} 60 67 \newcommand*{\eg}{% 61 \@ifnextchar{,}{\ abbrevFont{e}.\abbrevFont{g}.}%62 {\@ifnextchar{:}{\ abbrevFont{e}.\abbrevFont{g}.}%63 {\ abbrevFont{e}.\abbrevFont{g}.,\xspace}}%68 \@ifnextchar{,}{\EG}% 69 {\@ifnextchar{:}{\EG}% 70 {\EG,\xspace}}% 64 71 }% 72 \newcommand{\IE}{\abbrevFont{i}.\abbrevFont{e}.} 65 73 \newcommand*{\ie}{% 66 \@ifnextchar{,}{\ abbrevFont{i}.\abbrevFont{e}.}%67 {\@ifnextchar{:}{\ abbrevFont{i}.\abbrevFont{e}.}%68 {\ abbrevFont{i}.\abbrevFont{e}.,\xspace}}%74 \@ifnextchar{,}{\IE}% 75 {\@ifnextchar{:}{\IE}% 76 {\IE,\xspace}}% 69 77 }% 78 \newcommand{\ETC}{\abbrevFont{etc}} 70 79 \newcommand*{\etc}{% 71 \@ifnextchar{.}{\ abbrevFont{etc}}%72 {\ abbrevFont{etc}.\xspace}%80 \@ifnextchar{.}{\ETC}% 81 {\ETC\xspace}% 73 82 }% 74 \newcommand{\etal}{% 75 \@ifnextchar{.}{\abbrevFont{et~al}}% 76 {\abbrevFont{et al}.\xspace}% 83 \newcommand{\ETAL}{\abbrevFont{et}\hspace{2pt}\abbrevFont{al}} 84 \newcommand*{\etal}{% 85 \@ifnextchar{.}{\protect\ETAL}% 86 {\abbrevFont{\protect\ETAL}.\xspace}% 87 }% 88 \newcommand{\VIZ}{\abbrevFont{viz}} 89 \newcommand*{\viz}{% 90 \@ifnextchar{.}{\VIZ}% 91 {\abbrevFont{\VIZ}.\xspace}% 77 92 }% 78 93 \makeatother … … 80 95 % CFA programming language, based on ANSI C (with some gcc additions) 81 96 \lstdefinelanguage{CFA}[ANSI]{C}{ 82 morekeywords={_Alignas,_Alignof,__alignof,__alignof__,asm,__asm,__asm__,_At,_Atomic,__attribute,__attribute__,auto, 83 _Bool,catch,catchResume,choose,_Complex,__complex,__complex__,__const,__const__,disable,dtype,enable,__extension__, 84 fallthrough,fallthru,finally,forall,ftype,_Generic,_Imaginary,inline,__label__,lvalue,_Noreturn,one_t,otype,restrict,_Static_assert, 85 _Thread_local,throw,throwResume,trait,try,ttype,typeof,__typeof,__typeof__,zero_t}, 97 morekeywords={ 98 _Alignas, _Alignof, __alignof, __alignof__, asm, __asm, __asm__, _At, __attribute, 99 __attribute__, auto, _Bool, catch, catchResume, choose, _Complex, __complex, __complex__, 100 __const, __const__, disable, dtype, enable, __extension__, fallthrough, fallthru, 101 finally, forall, ftype, _Generic, _Imaginary, inline, __label__, lvalue, _Noreturn, one_t, 102 otype, restrict, _Static_assert, throw, throwResume, trait, try, ttype, typeof, __typeof, 103 __typeof__, virtual, with, zero_t}, 104 moredirectives={defined,include_next}% 86 105 }% 87 106 … … 91 110 basicstyle=\linespread{0.9}\sf, % reduce line spacing and use sanserif font 92 111 stringstyle=\tt, % use typewriter font 93 tabsize= 4, % 4space tabbing112 tabsize=5, % N space tabbing 94 113 xleftmargin=\parindentlnth, % indent code to paragraph indentation 95 114 %mathescape=true, % LaTeX math escape in CFA code $...$ … … 101 120 belowskip=3pt, 102 121 % replace/adjust listing characters that look bad in sanserif 103 literate={-}{\makebox[1 .4ex][c]{\raisebox{0.5ex}{\rule{1.2ex}{0.06ex}}}}1 {^}{\raisebox{0.6ex}{$\scriptscriptstyle\land\,$}}1122 literate={-}{\makebox[1ex][c]{\raisebox{0.4ex}{\rule{0.8ex}{0.1ex}}}}1 {^}{\raisebox{0.6ex}{$\scriptscriptstyle\land\,$}}1 104 123 {~}{\raisebox{0.3ex}{$\scriptstyle\sim\,$}}1 % {`}{\ttfamily\upshape\hspace*{-0.1ex}`}1 105 {<-}{$\leftarrow$}2 {=>}{$\Rightarrow$}2 {->}{\makebox[1 .4ex][c]{\raisebox{0.5ex}{\rule{1.2ex}{0.06ex}}}\kern-0.3ex\textgreater}2,124 {<-}{$\leftarrow$}2 {=>}{$\Rightarrow$}2 {->}{\makebox[1ex][c]{\raisebox{0.4ex}{\rule{0.8ex}{0.075ex}}}\kern-0.2ex\textgreater}2, 106 125 moredelim=**[is][\color{red}]{`}{`}, 107 126 }% lstset … … 109 128 % inline code @...@ 110 129 \lstMakeShortInline@% 130 131 \lstnewenvironment{cfa}[1][] 132 {\CFA\lstset{#1}} 133 {} 134 \lstnewenvironment{C++}[1][] % use C++ style 135 {\lstset{language=C++,moredelim=**[is][\protect\color{red}]{`}{`},#1}} 136 {} 137 111 138 112 139 \title{Generic and Tuple Types with Efficient Dynamic Layout in \protect\CFA} … … 148 175 The C programming language is a foundational technology for modern computing with millions of lines of code implementing everything from commercial operating-systems to hobby projects. 149 176 This installation base and the programmers producing it represent a massive software-engineering investment spanning decades and likely to continue for decades more. 150 The TIOBE \cite{TIOBE} ranks the top 5 most popular programming languages as: Java 16\%, \Textbf{C 7\%}, \Textbf{\CC 5\%}, \Csharp 4\%, Python 4\% = 36\%, where the next 50 languages are less than 3\% each with a long tail.177 The TIOBE~\cite{TIOBE} ranks the top 5 most popular programming languages as: Java 16\%, \Textbf{C 7\%}, \Textbf{\CC 5\%}, \Csharp 4\%, Python 4\% = 36\%, where the next 50 languages are less than 3\% each with a long tail. 151 178 The top 3 rankings over the past 30 years are: 152 179 \lstDeleteShortInline@% … … 185 212 \label{sec:poly-fns} 186 213 187 \CFA{}\hspace{1pt}'s polymorphism was originally formalized by Ditchfield \cite{Ditchfield92}, and first implemented by Bilson\cite{Bilson03}.214 \CFA{}\hspace{1pt}'s polymorphism was originally formalized by Ditchfield~\cite{Ditchfield92}, and first implemented by Bilson~\cite{Bilson03}. 188 215 The signature feature of \CFA is parametric-polymorphic functions~\cite{forceone:impl,Cormack90,Duggan96} with functions generalized using a @forall@ clause (giving the language its name): 189 216 \begin{lstlisting} … … 337 364 % While a nominal-inheritance system with associated types could model one of those two relationships by making @El@ an associated type of @Ptr@ in the @pointer_like@ implementation, few such systems could model both relationships simultaneously. 338 365 366 339 367 \section{Generic Types} 340 368 … … 465 493 However, the \CFA type-checker ensures matching types are used by all calls to @?+?@, preventing nonsensical computations like adding a length to a volume. 466 494 495 467 496 \section{Tuples} 468 497 \label{sec:tuples} … … 728 757 \end{lstlisting} 729 758 Hence, function parameter and return lists are flattened for the purposes of type unification allowing the example to pass expression resolution. 730 This relaxation is possible by extending the thunk scheme described by Bilson \cite{Bilson03}.759 This relaxation is possible by extending the thunk scheme described by Bilson~\cite{Bilson03}. 731 760 Whenever a candidate's parameter structure does not exactly match the formal parameter's structure, a thunk is generated to specialize calls to the actual function: 732 761 \begin{lstlisting} … … 899 928 \end{comment} 900 929 901 \section{Improved Procedural Paradigm} 930 931 \section{Control Structures} 932 933 934 \subsection{\texorpdfstring{Labelled \LstKeywordStyle{continue} / \LstKeywordStyle{break}}{Labelled continue / break}} 935 936 While C provides @continue@ and @break@ statements for altering control flow, both are restricted to one level of nesting for a particular control structure. 937 Unfortunately, this restriction forces programmers to use @goto@ to achieve the equivalent control-flow for more than one level of nesting. 938 To prevent having to switch to the @goto@, \CFA extends the @continue@ and @break@ with a target label to support static multi-level exit~\cite{Buhr85}, as in Java. 939 For both @continue@ and @break@, the target label must be directly associated with a @for@, @while@ or @do@ statement; 940 for @break@, the target label can also be associated with a @switch@, @if@ or compound (@{}@) statement. 941 Figure~\ref{f:MultiLevelExit} shows @continue@ and @break@ indicating the specific control structure, and the corresponding C program using only @goto@ and labels. 942 The innermost loop has 7 exit points, which cause continuation or termination of one or more of the 7 nested control-structures. 943 944 \begin{figure} 945 \lstDeleteShortInline@% 946 \begin{tabular}{@{\hspace{\parindentlnth}}l@{\hspace{\parindentlnth}}l@{\hspace{\parindentlnth}}l@{}} 947 \multicolumn{1}{@{\hspace{\parindentlnth}}c@{\hspace{\parindentlnth}}}{\textbf{\CFA}} & \multicolumn{1}{@{\hspace{\parindentlnth}}c}{\textbf{C}} \\ 948 \begin{lstlisting} 949 `LC:` { 950 ... $declarations$ ... 951 `LS:` switch ( ... ) { 952 case 3: 953 `LIF:` if ( ... ) { 954 `LF:` for ( ... ) { 955 `LW:` while ( ... ) { 956 ... break `LC`; ... 957 ... break `LS`; ... 958 ... break `LIF`; ... 959 ... continue `LF;` ... 960 ... break `LF`; ... 961 ... continue `LW`; ... 962 ... break `LW`; ... 963 } // while 964 } // for 965 } else { 966 ... break `LIF`; ... 967 } // if 968 } // switch 969 } // compound 970 \end{lstlisting} 971 & 972 \begin{lstlisting} 973 { 974 ... $declarations$ ... 975 switch ( ... ) { 976 case 3: 977 if ( ... ) { 978 for ( ... ) { 979 while ( ... ) { 980 ... goto `LC`; ... 981 ... goto `LS`; ... 982 ... goto `LIF`; ... 983 ... goto `LFC`; ... 984 ... goto `LFB`; ... 985 ... goto `LWC`; ... 986 ... goto `LWB`; ... 987 `LWC`: ; } `LWB:` ; 988 `LFC:` ; } `LFB:` ; 989 } else { 990 ... goto `LIF`; ... 991 } `L3:` ; 992 } `LS:` ; 993 } `LC:` ; 994 \end{lstlisting} 995 & 996 \begin{lstlisting} 997 998 999 1000 1001 1002 1003 1004 // terminate compound 1005 // terminate switch 1006 // terminate if 1007 // continue loop 1008 // terminate loop 1009 // continue loop 1010 // terminate loop 1011 1012 1013 1014 // terminate if 1015 1016 1017 1018 \end{lstlisting} 1019 \end{tabular} 1020 \lstMakeShortInline@% 1021 \caption{Multi-level Exit} 1022 \label{f:MultiLevelExit} 1023 \end{figure} 1024 1025 Both labelled @continue@ and @break@ are a @goto@ restricted in the following ways: 1026 \begin{itemize} 1027 \item 1028 They cannot create a loop, which means only the looping constructs cause looping. 1029 This restriction means all situations resulting in repeated execution are clearly delineated. 1030 \item 1031 They cannot branch into a control structure. 1032 This restriction prevents missing declarations and/or initializations at the start of a control structure resulting in undefined behaviour. 1033 \end{itemize} 1034 The advantage of the labelled @continue@/@break@ is allowing static multi-level exits without having to use the @goto@ statement, and tying control flow to the target control structure rather than an arbitrary point in a program. 1035 Furthermore, the location of the label at the \emph{beginning} of the target control structure informs the reader (eye candy) that complex control-flow is occurring in the body of the control structure. 1036 With @goto@, the label is at the end of the control structure, which fails to convey this important clue early enough to the reader. 1037 Finally, using an explicit target for the transfer instead of an implicit target allows new constructs to be added or removed without affecting existing constructs. 1038 The implicit targets of the current @continue@ and @break@, \ie the closest enclosing loop or @switch@, change as certain constructs are added or removed. 1039 1040 1041 \subsection{\texorpdfstring{\LstKeywordStyle{with} Clause / Statement}{with Clause / Statement}} 1042 \label{s:WithClauseStatement} 1043 1044 In any programming language, some functions have a naturally close relationship with a particular data type. 1045 Object-oriented programming allows this close relationship to be codified in the language by making such functions \emph{class methods} of their related data type. 1046 Class methods have certain privileges with respect to their associated data type, notably un-prefixed access to the fields of that data type. 1047 When writing C functions in an object-oriented style, this un-prefixed access is swiftly missed, as access to fields of a @Foo* f@ requires an extra three characters @f->@ every time, which disrupts coding flow and clutters the produced code. 1048 1049 \TODO{Fill out section. Be sure to mention arbitrary expressions in with-blocks, recent change driven by Thierry to prioritize field name over parameters.} 1050 1051 1052 In object-oriented programming, there is an implicit first parameter, often names @self@ or @this@, which is elided. 1053 \begin{C++} 1054 class C { 1055 int i, j; 1056 int mem() { $\C{\color{red}// implicit "this" parameter}$ 1057 i = 1; $\C{\color{red}// this->i}$ 1058 j = 2; $\C{\color{red}// this->j}$ 1059 } 1060 } 1061 \end{C++} 1062 Since CFA is non-object-oriented, the equivalent object-oriented program looks like: 1063 \begin{cfa} 1064 struct S { int i, j; }; 1065 int mem( S & `this` ) { $\C{// explicit "this" parameter}$ 1066 `this.`i = 1; $\C{// "this" is not elided}$ 1067 `this.`j = 2; 1068 } 1069 \end{cfa} 1070 but it is cumbersome having to write "this." many times in a member. 1071 1072 \CFA provides a @with@ clause/statement (see Pascal~\cite[\S~4.F]{Pascal}) to elided the "@this.@" by opening a scope containing field identifiers, changing the qualified fields into variables and giving an opportunity for optimizing qualified references. 1073 \begin{cfa} 1074 int mem( S &this ) `with this` { $\C{// with clause}$ 1075 i = 1; $\C{\color{red}// this.i}$ 1076 j = 2; $\C{\color{red}// this.j}$ 1077 } 1078 \end{cfa} 1079 which extends to multiple routine parameters: 1080 \begin{cfa} 1081 struct T { double m, n; }; 1082 int mem2( S & this1, T & this2 ) `with this1, this2` { 1083 i = 1; j = 2; 1084 m = 1.0; n = 2.0; 1085 } 1086 \end{cfa} 1087 1088 The statement form is used within a block: 1089 \begin{cfa} 1090 int foo() { 1091 struct S1 { ... } s1; 1092 struct S2 { ... } s2; 1093 `with s1` { $\C{// with statement}$ 1094 // access fields of s1 without qualification 1095 `with s2` { $\C{// nesting}$ 1096 // access fields of s1 and s2 without qualification 1097 } 1098 } 1099 `with s1, s2` { 1100 // access unambiguous fields of s1 and s2 without qualification 1101 } 1102 } 1103 \end{cfa} 1104 1105 When opening multiple structures, fields with the same name and type are ambiguous and must be fully qualified. 1106 For fields with the same name but different type, context/cast can be used to disambiguate. 1107 \begin{cfa} 1108 struct S { int i; int j; double m; } a, c; 1109 struct T { int i; int k; int m } b, c; 1110 `with a, b` { 1111 j + k; $\C{// unambiguous, unique names define unique types}$ 1112 i; $\C{// ambiguous, same name and type}$ 1113 a.i + b.i; $\C{// unambiguous, qualification defines unique names}$ 1114 m; $\C{// ambiguous, same name and no context to define unique type}$ 1115 m = 5.0; $\C{// unambiguous, same name and context defines unique type}$ 1116 m = 1; $\C{// unambiguous, same name and context defines unique type}$ 1117 } 1118 `with c` { ... } $\C{// ambiguous, same name and no context}$ 1119 `with (S)c` { ... } $\C{// unambiguous, same name and cast defines unique type}$ 1120 \end{cfa} 1121 1122 The components in the "with" clause 1123 1124 with a, b, c { ... } 1125 1126 serve 2 purposes: each component provides a type and object. The type must be a 1127 structure type. Enumerations are already opened, and I think a union is opened 1128 to some extent, too. (Or is that just unnamed unions?) The object is the target 1129 that the naked structure-fields apply to. The components are open in "parallel" 1130 at the scope of the "with" clause/statement, so opening "a" does not affect 1131 opening "b", etc. This semantic is different from Pascal, which nests the 1132 openings. 1133 1134 Having said the above, it seems reasonable to allow a "with" component to be an 1135 expression. The type is the static expression-type and the object is the result 1136 of the expression. Again, the type must be an aggregate. Expressions require 1137 parenthesis around the components. 1138 1139 with( a, b, c ) { ... } 1140 1141 Does this now make sense? 1142 1143 Having written more CFA code, it is becoming clear to me that I *really* want 1144 the "with" to be implemented because I hate having to type all those object 1145 names for fields. It's a great way to drive people away from the language. 1146 1147 1148 \subsection{Exception Handling ???} 1149 1150 1151 \section{Declarations} 902 1152 903 1153 It is important to the design team that \CFA subjectively ``feel like'' C to user programmers. … … 906 1156 Nonetheless, some features of object-oriented languages are undeniably convienient, and the \CFA design team has attempted to adapt them to a procedural paradigm so as to incorporate their benefits into \CFA; two of these features are resource management and name scoping. 907 1157 1158 1159 \subsection{Alternative Declaration Syntax} 1160 1161 1162 \subsection{References} 1163 1164 \TODO{Pull draft text from user manual; make sure to discuss nested references and rebind operator drawn from lvalue-addressof operator} 1165 1166 908 1167 \subsection{Constructors and Destructors} 909 1168 … … 914 1173 \TODO{Fill out section. Mention field-constructors and at-equal escape hatch to C-style initialization. Probably pull some text from Rob's thesis for first draft.} 915 1174 916 \subsection{with Statement} 917 918 In any programming language, some functions have a naturally close relationship with a particular data type. 919 Object-oriented programming allows this close relationship to be codified in the language by making such functions \emph{class methods} of their related data type. 920 Class methods have certain privileges with respect to their associated data type, notably un-prefixed access to the fields of that data type. 921 When writing C functions in an object-oriented style, this un-prefixed access is swiftly missed, as access to fields of a @Foo* f@ requires an extra three characters @f->@ every time, which disrupts coding flow and clutters the produced code. 922 923 \TODO{Fill out section. Be sure to mention arbitrary expressions in with-blocks, recent change driven by Thierry to prioritize field name over parameters.} 924 925 \section{References} 926 927 \TODO{Pull draft text from user manual; make sure to discuss nested references and rebind operator drawn from lvalue-addressof operator} 1175 1176 \subsection{Default Parameters} 1177 1178 1179 \section{Literals} 1180 1181 1182 \subsection{0/1} 1183 1184 1185 \subsection{Units} 1186 1187 Alternative call syntax (literal argument before routine name) to convert basic literals into user literals. 1188 1189 {\lstset{language=CFA,deletedelim=**[is][]{`}{`},moredelim=**[is][\color{red}]{@}{@}} 1190 \begin{lstlisting} 1191 struct Weight { double stones; }; 1192 1193 void ?{}( Weight & w ) { w.stones = 0; } $\C{// operations}$ 1194 void ?{}( Weight & w, double w ) { w.stones = w; } 1195 Weight ?+?( Weight l, Weight r ) { return (Weight){ l.stones + r.stones }; } 1196 1197 Weight @?`st@( double w ) { return (Weight){ w }; } $\C{// backquote for units}$ 1198 Weight @?`lb@( double w ) { return (Weight){ w / 14.0 }; } 1199 Weight @?`kg@( double w ) { return (Weight) { w * 0.1575}; } 1200 1201 int main() { 1202 Weight w, hw = { 14 }; $\C{// 14 stone}$ 1203 w = 11@`st@ + 1@`lb@; 1204 w = 70.3@`kg@; 1205 w = 155@`lb@; 1206 w = 0x_9b_u@`lb@; $\C{// hexadecimal unsigned weight (155)}$ 1207 w = 0_233@`lb@; $\C{// octal weight (155)}$ 1208 w = 5@`st@ + 8@`kg@ + 25@`lb@ + hw; 1209 } 1210 \end{lstlisting} 1211 }% 1212 928 1213 929 1214 \section{Evaluation}
Note: See TracChangeset
for help on using the changeset viewer.