- Timestamp:
- Oct 15, 2020, 3:41:38 PM (3 years ago)
- Branches:
- ADT, arm-eh, ast-experimental, enum, forall-pointer-decay, jacob/cs343-translation, master, new-ast-unique-expr, pthread-emulation, qualifiedEnum
- Children:
- b9537e6
- Parents:
- 33c3ded (diff), 0b18db7 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the(diff)
links above to see all the changes relative to each parent. - Location:
- doc
- Files:
-
- 13 added
- 12 edited
- 23 moved
Legend:
- Unmodified
- Added
- Removed
-
doc/LaTeXmacros/common.tex
r33c3ded r223a633 11 11 %% Created On : Sat Apr 9 10:06:17 2016 12 12 %% Last Modified By : Peter A. Buhr 13 %% Last Modified On : Fri Sep 4 13:56:52202014 %% Update Count : 38313 %% Last Modified On : Mon Oct 5 09:34:46 2020 14 %% Update Count : 464 15 15 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 16 16 … … 55 55 \newlength{\parindentlnth} 56 56 \setlength{\parindentlnth}{\parindent} 57 58 \newcommand{\LstBasicStyle}[1]{{\lst@basicstyle{#1}}}59 \newcommand{\LstKeywordStyle}[1]{{\lst@basicstyle{\lst@keywordstyle{#1}}}}60 \newcommand{\LstCommentStyle}[1]{{\lst@basicstyle{\lst@commentstyle{#1}}}}61 62 \newlength{\gcolumnposn} % temporary hack because lstlisting does not handle tabs correctly63 \newlength{\columnposn}64 \setlength{\gcolumnposn}{2.5in}65 \setlength{\columnposn}{\gcolumnposn}66 \newcommand{\C}[2][\@empty]{\ifx#1\@empty\else\global\setlength{\columnposn}{#1}\global\columnposn=\columnposn\fi\hfill\makebox[\textwidth-\columnposn][l]{\lst@basicstyle{\LstCommentStyle{#2}}}}67 \newcommand{\CRT}{\global\columnposn=\gcolumnposn}68 69 % allow escape sequence in lstinline70 %\usepackage{etoolbox}71 %\patchcmd{\lsthk@TextStyle}{\let\lst@DefEsc\@empty}{}{}{\errmessage{failed to patch}}72 57 73 58 \usepackage{pslatex} % reduce size of san serif font … … 244 229 \usepackage{listings} % format program code 245 230 \usepackage{lstlang} 246 247 \newcommand{\CFADefaults}{% 231 \makeatletter 232 233 \newcommand{\LstBasicStyle}[1]{{\lst@basicstyle{#1}}} 234 \newcommand{\LstKeywordStyle}[1]{{\lst@basicstyle{\lst@keywordstyle{#1}}}} 235 \newcommand{\LstCommentStyle}[1]{{\lst@basicstyle{\lst@commentstyle{#1}}}} 236 237 \newlength{\gcolumnposn} % temporary hack because lstlisting does not handle tabs correctly 238 \newlength{\columnposn} 239 \setlength{\gcolumnposn}{2.75in} 240 \setlength{\columnposn}{\gcolumnposn} 241 \newcommand{\C}[2][\@empty]{\ifx#1\@empty\else\global\setlength{\columnposn}{#1}\global\columnposn=\columnposn\fi\hfill\makebox[\textwidth-\columnposn][l]{\lst@basicstyle{\LstCommentStyle{#2}}}} 242 \newcommand{\CRT}{\global\columnposn=\gcolumnposn} 243 244 % allow escape sequence in lstinline 245 %\usepackage{etoolbox} 246 %\patchcmd{\lsthk@TextStyle}{\let\lst@DefEsc\@empty}{}{}{\errmessage{failed to patch}} 247 248 % allow adding to lst literate 249 \def\addToLiterate#1{\protect\edef\lst@literate{\unexpanded\expandafter{\lst@literate}\unexpanded{#1}}} 250 \lst@Key{add to literate}{}{\addToLiterate{#1}} 251 \makeatother 252 253 \newcommand{\CFAStyle}{% 248 254 \lstset{ 249 language=CFA,250 255 columns=fullflexible, 251 256 basicstyle=\linespread{0.9}\sf, % reduce line spacing and use sanserif font … … 262 267 belowskip=3pt, 263 268 % replace/adjust listing characters that look bad in sanserif 264 literate={-}{\makebox[1ex][c]{\raisebox{0.4ex}{\rule{0. 8ex}{0.1ex}}}}1 {^}{\raisebox{0.6ex}{$\scriptscriptstyle\land\,$}}1269 literate={-}{\makebox[1ex][c]{\raisebox{0.4ex}{\rule{0.75ex}{0.1ex}}}}1 {^}{\raisebox{0.6ex}{$\scriptscriptstyle\land\,$}}1 265 270 {~}{\raisebox{0.3ex}{$\scriptstyle\sim\,$}}1 {`}{\ttfamily\upshape\hspace*{-0.1ex}`}1 266 271 {<-}{$\leftarrow$}2 {=>}{$\Rightarrow$}2 {->}{\makebox[1ex][c]{\raisebox{0.4ex}{\rule{0.8ex}{0.075ex}}}\kern-0.2ex\textgreater}2, 267 moredelim=**[is][\color{red}]{?}{?}, % red highlighting ?...? (registered trademark symbol) emacs: C-q M-. 272 }% lstset 273 }% CFAStyle 274 275 \ifdefined\CFALatin% extra Latin-1 escape characters 276 \lstnewenvironment{cfa}[1][]{ 277 \lstset{ 278 language=CFA, 279 moredelim=**[is][\color{red}]{®}{®}, % red highlighting ®...® (registered trademark symbol) emacs: C-q M-. 268 280 moredelim=**[is][\color{blue}]{ß}{ß}, % blue highlighting ß...ß (sharp s symbol) emacs: C-q M-_ 269 281 moredelim=**[is][\color{OliveGreen}]{¢}{¢}, % green highlighting ¢...¢ (cent symbol) emacs: C-q M-" 270 282 moredelim=[is][\lstset{keywords={}}]{¶}{¶}, % keyword escape ¶...¶ (pilcrow symbol) emacs: C-q M-^ 283 % replace/adjust listing characters that look bad in sanserif 284 add to literate={`}{\ttfamily\upshape\hspace*{-0.1ex}`}1 271 285 }% lstset 272 }% CFADefaults 273 \newcommand{\CFAStyle}{% 274 \CFADefaults 286 \lstset{#1} 287 }{} 275 288 % inline code ©...© (copyright symbol) emacs: C-q M-) 276 289 \lstMakeShortInline© % single-character for \lstinline 277 }% CFAStyle 278 279 \lstnewenvironment{cfa}[1][] 280 {\CFADefaults\lstset{#1}} 281 {} 290 \else% regular ASCI characters 291 \lstnewenvironment{cfa}[1][]{ 292 \lstset{ 293 language=CFA, 294 escapechar=\$, % LaTeX escape in CFA code 295 moredelim=**[is][\color{red}]{@}{@}, % red highlighting @...@ 296 }% lstset 297 \lstset{#1} 298 }{} 299 % inline code @...@ (at symbol) 300 \lstMakeShortInline@ % single-character for \lstinline 301 \fi% 282 302 283 303 % Local Variables: % -
doc/LaTeXmacros/lstlang.sty
r33c3ded r223a633 8 8 %% Created On : Sat May 13 16:34:42 2017 9 9 %% Last Modified By : Peter A. Buhr 10 %% Last Modified On : Tue Jan 8 14:40:33 201911 %% Update Count : 2 110 %% Last Modified On : Wed Sep 23 22:40:04 2020 11 %% Update Count : 24 12 12 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 13 13 … … 115 115 auto, _Bool, catch, catchResume, choose, _Complex, __complex, __complex__, __const, __const__, 116 116 coroutine, disable, dtype, enable, exception, __extension__, fallthrough, fallthru, finally, 117 __float80, float80, __float128, float128, forall, ftype, _Generic, _Imaginary, __imag, __imag__,117 __float80, float80, __float128, float128, forall, ftype, generator, _Generic, _Imaginary, __imag, __imag__, 118 118 inline, __inline, __inline__, __int128, int128, __label__, monitor, mutex, _Noreturn, one_t, or, 119 otype, restrict, __restrict, __restrict__, __signed, __signed__, _Static_assert, thread,119 otype, restrict, __restrict, __restrict__, __signed, __signed__, _Static_assert, suspend, thread, 120 120 _Thread_local, throw, throwResume, timeout, trait, try, ttype, typeof, __typeof, __typeof__, 121 121 virtual, __volatile, __volatile__, waitfor, when, with, zero_t, … … 125 125 126 126 % C++ programming language 127 \lstdefinelanguage{C++}[ANSI]{C++}{} 127 \lstdefinelanguage{C++}[ANSI]{C++}{ 128 morekeywords={nullptr,} 129 } 128 130 129 131 % uC++ programming language, based on ANSI C++ -
doc/bibliography/pl.bib
r33c3ded r223a633 1005 1005 key = {Cforall Benchmarks}, 1006 1006 author = {{\textsf{C}{$\mathbf{\forall}$} Benchmarks}}, 1007 howpublished= {\href{https:// plg.uwaterloo.ca/~cforall/doc/CforallConcurrentBenchmarks.tar}{https://\-plg.uwaterloo.ca/\-$\sim$cforall/\-doc/\-CforallConcurrentBenchmarks.tar}},1007 howpublished= {\href{https://github.com/cforall/ConcurrentBenchmarks_SPE20}{https://\-github.com/\-cforall/\-ConcurrentBenchmarks\_SPE20}}, 1008 1008 } 1009 1009 … … 1973 1973 title = {Cooperating Sequential Processes}, 1974 1974 institution = {Technological University}, 1975 address = {Eindhoven, Neth erlands},1975 address = {Eindhoven, Neth.}, 1976 1976 year = 1965, 1977 1977 note = {Reprinted in \cite{Genuys68} pp. 43--112.} -
doc/papers/concurrency/Paper.tex
r33c3ded r223a633 224 224 {} 225 225 \lstnewenvironment{C++}[1][] % use C++ style 226 {\lstset{language=C++,moredelim=**[is][\protect\color{red}]{`}{`} ,#1}\lstset{#1}}226 {\lstset{language=C++,moredelim=**[is][\protect\color{red}]{`}{`}}\lstset{#1}} 227 227 {} 228 228 \lstnewenvironment{uC++}[1][] 229 {\lstset{language=uC++,moredelim=**[is][\protect\color{red}]{`}{`} ,#1}\lstset{#1}}229 {\lstset{language=uC++,moredelim=**[is][\protect\color{red}]{`}{`}}\lstset{#1}} 230 230 {} 231 231 \lstnewenvironment{Go}[1][] 232 {\lstset{language=Golang,moredelim=**[is][\protect\color{red}]{`}{`} ,#1}\lstset{#1}}232 {\lstset{language=Golang,moredelim=**[is][\protect\color{red}]{`}{`}}\lstset{#1}} 233 233 {} 234 234 \lstnewenvironment{python}[1][] 235 {\lstset{language=python,moredelim=**[is][\protect\color{red}]{`}{`} ,#1}\lstset{#1}}235 {\lstset{language=python,moredelim=**[is][\protect\color{red}]{`}{`}}\lstset{#1}} 236 236 {} 237 237 \lstnewenvironment{java}[1][] 238 {\lstset{language=java,moredelim=**[is][\protect\color{red}]{`}{`} ,#1}\lstset{#1}}238 {\lstset{language=java,moredelim=**[is][\protect\color{red}]{`}{`}}\lstset{#1}} 239 239 {} 240 240 … … 284 284 285 285 \begin{document} 286 \linenumbers % comment out to turn off line numbering286 %\linenumbers % comment out to turn off line numbering 287 287 288 288 \maketitle … … 450 450 \hline 451 451 stateful & thread & \multicolumn{1}{c|}{No} & \multicolumn{1}{c}{Yes} \\ 452 \hline 453 \hline 452 \hline 453 \hline 454 454 No & No & \textbf{1}\ \ \ @struct@ & \textbf{2}\ \ \ @mutex@ @struct@ \\ 455 \hline 455 \hline 456 456 Yes (stackless) & No & \textbf{3}\ \ \ @generator@ & \textbf{4}\ \ \ @mutex@ @generator@ \\ 457 \hline 457 \hline 458 458 Yes (stackful) & No & \textbf{5}\ \ \ @coroutine@ & \textbf{6}\ \ \ @mutex@ @coroutine@ \\ 459 \hline 459 \hline 460 460 No & Yes & \textbf{7}\ \ \ {\color{red}rejected} & \textbf{8}\ \ \ {\color{red}rejected} \\ 461 \hline 461 \hline 462 462 Yes (stackless) & Yes & \textbf{9}\ \ \ {\color{red}rejected} & \textbf{10}\ \ \ {\color{red}rejected} \\ 463 \hline 463 \hline 464 464 Yes (stackful) & Yes & \textbf{11}\ \ \ @thread@ & \textbf{12}\ \ @mutex@ @thread@ \\ 465 465 \end{tabular} … … 2896 2896 \label{s:RuntimeStructureCluster} 2897 2897 2898 A \newterm{cluster} is a collection of user and kernel threads, where the kernel threads run the user threads from the cluster's ready queue, and the operating system runs the kernel threads on the processors from its ready queue .2898 A \newterm{cluster} is a collection of user and kernel threads, where the kernel threads run the user threads from the cluster's ready queue, and the operating system runs the kernel threads on the processors from its ready queue~\cite{Buhr90a}. 2899 2899 The term \newterm{virtual processor} is introduced as a synonym for kernel thread to disambiguate between user and kernel thread. 2900 2900 From the language perspective, a virtual processor is an actual processor (core). … … 2992 2992 \end{cfa} 2993 2993 where CPU time in nanoseconds is from the appropriate language clock. 2994 Each benchmark is performed @N@ times, where @N@ is selected so the benchmark runs in the range of 2--20 seconds for the specific programming language. 2994 Each benchmark is performed @N@ times, where @N@ is selected so the benchmark runs in the range of 2--20 seconds for the specific programming language; 2995 each @N@ appears after the experiment name in the following tables. 2995 2996 The total time is divided by @N@ to obtain the average time for a benchmark. 2996 2997 Each benchmark experiment is run 13 times and the average appears in the table. 2998 For languages with a runtime JIT (Java, Node.js, Python), a single half-hour long experiment is run to check stability; 2999 all long-experiment results are statistically equivalent, \ie median/average/standard-deviation correlate with the short-experiment results, indicating the short experiments reached a steady state. 2997 3000 All omitted tests for other languages are functionally identical to the \CFA tests and available online~\cite{CforallConcurrentBenchmarks}. 2998 % tar --exclude-ignore=exclude -cvhf benchmark.tar benchmark2999 % cp -p benchmark.tar /u/cforall/public_html/doc/concurrent_benchmark.tar3000 3001 3001 3002 \paragraph{Creation} … … 3006 3007 3007 3008 \begin{multicols}{2} 3008 \lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}} 3009 \begin{cfa} 3010 @coroutine@ MyCoroutine {}; 3009 \begin{cfa}[xleftmargin=0pt] 3010 `coroutine` MyCoroutine {}; 3011 3011 void ?{}( MyCoroutine & this ) { 3012 3012 #ifdef EAGER … … 3016 3016 void main( MyCoroutine & ) {} 3017 3017 int main() { 3018 BENCH( for ( N ) { @MyCoroutine c;@} )3018 BENCH( for ( N ) { `MyCoroutine c;` } ) 3019 3019 sout | result; 3020 3020 } … … 3030 3030 3031 3031 \begin{tabular}[t]{@{}r*{3}{D{.}{.}{5.2}}@{}} 3032 \multicolumn{1}{@{}c}{} & \multicolumn{1}{c}{Median} & \multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\ 3033 \CFA generator & 0.6 & 0.6 & 0.0 \\ 3034 \CFA coroutine lazy & 13.4 & 13.1 & 0.5 \\ 3035 \CFA coroutine eager & 144.7 & 143.9 & 1.5 \\ 3036 \CFA thread & 466.4 & 468.0 & 11.3 \\ 3037 \uC coroutine & 155.6 & 155.7 & 1.7 \\ 3038 \uC thread & 523.4 & 523.9 & 7.7 \\ 3039 Python generator & 123.2 & 124.3 & 4.1 \\ 3040 Node.js generator & 33.4 & 33.5 & 0.3 \\ 3041 Goroutine thread & 751.0 & 750.5 & 3.1 \\ 3042 Rust tokio thread & 1860.0 & 1881.1 & 37.6 \\ 3043 Rust thread & 53801.0 & 53896.8 & 274.9 \\ 3044 Java thread & 120274.0 & 120722.9 & 2356.7 \\ 3045 Pthreads thread & 31465.5 & 31419.5 & 140.4 3032 \multicolumn{1}{@{}r}{N\hspace*{10pt}} & \multicolumn{1}{c}{Median} & \multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\ 3033 \CFA generator (1B) & 0.6 & 0.6 & 0.0 \\ 3034 \CFA coroutine lazy (100M) & 13.4 & 13.1 & 0.5 \\ 3035 \CFA coroutine eager (10M) & 144.7 & 143.9 & 1.5 \\ 3036 \CFA thread (10M) & 466.4 & 468.0 & 11.3 \\ 3037 \uC coroutine (10M) & 155.6 & 155.7 & 1.7 \\ 3038 \uC thread (10M) & 523.4 & 523.9 & 7.7 \\ 3039 Python generator (10M) & 123.2 & 124.3 & 4.1 \\ 3040 Node.js generator (10M) & 33.4 & 33.5 & 0.3 \\ 3041 Goroutine thread (10M) & 751.0 & 750.5 & 3.1 \\ 3042 Rust tokio thread (10M) & 1860.0 & 1881.1 & 37.6 \\ 3043 Rust thread (250K) & 53801.0 & 53896.8 & 274.9 \\ 3044 Java thread (250K) & 119256.0 & 119679.2 & 2244.0 \\ 3045 % Java thread (1 000 000) & 123100.0 & 123052.5 & 751.6 \\ 3046 Pthreads thread (250K) & 31465.5 & 31419.5 & 140.4 3046 3047 \end{tabular} 3047 3048 \end{multicols} … … 3052 3053 Internal scheduling is measured using a cycle of two threads signalling and waiting. 3053 3054 Figure~\ref{f:schedint} shows the code for \CFA, with results in Table~\ref{t:schedint}. 3054 Note, the incremental cost of bulk acquire for \CFA, which is largely a fixed cost for small numbers of mutex objects. 3055 Java scheduling is significantly greater because the benchmark explicitly creates multiple threads in order to prevent the JIT from making the program sequential, \ie removing all locking. 3055 Note, the \CFA incremental cost for bulk acquire is a fixed cost for small numbers of mutex objects. 3056 User-level threading has one kernel thread, eliminating contention between the threads (direct handoff of the kernel thread). 3057 Kernel-level threading has two kernel threads allowing some contention. 3056 3058 3057 3059 \begin{multicols}{2} 3058 \ lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}3059 \begin{cfa} 3060 \setlength{\tabcolsep}{3pt} 3061 \begin{cfa}[xleftmargin=0pt] 3060 3062 volatile int go = 0; 3061 @condition c;@ 3062 @monitor@M {} m1/*, m2, m3, m4*/;3063 void call( M & @mutex p1/*, p2, p3, p4*/@) {3064 @signal( c );@3065 } 3066 void wait( M & @mutex p1/*, p2, p3, p4*/@) {3063 `condition c;` 3064 `monitor` M {} m1/*, m2, m3, m4*/; 3065 void call( M & `mutex p1/*, p2, p3, p4*/` ) { 3066 `signal( c );` 3067 } 3068 void wait( M & `mutex p1/*, p2, p3, p4*/` ) { 3067 3069 go = 1; // continue other thread 3068 for ( N ) { @wait( c );@} );3070 for ( N ) { `wait( c );` } ); 3069 3071 } 3070 3072 thread T {}; … … 3091 3093 3092 3094 \begin{tabular}{@{}r*{3}{D{.}{.}{5.2}}@{}} 3093 \multicolumn{1}{@{}c}{} & \multicolumn{1}{c}{Median} & \multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\ 3094 \CFA @signal@, 1 monitor & 364.4 & 364.2 & 4.4 \\ 3095 \CFA @signal@, 2 monitor & 484.4 & 483.9 & 8.8 \\ 3096 \CFA @signal@, 4 monitor & 709.1 & 707.7 & 15.0 \\ 3097 \uC @signal@ monitor & 328.3 & 327.4 & 2.4 \\ 3098 Rust cond. variable & 7514.0 & 7437.4 & 397.2 \\ 3099 Java @notify@ monitor & 9623.0 & 9654.6 & 236.2 \\ 3100 Pthreads cond. variable & 5553.7 & 5576.1 & 345.6 3095 \multicolumn{1}{@{}r}{N\hspace*{10pt}} & \multicolumn{1}{c}{Median} & \multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\ 3096 \CFA @signal@, 1 monitor (10M) & 364.4 & 364.2 & 4.4 \\ 3097 \CFA @signal@, 2 monitor (10M) & 484.4 & 483.9 & 8.8 \\ 3098 \CFA @signal@, 4 monitor (10M) & 709.1 & 707.7 & 15.0 \\ 3099 \uC @signal@ monitor (10M) & 328.3 & 327.4 & 2.4 \\ 3100 Rust cond. variable (1M) & 7514.0 & 7437.4 & 397.2 \\ 3101 Java @notify@ monitor (1M) & 8717.0 & 8774.1 & 471.8 \\ 3102 % Java @notify@ monitor (100 000 000) & 8634.0 & 8683.5 & 330.5 \\ 3103 Pthreads cond. variable (1M) & 5553.7 & 5576.1 & 345.6 3101 3104 \end{tabular} 3102 3105 \end{multicols} … … 3107 3110 External scheduling is measured using a cycle of two threads calling and accepting the call using the @waitfor@ statement. 3108 3111 Figure~\ref{f:schedext} shows the code for \CFA with results in Table~\ref{t:schedext}. 3109 Note, the incremental cost of bulk acquire for \CFA, which is largelya fixed cost for small numbers of mutex objects.3112 Note, the \CFA incremental cost for bulk acquire is a fixed cost for small numbers of mutex objects. 3110 3113 3111 3114 \begin{multicols}{2} 3112 \ lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}3115 \setlength{\tabcolsep}{5pt} 3113 3116 \vspace*{-16pt} 3114 \begin{cfa} 3115 @monitor@M {} m1/*, m2, m3, m4*/;3116 void call( M & @mutex p1/*, p2, p3, p4*/@) {}3117 void wait( M & @mutex p1/*, p2, p3, p4*/@) {3118 for ( N ) { @waitfor( call : p1/*, p2, p3, p4*/ );@}3117 \begin{cfa}[xleftmargin=0pt] 3118 `monitor` M {} m1/*, m2, m3, m4*/; 3119 void call( M & `mutex p1/*, p2, p3, p4*/` ) {} 3120 void wait( M & `mutex p1/*, p2, p3, p4*/` ) { 3121 for ( N ) { `waitfor( call : p1/*, p2, p3, p4*/ );` } 3119 3122 } 3120 3123 thread T {}; … … 3133 3136 \columnbreak 3134 3137 3135 \vspace*{-1 6pt}3138 \vspace*{-18pt} 3136 3139 \captionof{table}{External-scheduling comparison (nanoseconds)} 3137 3140 \label{t:schedext} 3138 3141 \begin{tabular}{@{}r*{3}{D{.}{.}{3.2}}@{}} 3139 \multicolumn{1}{@{} c}{} & \multicolumn{1}{c}{Median} &\multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\3140 \CFA @waitfor@, 1 monitor & 367.1 & 365.3 & 5.0 \\3141 \CFA @waitfor@, 2 monitor & 463.0 & 464.6 & 7.1 \\3142 \CFA @waitfor@, 4 monitor & 689.6 & 696.2 & 21.5 \\3143 \uC \lstinline[language=uC++]|_Accept| monitor & 328.2 & 329.1 & 3.4 \\3144 Go \lstinline[language=Golang]|select| channel & 365.0 & 365.5 & 1.23142 \multicolumn{1}{@{}r}{N\hspace*{10pt}} & \multicolumn{1}{c}{Median} &\multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\ 3143 \CFA @waitfor@, 1 monitor (10M) & 367.1 & 365.3 & 5.0 \\ 3144 \CFA @waitfor@, 2 monitor (10M) & 463.0 & 464.6 & 7.1 \\ 3145 \CFA @waitfor@, 4 monitor (10M) & 689.6 & 696.2 & 21.5 \\ 3146 \uC \lstinline[language=uC++]|_Accept| monitor (10M) & 328.2 & 329.1 & 3.4 \\ 3147 Go \lstinline[language=Golang]|select| channel (10M) & 365.0 & 365.5 & 1.2 3145 3148 \end{tabular} 3146 3149 \end{multicols} … … 3155 3158 3156 3159 \begin{multicols}{2} 3157 \ lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}3158 \begin{cfa} 3159 @monitor@M {} m1/*, m2, m3, m4*/;3160 call( M & @mutex p1/*, p2, p3, p4*/@) {}3160 \setlength{\tabcolsep}{3pt} 3161 \begin{cfa}[xleftmargin=0pt] 3162 `monitor` M {} m1/*, m2, m3, m4*/; 3163 call( M & `mutex p1/*, p2, p3, p4*/` ) {} 3161 3164 int main() { 3162 3165 BENCH( for( N ) call( m1/*, m2, m3, m4*/ ); ) … … 3173 3176 \label{t:mutex} 3174 3177 \begin{tabular}{@{}r*{3}{D{.}{.}{3.2}}@{}} 3175 \multicolumn{1}{@{}c}{} & \multicolumn{1}{c}{Median} &\multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\ 3176 test-and-test-set lock & 19.1 & 18.9 & 0.4 \\ 3177 \CFA @mutex@ function, 1 arg. & 48.3 & 47.8 & 0.9 \\ 3178 \CFA @mutex@ function, 2 arg. & 86.7 & 87.6 & 1.9 \\ 3179 \CFA @mutex@ function, 4 arg. & 173.4 & 169.4 & 5.9 \\ 3180 \uC @monitor@ member rtn. & 54.8 & 54.8 & 0.1 \\ 3181 Goroutine mutex lock & 34.0 & 34.0 & 0.0 \\ 3182 Rust mutex lock & 33.0 & 33.2 & 0.8 \\ 3183 Java synchronized method & 31.0 & 31.0 & 0.0 \\ 3184 Pthreads mutex Lock & 31.0 & 31.1 & 0.4 3178 \multicolumn{1}{@{}r}{N\hspace*{10pt}} & \multicolumn{1}{c}{Median} &\multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\ 3179 test-and-test-set lock (50M) & 19.1 & 18.9 & 0.4 \\ 3180 \CFA @mutex@ function, 1 arg. (50M) & 48.3 & 47.8 & 0.9 \\ 3181 \CFA @mutex@ function, 2 arg. (50M) & 86.7 & 87.6 & 1.9 \\ 3182 \CFA @mutex@ function, 4 arg. (50M) & 173.4 & 169.4 & 5.9 \\ 3183 \uC @monitor@ member rtn. (50M) & 54.8 & 54.8 & 0.1 \\ 3184 Goroutine mutex lock (50M) & 34.0 & 34.0 & 0.0 \\ 3185 Rust mutex lock (50M) & 33.0 & 33.2 & 0.8 \\ 3186 Java synchronized method (50M) & 31.0 & 30.9 & 0.5 \\ 3187 % Java synchronized method (10 000 000 000) & 31.0 & 30.2 & 0.9 \\ 3188 Pthreads mutex Lock (50M) & 31.0 & 31.1 & 0.4 3185 3189 \end{tabular} 3186 3190 \end{multicols} … … 3201 3205 % To: "Peter A. Buhr" <pabuhr@plg2.cs.uwaterloo.ca> 3202 3206 % Date: Fri, 24 Jan 2020 13:49:18 -0500 3203 % 3207 % 3204 3208 % I can also verify that the previous version, which just tied a bunch of promises together, *does not* go back to the 3205 3209 % event loop at all in the current version of Node. Presumably they're taking advantage of the fact that the ordering of … … 3211 3215 3212 3216 \begin{multicols}{2} 3213 \lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}} 3214 \begin{cfa}[aboveskip=0pt,belowskip=0pt] 3215 @coroutine@ C {}; 3216 void main( C & ) { for () { @suspend;@ } } 3217 \begin{cfa}[xleftmargin=0pt] 3218 `coroutine` C {}; 3219 void main( C & ) { for () { `suspend;` } } 3217 3220 int main() { // coroutine test 3218 3221 C c; 3219 BENCH( for ( N ) { @resume( c );@} )3222 BENCH( for ( N ) { `resume( c );` } ) 3220 3223 sout | result; 3221 3224 } 3222 3225 int main() { // thread test 3223 BENCH( for ( N ) { @yield();@} )3226 BENCH( for ( N ) { `yield();` } ) 3224 3227 sout | result; 3225 3228 } … … 3234 3237 \label{t:ctx-switch} 3235 3238 \begin{tabular}{@{}r*{3}{D{.}{.}{3.2}}@{}} 3236 \multicolumn{1}{@{}c}{} & \multicolumn{1}{c}{Median} &\multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\ 3237 C function & 1.8 & 1.8 & 0.0 \\ 3238 \CFA generator & 1.8 & 2.0 & 0.3 \\ 3239 \CFA coroutine & 32.5 & 32.9 & 0.8 \\ 3240 \CFA thread & 93.8 & 93.6 & 2.2 \\ 3241 \uC coroutine & 50.3 & 50.3 & 0.2 \\ 3242 \uC thread & 97.3 & 97.4 & 1.0 \\ 3243 Python generator & 40.9 & 41.3 & 1.5 \\ 3244 Node.js await & 1852.2 & 1854.7 & 16.4 \\ 3245 Node.js generator & 33.3 & 33.4 & 0.3 \\ 3246 Goroutine thread & 143.0 & 143.3 & 1.1 \\ 3247 Rust async await & 32.0 & 32.0 & 0.0 \\ 3248 Rust tokio thread & 143.0 & 143.0 & 1.7 \\ 3249 Rust thread & 332.0 & 331.4 & 2.4 \\ 3250 Java thread & 405.0 & 415.0 & 17.6 \\ 3251 Pthreads thread & 334.3 & 335.2 & 3.9 3239 \multicolumn{1}{@{}r}{N\hspace*{10pt}} & \multicolumn{1}{c}{Median} &\multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\ 3240 C function (10B) & 1.8 & 1.8 & 0.0 \\ 3241 \CFA generator (5B) & 1.8 & 2.0 & 0.3 \\ 3242 \CFA coroutine (100M) & 32.5 & 32.9 & 0.8 \\ 3243 \CFA thread (100M) & 93.8 & 93.6 & 2.2 \\ 3244 \uC coroutine (100M) & 50.3 & 50.3 & 0.2 \\ 3245 \uC thread (100M) & 97.3 & 97.4 & 1.0 \\ 3246 Python generator (100M) & 40.9 & 41.3 & 1.5 \\ 3247 Node.js await (5M) & 1852.2 & 1854.7 & 16.4 \\ 3248 Node.js generator (100M) & 33.3 & 33.4 & 0.3 \\ 3249 Goroutine thread (100M) & 143.0 & 143.3 & 1.1 \\ 3250 Rust async await (100M) & 32.0 & 32.0 & 0.0 \\ 3251 Rust tokio thread (100M) & 143.0 & 143.0 & 1.7 \\ 3252 Rust thread (25M) & 332.0 & 331.4 & 2.4 \\ 3253 Java thread (100M) & 405.0 & 415.0 & 17.6 \\ 3254 % Java thread ( 100 000 000) & 413.0 & 414.2 & 6.2 \\ 3255 % Java thread (5 000 000 000) & 415.0 & 415.2 & 6.1 \\ 3256 Pthreads thread (25M) & 334.3 & 335.2 & 3.9 3252 3257 \end{tabular} 3253 3258 \end{multicols} … … 3258 3263 Languages using 1:1 threading based on pthreads can at best meet or exceed, due to language overhead, the pthread results. 3259 3264 Note, pthreads has a fast zero-contention mutex lock checked in user space. 3260 Languages with M:N threading have better performance than 1:1 because there is no operating-system interactions. 3265 Languages with M:N threading have better performance than 1:1 because there is no operating-system interactions (context-switching or locking). 3266 As well, for locking experiments, M:N threading has less contention if only one kernel thread is used. 3261 3267 Languages with stackful coroutines have higher cost than stackless coroutines because of stack allocation and context switching; 3262 3268 however, stackful \uC and \CFA coroutines have approximately the same performance as stackless Python and Node.js generators. 3263 3269 The \CFA stackless generator is approximately 25 times faster for suspend/resume and 200 times faster for creation than stackless Python and Node.js generators. 3270 The Node.js context-switch is costly when asynchronous await must enter the event engine because a promise is not fulfilled. 3271 Finally, the benchmark results correlate across programming languages with and without JIT, indicating the JIT has completed any runtime optimizations. 3264 3272 3265 3273 … … 3319 3327 3320 3328 The authors recognize the design assistance of Aaron Moss, Rob Schluntz, Andrew Beach, and Michael Brooks; David Dice for commenting and helping with the Java benchmarks; and Gregor Richards for helping with the Node.js benchmarks. 3321 This research is funded by a grant fromWaterloo-Huawei (\url{http://www.huawei.com}) Joint Innovation Lab. %, and Peter Buhr is partially funded by the Natural Sciences and Engineering Research Council of Canada.3329 This research is funded by the NSERC/Waterloo-Huawei (\url{http://www.huawei.com}) Joint Innovation Lab. %, and Peter Buhr is partially funded by the Natural Sciences and Engineering Research Council of Canada. 3322 3330 3323 3331 {% -
doc/papers/concurrency/annex/local.bib
r33c3ded r223a633 59 59 @manual{Cpp-Transactions, 60 60 keywords = {C++, Transactional Memory}, 61 title = {Tech nical Specificationfor C++ Extensions for Transactional Memory},61 title = {Tech. Spec. for C++ Extensions for Transactional Memory}, 62 62 organization= {International Standard ISO/IEC TS 19841:2015 }, 63 63 publisher = {American National Standards Institute}, -
doc/papers/concurrency/mail2
r33c3ded r223a633 959 959 Software: Practice and Experience Editorial Office 960 960 961 962 963 Date: Wed, 2 Sep 2020 20:55:34 +0000 964 From: Richard Jones <onbehalfof@manuscriptcentral.com> 965 Reply-To: R.E.Jones@kent.ac.uk 966 To: tdelisle@uwaterloo.ca, pabuhr@uwaterloo.ca 967 Subject: Software: Practice and Experience - Decision on Manuscript ID 968 SPE-19-0219.R2 969 970 02-Sep-2020 971 972 Dear Dr Buhr, 973 974 Many thanks for submitting SPE-19-0219.R2 entitled "Advanced Control-flow and Concurrency in Cforall" to Software: Practice and Experience. The paper has now been reviewed and the comments of the referees are included at the bottom of this letter. I apologise for the length of time it has taken to get these. 975 976 Both reviewers consider this paper to be close to acceptance. However, before I can accept this paper, I would like you address the comments of Reviewer 2, particularly with regard to the description of the adaptation Java harness to deal with warmup. I would expect to see a convincing argument that the computation has reached a steady state. I would also like you to provide the values for N for each benchmark run. This should be very straightforward for you to do. There are a couple of papers on steady state that you may wish to consult (though I am certainly not pushing my own work). 977 978 1) Barrett, Edd; Bolz-Tereick, Carl Friedrich; Killick, Rebecca; Mount, Sarah and Tratt, Laurence. Virtual Machine Warmup Blows Hot and Cold. OOPSLA 2017. https://doi.org/10.1145/3133876 979 Virtual Machines (VMs) with Just-In-Time (JIT) compilers are traditionally thought to execute programs in two phases: the initial warmup phase determines which parts of a program would most benefit from dynamic compilation, before JIT compiling those parts into machine code; subsequently the program is said to be at a steady state of peak performance. Measurement methodologies almost always discard data collected during the warmup phase such that reported measurements focus entirely on peak performance. We introduce a fully automated statistical approach, based on changepoint analysis, which allows us to determine if a program has reached a steady state and, if so, whether that represents peak performance or not. Using this, we show that even when run in the most controlled of circumstances, small, deterministic, widely studied microbenchmarks often fail to reach a steady state of peak performance on a variety of common VMs. Repeating our experiment on 3 different machines, we found that at most 43.5% of pairs consistently reach a steady state of peak performance. 980 981 2) Kalibera, Tomas and Jones, Richard. Rigorous Benchmarking in Reasonable Time. ISMM 2013. https://doi.org/10.1145/2555670.2464160 982 Experimental evaluation is key to systems research. Because modern systems are complex and non-deterministic, good experimental methodology demands that researchers account for uncertainty. To obtain valid results, they are expected to run many iterations of benchmarks, invoke virtual machines (VMs) several times, or even rebuild VM or benchmark binaries more than once. All this repetition costs time to complete experiments. Currently, many evaluations give up on sufficient repetition or rigorous statistical methods, or even run benchmarks only in training sizes. The results reported often lack proper variation estimates and, when a small difference between two systems is reported, some are simply unreliable.In contrast, we provide a statistically rigorous methodology for repetition and summarising results that makes efficient use of experimentation time. Time efficiency comes from two key observations. First, a given benchmark on a given platform is typically prone to much less non-determinism than the common worst-case of published corner-case studies. Second, repetition is most needed where most uncertainty arises (whether between builds, between executions or between iterations). We capture experimentation cost with a novel mathematical model, which we use to identify the number of repetitions at each level of an experiment necessary and sufficient to obtain a given level of precision.We present our methodology as a cookbook that guides researchers on the number of repetitions they should run to obtain reliable results. We also show how to present results with an effect size confidence interval. As an example, we show how to use our methodology to conduct throughput experiments with the DaCapo and SPEC CPU benchmarks on three recent platforms. 983 984 You have 42 days from the date of this email to submit your revision. If you are unable to complete the revision within this time, please contact me to request a short extension. 985 986 You can upload your revised manuscript and submit it through your Author Center. Log into https://mc.manuscriptcentral.com/spe and enter your Author Center, where you will find your manuscript title listed under "Manuscripts with Decisions". 987 988 When submitting your revised manuscript, you will be able to respond to the comments made by the referee(s) in the space provided. You can use this space to document any changes you make to the original manuscript. 989 990 If you would like help with English language editing, or other article preparation support, Wiley Editing Services offers expert help with English Language Editing, as well as translation, manuscript formatting, and figure formatting at www.wileyauthors.com/eeo/preparation. You can also check out our resources for Preparing Your Article for general guidance about writing and preparing your manuscript at www.wileyauthors.com/eeo/prepresources. 991 992 Once again, thank you for submitting your manuscript to Software: Practice and Experience. I look forward to receiving your revision. 993 994 Sincerely, 995 Richard 996 997 Prof. Richard Jones 998 Editor, Software: Practice and Experience 999 R.E.Jones@kent.ac.uk 1000 1001 Referee(s)' Comments to Author: 1002 1003 Reviewing: 1 1004 1005 Comments to the Author 1006 Overall, I felt that this draft was an improvement on previous drafts and I don't have further changes to request. 1007 1008 I appreciated the new language to clarify the relationship of external and internal scheduling, for example, as well as the new measurements of Rust tokio. Also, while I still believe that the choice between thread/generator/coroutine and so forth could be made crisper and clearer, the current draft of Section 2 did seem adequate to me in terms of specifying the considerations that users would have to take into account to make the choice. 1009 1010 1011 Reviewing: 2 1012 1013 Comments to the Author 1014 First: let me apologise for the delay on this review. I'll blame the global pandemic combined with my institution's senior management's counterproductive decisions for taking up most of my time and all of my energy. 1015 1016 At this point, reading the responses, I think we've been around the course enough times that further iteration is unlikely to really improve the paper any further, so I'm happy to recommend acceptance. My main comments are that there were some good points in the responses to *all* the reviews and I strongly encourage the authors to incorporate those discursive responses into the final paper so they may benefit readers as well as reviewers. I agree with the recommendations of reviewer #2 that the paper could usefully be split in to two, which I think I made to a previous revision, but I'm happy to leave that decision to the Editor. 1017 1018 Finally, the paper needs to describe how the Java harness was adapted to deal with warmup; why the computation has warmed up and reached a steady state - similarly for js and Python. The tables should also give the "N" chosen for each benchmark run. 1019 1020 minor points 1021 * don't start sentences with "However" 1022 * most downloaded isn't an "Award" 1023 1024 1025 1026 Date: Thu, 1 Oct 2020 05:34:29 +0000 1027 From: Richard Jones <onbehalfof@manuscriptcentral.com> 1028 Reply-To: R.E.Jones@kent.ac.uk 1029 To: pabuhr@uwaterloo.ca 1030 Subject: Revision reminder - SPE-19-0219.R2 1031 1032 01-Oct-2020 1033 1034 Dear Dr Buhr 1035 1036 SPE-19-0219.R2 1037 1038 This is a reminder that your opportunity to revise and re-submit your manuscript will expire 14 days from now. If you require more time please contact me directly and I may grant an extension to this deadline, otherwise the option to submit a revision online, will not be available. 1039 1040 If your article is of potential interest to the general public, (which means it must be timely, groundbreaking, interesting and impact on everyday society) then please e-mail ejp@wiley.co.uk explaining the public interest side of the research. Wiley will then investigate the potential for undertaking a global press campaign on the article. 1041 1042 I look forward to receiving your revision. 1043 1044 Sincerely, 1045 1046 Prof. Richard Jones 1047 Editor, Software: Practice and Experience 1048 1049 https://mc.manuscriptcentral.com/spe 1050 1051 1052 1053 Date: Tue, 6 Oct 2020 15:29:41 +0000 1054 From: Mayank Roy Chowdhury <onbehalfof@manuscriptcentral.com> 1055 Reply-To: speoffice@wiley.com 1056 To: tdelisle@uwaterloo.ca, pabuhr@uwaterloo.ca 1057 Subject: SPE-19-0219.R3 successfully submitted 1058 1059 06-Oct-2020 1060 1061 Dear Dr Buhr, 1062 1063 Your manuscript entitled "Advanced Control-flow and Concurrency in Cforall" has been successfully submitted online and is presently being given full consideration for publication in Software: Practice and Experience. 1064 1065 Your manuscript number is SPE-19-0219.R3. Please mention this number in all future correspondence regarding this submission. 1066 1067 You can view the status of your manuscript at any time by checking your Author Center after logging into https://mc.manuscriptcentral.com/spe. If you have difficulty using this site, please click the 'Get Help Now' link at the top right corner of the site. 1068 1069 1070 Thank you for submitting your manuscript to Software: Practice and Experience. 1071 1072 Sincerely, 1073 1074 Software: Practice and Experience Editorial Office 1075 -
doc/refrat/refrat.tex
r33c3ded r223a633 11 11 %% Created On : Wed Apr 6 14:52:25 2016 12 12 %% Last Modified By : Peter A. Buhr 13 %% Last Modified On : Wed Jan 31 17:30:23 201814 %% Update Count : 1 0813 %% Last Modified On : Mon Oct 5 09:02:53 2020 14 %% Update Count : 110 15 15 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 16 16 … … 30 30 \usepackage{upquote} % switch curled `'" to straight 31 31 \usepackage{calc} 32 \usepackage{xspace}33 32 \usepackage{varioref} % extended references 34 \usepackage{listings} % format program code35 33 \usepackage[flushmargin]{footmisc} % support label/reference in footnote 36 34 \usepackage{latexsym} % \Box glyph 37 35 \usepackage{mathptmx} % better math font with "times" 38 36 \usepackage[usenames]{color} 39 \input{common} % common CFA document macros 40 \usepackage[dvips,plainpages=false,pdfpagelabels,pdfpagemode=UseNone,colorlinks=true,pagebackref=true,linkcolor=blue,citecolor=blue,urlcolor=blue,pagebackref=true,breaklinks=true]{hyperref} 41 \usepackage{breakurl} 42 \renewcommand{\UrlFont}{\small\sf} 43 44 \usepackage[pagewise]{lineno} 45 \renewcommand{\linenumberfont}{\scriptsize\sffamily} 46 \usepackage[firstpage]{draftwatermark} 47 \SetWatermarkLightness{0.9} 48 49 % Default underscore is too low and wide. Cannot use lstlisting "literate" as replacing underscore 50 % removes it as a variable-name character so keywords in variables are highlighted. MUST APPEAR 51 % AFTER HYPERREF. 52 \renewcommand{\textunderscore}{\leavevmode\makebox[1.2ex][c]{\rule{1ex}{0.075ex}}} 53 54 \setlength{\topmargin}{-0.45in} % move running title into header 55 \setlength{\headsep}{0.25in} 56 57 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 58 59 \CFAStyle % use default CFA format-style 60 \lstnewenvironment{C++}[1][] % use C++ style 61 {\lstset{language=C++,moredelim=**[is][\protect\color{red}]{®}{®}#1}} 62 {} 63 37 \newcommand{\CFALatin}{} 64 38 % inline code ©...© (copyright symbol) emacs: C-q M-) 65 39 % red highlighting ®...® (registered trademark symbol) emacs: C-q M-. … … 69 43 % keyword escape ¶...¶ (pilcrow symbol) emacs: C-q M-^ 70 44 % math escape $...$ (dollar symbol) 45 \input{common} % common CFA document macros 46 \usepackage[dvips,plainpages=false,pdfpagelabels,pdfpagemode=UseNone,colorlinks=true,pagebackref=true,linkcolor=blue,citecolor=blue,urlcolor=blue,pagebackref=true,breaklinks=true]{hyperref} 47 \usepackage{breakurl} 48 \renewcommand{\UrlFont}{\small\sf} 49 50 \usepackage[pagewise]{lineno} 51 \renewcommand{\linenumberfont}{\scriptsize\sffamily} 52 \usepackage[firstpage]{draftwatermark} 53 \SetWatermarkLightness{0.9} 54 55 % Default underscore is too low and wide. Cannot use lstlisting "literate" as replacing underscore 56 % removes it as a variable-name character so keywords in variables are highlighted. MUST APPEAR 57 % AFTER HYPERREF. 58 \renewcommand{\textunderscore}{\leavevmode\makebox[1.2ex][c]{\rule{1ex}{0.075ex}}} 59 60 \setlength{\topmargin}{-0.45in} % move running title into header 61 \setlength{\headsep}{0.25in} 71 62 72 63 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 73 64 65 \CFAStyle % use default CFA format-style 66 \lstnewenvironment{C++}[1][] % use C++ style 67 {\lstset{language=C++,moredelim=**[is][\protect\color{red}]{®}{®},#1}} 68 {} 69 70 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 71 74 72 % Names used in the document. 75 \newcommand{\Version}{\input{ ../../version}}73 \newcommand{\Version}{\input{build/version}} 76 74 \newcommand{\Textbf}[2][red]{{\color{#1}{\textbf{#2}}}} 77 75 \newcommand{\Emph}[2][red]{{\color{#1}\textbf{\emph{#2}}}} -
doc/theses/andrew_beach_MMath/thesis.tex
r33c3ded r223a633 34 34 \usepackage[toc,abbreviations]{glossaries-extra} 35 35 36 % Main glossary entries -- definitions of relevant terminology 37 \newglossaryentry{computer} 38 { 39 name=computer, 40 description={A programmable machine that receives input data, 41 stores and manipulates the data, and provides 42 formatted output} 43 } 44 45 % Nomenclature glossary entries -- New definitions, or unusual terminology 46 \newglossary*{nomenclature}{Nomenclature} 47 \newglossaryentry{dingledorf} 48 { 49 type=nomenclature, 50 name=dingledorf, 51 description={A person of supposed average intelligence who makes incredibly 52 brainless misjudgments} 53 } 54 55 % List of Abbreviations (abbreviations are from the glossaries-extra package) 56 \newabbreviation{aaaaz}{AAAAZ}{American Association of Amature Astronomers 57 and Zoologists} 58 59 % List of Symbols 60 \newglossary*{symbols}{List of Symbols} 61 \newglossaryentry{rvec} 62 { 63 name={$\mathbf{v}$}, 64 sort={label}, 65 type=symbols, 66 description={Random vector: a location in n-dimensional Cartesian space, where 67 each dimensional component is determined by a random process} 68 } 36 % Define all the glossaries. 37 \input{glossaries} 69 38 70 39 % Generate the glossaries defined above. -
doc/theses/fangren_yu_COOP_S20/Makefile
r33c3ded r223a633 46 46 # File Dependencies # 47 47 48 49 48 ${DOCUMENT} : ${BASE}.ps 50 49 ps2pdf $< -
doc/theses/fangren_yu_COOP_S20/Report.tex
r33c3ded r223a633 1 \documentclass[twoside,1 2pt]{article}1 \documentclass[twoside,11pt]{article} 2 2 3 3 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% … … 11 11 \usepackage[labelformat=simple,aboveskip=0pt,farskip=0pt]{subfig} 12 12 \renewcommand{\thesubfigure}{\alph{subfigure})} 13 \usepackage[flushmargin]{footmisc} % support label/reference in footnote 13 14 \usepackage{latexsym} % \Box glyph 14 15 \usepackage{mathptmx} % better math font with "times" 16 \usepackage[toc]{appendix} % article does not have appendix 15 17 \usepackage[usenames]{color} 16 18 \input{common} % common CFA document macros 17 19 \usepackage[dvips,plainpages=false,pdfpagelabels,pdfpagemode=UseNone,colorlinks=true,pagebackref=true,linkcolor=blue,citecolor=blue,urlcolor=blue,pagebackref=true,breaklinks=true]{hyperref} 18 20 \usepackage{breakurl} 21 \urlstyle{sf} 22 23 % reduce spacing 24 \setlist[itemize]{topsep=5pt,parsep=0pt}% global 25 \setlist[enumerate]{topsep=5pt,parsep=0pt}% global 19 26 20 27 \usepackage[pagewise]{lineno} … … 26 33 \renewcommand{\textunderscore}{\leavevmode\makebox[1.2ex][c]{\rule{1ex}{0.075ex}}} 27 34 \newcommand{\NOTE}{\textbf{NOTE}} 35 \newcommand{\TODO}[1]{{\color{Purple}#1}} 28 36 29 37 \setlength{\topmargin}{-0.45in} % move running title into header … … 32 40 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 33 41 34 \CFA Defaults42 \CFAStyle % CFA code-style for all languages 35 43 \lstset{ 36 language=C++, % make C++ the default language 37 escapechar=\$, % LaTeX escape in CFA code 38 moredelim=**[is][\color{red}]{`}{`}, 44 language=C++,moredelim=**[is][\color{red}]{@}{@} % make C++ the default language 39 45 }% lstset 40 \lstMakeShortInline@%41 46 \lstnewenvironment{C++}[1][] % use C++ style 42 {\lstset{language=C++,moredelim=**[is][\protect\color{red}]{`}{`},#1}} 43 {} 47 {\lstset{language=C++,moredelim=**[is][\color{red}]{@}{@}}\lstset{#1}}{} 44 48 45 49 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% … … 84 88 \section{Overview} 85 89 86 cfa-cc is the reference compiler for the \CFA programming language, which is a non- 87 object-oriented extension to C. 88 \CFA attempts to introduce productive modern programming language features to C 89 while maintaining as much backward-compatibility as possible, so that most existing C 90 programs can seamlessly work with \CFA. 91 92 Since the \CFA project was dated back to the early 2000s, and only restarted in the past 93 few years, there is a significant amount of legacy code in the current compiler codebase, 94 with little proper documentation available. This becomes a difficulty while developing new 95 features based on the previous implementations, and especially while diagnosing 96 problems. 97 98 Currently, the \CFA team is also facing another problem: bad compiler performance. For 99 the development of a new programming language, writing a standard library is an 100 important part. The incompetence of the compiler causes building the library files to take 101 tens of minutes, making iterative development and testing almost impossible. There is 102 ongoing effort to rewrite the core data structure of the compiler to overcome the 103 performance issue, but many bugs may appear during the work, and lack of documentation 104 makes debugging extremely difficult. 105 106 This developer's reference will be continuously improved and eventually cover the 107 compiler codebase. For now, the focus is mainly on the parts being rewritten, and also the 108 performance bottleneck, namely the resolution algorithm. It is aimed to provide new 109 developers to the project enough guidance and clarify the purposes and behavior of certain 110 functions which are not mentioned in the previous \CFA research papers. 90 @cfa-cc@ is the reference compiler for the \CFA programming language, which is a non-object-oriented extension to C. 91 \CFA attempts to introduce productive modern programming language features to C while maintaining as much backward-compatibility as possible, so that most existing C programs can seamlessly work with \CFA. 92 93 Since the \CFA project dates back to the early 2000s, and only restarted in the past few years, there is a significant amount of legacy code in the current compiler codebase with little documentation. 94 The lack of documentation makes it difficult to develop new features from the current implementation and diagnose problems. 95 96 Currently, the \CFA team is also facing poor compiler performance. 97 For the development of a new programming language, writing standard libraries is an important component. 98 The slow compiler causes building of the library files to take tens of minutes, making iterative development and testing almost impossible. 99 There is an ongoing effort to rewrite the core data-structure of the compiler to overcome the performance issue, but many bugs have appeared during this work, and lack of documentation is hampering debugging. 100 101 This developer's reference manual begins the documentation and should be continuously im\-proved until it eventually covers the entire compiler codebase. 102 For now, the focus is mainly on the parts being rewritten, and also the primary performance bottleneck, namely the resolution algorithm. 103 Its aimed is to provide new project developers with guidance in understanding the codebase, and clarify the purpose and behaviour of certain functions that are not mentioned in the previous \CFA research papers~\cite{Bilson03,Ditchfield92,Moss19}. 111 104 112 105 113 106 \section{Compiler Framework} 114 107 108 \CFA source code is first transformed into an abstract syntax tree (AST) by the parser before analyzed by the compiler. 109 110 115 111 \subsection{AST Representation} 116 112 117 Source code input is first transformed into abstract syntax tree (AST) representation by the 118 parser before analyzed by the compiler. 119 120 There are 4 major categories of AST nodes used by the compiler, along with some derived 121 structures. 122 123 \subsubsection{Declaration nodes} 113 114 There are 4 major categories of AST nodes used by the compiler, along with some derived structures. 115 116 \subsubsection{Declaration Nodes} 124 117 125 118 A declaration node represents either of: 126 119 \begin{itemize} 127 120 \item 128 Type declaration: struct, union, typedef or type parameter (see Appendix A.3)129 \item 130 Variable declaration131 \item 132 Function declaration121 type declaration: @struct@, @union@, @typedef@ or type parameter (see \VRef[Appendix]{s:KindsTypeParameters}) 122 \item 123 variable declaration 124 \item 125 function declaration 133 126 \end{itemize} 134 127 Declarations are introduced by standard C declarations, with the usual scoping rules. 135 In addition, declarations can also be introduced by the forall clause (which is the origin 136 of \CFA's name): 128 In addition, declarations can also be qualified by the \lstinline[language=CFA]@forall@ clause (which is the origin of \CFA's name): 137 129 \begin{cfa} 138 forall ( <$\emph{TypeParameterList}$> | <$\emph{AssertionList}$>)130 forall ( <$\emph{TypeParameterList}$> | <$\emph{AssertionList}$> ) 139 131 $\emph{declaration}$ 140 132 \end{cfa} 141 Type parameters in \CFA are similar to \CC template type parameters. The \CFA142 declaration133 Type parameters in \CFA are similar to \CC template type parameters. 134 The \CFA declaration 143 135 \begin{cfa} 144 136 forall (dtype T) ... 145 137 \end{cfa} 146 behaves similarly asthe \CC template declaration138 behaves similarly to the \CC template declaration 147 139 \begin{C++} 148 140 template <typename T> ... 149 141 \end{C++} 150 142 151 Assertions are a distinctive feature of \CFA: contrary to the \CC template where 152 arbitrary functions and operators can be used in a template definition, in a \CFA 153 parametric function, operations on parameterized types must be declared in assertions. 154 143 Assertions are a distinctive feature of \CFA, similar to \emph{interfaces} in D and Go, and \emph{traits} in Rust. 144 Contrary to the \CC template where arbitrary functions and operators can be used in a template definition, in a \CFA parametric function, operations on parameterized types must be declared in assertions. 155 145 Consider the following \CC template: 156 146 \begin{C++} 157 template <typename T> int foo(T t) {158 return bar(t) + baz(t);147 @template@ forall<typename T> T foo( T t ) { 148 return t + t * t; 159 149 } 160 150 \end{C++} 161 Unless bar and baz are also parametric functions taking any argument type, they must be 162 declared in the assertions, or otherwise the code will not compile: 151 where there are no explicit requirements on the type @T@. 152 Therefore, the \CC compiler must deduce what operators are required during textual (macro) expansion of the template at each usage. 153 As a result, templates cannot be compiled. 154 \CFA assertions specify restrictions on type parameters: 163 155 \begin{cfa} 164 forall (dtype T | { int bar(T); int baz(t); }) int foo (T t) {165 return bar(t) + baz(t);156 forall( dtype T | @{ T ?+?( T, T ); T ?*?( T, T ) }@ ) int foo ( T t ) { 157 return t + t * t; 166 158 } 167 159 \end{cfa} 168 Assertions are written using the usual function declaration syntax. The scope of type 169 parameters and assertions is the following declaration. 170 171 \subsubsection{Type nodes} 172 173 A type node represents the type of an object or expression. 174 Named types reference the corresponding type declarations. The type of a function is its 175 function pointer type (same as standard C). 176 With the addition of type parameters, named types may contain a list of parameter values 177 (actual parameter types). 178 179 \subsubsection{Statement nodes} 180 181 Statement nodes represent the statements in the program, including basic expression 182 statements, control flows and blocks. 160 Assertions are written using the usual \CFA function declaration syntax. 161 Only types with operators ``@+@'' and ``@*@'' work with this function, and the function prototype is sufficient to allow separate compilation. 162 163 Type parameters and assertions are used in the following compiler data-structures. 164 165 166 \subsubsection{Type Nodes} 167 168 Type nodes represent the type of an object or expression. 169 Named types reference the corresponding type declarations. 170 The type of a function is its function pointer type (same as standard C). 171 With the addition of type parameters, named types may contain a list of parameter values (actual parameter types). 172 173 174 \subsubsection{Statement Nodes} 175 176 Statement nodes represent the executable statements in the program, including basic expression statements, control flows and blocks. 183 177 Local declarations (within a block statement) are represented as declaration statements. 184 178 185 \subsubsection{Expression nodes} 186 187 Some expressions are represented differently in the compiler before and after resolution 188 stage:179 180 \subsubsection{Expression Nodes} 181 182 Some expressions are represented differently before and after the resolution stage: 189 183 \begin{itemize} 190 184 \item 191 Name expressions: NameExpr pre-resolution, VariableExpr post-resolution 192 \item 193 Member expressions: UntypedMemberExpr pre-resolution, MemberExpr post-resolution 194 \item 195 Function call expressions (including overloadable operators): UntypedExpr pre-resolution, ApplicationExpr post-resolution 185 Name expressions: @NameExpr@ pre-resolution, @VariableExpr@ post-resolution 186 \item 187 Member expressions: @UntypedMemberExpr@ pre-resolution, @MemberExpr@ post-resolution 188 \item 189 \begin{sloppypar} 190 Function call expressions (including overloadable operators): @UntypedExpr@ pre-resolution, @ApplicationExpr@ post-resolution 191 \end{sloppypar} 196 192 \end{itemize} 197 The pre-resolution representation s contain only the symbols. Post-resolution results link198 them to the actual variable and function declarations.193 The pre-resolution representation contains only the symbols. 194 Post-resolution links them to the actual variable and function declarations. 199 195 200 196 201 197 \subsection{Compilation Passes} 202 198 203 Compilation steps are implemented as passes, which follows a general structural recursion 204 pattern on the syntax tree. 205 206 The basic work flow of compilation passes follows preorder and postorder traversal on 207 tree data structure, implemented with visitor pattern, and can be loosely described with 208 the following pseudocode: 209 \begin{C++} 210 Pass::visit (node_t node) { 211 previsit(node); 212 if (visit_children) 199 Compilation steps are implemented as passes, which follows a general structural recursion pattern on the syntax tree. 200 201 The basic workflow of compilation passes follows preorder and postorder traversal on the AST data-structure, implemented with visitor pattern, and can be loosely described with the following pseudocode: 202 \begin{C++} 203 Pass::visit( node_t node ) { 204 previsit( node ); 205 if ( visit_children ) 213 206 for each child of node: 214 child.accept( this);215 postvisit( node);207 child.accept( this ); 208 postvisit( node ); 216 209 } 217 210 \end{C++} 218 Operations in previsit() happen in preorder (top to bottom) and operations in 219 postvisit() happen in postorder (bottom to top). The precise order of recursive 220 operations on child nodes can be found in @Common/PassVisitor.impl.h@ (old) and 221 @AST/Pass.impl.hpp@ (new). 222 Implementations of compilation passes need to follow certain conventions: 211 Operations in @previsit@ happen in preorder (top to bottom) and operations in @postvisit@ happen in postorder (bottom to top). 212 The precise order of recursive operations on child nodes can be found in @Common/PassVisitor.impl.h@ (old) and @AST/Pass.impl.hpp@ (new). 213 214 Implementations of compilation passes follow certain conventions: 223 215 \begin{itemize} 224 216 \item 225 Passes \textbf{should not} directly override the visit method (Non-virtual Interface 226 principle); if a pass desires different recursion behavior, it should set 227 @visit_children@ to false and perform recursive calls manually within previsit or 228 postvisit procedures. To enable this option, inherit from @WithShortCircuiting@ mixin. 229 \item 230 previsit may mutate the node but \textbf{must not} change the node type or return null. 231 \item 232 postvisit may mutate the node, reconstruct it to a different node type, or delete it by 233 returning null. 217 Passes \textbf{should not} directly override the visit method (Non-virtual Interface principle); 218 if a pass desires different recursion behaviour, it should set @visit_children@ to false and perform recursive calls manually within previsit or postvisit procedures. 219 To enable this option, inherit from the @WithShortCircuiting@ mixin. 220 \item 221 previsit may mutate the node but \textbf{must not} change the node type or return @nullptr@. 222 \item 223 postvisit may mutate the node, reconstruct it to a different node type, or delete it by returning @nullptr@. 234 224 \item 235 225 If the previsit or postvisit method is not defined for a node type, the step is skipped. 236 If the return type is declared as void, the original node is returned by default. These237 behaviors are controlled by template specialization rules; see 238 @Common/PassVisitor.proto.h@ (old) and @AST/Pass.proto.hpp@ (new) for details.226 If the return type is declared as @void@, the original node is returned by default. 227 These behaviours are controlled by template specialization rules; 228 see @Common/PassVisitor.proto.h@ (old) and @AST/@ @Pass.proto.hpp@ (new) for details. 239 229 \end{itemize} 240 230 Other useful mixin classes for compilation passes include: 241 231 \begin{itemize} 242 232 \item 243 WithGuards allows saving values of variables and restore automatically upon exiting 244 the current node. 245 \item 246 WithVisitorRef creates a wrapped entity of current pass (the actual argument 247 passed to recursive calls internally) for explicit recursion, usually used together 248 with WithShortCircuiting. 249 \item 250 WithSymbolTable gives a managed symbol table with built-in scoping rule handling 251 (\eg on entering and exiting a block statement) 233 @WithGuards@ allows saving and restoring variable values automatically upon entering/exiting the current node. 234 \item 235 @WithVisitorRef@ creates a wrapped entity for the current pass (the actual argument passed to recursive calls internally) for explicit recursion, usually used together with @WithShortCircuiting@. 236 \item 237 @WithSymbolTable@ gives a managed symbol table with built-in scoping-rule handling (\eg on entering and exiting a block statement) 252 238 \end{itemize} 253 \NOTE: If a pass extends the functionality of another existing pass, due to \CC overloading 254 resolution rules, it \textbf{must} explicitly introduce the inherited previsit and postvisit procedures 255 to its own scope, or otherwise they will not be picked up by template resolution: 239 \NOTE: If a pass extends the functionality of another existing pass, due to \CC overloading resolution rules, it \textbf{must} explicitly introduce the inherited previsit and postvisit procedures to its own scope, or otherwise they are not picked up by template resolution: 256 240 \begin{C++} 257 241 class Pass2: public Pass1 { 258 using Pass1::previsit;259 using Pass1::postvisit;242 @using Pass1::previsit;@ 243 @using Pass1::postvisit;@ 260 244 // new procedures 261 245 } … … 263 247 264 248 265 \subsection{Data Structure Change WIP (new-ast)} 266 267 It has been observed that excessive copying of syntax tree structures accounts for a 268 majority of computation cost and significantly slows down the compiler. In the previous 269 implementation of the syntax tree, every internal node has a unique parent; therefore all 270 copies are required to duplicate everything down to the bottom. A new, experimental 271 re-implementation of the syntax tree (source under directory AST/ hereby referred to as 272 ``new-ast'') attempts to overcome this issue with a functional approach that allows sharing 273 of common sub-structures and only makes copies when necessary. 274 275 The core of new-ast is a customized implementation of smart pointers, similar to 276 @std::shared_ptr@ and @std::weak_ptr@ in \CC standard library. Reference counting is 277 used to detect sharing and allows optimization. For a purely functional (a.k.a. immutable) 278 data structure, all mutations are modelled by shallow copies along the path of mutation. 249 \subsection{Data Structure Change (new-ast)} 250 251 It has been observed that excessive copying of syntax tree structures accounts for a majority of computation cost and significantly slows down the compiler. 252 In the previous implementation of the syntax tree, every internal node has a unique parent; 253 therefore all copies are required to duplicate the entire subtree. 254 A new, experimental re-implementation of the syntax tree (source under directory @AST/@ hereby referred to as ``new-ast'') attempts to overcome this issue with a functional approach that allows sharing of common sub-structures and only makes copies when necessary. 255 256 The core of new-ast is a customized implementation of smart pointers, similar to @std::shared_ptr@ and @std::weak_ptr@ in the \CC standard library. 257 Reference counting is used to detect sharing and allowing certain optimizations. 258 For a purely functional (immutable) data-structure, all mutations are modelled by shallow copies along the path of mutation. 279 259 With reference counting optimization, unique nodes are allowed to be mutated in place. 280 This however, may potentially introduce some complications and bugs; a few issues are 281 discussed near the end of this section. 282 283 \subsubsection{Source: AST/Node.hpp} 284 285 class @ast::Node@ is the base class of all new-ast node classes, which implements 286 reference counting mechanism. Two different counters are recorded: ``strong'' reference 287 count for number of nodes semantically owning it; ``weak'' reference count for number of 288 nodes holding a mere reference and only need to observe changes. 289 class @ast::ptr_base@ is the smart pointer implementation and also takes care of 290 resource management. 291 292 Direct access through the smart pointer is read-only. A mutable access should be obtained 293 by calling shallowCopy or mutate as below. 294 295 Currently, the weak pointers are only used to reference declaration nodes from a named 296 type, or a variable expression. Since declaration nodes are intended to denote unique 297 entities in the program, weak pointers always point to unique (unshared) nodes. This may 298 change in the future, and weak references to shared nodes may introduce some problems; 260 This however, may potentially introduce some complications and bugs; 261 a few issues are discussed near the end of this section. 262 263 264 \subsubsection{Source: \lstinline{AST/Node.hpp}} 265 266 Class @ast::Node@ is the base class of all new-ast node classes, which implements reference counting mechanism. 267 Two different counters are recorded: ``strong'' reference count for number of nodes semantically owning it; 268 ``weak'' reference count for number of nodes holding a mere reference and only need to observe changes. 269 Class @ast::ptr_base@ is the smart pointer implementation and also takes care of resource management. 270 271 Direct access through the smart pointer is read-only. 272 A mutable access should be obtained by calling @shallowCopy@ or mutate as below. 273 274 Currently, the weak pointers are only used to reference declaration nodes from a named type, or a variable expression. 275 Since declaration nodes are intended to denote unique entities in the program, weak pointers always point to unique (unshared) nodes. 276 This property may change in the future, and weak references to shared nodes may introduce some problems; 299 277 see mutate function below. 300 278 301 All node classes should always use smart pointers in the structure and should not use raw 302 pointers. 303 279 All node classes should always use smart pointers in structure definitions versus raw pointers. 280 Function 304 281 \begin{C++} 305 282 void ast::Node::increment(ref_type ref) 306 283 \end{C++} 307 Increments this node's strong or weak reference count. 284 increments this node's strong or weak reference count. 285 Function 308 286 \begin{C++} 309 287 void ast::Node::decrement(ref_type ref, bool do_delete = true) 310 288 \end{C++} 311 Decrements this node's strong or weak reference count. If strong reference count reaches 312 zero, the node is deleted by default. 313 \NOTE: Setting @do_delete@ to false may result in a detached node. Subsequent code should 314 manually delete the node or assign it to a strong pointer to prevent memory leak. 289 decrements this node's strong or weak reference count. 290 If strong reference count reaches zero, the node is deleted. 291 \NOTE: Setting @do_delete@ to false may result in a detached node. 292 Subsequent code should manually delete the node or assign it to a strong pointer to prevent memory leak. 293 315 294 Reference counting functions are internally called by @ast::ptr_base@. 295 Function 316 296 \begin{C++} 317 297 template<typename node_t> 318 298 node_t * shallowCopy(const node_t * node) 319 299 \end{C++} 320 Returns a mutable, shallow copy of node: all child pointers are pointing to the same child 321 nodes. 300 returns a mutable, shallow copy of node: all child pointers are pointing to the same child nodes. 301 Function 322 302 \begin{C++} 323 303 template<typename node_t> 324 304 node_t * mutate(const node_t * node) 325 305 \end{C++} 326 If node is unique (strong reference count is 1), returns a mutable pointer to the same node. 327 Otherwise, returns shallowCopy(node). 328 It is an error to mutate a shared node that is weak-referenced. Currently this does not 329 happen. The problem may appear once weak pointers to shared nodes (\eg expression 330 nodes) are used; special care will be needed. 331 332 \NOTE: This naive uniqueness check may not be sufficient in some cases. A discussion of the 333 issue is presented at the end of this section. 306 returns a mutable pointer to the same node, if the node is unique (strong reference count is 1); 307 otherwise, it returns @shallowCopy(node)@. 308 It is an error to mutate a shared node that is weak-referenced. 309 Currently this does not happen. 310 A problem may appear once weak pointers to shared nodes (\eg expression nodes) are used; 311 special care is needed. 312 313 \NOTE: This naive uniqueness check may not be sufficient in some cases. 314 A discussion of the issue is presented at the end of this section. 315 Functions 334 316 \begin{C++} 335 317 template<typename node_t, typename parent_t, typename field_t, typename assn_t> 336 const node_t * mutate_field(const node_t * node, field_t parent_t::* field, assn_t && val)318 const node_t * mutate_field(const node_t * node, field_t parent_t::* field, assn_t && val) 337 319 \end{C++} 338 320 \begin{C++} … … 342 324 field_t && val) 343 325 \end{C++} 344 Helpers for mutating a field on a node using pointer to member (creates shallow copy 345 when necessary). 346 347 \subsubsection{Issue: Undetected sharing}348 349 The @mutate@ behavio r described above has a problem: deeper shared nodes may be326 are helpers for mutating a field on a node using pointer to a member function (creates shallow copy when necessary). 327 328 329 \subsubsection{Issue: Undetected Sharing} 330 331 The @mutate@ behaviour described above has a problem: deeper shared nodes may be 350 332 mistakenly considered as unique. \VRef[Figure]{f:DeepNodeSharing} shows how the problem could arise: 351 333 \begin{figure} … … 355 337 \label{f:DeepNodeSharing} 356 338 \end{figure} 357 Suppose that we are working on the tree rooted at P1, which 358 is logically the chain P1-A-B and P2 is irrelevant, and then 359 mutate(B) is called. The algorithm considers B as unique since 360 it is only directly owned by A. However, the other tree P2-A-B 361 indirectly shares the node B and is therefore wrongly mutated. 362 363 To partly address this problem, if the mutation is called higher up the tree, a chain 364 mutation helper can be used: 365 366 \subsubsection{Source: AST/Chain.hpp} 367 339 Given the tree rooted at P1, which is logically the chain P1-A-B, and P2 is irrelevant, assume @mutate(B)@ is called. 340 The algorithm considers B as unique since it is only directly owned by A. 341 However, the other tree P2-A-B indirectly shares the node B and is therefore wrongly mutated. 342 343 To partly address this problem, if the mutation is called higher up the tree, a chain mutation helper can be used. 344 345 \subsubsection{Source: \lstinline{AST/Chain.hpp}} 346 347 Function 368 348 \begin{C++} 369 349 template<typename node_t, Node::ref_type ref_t> 370 350 auto chain_mutate(ptr_base<node_t, ref_t> & base) 371 351 \end{C++} 372 This function returns a chain mutator handle which takes pointer-to-member to go down 373 the tree while creating shallow copies as necessary; see @struct _chain_mutator@ in the 374 source code for details. 375 376 For example, in the above diagram, if mutation of B is wanted while at P1, the call using 377 @chain_mutate@ looks like the following: 352 returns a chain mutator handle that takes pointer-to-member to go down the tree, while creating shallow copies as necessary; 353 see @struct _chain_mutator@ in the source code for details. 354 355 For example, in the above diagram, if mutation of B is wanted while at P1, the call using @chain_mutate@ looks like the following: 378 356 \begin{C++} 379 357 chain_mutate(P1.a)(&A.b) = new_value_of_b; 380 358 \end{C++} 381 Note that if some node in chain mutate is shared (therefore shallow copied), it implies that 382 every node further down will also be copied, thus correctly executing the functional 383 mutation algorithm. This example code creates copies of both A and B and performs 384 mutation on the new nodes, so that the other tree P2-A-B is untouched. 385 However, if a pass traverses down to node B and performs mutation, for example, in 386 @postvisit(B)@, information on sharing higher up is lost. Since the new-ast structure is only in 387 experimental use with the resolver algorithm, which mostly rebuilds the tree bottom-up, 388 this issue does not actually happen. It should be addressed in the future when other 389 compilation passes are migrated to new-ast and many of them contain procedural 390 mutations, where it might cause accidental mutations to other logically independent trees 391 (\eg common sub-expression) and become a bug. 392 393 394 \vspace*{20pt} % FIX ME, spacing problem with this heading ??? 359 \NOTE: if some node in chain mutate is shared (therefore shallow copied), it implies that every node further down is also copied, thus correctly executing the functional mutation algorithm. 360 This example code creates copies of both A and B and performs mutation on the new nodes, so that the other tree P2-A-B is untouched. 361 However, if a pass traverses down to node B and performs mutation, for example, in @postvisit(B)@, information on sharing higher up is lost. 362 Since the new-ast structure is only in experimental use with the resolver algorithm, which mostly rebuilds the tree bottom-up, this issue does not actually happen. 363 It should be addressed in the future when other compilation passes are migrated to new-ast and many of them contain procedural mutations, where it might cause accidental mutations to other logically independent trees (\eg common sub-expression) and become a bug. 364 365 395 366 \section{Compiler Algorithm Documentation} 396 367 397 This documentation currently covers most of the resolver, data structures used in variable 398 and expression resolution, and a few directly related passes. Later passes involving code 399 generation is not included yet; documentation for those will be done afterwards. 368 This compiler algorithm documentation covers most of the resolver, data structures used in variable and expression resolution, and a few directly related passes. 369 Later passes involving code generation are not included yet; 370 documentation for those will be done latter. 371 400 372 401 373 \subsection{Symbol Table} 402 374 403 \NOTE: For historical reasons, the symbol table data structure was called ``indexer'' in the 404 old implementation. Hereby we will be using the name SymbolTable everywhere. 405 The symbol table stores a mapping from names to declarations and implements a similar 406 name space separation rule, and the same scoping rules in standard C.\footnote{ISO/IEC 9899:1999, Sections 6.2.1 and 6.2.3} The difference in 407 name space rule is that typedef aliases are no longer considered ordinary identifiers. 408 In addition to C tag types (struct, union, enum), \CFA introduces another tag type, trait, 409 which is a named collection of assertions. 410 411 \subsubsection{Source: AST/SymbolTable.hpp} 412 413 \subsubsection{Source: SymTab/Indexer.h} 414 375 \NOTE: For historical reasons, the symbol-table data-structure is called @indexer@ in the old implementation. 376 Hereby, the name is changed to @SymbolTable@. 377 The symbol table stores a mapping from names to declarations, implements a similar name-space separation rule, and provides the same scoping rules as standard C.\footnote{ISO/IEC 9899:1999, Sections 6.2.1 and 6.2.3.} 378 The difference in name-space rule is that @typedef@ aliases are no longer considered ordinary identifiers. 379 In addition to C tag-types (@struct@, @union@, @enum@), \CFA introduces another tag type, @trait@, which is a named collection of assertions. 380 381 382 \subsubsection{Source: \lstinline{AST/SymbolTable.hpp}} 383 384 Function 415 385 \begin{C++} 416 386 SymbolTable::addId(const DeclWithType * decl) 417 387 \end{C++} 418 Since \CFA allows overloading of variables and functions, ordinary identifier names need 419 to be mangled. The mangling scheme is closely based on the Itanium \CC ABI,\footnote{\url{https://itanium-cxx-abi.github.io/cxx-abi/abi.html}, Section 5.1} while 420 making adaptations to \CFA specific features, mainly assertions and overloaded variables 421 by type. Naming conflicts are handled by mangled names; lookup by name returns a list of 422 declarations with the same literalidentifier name.423 388 provides name mangling of identifiers, since \CFA allows overloading of variables and functions. 389 The mangling scheme is closely based on the Itanium \CC ABI,\footnote{\url{https://itanium-cxx-abi.github.io/cxx-abi/abi.html}, Section 5.1} while making adaptations to \CFA specific features, mainly assertions and overloaded variables by type. 390 391 Naming conflicts are handled by mangled names; 392 lookup by name returns a list of declarations with the same identifier name. 393 Functions 424 394 \begin{C++} 425 395 SymbolTable::addStruct(const StructDecl * decl) … … 428 398 SymbolTable::addTrait(const TraitDecl * decl) 429 399 \end{C++} 430 Adds a tag type declaration to the symbol table. 400 add a tag-type declaration to the symbol table. 401 Function 431 402 \begin{C++} 432 403 SymbolTable::addType(const NamedTypeDecl * decl) 433 404 \end{C++} 434 Adds a typedef alias to the symbol table. 435 436 \textbf{C Incompatibility Note}: Since Cforall allows using struct, union and enum type names 437 without the keywords, typedef names and tag type names cannot be disambiguated by 438 syntax rules. Currently the compiler puts them together and disallows collision. The 439 following program is valid C but not valid Cforall: 405 adds a @typedef@ alias to the symbol table. 406 407 \textbf{C Incompatibility Note}: Since \CFA allows using @struct@, @union@ and @enum@ type-names without a prefix keyword, as in \CC, @typedef@ names and tag-type names cannot be disambiguated by syntax rules. 408 Currently the compiler puts them together and disallows collision. 409 The following program is valid C but invalid \CFA (and \CC): 440 410 \begin{C++} 441 411 struct A {}; 412 typedef int A; // gcc: ok, cfa: Cannot redefine typedef A 413 struct A sa; // C disambiguates via struct prefix 414 A ia; 415 \end{C++} 416 In practices, such usage is extremely rare, and hence, this change (as in \CC) has minimal impact on existing C programs. 417 The declaration 418 \begin{C++} 419 struct A {}; 420 typedef struct A A; // A is an alias for struct A 421 A a; 422 struct A b; 423 \end{C++} 424 is not an error because the alias name is identical to the original. 425 Finally, the following program is allowed in \CFA: 426 \begin{C++} 442 427 typedef int A; 443 // gcc: ok, cfa: Cannot redefine typedef A 444 \end{C++} 445 In actual practices however, such usage is extremely rare, and typedef struct A A; is 446 not considered an error, but silently discarded. Therefore, we expect this change to have 447 minimal impact on existing C programs. 448 Meanwhile, the following program is allowed in Cforall: 449 \begin{C++} 450 typedef int A; 451 void A(); 428 void A(); // name mangled 452 429 // gcc: A redeclared as different kind of symbol, cfa: ok 453 430 \end{C++} 431 because the function name is mangled. 432 454 433 455 434 \subsection{Type Environment and Unification} 456 435 457 The core of parametric type resolution algorithm. 458 Type Environment organizes type parameters in \textbf{equivalent classes} and maps them to 459 actual types. Unification is the algorithm that takes two (possibly parametric) types and 460 parameter mappings and attempts to produce a common type by matching the type 461 environments. 436 The following core ideas underlie the parametric type-resolution algorithm. 437 A type environment organizes type parameters into \textbf{equivalent classes} and maps them to actual types. 438 Unification is the algorithm that takes two (possibly parametric) types and parameter mappings, and attempts to produce a common type by matching information in the type environments. 462 439 463 440 The unification algorithm is recursive in nature and runs in two different modes internally: 464 441 \begin{itemize} 465 442 \item 466 \textbf{Exact} unification mode requires equivalent parameters to match perfectly; 467 \item 468 \textbf{Inexact} unification mode allows equivalent parameters to be converted to a 469 common type. 443 Exact unification mode requires equivalent parameters to match perfectly. 444 \item 445 Inexact unification mode allows equivalent parameters to be converted to a common type. 470 446 \end{itemize} 471 For a pair of matching parameters (actually, their equivalent classes), if either side is open 472 (not bound to a concrete type yet), they are simply combined. 473 474 Within inexact mode, types are allowed to differ on their cv-qualifiers; additionally, if a 475 type never appear either in parameter list or as the base type of a pointer, it may also be 476 widened (i.e. safely converted). As Cforall currently does not implement subclassing similar 477 to object-oriented languages, widening conversions are on primitive types only, for 478 example the conversion from int to long. 479 480 The need for two unification modes come from the fact that parametric types are 481 considered compatible only if all parameters are exactly the same (not just compatible). 482 Pointer types also behaves similarly; in fact, they may be viewed as a primitive kind of 483 parametric types. @int*@ and @long*@ are different types, just like @vector(int)@ and 484 @vector(long)@ are, for the parametric type @vector(T)@. 485 486 The resolver should use the following ``@public@'' functions:\footnote{ 487 Actual code also tracks assertions on type parameters; those extra arguments are omitted here for 488 conciseness.} 489 490 491 \subsubsection{Source: ResolvExpr/Unify.cc} 492 493 \begin{C++} 494 bool unify(const Type *type1, const Type *type2, TypeEnvironment &env, 495 OpenVarSet &openVars, const SymbolTable &symtab, Type *&commonType) 496 \end{C++} 497 Attempts to unify @type1@ and @type2@ with current type environment. 498 499 If operation succeeds, @env@ is modified by combining the equivalence classes of matching 500 parameters in @type1@ and @type2@, and their common type is written to commonType. 501 502 If operation fails, returns false. 503 \begin{C++} 504 bool typesCompatible(const Type * type1, const Type * type2, const 505 SymbolTable &symtab, const TypeEnvironment &env) 506 bool typesCompatibleIgnoreQualifiers(const Type * type1, const Type * 507 type2, const SymbolTable &symtab, const TypeEnvironment &env) 508 \end{C++} 509 510 Determines if type1 and type2 can possibly be the same type. The second version ignores 511 the outermost cv-qualifiers if present.\footnote{ 512 In const \lstinline@int * const@, only the second \lstinline@const@ is ignored.} 513 514 The call has no side effect. 515 516 \NOTE: No attempts are made to widen the types (exact unification is used), although the 517 function names may suggest otherwise. E.g. @typesCompatible(int, long)@ returns false. 447 For a pair of matching parameters (actually, their equivalent classes), if either side is open (not bound to a concrete type yet), they are combined. 448 449 Within the inexact mode, types are allowed to differ on their cv-qualifiers (\eg @const@, @volatile@, \etc); 450 additionally, if a type never appear either in a parameter list or as the base type of a pointer, it may also be widened (\ie safely converted). 451 As \CFA currently does not implement subclassing as in object-oriented languages, widening conversions are only on the primitive types, \eg conversion from @int@ to @long int@. 452 453 The need for two unification modes comes from the fact that parametric types are considered compatible only if all parameters are exactly the same (not just compatible). 454 Pointer types also behaves similarly; 455 in fact, they may be viewed as a primitive kind of parametric types. 456 @int *@ and @long *@ are different types, just like @vector(int)@ and @vector(long)@ are, for the parametric type @*(T)@ / @vector(T)@, respectively. 457 458 The resolver uses the following @public@ functions:\footnote{ 459 Actual code also tracks assertions on type parameters; those extra arguments are omitted here for conciseness.} 460 461 462 \subsubsection{Source: \lstinline{ResolvExpr/Unify.cc}} 463 464 Function 465 \begin{C++} 466 bool unify(const Type * type1, const Type * type2, TypeEnvironment & env, 467 OpenVarSet & openVars, const SymbolTable & symtab, Type *& commonType) 468 \end{C++} 469 returns a boolean indicating if the unification succeeds or fails after attempting to unify @type1@ and @type2@ within current type environment. 470 If the unify succeeds, @env@ is modified by combining the equivalence classes of matching parameters in @type1@ and @type2@, and their common type is written to @commonType@. 471 If the unify fails, nothing changes. 472 Functions 473 \begin{C++} 474 bool typesCompatible(const Type * type1, const Type * type2, const SymbolTable & symtab, 475 const TypeEnvironment & env) 476 bool typesCompatibleIgnoreQualifiers(const Type * type1, const Type * type2, 477 const SymbolTable & symtab, const TypeEnvironment & env) 478 \end{C++} 479 return a boolean indicating if types @type1@ and @type2@ can possibly be the same type. 480 The second version ignores the outermost cv-qualifiers if present.\footnote{ 481 In \lstinline@const int * const@, only the second \lstinline@const@ is ignored.} 482 These function have no side effects. 483 484 \NOTE: No attempt is made to widen the types (exact unification is used), although the function names may suggest otherwise, \eg @typesCompatible(int, long)@ returns false. 518 485 519 486 520 487 \subsection{Expression Resolution} 521 488 522 The design of the current version of expression resolver is outlined in the Ph.D. Thesis from 523 Aaron Moss~\cite{Moss19}. 524 489 The design of the current version of expression resolver is outlined in the Ph.D.\ thesis by Aaron Moss~\cite{Moss19}. 525 490 A summary of the resolver algorithm for each expression type is presented below. 526 491 527 All overloadable operators are modelled as function calls. For a function call,528 interpretations of the function and arguments are found recursively. Then the following 529 steps produce a filtered list of valid interpretations:492 All overloadable operators are modelled as function calls. 493 For a function call, interpretations of the function and arguments are found recursively. 494 Then the following steps produce a filtered list of valid interpretations: 530 495 \begin{enumerate} 531 496 \item 532 From all possible combinations of interpretations of the function and arguments, 533 those where argument types may be converted to function parameter types are 534 considered valid. 497 From all possible combinations of interpretations of the function and arguments, those where argument types may be converted to function parameter types are considered valid. 535 498 \item 536 499 Valid interpretations with the minimum sum of argument costs are kept. 537 500 \item 538 Argument costs are then discarded; the actual cost for the function call expression is 539 the sum of conversion costs from the argument types to parameter types. 540 \item 541 For each return type, the interpretations with satisfiable assertions are then sorted 542 by actual cost computed in step 3. If for a given type, the minimum cost 543 interpretations are not unique, it is said that for that return type the interpretation 544 is ambiguous. If the minimum cost interpretation is unique but contains an 545 ambiguous argument, it is also considered ambiguous. 501 \label{p:argcost} 502 Argument costs are then discarded; the actual cost for the function call expression is the sum of conversion costs from the argument types to parameter types. 503 \item 504 \label{p:returntype} 505 For each return type, the interpretations with satisfiable assertions are then sorted by actual cost computed in step~\ref{p:argcost}. 506 If for a given type, the minimum cost interpretations are not unique, that return type is ambiguous. 507 If the minimum cost interpretation is unique but contains an ambiguous argument, it is also ambiguous. 546 508 \end{enumerate} 547 Therefore, for each return type, the resolver produces either of:509 Therefore, for each return type, the resolver produces: 548 510 \begin{itemize} 549 511 \item 550 No alternatives551 \item 552 Asingle valid alternative553 \item 554 An ambiguous alternative512 no alternatives 513 \item 514 a single valid alternative 515 \item 516 an ambiguous alternative 555 517 \end{itemize} 556 Note that an ambiguous alternative may be discarded at the parent expressions because a 557 different return type matches better for the parent expressions. 558 559 The non-overloadable expressions in Cforall are: cast expressions, address-of (unary @&@) 560 expressions, short-circuiting logical expressions (@&&@, @||@) and ternary conditional 561 expression (@?:@). 562 563 For a cast expression, the convertible argument types are kept. Then the result is selected 564 by lowest argument cost, and further by lowest conversion cost to target type. If the lowest 565 cost is still not unique, or an ambiguous argument interpretation is selected, the cast 566 expression is ambiguous. In an expression statement, the top level expression is implicitly 567 cast to void. 518 \NOTE: an ambiguous alternative may be discarded at the parent expressions because a different return type matches better for the parent expressions. 519 520 The \emph{non}-overloadable expressions in \CFA are: cast expressions, address-of (unary @&@) expressions, short-circuiting logical expressions (@&&@, @||@) and ternary conditional expression (@?:@). 521 522 For a cast expression, the convertible argument types are kept. 523 Then the result is selected by lowest argument cost, and further by lowest conversion cost to target type. 524 If the lowest cost is still not unique or an ambiguous argument interpretation is selected, the cast expression is ambiguous. 525 In an expression statement, the top level expression is implicitly cast to @void@. 568 526 569 527 For an address-of expression, only lvalue results are kept and the minimum cost is selected. 570 528 571 For logical expressions @&&@ and @||@, arguments are implicitly cast to bool, and follow the rule 572 of cast expression as above. 573 574 For the ternary conditional expression, the condition is implicitly cast to bool, and the 575 branch expressions must have compatible types. Each pair of compatible branch 576 expression types produce a possible interpretation, and the cost is defined as the sum of 577 expression costs plus the sum of conversion costs to the common type. 578 579 TODO: Write a specification for expression costs. 529 For logical expressions @&&@ and @||@, arguments are implicitly cast to @bool@, and follow the rules fr cast expression above. 530 531 For the ternary conditional expression, the condition is implicitly cast to @bool@, and the branch expressions must have compatible types. 532 Each pair of compatible branch expression types produce a possible interpretation, and the cost is defined as the sum of the expression costs plus the sum of conversion costs to the common type. 533 534 535 \subsection{Conversion and Application Cost} 536 537 There were some unclear parts in the previous documentation in the cost system, as described in the Moss thesis~\cite{Moss19}, section 4.1.2. 538 Some clarification are presented in this section. 539 540 \begin{enumerate} 541 \item 542 Conversion to a type denoted by parameter may incur additional cost if the match is not exact. 543 For example, if a function is declared to accept @(T, T)@ and receives @(int, long)@, @T@ is deducted @long@ and an additional widening conversion cost is added for @int@ to @T@. 544 545 \item 546 The specialization level of a function is the sum of the least depth of an appearance of a type parameter (counting pointers, references and parameterized types), plus the number of assertions. 547 A higher specialization level is favoured if argument conversion costs are equal. 548 549 \item 550 Coercion of pointer types is only allowed in explicit cast expressions; 551 the only allowed implicit pointer casts are adding qualifiers to the base type and cast to @void*@, and these counts as safe conversions. 552 Note that implicit cast from @void *@ to other pointer types is no longer valid, as opposed to standard C. 553 \end{enumerate} 580 554 581 555 582 556 \subsection{Assertion Satisfaction} 583 557 584 The resolver tries to satisfy assertions on expressions only when it is needed: either while 585 selecting from multiple alternatives of a same result type for a function call (step 4 of 586 resolving function calls), or upon reaching the top level of an expression statement. 587 588 Unsatisfiable alternatives are discarded. Satisfiable alternatives receive \textbf{implicit 589 parameters}: in Cforall, parametric functions are designed such that they can be compiled 590 separately, as opposed to \CC templates which are only compiled at instantiation. Given a 591 parametric function definition: 558 The resolver tries to satisfy assertions on expressions only when it is needed: either while selecting from multiple alternatives of a same result type for a function call (step \ref{p:returntype} of resolving function calls) or upon reaching the top level of an expression statement. 559 560 Unsatisfiable alternatives are discarded. 561 Satisfiable alternatives receive \textbf{implicit parameters}: in \CFA, parametric functions may be separately compiled, as opposed to \CC templates which are only compiled at instantiation. 562 Given the parametric function-definition: 592 563 \begin{C++} 593 564 forall (otype T | {void foo(T);}) 594 565 void bar (T t) { foo(t); } 595 566 \end{C++} 596 The function bar does not know which @foo@ to call when compiled without knowing the call 597 site, so it requests a function pointer to be passed as an extra argument. At the call site, 598 implicit parameters are automatically inserted by the compiler. 599 600 \textbf{TODO}: Explain how recursive assertion satisfaction and polymorphic recursion work. 601 567 the function @bar@ does not know which @foo@ to call when compiled without knowing the call site, so it requests a function pointer to be passed as an extra argument. 568 At the call site, implicit parameters are automatically inserted by the compiler. 569 570 Implementation of implicit parameters is discussed in \VRef[Appendix]{s:ImplementationParametricFunctions}. 602 571 603 572 \section{Tests} … … 605 574 \subsection{Test Suites} 606 575 607 Automatic test suites are located under the @tests/@ directory. A test case consists of an 608 input CFA source file (name ending with @.cfa@), and an expected output file located 609 in @.expect/@ directory relative to the source file, with the same file name ending with @.txt@. 610 So a test named @tuple/tupleCast@ has the following files, for example: 576 Automatic test suites are located under the @tests/@ directory. 577 A test case consists of an input CFA source file (suffix @.cfa@), and an expected output file located in the @tests/.expect/@ directory, with the same file name ending with suffix @.txt@. 578 For example, the test named @tests/tuple/tupleCast.cfa@ has the following files, for example: 611 579 \begin{C++} 612 580 tests/ 613 .. tuple/ 614 ...... .expect/ 615 .......... tupleCast.txt 616 ...... tupleCast.cfa 617 \end{C++} 618 If compilation fails, the error output is compared to the expect file. If compilation succeeds, 619 the built program is run and its output compared to the expect file. 620 To run the tests, execute the test script @test.py@ under the @tests/@ directory, with a list of 621 test names to be run, or @--all@ to run all tests. The test script reports test cases 622 fail/success, compilation time and program run time. 581 tuple/ 582 .expect/ 583 tupleCast.txt 584 tupleCast.cfa 585 \end{C++} 586 If compilation fails, the error output is compared to the expect file. 587 If the compilation succeeds but does not generate an executable, the compilation output is compared to the expect file. 588 If the compilation succeeds and generates an executable, the executable is run and its output is compared to the expect file. 589 To run the tests, execute the test script @test.py@ under the @tests/@ directory, with a list of test names to be run, or @--all@ (or @make all-tests@) to run all tests. 590 The test script reports test cases fail/success, compilation time and program run time. 591 To see all the options available for @test.py@ using the @--help@ option. 623 592 624 593 625 594 \subsection{Performance Reports} 626 595 627 To turn on performance reports, pass @-S@ flag to the compiler. 628 629 3 kinds of performance reports are available: 596 To turn on performance reports, pass the @-XCFA -S@ flag to the compiler. 597 Three kinds of performance reports are available: 630 598 \begin{enumerate} 631 599 \item … … 639 607 @Common/Stats/Counter.h@. 640 608 \end{enumerate} 641 It is suggested to run performance tests with optimized build (@g++@ flag @-O3@) 642 609 It is suggested to run performance tests with optimization (@g++@ flag @-O3@). 610 611 612 \appendix 613 \section{Appendix} 614 615 \subsection{Kinds of Type Parameters} 616 \label{s:KindsTypeParameters} 617 618 A type parameter in a @forall@ clause has 3 kinds: 619 \begin{enumerate}[listparindent=0pt] 620 \item 621 @dtype@: any data type (built-in or user defined) that is not a concrete type. 622 623 A non-concrete type is an incomplete type such as an opaque type or pointer/reference with an implicit (pointer) size and implicitly generated reference and dereference operations. 624 \item 625 @otype@: any data type (built-in or user defined) that is concrete type. 626 627 A concrete type is a complete type, \ie types that can be used to create a variable, which also implicitly asserts the existence of default and copy constructors, assignment, and destructor\footnote{\CFA implements the same automatic resource management (RAII) semantics as \CC.}. 628 % \item 629 % @ftype@: any function type. 630 % 631 % @ftype@ provides two purposes: 632 % \begin{itemize} 633 % \item 634 % Differentiate function pointer from data pointer because (in theory) some systems have different sizes for these pointers. 635 % \item 636 % Disallow a function pointer to match an overloaded data pointer, since variables and functions can have the same names. 637 % \end{itemize} 638 639 \item 640 @ttype@: tuple (variadic) type. 641 642 Restricted to the type for the last parameter in a function, it provides a type-safe way to implement variadic functions. 643 Note however, that it has certain restrictions, as described in the implementation section below. 644 \end{enumerate} 645 646 647 \subsection{GNU C Nested Functions} 648 649 \CFA is designed to be mostly compatible with GNU C, an extension to ISO C99 and C11 standards. The \CFA compiler also implements some language features by GCC extensions, most notably nested functions. 650 651 In ISO C, function definitions are not allowed to be nested. GCC allows nested functions with full lexical scoping. The following example is taken from GCC documentation\footnote{\url{https://gcc.gnu.org/onlinedocs/gcc/Nested-Functions.html}}: 652 \begin{C++} 653 void bar( int * array, int offset, int size ) { 654 int access( int * array, int index ) { return array[index + offset]; } 655 int i; 656 /* ... */ 657 for ( i = 0; i < size; i++ ) 658 /* ... */ access (array, i) /* ... */ 659 } 660 \end{C++} 661 GCC nested functions behave identically to \CC lambda functions with default by-reference capture (stack-allocated, lifetime ends upon exiting the declared block), while also possible to be passed as arguments with standard function pointer types. 662 663 664 \subsection{Implementation of Parametric Functions} 665 \label{s:ImplementationParametricFunctions} 666 667 \CFA implements parametric functions using the implicit parameter approach: required assertions are passed to the callee by function pointers; 668 size of a parametric type must also be known if referenced directly (\ie not as a pointer). 669 670 The implementation is similar to the one from Scala\footnote{\url{https://www.scala-lang.org/files/archive/spec/2.13/07-implicits.html}}, with some notable differences in resolution: 671 \begin{enumerate} 672 \item 673 All types, variables, and functions are candidates of implicit parameters 674 \item 675 The parameter (assertion) name must match the actual declarations. 676 \end{enumerate} 677 678 For example, the \CFA function declaration 679 \begin{cfa} 680 forall( otype T | { int foo( T, int ); } ) 681 int bar(T); 682 \end{cfa} 683 after implicit parameter expansion, has the actual signature\footnote{\textbf{otype} also requires the type to have constructor and destructor, which are the first two function pointers preceding the one for \textbf{foo}.} 684 \begin{C++} 685 int bar( T, size_t, void (*)(T&), void (*)(T&), int (*)(T, int) ); 686 \end{C++} 687 The implicit parameter approach has an apparent issue: when the satisfying declaration is also parametric, it may require its own implicit parameters too. 688 That also causes the supplied implicit parameter to have a different \textbf{actual} type than the \textbf{nominal} type, so it cannot be passed directly. 689 Therefore, a wrapper with matching actual type must be created, and it is here where GCC nested functions are used internally by the compiler. 690 691 Consider the following program: 692 \begin{cfa} 693 int assertion(int); 694 695 forall( otype T | { int assertion(T); } ) 696 void foo(T); 697 698 forall(otype T | { void foo(T); } ) 699 void bar(T t) { 700 foo(t); 701 } 702 \end{cfa} 703 The \CFA compiler translates the program to non-parametric form\footnote{In the final code output, \lstinline@T@ needs to be replaced by an opaque type, and arguments must be accessed by a frame pointer offset table, due to the unknown sizes. The presented code here is simplified for better understanding.} 704 \begin{C++} 705 // ctor, dtor and size arguments are omitted 706 void foo(T, int (*)(T)); 707 708 void bar(T t, void (*foo)(T)) { 709 foo(t); 710 } 711 \end{C++} 712 However, when @bar(1)@ is called, @foo@ cannot be directly provided as an argument: 713 \begin{C++} 714 bar(1, foo); // WRONG: foo has different actual type 715 \end{C++} 716 and an additional step is required: 717 \begin{C++} 718 { 719 void _foo_wrapper(int t) { 720 foo( t, assertion ); 721 } 722 bar( 1, _foo_wrapper ); 723 } 724 \end{C++} 725 Nested assertions and implicit parameter creation may continue indefinitely. 726 This issue is a limitation of implicit parameter implementation. 727 In particular, polymorphic variadic recursion must be structural (\ie the number of arguments decreases in any possible recursive calls), otherwise code generation gets into an infinite loop. 728 The \CFA compiler sets a limit on assertion depth and reports an error if assertion resolution does not terminate within the limit (as for \lstinline[language=C++]@templates@ in \CC). 643 729 644 730 \bibliographystyle{plain} -
doc/user/Makefile
r33c3ded r223a633 55 55 56 56 ${DOCUMENT} : ${BASE}.ps 57 ps2pdf $<57 ps2pdf -dPDFSETTINGS=/prepress $< 58 58 59 59 ${BASE}.ps : ${BASE}.dvi -
doc/user/user.tex
r33c3ded r223a633 11 11 %% Created On : Wed Apr 6 14:53:29 2016 12 12 %% Last Modified By : Peter A. Buhr 13 %% Last Modified On : Fri Mar 6 13:34:52202014 %% Update Count : 39 2413 %% Last Modified On : Mon Oct 5 08:57:29 2020 14 %% Update Count : 3998 15 15 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 16 16 … … 30 30 \usepackage{upquote} % switch curled `'" to straight 31 31 \usepackage{calc} 32 \usepackage{xspace}33 32 \usepackage{varioref} % extended references 34 \usepackage{listings} % format program code 33 \usepackage[labelformat=simple,aboveskip=0pt,farskip=0pt]{subfig} 34 \renewcommand{\thesubfigure}{\alph{subfigure})} 35 35 \usepackage[flushmargin]{footmisc} % support label/reference in footnote 36 36 \usepackage{latexsym} % \Box glyph 37 37 \usepackage{mathptmx} % better math font with "times" 38 38 \usepackage[usenames]{color} 39 \input{common} % common CFA document macros 40 \usepackage[dvips,plainpages=false,pdfpagelabels,pdfpagemode=UseNone,colorlinks=true,pagebackref=true,linkcolor=blue,citecolor=blue,urlcolor=blue,pagebackref=true,breaklinks=true]{hyperref} 41 \usepackage{breakurl} 42 43 \usepackage[pagewise]{lineno} 44 \renewcommand{\linenumberfont}{\scriptsize\sffamily} 45 \usepackage[firstpage]{draftwatermark} 46 \SetWatermarkLightness{0.9} 47 48 % Default underscore is too low and wide. Cannot use lstlisting "literate" as replacing underscore 49 % removes it as a variable-name character so keywords in variables are highlighted. MUST APPEAR 50 % AFTER HYPERREF. 51 \renewcommand{\textunderscore}{\leavevmode\makebox[1.2ex][c]{\rule{1ex}{0.075ex}}} 52 53 \setlength{\topmargin}{-0.45in} % move running title into header 54 \setlength{\headsep}{0.25in} 55 56 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 57 58 \CFAStyle % use default CFA format-style 59 \lstnewenvironment{C++}[1][] % use C++ style 60 {\lstset{language=C++,moredelim=**[is][\protect\color{red}]{®}{®},#1}} 61 {} 62 39 \newcommand{\CFALatin}{} 63 40 % inline code ©...© (copyright symbol) emacs: C-q M-) 64 41 % red highlighting ®...® (registered trademark symbol) emacs: C-q M-. … … 68 45 % keyword escape ¶...¶ (pilcrow symbol) emacs: C-q M-^ 69 46 % math escape $...$ (dollar symbol) 47 \input{common} % common CFA document macros 48 \usepackage[dvips,plainpages=false,pdfpagelabels,pdfpagemode=UseNone,colorlinks=true,pagebackref=true,linkcolor=blue,citecolor=blue,urlcolor=blue,pagebackref=true,breaklinks=true]{hyperref} 49 \usepackage{breakurl} 50 51 \renewcommand\footnoterule{\kern -3pt\rule{0.3\linewidth}{0.15pt}\kern 2pt} 52 53 \usepackage[pagewise]{lineno} 54 \renewcommand{\linenumberfont}{\scriptsize\sffamily} 55 \usepackage[firstpage]{draftwatermark} 56 \SetWatermarkLightness{0.9} 57 58 % Default underscore is too low and wide. Cannot use lstlisting "literate" as replacing underscore 59 % removes it as a variable-name character so keywords in variables are highlighted. MUST APPEAR 60 % AFTER HYPERREF. 61 \renewcommand{\textunderscore}{\leavevmode\makebox[1.2ex][c]{\rule{1ex}{0.075ex}}} 62 63 \setlength{\topmargin}{-0.45in} % move running title into header 64 \setlength{\headsep}{0.25in} 65 66 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 67 68 \CFAStyle % use default CFA format-style 69 \lstnewenvironment{C++}[1][] % use C++ style 70 {\lstset{language=C++,moredelim=**[is][\protect\color{red}]{®}{®},#1}} 71 {} 72 73 \newsavebox{\myboxA} 74 \newsavebox{\myboxB} 70 75 71 76 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% … … 79 84 \newcommand{\G}[1]{{\Textbf[OliveGreen]{#1}}} 80 85 \newcommand{\KWC}{K-W C\xspace} 81 82 \newsavebox{\LstBox}83 86 84 87 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% … … 253 256 254 257 The signature feature of \CFA is \emph{\Index{overload}able} \Index{parametric-polymorphic} functions~\cite{forceone:impl,Cormack90,Duggan96} with functions generalized using a ©forall© clause (giving the language its name): 255 \begin{ lstlisting}258 \begin{cfa} 256 259 ®forall( otype T )® T identity( T val ) { return val; } 257 260 int forty_two = identity( 42 ); §\C{// T is bound to int, forty\_two == 42}§ 258 \end{ lstlisting}261 \end{cfa} 259 262 % extending the C type system with parametric polymorphism and overloading, as opposed to the \Index*[C++]{\CC{}} approach of object-oriented extensions. 260 263 \CFA{}\hspace{1pt}'s polymorphism was originally formalized by \Index*{Glen Ditchfield}\index{Ditchfield, Glen}~\cite{Ditchfield92}, and first implemented by \Index*{Richard Bilson}\index{Bilson, Richard}~\cite{Bilson03}. … … 275 278 \begin{comment} 276 279 A simple example is leveraging the existing type-unsafe (©void *©) C ©bsearch© to binary search a sorted floating array: 277 \begin{ lstlisting}280 \begin{cfa} 278 281 void * bsearch( const void * key, const void * base, size_t dim, size_t size, 279 282 int (* compar)( const void *, const void * )); … … 284 287 double key = 5.0, vals[10] = { /* 10 sorted floating values */ }; 285 288 double * val = (double *)bsearch( &key, vals, 10, sizeof(vals[0]), comp ); §\C{// search sorted array}§ 286 \end{ lstlisting}289 \end{cfa} 287 290 which can be augmented simply with a polymorphic, type-safe, \CFA-overloaded wrappers: 288 \begin{ lstlisting}291 \begin{cfa} 289 292 forall( otype T | { int ?<?( T, T ); } ) T * bsearch( T key, const T * arr, size_t size ) { 290 293 int comp( const void * t1, const void * t2 ) { /* as above with double changed to T */ } … … 297 300 double * val = bsearch( 5.0, vals, 10 ); §\C{// selection based on return type}§ 298 301 int posn = bsearch( 5.0, vals, 10 ); 299 \end{ lstlisting}302 \end{cfa} 300 303 The nested function ©comp© provides the hidden interface from typed \CFA to untyped (©void *©) C, plus the cast of the result. 301 304 Providing a hidden ©comp© function in \CC is awkward as lambdas do not use C calling-conventions and template declarations cannot appear at block scope. … … 305 308 \CFA has replacement libraries condensing hundreds of existing C functions into tens of \CFA overloaded functions, all without rewriting the actual computations. 306 309 For example, it is possible to write a type-safe \CFA wrapper ©malloc© based on the C ©malloc©: 307 \begin{ lstlisting}310 \begin{cfa} 308 311 forall( dtype T | sized(T) ) T * malloc( void ) { return (T *)malloc( sizeof(T) ); } 309 312 int * ip = malloc(); §\C{// select type and size from left-hand side}§ 310 313 double * dp = malloc(); 311 314 struct S {...} * sp = malloc(); 312 \end{ lstlisting}315 \end{cfa} 313 316 where the return type supplies the type/size of the allocation, which is impossible in most type systems. 314 317 \end{comment} … … 943 946 the same level as a ©case© clause; the target label may be case ©default©, but only associated 944 947 with the current ©switch©/©choose© statement. 945 946 947 \subsection{Loop Control}948 949 The ©for©/©while©/©do-while© loop-control allows empty or simplified ranges (see Figure~\ref{f:LoopControlExamples}).950 \begin{itemize}951 \item952 The loop index is polymorphic in the type of the comparison value N (when the start value is implicit) or the start value M.953 \item954 An empty conditional implies comparison value of ©1© (true).955 \item956 A comparison N is implicit up-to exclusive range [0,N©®)®©.957 \item958 A comparison ©=© N is implicit up-to inclusive range [0,N©®]®©.959 \item960 The up-to range M ©~©\index{~@©~©} N means exclusive range [M,N©®)®©.961 \item962 The up-to range M ©~=©\index{~=@©~=©} N means inclusive range [M,N©®]®©.963 \item964 The down-to range M ©-~©\index{-~@©-~©} N means exclusive range [N,M©®)®©.965 \item966 The down-to range M ©-~=©\index{-~=@©-~=©} N means inclusive range [N,M©®]®©.967 \item968 ©0© is the implicit start value;969 \item970 ©1© is the implicit increment value.971 \item972 The up-to range uses operator ©+=© for increment;973 \item974 The down-to range uses operator ©-=© for decrement.975 \item976 ©@© means put nothing in this field.977 \item978 ©:© means start another index.979 \end{itemize}980 948 981 949 \begin{figure} … … 1086 1054 1087 1055 1056 \subsection{Loop Control} 1057 1058 The ©for©/©while©/©do-while© loop-control allows empty or simplified ranges (see Figure~\ref{f:LoopControlExamples}). 1059 \begin{itemize} 1060 \item 1061 The loop index is polymorphic in the type of the comparison value N (when the start value is implicit) or the start value M. 1062 \item 1063 An empty conditional implies comparison value of ©1© (true). 1064 \item 1065 A comparison N is implicit up-to exclusive range [0,N©®)®©. 1066 \item 1067 A comparison ©=© N is implicit up-to inclusive range [0,N©®]®©. 1068 \item 1069 The up-to range M ©~©\index{~@©~©} N means exclusive range [M,N©®)®©. 1070 \item 1071 The up-to range M ©~=©\index{~=@©~=©} N means inclusive range [M,N©®]®©. 1072 \item 1073 The down-to range M ©-~©\index{-~@©-~©} N means exclusive range [N,M©®)®©. 1074 \item 1075 The down-to range M ©-~=©\index{-~=@©-~=©} N means inclusive range [N,M©®]®©. 1076 \item 1077 ©0© is the implicit start value; 1078 \item 1079 ©1© is the implicit increment value. 1080 \item 1081 The up-to range uses operator ©+=© for increment; 1082 \item 1083 The down-to range uses operator ©-=© for decrement. 1084 \item 1085 ©@© means put nothing in this field. 1086 \item 1087 ©:© means start another index. 1088 \end{itemize} 1089 1090 1088 1091 %\subsection{\texorpdfstring{Labelled \protect\lstinline@continue@ / \protect\lstinline@break@}{Labelled continue / break}} 1089 1092 \subsection{\texorpdfstring{Labelled \LstKeywordStyle{continue} / \LstKeywordStyle{break} Statement}{Labelled continue / break Statement}} … … 1095 1098 for ©break©, the target label can also be associated with a ©switch©, ©if© or compound (©{}©) statement. 1096 1099 \VRef[Figure]{f:MultiLevelExit} shows ©continue© and ©break© indicating the specific control structure, and the corresponding C program using only ©goto© and labels. 1097 The innermost loop has 7exit points, which cause continuation or termination of one or more of the 7 \Index{nested control-structure}s.1100 The innermost loop has 8 exit points, which cause continuation or termination of one or more of the 7 \Index{nested control-structure}s. 1098 1101 1099 1102 \begin{figure} 1100 \begin{tabular}{@{\hspace{\parindentlnth}}l@{\hspace{\parindentlnth}}l@{\hspace{\parindentlnth}}l@{}} 1101 \multicolumn{1}{@{\hspace{\parindentlnth}}c@{\hspace{\parindentlnth}}}{\textbf{\CFA}} & \multicolumn{1}{@{\hspace{\parindentlnth}}c}{\textbf{C}} \\ 1102 \begin{cfa} 1103 ®LC:® { 1104 ... §declarations§ ... 1105 ®LS:® switch ( ... ) { 1106 case 3: 1107 ®LIF:® if ( ... ) { 1108 ®LF:® for ( ... ) { 1109 ®LW:® while ( ... ) { 1110 ... break ®LC®; ... 1111 ... break ®LS®; ... 1112 ... break ®LIF®; ... 1113 ... continue ®LF;® ... 1114 ... break ®LF®; ... 1115 ... continue ®LW®; ... 1116 ... break ®LW®; ... 1117 } // while 1118 } // for 1119 } else { 1120 ... break ®LIF®; ... 1121 } // if 1122 } // switch 1103 \centering 1104 \begin{lrbox}{\myboxA} 1105 \begin{cfa}[tabsize=3] 1106 ®Compound:® { 1107 ®Try:® try { 1108 ®For:® for ( ... ) { 1109 ®While:® while ( ... ) { 1110 ®Do:® do { 1111 ®If:® if ( ... ) { 1112 ®Switch:® switch ( ... ) { 1113 case 3: 1114 ®break Compound®; 1115 ®break Try®; 1116 ®break For®; /* or */ ®continue For®; 1117 ®break While®; /* or */ ®continue While®; 1118 ®break Do®; /* or */ ®continue Do®; 1119 ®break If®; 1120 ®break Switch®; 1121 } // switch 1122 } else { 1123 ... ®break If®; ... // terminate if 1124 } // if 1125 } while ( ... ); // do 1126 } // while 1127 } // for 1128 } ®finally® { // always executed 1129 } // try 1123 1130 } // compound 1124 1131 \end{cfa} 1125 & 1126 \begin{cfa} 1132 \end{lrbox} 1133 1134 \begin{lrbox}{\myboxB} 1135 \begin{cfa}[tabsize=3] 1127 1136 { 1128 ... §declarations§ ... 1129 switch ( ... ) { 1130 case 3: 1131 if ( ... ) { 1132 for ( ... ) { 1133 while ( ... ) { 1134 ... goto ®LC®; ... 1135 ... goto ®LS®; ... 1136 ... goto ®LIF®; ... 1137 ... goto ®LFC®; ... 1138 ... goto ®LFB®; ... 1139 ... goto ®LWC®; ... 1140 ... goto ®LWB®; ... 1141 ®LWC®: ; } ®LWB:® ; 1142 ®LFC:® ; } ®LFB:® ; 1143 } else { 1144 ... goto ®LIF®; ... 1145 } ®L3:® ; 1146 } ®LS:® ; 1147 } ®LC:® ; 1148 \end{cfa} 1149 & 1150 \begin{cfa} 1151 1152 1153 1154 1155 1156 1157 1158 // terminate compound 1159 // terminate switch 1160 // terminate if 1161 // continue loop 1162 // terminate loop 1163 // continue loop 1164 // terminate loop 1165 1166 1167 1168 // terminate if 1169 1170 1171 1172 \end{cfa} 1173 \end{tabular} 1137 1138 ®ForC:® for ( ... ) { 1139 ®WhileC:® while ( ... ) { 1140 ®DoC:® do { 1141 if ( ... ) { 1142 switch ( ... ) { 1143 case 3: 1144 ®goto Compound®; 1145 ®goto Try®; 1146 ®goto ForB®; /* or */ ®goto ForC®; 1147 ®goto WhileB®; /* or */ ®goto WhileC®; 1148 ®goto DoB®; /* or */ ®goto DoC®; 1149 ®goto If®; 1150 ®goto Switch®; 1151 } ®Switch:® ; 1152 } else { 1153 ... ®goto If®; ... // terminate if 1154 } ®If:®; 1155 } while ( ... ); ®DoB:® ; 1156 } ®WhileB:® ; 1157 } ®ForB:® ; 1158 1159 1160 } ®Compound:® ; 1161 \end{cfa} 1162 \end{lrbox} 1163 1164 \subfloat[\CFA]{\label{f:CFibonacci}\usebox\myboxA} 1165 \hspace{2pt} 1166 \vrule 1167 \hspace{2pt} 1168 \subfloat[C]{\label{f:CFAFibonacciGen}\usebox\myboxB} 1174 1169 \caption{Multi-level Exit} 1175 1170 \label{f:MultiLevelExit} … … 1426 1421 try { 1427 1422 f(...); 1428 } catch( E e ; §boolean-predicate§ ) { §\C [8cm]{// termination handler}§1423 } catch( E e ; §boolean-predicate§ ) { §\C{// termination handler}§ 1429 1424 // recover and continue 1430 } catchResume( E e ; §boolean-predicate§ ) { §\C{// resumption handler} \CRT§1425 } catchResume( E e ; §boolean-predicate§ ) { §\C{// resumption handler}§ 1431 1426 // repair and return 1432 1427 } finally { … … 3491 3486 For implicit formatted input, the common case is reading a sequence of values separated by whitespace, where the type of an input constant must match with the type of the input variable. 3492 3487 \begin{cquote} 3493 \begin{lrbox}{\ LstBox}3488 \begin{lrbox}{\myboxA} 3494 3489 \begin{cfa}[aboveskip=0pt,belowskip=0pt] 3495 3490 int x; double y char z; … … 3497 3492 \end{lrbox} 3498 3493 \begin{tabular}{@{}l@{\hspace{3em}}l@{\hspace{3em}}l@{}} 3499 \multicolumn{1}{@{}l@{}}{\usebox\ LstBox} \\3494 \multicolumn{1}{@{}l@{}}{\usebox\myboxA} \\ 3500 3495 \multicolumn{1}{c@{\hspace{2em}}}{\textbf{\CFA}} & \multicolumn{1}{c@{\hspace{2em}}}{\textbf{\CC}} & \multicolumn{1}{c}{\textbf{Python}} \\ 3501 3496 \begin{cfa}[aboveskip=0pt,belowskip=0pt] … … 6672 6667 For example, an initial alignment and fill capability are preserved during a resize copy so the copy has the same alignment and extended storage is filled. 6673 6668 Without sticky properties it is dangerous to use ©realloc©, resulting in an idiom of manually performing the reallocation to maintain correctness. 6669 \begin{cfa} 6670 6671 \end{cfa} 6674 6672 6675 6673 \CFA memory management extends allocation to support constructors for initialization of allocated storage, \eg in … … 6721 6719 6722 6720 // §\CFA§ safe general allocation, fill, resize, alignment, array 6723 T * alloc( void );§\indexc{alloc}§ 6724 T * alloc( size_t dim ); 6725 T * alloc( T ptr[], size_t dim ); 6726 T * alloc_set( char fill );§\indexc{alloc_set}§ 6727 T * alloc_set( T fill ); 6728 T * alloc_set( size_t dim, char fill ); 6729 T * alloc_set( size_t dim, T fill ); 6730 T * alloc_set( size_t dim, const T fill[] ); 6731 T * alloc_set( T ptr[], size_t dim, char fill ); 6732 6733 T * alloc_align( size_t align ); 6734 T * alloc_align( size_t align, size_t dim ); 6735 T * alloc_align( T ptr[], size_t align ); // aligned realloc array 6736 T * alloc_align( T ptr[], size_t align, size_t dim ); // aligned realloc array 6737 T * alloc_align_set( size_t align, char fill ); 6738 T * alloc_align_set( size_t align, T fill ); 6739 T * alloc_align_set( size_t align, size_t dim, char fill ); 6740 T * alloc_align_set( size_t align, size_t dim, T fill ); 6741 T * alloc_align_set( size_t align, size_t dim, const T fill[] ); 6742 T * alloc_align_set( T ptr[], size_t align, size_t dim, char fill ); 6721 T * alloc( void );§\indexc{alloc}§ §\C[3.5in]{// variable, T size}§ 6722 T * alloc( size_t dim ); §\C{// array[dim], T size elements}§ 6723 T * alloc( T ptr[], size_t dim ); §\C{// realloc array[dim], T size elements}§ 6724 6725 T * alloc_set( char fill );§\indexc{alloc_set}§ §\C{// variable, T size, fill bytes with value}§ 6726 T * alloc_set( T fill ); §\C{// variable, T size, fill with value}§ 6727 T * alloc_set( size_t dim, char fill ); §\C{// array[dim], T size elements, fill bytes with value}§ 6728 T * alloc_set( size_t dim, T fill ); §\C{// array[dim], T size elements, fill elements with value}§ 6729 T * alloc_set( size_t dim, const T fill[] ); §\C{// array[dim], T size elements, fill elements with array}§ 6730 T * alloc_set( T ptr[], size_t dim, char fill ); §\C{// realloc array[dim], T size elements, fill bytes with value}§ 6731 6732 T * alloc_align( size_t align ); §\C{// aligned variable, T size}§ 6733 T * alloc_align( size_t align, size_t dim ); §\C{// aligned array[dim], T size elements}§ 6734 T * alloc_align( T ptr[], size_t align ); §\C{// realloc new aligned array}§ 6735 T * alloc_align( T ptr[], size_t align, size_t dim ); §\C{// realloc new aligned array[dim]}§ 6736 6737 T * alloc_align_set( size_t align, char fill ); §\C{// aligned variable, T size, fill bytes with value}§ 6738 T * alloc_align_set( size_t align, T fill ); §\C{// aligned variable, T size, fill with value}§ 6739 T * alloc_align_set( size_t align, size_t dim, char fill ); §\C{// aligned array[dim], T size elements, fill bytes with value}§ 6740 T * alloc_align_set( size_t align, size_t dim, T fill ); §\C{// aligned array[dim], T size elements, fill elements with value}§ 6741 T * alloc_align_set( size_t align, size_t dim, const T fill[] ); §\C{// aligned array[dim], T size elements, fill elements with array}§ 6742 T * alloc_align_set( T ptr[], size_t align, size_t dim, char fill ); §\C{// realloc new aligned array[dim], fill new bytes with value}§ 6743 6743 6744 6744 // §\CFA§ safe initialization/copy, i.e., implicit size specification
Note: See TracChangeset
for help on using the changeset viewer.