Changes in / [b5563e1:a9b1b0c]
- Files:
-
- 1 deleted
- 11 edited
-
doc/papers/concurrency/Makefile (modified) (1 diff)
-
doc/papers/concurrency/Paper.tex (modified) (154 diffs)
-
doc/papers/concurrency/annex/local.bib (modified) (3 diffs)
-
doc/papers/concurrency/style/cfa-format.tex (modified) (3 diffs)
-
doc/papers/general/Paper.tex (modified) (9 diffs)
-
src/Parser/ExpressionNode.cc (modified) (6 diffs)
-
src/Parser/lex.ll (modified) (11 diffs)
-
src/Parser/parser.yy (modified) (3 diffs)
-
src/libcfa/Makefile.am (modified) (2 diffs)
-
src/libcfa/Makefile.in (modified) (2 diffs)
-
src/libcfa/time (deleted)
-
src/tests/coroutine/fibonacci.c (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
-
doc/papers/concurrency/Makefile
rb5563e1 ra9b1b0c 4 4 Figures = figures 5 5 Macros = AMA/AMA-stix/ama 6 TeXLIB = .: annex:../../LaTeXmacros:${Macros}:${Build}:../../bibliography:6 TeXLIB = .:style:annex:../../LaTeXmacros:${Macros}:${Build}:../../bibliography: 7 7 LaTeX = TEXINPUTS=${TeXLIB} && export TEXINPUTS && latex -halt-on-error -output-directory=${Build} 8 8 BibTeX = BIBINPUTS=${TeXLIB} && export BIBINPUTS && bibtex -
doc/papers/concurrency/Paper.tex
rb5563e1 ra9b1b0c 1 % inline code ©...© (copyright symbol) emacs: C-q M-) 2 % red highlighting ®...® (registered trademark symbol) emacs: C-q M-. 3 % blue highlighting ß...ß (sharp s symbol) emacs: C-q M-_ 4 % green highlighting ¢...¢ (cent symbol) emacs: C-q M-" 5 % LaTex escape §...§ (section symbol) emacs: C-q M-' 6 % keyword escape ¶...¶ (pilcrow symbol) emacs: C-q M-^ 7 % math escape $...$ (dollar symbol) 8 1 9 \documentclass[AMA,STIX1COL]{WileyNJD-v2} 2 10 … … 12 20 13 21 % Latex packages used in the document. 22 23 \usepackage[T1]{fontenc} % allow Latin1 (extended ASCII) characters 24 \usepackage{textcomp} 25 \usepackage[latin1]{inputenc} 26 14 27 \usepackage{epic,eepic} 15 28 \usepackage{xspace} … … 21 34 \usepackage{siunitx} 22 35 \sisetup{ binary-units=true } 23 %\input{style} % bespoke macros used in the document36 \input{style} % bespoke macros used in the document 24 37 25 38 \hypersetup{breaklinks=true} … … 38 51 % Names used in the document. 39 52 40 \newcommand{\CFAIcon}{\textsf{C}\raisebox{\depth}{\rotatebox{180}{\textsf{A}}}\xspace} % Cforall symbolic name 41 \newcommand{\CFA}{\protect\CFAIcon} % safe for section/caption 42 \newcommand{\CFL}{\textrm{Cforall}\xspace} % Cforall symbolic name 43 \newcommand{\Celeven}{\textrm{C11}\xspace} % C11 symbolic name 44 \newcommand{\CC}{\textrm{C}\kern-.1em\hbox{+\kern-.25em+}\xspace} % C++ symbolic name 45 \newcommand{\CCeleven}{\textrm{C}\kern-.1em\hbox{+\kern-.25em+}11\xspace} % C++11 symbolic name 46 \newcommand{\CCfourteen}{\textrm{C}\kern-.1em\hbox{+\kern-.25em+}14\xspace} % C++14 symbolic name 47 \newcommand{\CCseventeen}{\textrm{C}\kern-.1em\hbox{+\kern-.25em+}17\xspace} % C++17 symbolic name 48 \newcommand{\CCtwenty}{\textrm{C}\kern-.1em\hbox{+\kern-.25em+}20\xspace} % C++20 symbolic name 49 \newcommand{\Csharp}{C\raisebox{-0.7ex}{\Large$^\sharp$}\xspace} % C# symbolic name 50 51 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 53 \newcommand{\CS}{C\raisebox{-0.9ex}{\large$^\sharp$}\xspace} 52 54 53 55 \newcommand{\Textbf}[2][red]{{\color{#1}{\textbf{#2}}}} … … 60 62 \newcommand{\TODO}{{\Textbf{TODO}}} 61 63 64 65 \newsavebox{\LstBox} 66 62 67 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 63 64 % Default underscore is too low and wide. Cannot use lstlisting "literate" as replacing underscore65 % removes it as a variable-name character so keywords in variables are highlighted. MUST APPEAR66 % AFTER HYPERREF.67 %\DeclareTextCommandDefault{\textunderscore}{\leavevmode\makebox[1.2ex][c]{\rule{1ex}{0.1ex}}}68 \renewcommand{\textunderscore}{\leavevmode\makebox[1.2ex][c]{\rule{1ex}{0.075ex}}}69 70 \makeatletter71 % parindent is relative, i.e., toggled on/off in environments like itemize, so store the value for72 % use rather than use \parident directly.73 \newlength{\parindentlnth}74 \setlength{\parindentlnth}{\parindent}75 76 \newcommand{\LstBasicStyle}[1]{{\lst@basicstyle{\lst@basicstyle{#1}}}}77 \newcommand{\LstKeywordStyle}[1]{{\lst@basicstyle{\lst@keywordstyle{#1}}}}78 \newcommand{\LstCommentStyle}[1]{{\lst@basicstyle{\lst@commentstyle{#1}}}}79 80 \newlength{\gcolumnposn} % temporary hack because lstlisting does not handle tabs correctly81 \newlength{\columnposn}82 \setlength{\gcolumnposn}{3.5in}83 \setlength{\columnposn}{\gcolumnposn}84 \newcommand{\C}[2][\@empty]{\ifx#1\@empty\else\global\setlength{\columnposn}{#1}\global\columnposn=\columnposn\fi\hfill\makebox[\textwidth-\columnposn][l]{\lst@basicstyle{\LstCommentStyle{#2}}}}85 \newcommand{\CRT}{\global\columnposn=\gcolumnposn}86 87 % Denote newterms in particular font and index them without particular font and in lowercase, e.g., \newterm{abc}.88 % The option parameter provides an index term different from the new term, e.g., \newterm[\texttt{abc}]{abc}89 % The star version does not lowercase the index information, e.g., \newterm*{IBM}.90 \newcommand{\newtermFontInline}{\emph}91 \newcommand{\newterm}{\@ifstar\@snewterm\@newterm}92 \newcommand{\@newterm}[2][\@empty]{\lowercase{\def\temp{#2}}{\newtermFontInline{#2}}\ifx#1\@empty\index{\temp}\else\index{#1@{\protect#2}}\fi}93 \newcommand{\@snewterm}[2][\@empty]{{\newtermFontInline{#2}}\ifx#1\@empty\index{#2}\else\index{#1@{\protect#2}}\fi}94 95 % Latin abbreviation96 \newcommand{\abbrevFont}{\textit} % set empty for no italics97 \newcommand{\EG}{\abbrevFont{e}.\abbrevFont{g}.}98 \newcommand*{\eg}{%99 \@ifnextchar{,}{\EG}%100 {\@ifnextchar{:}{\EG}%101 {\EG,\xspace}}%102 }%103 \newcommand{\IE}{\abbrevFont{i}.\abbrevFont{e}.}104 \newcommand*{\ie}{%105 \@ifnextchar{,}{\IE}%106 {\@ifnextchar{:}{\IE}%107 {\IE,\xspace}}%108 }%109 \newcommand{\ETC}{\abbrevFont{etc}}110 \newcommand*{\etc}{%111 \@ifnextchar{.}{\ETC}%112 {\ETC.\xspace}%113 }%114 \newcommand{\ETAL}{\abbrevFont{et}~\abbrevFont{al}}115 \renewcommand*{\etal}{%116 \@ifnextchar{.}{\protect\ETAL}%117 {\protect\ETAL.\xspace}%118 }%119 \newcommand{\VIZ}{\abbrevFont{viz}}120 \newcommand*{\viz}{%121 \@ifnextchar{.}{\VIZ}%122 {\VIZ.\xspace}%123 }%124 \makeatother125 126 \newenvironment{cquote}{%127 \list{}{\lstset{resetmargins=true,aboveskip=0pt,belowskip=0pt}\topsep=3pt\parsep=0pt\leftmargin=\parindentlnth\rightmargin\leftmargin}%128 \item\relax129 }{%130 \endlist131 }% cquote132 133 % CFA programming language, based on ANSI C (with some gcc additions)134 \lstdefinelanguage{CFA}[ANSI]{C}{135 morekeywords={136 _Alignas, _Alignof, __alignof, __alignof__, asm, __asm, __asm__, _At, __attribute,137 __attribute__, auto, _Bool, catch, catchResume, choose, _Complex, __complex, __complex__,138 __const, __const__, disable, dtype, enable, exception, __extension__, fallthrough, fallthru,139 finally, forall, ftype, _Generic, _Imaginary, inline, __label__, lvalue, _Noreturn, one_t,140 otype, restrict, _Static_assert, throw, throwResume, trait, try, ttype, typeof, __typeof,141 __typeof__, virtual, with, zero_t},142 morekeywords=[2]{143 _Atomic, coroutine, is_coroutine, is_monitor, is_thread, monitor, mutex, nomutex, or,144 resume, suspend, thread, _Thread_local, waitfor, when, yield},145 moredirectives={defined,include_next}%146 }147 148 \lstset{149 language=CFA,150 columns=fullflexible,151 basicstyle=\linespread{0.9}\sf, % reduce line spacing and use sanserif font152 stringstyle=\tt, % use typewriter font153 tabsize=5, % N space tabbing154 xleftmargin=\parindentlnth, % indent code to paragraph indentation155 %mathescape=true, % LaTeX math escape in CFA code $...$156 escapechar=\$, % LaTeX escape in CFA code157 keepspaces=true, %158 showstringspaces=false, % do not show spaces with cup159 showlines=true, % show blank lines at end of code160 aboveskip=4pt, % spacing above/below code block161 belowskip=3pt,162 % replace/adjust listing characters that look bad in sanserif163 literate={-}{\makebox[1ex][c]{\raisebox{0.4ex}{\rule{0.8ex}{0.1ex}}}}1 {^}{\raisebox{0.6ex}{$\scriptstyle\land\,$}}1164 {~}{\raisebox{0.3ex}{$\scriptstyle\sim\,$}}1 % {`}{\ttfamily\upshape\hspace*{-0.1ex}`}1165 {<-}{$\leftarrow$}2 {=>}{$\Rightarrow$}2 {->}{\makebox[1ex][c]{\raisebox{0.5ex}{\rule{0.8ex}{0.075ex}}}\kern-0.2ex{\textgreater}}2,166 moredelim=**[is][\color{red}]{`}{`},167 }% lstset168 169 % uC++ programming language, based on ANSI C++170 \lstdefinelanguage{uC++}[ANSI]{C++}{171 morekeywords={172 _Accept, _AcceptReturn, _AcceptWait, _Actor, _At, _CatchResume, _Cormonitor, _Coroutine, _Disable,173 _Else, _Enable, _Event, _Finally, _Monitor, _Mutex, _Nomutex, _PeriodicTask, _RealTimeTask,174 _Resume, _Select, _SporadicTask, _Task, _Timeout, _When, _With, _Throw},175 }176 \lstdefinelanguage{Golang}{177 morekeywords=[1]{package,import,func,type,struct,return,defer,panic,recover,select,var,const,iota,},178 morekeywords=[2]{string,uint,uint8,uint16,uint32,uint64,int,int8,int16,int32,int64,179 bool,float32,float64,complex64,complex128,byte,rune,uintptr, error,interface},180 morekeywords=[3]{map,slice,make,new,nil,len,cap,copy,close,true,false,delete,append,real,imag,complex,chan,},181 morekeywords=[4]{for,break,continue,range,goto,switch,case,fallthrough,if,else,default,},182 morekeywords=[5]{Println,Printf,Error,},183 sensitive=true,184 morecomment=[l]{//},185 morecomment=[s]{/*}{*/},186 morestring=[b]',187 morestring=[b]",188 morestring=[s]{`}{`},189 }190 191 \lstnewenvironment{cfa}[1][]192 {\lstset{#1}}193 {}194 \lstnewenvironment{C++}[1][] % use C++ style195 {\lstset{language=C++,moredelim=**[is][\protect\color{red}]{`}{`},#1}\lstset{#1}}196 {}197 \lstnewenvironment{uC++}[1][]198 {\lstset{#1}}199 {}200 \lstnewenvironment{Go}[1][]201 {\lstset{#1}}202 {}203 204 % inline code @...@205 \lstMakeShortInline@%206 207 68 208 69 \title{\texorpdfstring{Concurrency in \protect\CFA}{Concurrency in Cforall}} … … 220 81 \abstract[Summary]{ 221 82 \CFA is a modern, polymorphic, \emph{non-object-oriented} extension of the C programming language. 222 This paper discusses the design of the concurrency and parallelism features in \CFA, and the concurrent runtime-system .83 This paper discusses the design of the concurrency and parallelism features in \CFA, and the concurrent runtime-system that supports them. 223 84 These features are created from scratch as ISO C lacks concurrency, relying largely on pthreads. 224 85 Coroutines and lightweight (user) threads are introduced into the language. … … 226 87 A unique contribution is allowing multiple monitors to be safely acquired simultaneously. 227 88 All features respect the expectations of C programmers, while being fully integrate with the \CFA polymorphic type-system and other language features. 228 Finally, experimental results are presented to compare the performance of the new features with similar mechanisms inother concurrent programming-languages.89 Finally, experimental results are presented to validate several of the new features with other concurrent programming-languages. 229 90 }% 230 91 … … 243 104 % ====================================================================== 244 105 245 This paper provides a minimal concurrency \newterm{API} that is simple, efficient and can be used to build other concurrency features. 246 While the simplest concurrency system is a thread and a lock, this low-level approach is hard to master. 247 An easier approach for programmers is to support higher-level constructs as the basis of concurrency. 248 Indeed, for highly productive concurrent programming, high-level approaches are much more popular~\cite{Hochstein05}. 249 Examples of high-level approaches are task based~\cite{TBB}, message passing~\cite{Erlang,MPI}, and implicit threading~\cite{OpenMP}. 250 251 The terminology used in this paper is as follows. 252 A \newterm{thread} is a fundamental unit of execution that runs a sequence of code and requires a stack to maintain state. 253 Multiple simultaneous threads gives rise to \newterm{concurrency}, which requires locking to ensure safe access to shared data. 254 % Correspondingly, concurrency is defined as the concepts and challenges that occur when multiple independent (sharing memory, timing dependencies, etc.) concurrent threads are introduced. 255 \newterm{Locking}, and by extension locks, are defined as a mechanism to prevent progress of threads to provide safety. 256 \newterm{Parallelism} is running multiple threads simultaneously. 257 Parallelism implies \emph{actual} simultaneous execution, where concurrency only requires \emph{apparent} simultaneous execution. 258 As such, parallelism is only observable in differences in performance, which is observed through differences in timing. 259 260 Hence, there are two problems to be solved in the design of concurrency for a programming language: concurrency and parallelism. 106 This paper provides a minimal concurrency \textbf{api} that is simple, efficient and can be reused to build higher-level features. 107 The simplest possible concurrency system is a thread and a lock but this low-level approach is hard to master. 108 An easier approach for users is to support higher-level constructs as the basis of concurrency. 109 Indeed, for highly productive concurrent programming, high-level approaches are much more popular~\cite{HPP:Study}. 110 Examples are task based, message passing and implicit threading. 111 The high-level approach and its minimal \textbf{api} are tested in a dialect of C, called \CFA. 112 Furthermore, the proposed \textbf{api} doubles as an early definition of the \CFA language and library. 113 This paper also provides an implementation of the concurrency library for \CFA as well as all the required language features added to the source-to-source translator. 114 115 There are actually two problems that need to be solved in the design of concurrency for a programming language: which concurrency and which parallelism tools are available to the programmer. 261 116 While these two concepts are often combined, they are in fact distinct, requiring different tools~\cite{Buhr05a}. 262 Concurrency tools handle mutual exclusion and synchronization, while parallelism tools handle performance, cost and resource utilization. 263 264 The proposed concurrency API is implemented in a dialect of C, called \CFA. 265 The paper discusses how the language features are added to the \CFA translator with respect to parsing, semantic, and type checking, and the corresponding high-perforamnce runtime-library to implement the concurrency features. 117 Concurrency tools need to handle mutual exclusion and synchronization, while parallelism tools are about performance, cost and resource utilization. 118 119 In the context of this paper, a \textbf{thread} is a fundamental unit of execution that runs a sequence of code, generally on a program stack. 120 Having multiple simultaneous threads gives rise to concurrency and generally requires some kind of locking mechanism to ensure proper execution. 121 Correspondingly, \textbf{concurrency} is defined as the concepts and challenges that occur when multiple independent (sharing memory, timing dependencies, etc.) concurrent threads are introduced. 122 Accordingly, \textbf{locking} (and by extension locks) are defined as a mechanism that prevents the progress of certain threads in order to avoid problems due to concurrency. 123 Finally, in this paper \textbf{parallelism} is distinct from concurrency and is defined as running multiple threads simultaneously. 124 More precisely, parallelism implies \emph{actual} simultaneous execution as opposed to concurrency which only requires \emph{apparent} simultaneous execution. 125 As such, parallelism is only observable in the differences in performance or, more generally, differences in timing. 266 126 267 127 % ====================================================================== … … 279 139 Interestingly, while \CFA is not an object-oriented language, lacking the concept of a receiver (e.g., {\tt this}), it does have some notion of objects\footnote{C defines the term objects as : ``region of data storage in the execution environment, the contents of which can represent 280 140 values''~\cite[3.15]{C11}}, most importantly construction and destruction of objects. 281 Most of the following code examples can be found on the \CFA website~\cite{ Cforall}.282 283 141 Most of the following code examples can be found on the \CFA website~\cite{www-cfa}. 142 143 % ====================================================================== 284 144 \subsection{References} 285 145 286 146 Like \CC, \CFA introduces rebind-able references providing multiple dereferencing as an alternative to pointers. 287 147 In regards to concurrency, the semantic difference between pointers and references are not particularly relevant, but since this document uses mostly references, here is a quick overview of the semantics: 288 \begin{cfa }148 \begin{cfacode} 289 149 int x, *p1 = &x, **p2 = &p1, ***p3 = &p2, 290 150 &r1 = x, &&r2 = r1, &&&r3 = r2; 291 ***p3 = 3; $\C{// change x}$292 r3 = 3; $\C{// change x, ***r3}$293 **p3 = ...; $\C{// change p1}$294 *p3 = ...; $\C{// change p2}$295 int y, z, & ar[3] = {x, y, z}; $\C{// initialize array of references}$296 typeof( ar[1]) p; $\C{// is int, referenced object type}$297 typeof(&ar[1]) q; $\C{// is int \&, reference type}$298 sizeof( ar[1]) == sizeof(int); $\C{// is true, referenced object size}$299 sizeof(&ar[1]) == sizeof(int *); $\C{// is true, reference size}$300 \end{cfa }151 ***p3 = 3; //change x 152 r3 = 3; //change x, ***r3 153 **p3 = ...; //change p1 154 *p3 = ...; //change p2 155 int y, z, & ar[3] = {x, y, z}; //initialize array of references 156 typeof( ar[1]) p; //is int, referenced object type 157 typeof(&ar[1]) q; //is int &, reference type 158 sizeof( ar[1]) == sizeof(int); //is true, referenced object size 159 sizeof(&ar[1]) == sizeof(int *); //is true, reference size 160 \end{cfacode} 301 161 The important take away from this code example is that a reference offers a handle to an object, much like a pointer, but which is automatically dereferenced for convenience. 302 162 … … 307 167 As well, \CFA uses the return type as part of the selection criteria, as in Ada~\cite{Ada}. 308 168 For routines with multiple parameters and returns, the selection is complex. 309 \begin{cfa }310 // selection based on type and number of parameters311 void f(void); $\C{// (1)}$312 void f(char); $\C{// (2)}$313 void f(int, double); $\C{// (3)}$314 f(); $\C{// select (1)}$315 f('a'); $\C{// select (2)}$316 f(3, 5.2); $\C{// select (3)}$317 318 // selection based on type and number of returns319 char f(int); $\C{// (1)}$320 double f(int); $\C{// (2)}$321 char c = f(3); $\C{// select (1)}$322 double d = f(4); $\C{// select (2)}$323 \end{cfa }169 \begin{cfacode} 170 //selection based on type and number of parameters 171 void f(void); //(1) 172 void f(char); //(2) 173 void f(int, double); //(3) 174 f(); //select (1) 175 f('a'); //select (2) 176 f(3, 5.2); //select (3) 177 178 //selection based on type and number of returns 179 char f(int); //(1) 180 double f(int); //(2) 181 char c = f(3); //select (1) 182 double d = f(4); //select (2) 183 \end{cfacode} 324 184 This feature is particularly important for concurrency since the runtime system relies on creating different types to represent concurrency objects. 325 185 Therefore, overloading is necessary to prevent the need for long prefixes and other naming conventions that prevent name clashes. 326 As seen in section \ref{basics}, routine @main@is an example that benefits from overloading.186 As seen in section \ref{basics}, routine \code{main} is an example that benefits from overloading. 327 187 328 188 % ====================================================================== … … 330 190 Overloading also extends to operators. 331 191 The syntax for denoting operator-overloading is to name a routine with the symbol of the operator and question marks where the arguments of the operation appear, e.g.: 332 \begin{cfa }333 int ++? (int op); $\C{// unary prefix increment}$334 int ?++ (int op); $\C{// unary postfix increment}$335 int ?+? (int op1, int op2); $\C{// binary plus}$336 int ?<=?(int op1, int op2); $\C{// binary less than}$337 int ?=? (int & op1, int op2); $\C{// binary assignment}$338 int ?+=?(int & op1, int op2); $\C{// binary plus-assignment}$192 \begin{cfacode} 193 int ++? (int op); //unary prefix increment 194 int ?++ (int op); //unary postfix increment 195 int ?+? (int op1, int op2); //binary plus 196 int ?<=?(int op1, int op2); //binary less than 197 int ?=? (int & op1, int op2); //binary assignment 198 int ?+=?(int & op1, int op2); //binary plus-assignment 339 199 340 200 struct S {int i, j;}; 341 S ?+?(S op1, S op2) { $\C{// add two structures}$201 S ?+?(S op1, S op2) { //add two structures 342 202 return (S){op1.i + op2.i, op1.j + op2.j}; 343 203 } 344 204 S s1 = {1, 2}, s2 = {2, 3}, s3; 345 s3 = s1 + s2; $\C{// compute sum: s3 == {2, 5}}$346 \end{cfa }205 s3 = s1 + s2; //compute sum: s3 == {2, 5} 206 \end{cfacode} 347 207 While concurrency does not use operator overloading directly, this feature is more important as an introduction for the syntax of constructors. 348 208 … … 351 211 Object lifetime is often a challenge in concurrency. \CFA uses the approach of giving concurrent meaning to object lifetime as a means of synchronization and/or mutual exclusion. 352 212 Since \CFA relies heavily on the lifetime of objects, constructors and destructors is a core feature required for concurrency and parallelism. \CFA uses the following syntax for constructors and destructors: 353 \begin{cfa }213 \begin{cfacode} 354 214 struct S { 355 215 size_t size; 356 216 int * ia; 357 217 }; 358 void ?{}(S & s, int asize) { $\C{// constructor operator}$359 s.size = asize; $\C{// initialize fields}$218 void ?{}(S & s, int asize) { //constructor operator 219 s.size = asize; //initialize fields 360 220 s.ia = calloc(size, sizeof(S)); 361 221 } 362 void ^?{}(S & s) { $\C{// destructor operator}$363 free(ia); $\C{// de-initialization fields}$222 void ^?{}(S & s) { //destructor operator 223 free(ia); //de-initialization fields 364 224 } 365 225 int main() { 366 S x = {10}, y = {100}; $\C{// implicit calls: ?\{\}(x, 10), ?\{\}(y, 100)}$367 ... $\C{// use x and y}$368 ^x{}; ^y{}; $\C{// explicit calls to de-initialize}$369 x{20}; y{200}; $\C{// explicit calls to reinitialize}$370 ... $\C{// reuse x and y}$371 } $\C{// implicit calls: \^?\{\}(y), \^?\{\}(x)}$372 \end{cfa }226 S x = {10}, y = {100}; //implicit calls: ?{}(x, 10), ?{}(y, 100) 227 ... //use x and y 228 ^x{}; ^y{}; //explicit calls to de-initialize 229 x{20}; y{200}; //explicit calls to reinitialize 230 ... //reuse x and y 231 } //implicit calls: ^?{}(y), ^?{}(x) 232 \end{cfacode} 373 233 The language guarantees that every object and all their fields are constructed. 374 234 Like \CC, construction of an object is automatically done on allocation and destruction of the object is done on deallocation. 375 235 Allocation and deallocation can occur on the stack or on the heap. 376 \begin{cfa }236 \begin{cfacode} 377 237 { 378 struct S s = {10}; $\C{// allocation, call constructor}$238 struct S s = {10}; //allocation, call constructor 379 239 ... 380 } $\C{// deallocation, call destructor}$381 struct S * s = new(); $\C{// allocation, call constructor}$240 } //deallocation, call destructor 241 struct S * s = new(); //allocation, call constructor 382 242 ... 383 delete(s); $\C{// deallocation, call destructor}$384 \end{cfa }385 Note that like \CC, \CFA introduces @new@ and @delete@, which behave like @malloc@ and @free@ in addition to constructing and destructing objects, after calling @malloc@ and before calling @free@, respectively.243 delete(s); //deallocation, call destructor 244 \end{cfacode} 245 Note that like \CC, \CFA introduces \code{new} and \code{delete}, which behave like \code{malloc} and \code{free} in addition to constructing and destructing objects, after calling \code{malloc} and before calling \code{free}, respectively. 386 246 387 247 % ====================================================================== … … 389 249 \label{s:ParametricPolymorphism} 390 250 Routines in \CFA can also be reused for multiple types. 391 This capability is done using the @forall@clauses, which allow separately compiled routines to support generic usage over multiple types.251 This capability is done using the \code{forall} clauses, which allow separately compiled routines to support generic usage over multiple types. 392 252 For example, the following sum function works for any type that supports construction from 0 and addition: 393 \begin{cfa }394 // constraint type, 0 and +253 \begin{cfacode} 254 //constraint type, 0 and + 395 255 forall(otype T | { void ?{}(T *, zero_t); T ?+?(T, T); }) 396 256 T sum(T a[ ], size_t size) { 397 T total = 0; $\C{// construct T from 0}$257 T total = 0; //construct T from 0 398 258 for(size_t i = 0; i < size; i++) 399 total = total + a[i]; $\C{// select appropriate +}$259 total = total + a[i]; //select appropriate + 400 260 return total; 401 261 } 402 262 403 263 S sa[5]; 404 int i = sum(sa, 5); $\C{// use S's 0 construction and +}$405 \end{cfa }264 int i = sum(sa, 5); //use S's 0 construction and + 265 \end{cfacode} 406 266 407 267 Since writing constraints on types can become cumbersome for more constrained functions, \CFA also has the concept of traits. 408 268 Traits are named collection of constraints that can be used both instead and in addition to regular constraints: 409 \begin{cfa }269 \begin{cfacode} 410 270 trait summable( otype T ) { 411 void ?{}(T *, zero_t); $\C{// constructor from 0 literal}$412 T ?+?(T, T); $\C{// assortment of additions}$271 void ?{}(T *, zero_t); //constructor from 0 literal 272 T ?+?(T, T); //assortment of additions 413 273 T ?+=?(T *, T); 414 274 T ++?(T *); 415 275 T ?++(T *); 416 276 }; 417 forall( otype T | summable(T) ) $\C{// use trait}$277 forall( otype T | summable(T) ) //use trait 418 278 T sum(T a[], size_t size); 419 \end{cfa }420 421 Note that the type use for assertions can be either an @otype@ or a @dtype@.422 Types declared as @otype@refer to ``complete'' objects, i.e., objects with a size, a default constructor, a copy constructor, a destructor and an assignment operator.423 Using @dtype@,on the other hand, has none of these assumptions but is extremely restrictive, it only guarantees the object is addressable.279 \end{cfacode} 280 281 Note that the type use for assertions can be either an \code{otype} or a \code{dtype}. 282 Types declared as \code{otype} refer to ``complete'' objects, i.e., objects with a size, a default constructor, a copy constructor, a destructor and an assignment operator. 283 Using \code{dtype,} on the other hand, has none of these assumptions but is extremely restrictive, it only guarantees the object is addressable. 424 284 425 285 % ====================================================================== 426 286 \subsection{with Clause/Statement} 427 287 Since \CFA lacks the concept of a receiver, certain functions end up needing to repeat variable names often. 428 To remove this inconvenience, \CFA provides the @with@statement, which opens an aggregate scope making its fields directly accessible (like Pascal).429 \begin{cfa }288 To remove this inconvenience, \CFA provides the \code{with} statement, which opens an aggregate scope making its fields directly accessible (like Pascal). 289 \begin{cfacode} 430 290 struct S { int i, j; }; 431 int mem(S & this) with (this) $\C{// with clause}$432 i = 1; $\C{// this->i}$433 j = 2; $\C{// this->j}$291 int mem(S & this) with (this) //with clause 292 i = 1; //this->i 293 j = 2; //this->j 434 294 } 435 295 int foo() { 436 296 struct S1 { ... } s1; 437 297 struct S2 { ... } s2; 438 with (s1) $\C{// with statement}$298 with (s1) //with statement 439 299 { 440 // access fields of s1 without qualification441 with (s2) $\C{// nesting}$300 //access fields of s1 without qualification 301 with (s2) //nesting 442 302 { 443 // access fields of s1 and s2 without qualification303 //access fields of s1 and s2 without qualification 444 304 } 445 305 } 446 with (s1, s2) $\C{// scopes open in parallel}$306 with (s1, s2) //scopes open in parallel 447 307 { 448 // access fields of s1 and s2 without qualification449 } 450 } 451 \end{cfa }452 453 For more information on \CFA see \cite{cforall-ug, Schluntz17,www-cfa}.308 //access fields of s1 and s2 without qualification 309 } 310 } 311 \end{cfacode} 312 313 For more information on \CFA see \cite{cforall-ug,rob-thesis,www-cfa}. 454 314 455 315 % ====================================================================== … … 479 339 Optimal performance in concurrent applications is often obtained by having as much non-determinism as correctness allows. 480 340 481 482 341 \section{\protect\CFA's Thread Building Blocks} 483 484 342 One of the important features that are missing in C is threading\footnote{While the C11 standard defines a ``threads.h'' header, it is minimal and defined as optional. 485 343 As such, library support for threading is far from widespread. 486 At the time of writing the paper, neither \ protect\lstinline|gcc| nor \protect\lstinline|clang| support ``threads.h'' in theirstandard libraries.}.344 At the time of writing the paper, neither \texttt{gcc} nor \texttt{clang} support ``threads.h'' in their respective standard libraries.}. 487 345 On modern architectures, a lack of threading is unacceptable~\cite{Sutter05, Sutter05b}, and therefore modern programming languages must have the proper tools to allow users to write efficient concurrent programs to take advantage of parallelism. 488 346 As an extension of C, \CFA needs to express these concepts in a way that is as natural as possible to programmers familiar with imperative languages. 489 347 And being a system-level language means programmers expect to choose precisely which features they need and which cost they are willing to pay. 490 348 491 492 349 \section{Coroutines: A Stepping Stone}\label{coroutine} 493 494 350 While the main focus of this proposal is concurrency and parallelism, it is important to address coroutines, which are actually a significant building block of a concurrency system. \textbf{Coroutine}s are generalized routines which have predefined points where execution is suspended and can be resumed at a later time. 495 351 Therefore, they need to deal with context switches and other context-management operations. 496 352 This proposal includes coroutines both as an intermediate step for the implementation of threads, and a first-class feature of \CFA. 497 353 Furthermore, many design challenges of threads are at least partially present in designing coroutines, which makes the design effort that much more relevant. 498 The core \textbf{api} of coroutines revolves around two features: independent call-stacks and @suspend@/@resume@.499 500 \begin{ figure}354 The core \textbf{api} of coroutines revolves around two features: independent call-stacks and \code{suspend}/\code{resume}. 355 356 \begin{table} 501 357 \begin{center} 502 \begin{tabular}{@{}lll@{}} 503 \multicolumn{1}{c}{\textbf{callback}} & \multicolumn{1}{c}{\textbf{output array}} & \multicolumn{1}{c}{\textbf{external state}} \\ 504 \begin{cfa} 505 void fib_func( 506 int n, void (* callback)( int ) 358 \begin{tabular}{c @{\hskip 0.025in}|@{\hskip 0.025in} c @{\hskip 0.025in}|@{\hskip 0.025in} c} 359 \begin{ccode}[tabsize=2] 360 //Using callbacks 361 void fibonacci_func( 362 int n, 363 void (*callback)(int) 507 364 ) { 508 int fn, f1 = 0, f2 = 1; 509 for ( int i = 0; i < n; i++ ) { 510 callback( f1 ); 511 fn = f1 + f2; 512 f1 = f2; f2 = fn; 513 } 514 } 365 int first = 0; 366 int second = 1; 367 int next, i; 368 for(i = 0; i < n; i++) 369 { 370 if(i <= 1) 371 next = i; 372 else { 373 next = f1 + f2; 374 f1 = f2; 375 f2 = next; 376 } 377 callback(next); 378 } 379 } 380 515 381 int main() { 516 void print_fib( int n ) { 517 printf( "%d\n", n ); 518 } 519 fib_func( 10, print_fib ); 520 } 521 522 \end{cfa} 523 & 524 \begin{cfa} 525 void fib_array( 526 int n, int * array 382 void print_fib(int n) { 383 printf("%d\n", n); 384 } 385 386 fibonacci_func( 387 10, print_fib 388 ); 389 390 391 392 } 393 \end{ccode}&\begin{ccode}[tabsize=2] 394 //Using output array 395 void fibonacci_array( 396 int n, 397 int* array 527 398 ) { 528 int fn, f1 = 0, f2 = 1; 529 for ( int i = 0; i < n; i++ ) { 530 array[i] = f1; 531 fn = f1 + f2; 532 f1 = f2; f2 = fn; 533 } 534 } 399 int f1 = 0; int f2 = 1; 400 int next, i; 401 for(i = 0; i < n; i++) 402 { 403 if(i <= 1) 404 next = i; 405 else { 406 next = f1 + f2; 407 f1 = f2; 408 f2 = next; 409 } 410 array[i] = next; 411 } 412 } 413 414 535 415 int main() { 536 416 int a[10]; 537 fib_array( 10, a ); 538 for ( int i = 0; i < 10; i++ ) { 539 printf( "%d\n", a[i] ); 540 } 541 } 542 \end{cfa} 543 & 544 \begin{cfa} 545 546 typedef struct { int f1, f2; } Fib; 547 int fib_state( 548 Fib * fib 417 418 fibonacci_func( 419 10, a 420 ); 421 422 for(int i=0;i<10;i++){ 423 printf("%d\n", a[i]); 424 } 425 426 } 427 \end{ccode}&\begin{ccode}[tabsize=2] 428 //Using external state 429 typedef struct { 430 int f1, f2; 431 } Iterator_t; 432 433 int fibonacci_state( 434 Iterator_t* it 549 435 ) { 550 int ret = fib->f1; 551 int fn = fib->f1 + fib->f2; 552 fib->f2 = fib->f1; fib->f1 = fn; 553 return ret; 554 } 436 int f; 437 f = it->f1 + it->f2; 438 it->f2 = it->f1; 439 it->f1 = max(f,1); 440 return f; 441 } 442 443 444 445 446 447 448 555 449 int main() { 556 Fib fib = { 0, 1 }; 557 558 for ( int i = 0; i < 10; i++ ) { 559 printf( "%d\n", fib_state( &fib ) ); 560 } 561 } 562 \end{cfa} 450 Iterator_t it={0,0}; 451 452 for(int i=0;i<10;i++){ 453 printf("%d\n", 454 fibonacci_state( 455 &it 456 ); 457 ); 458 } 459 460 } 461 \end{ccode} 563 462 \end{tabular} 564 463 \end{center} 565 \caption{ Fibonacci Implementations in C}566 \label{lst:fib -c}567 \end{ figure}464 \caption{Different implementations of a Fibonacci sequence generator in C.} 465 \label{lst:fibonacci-c} 466 \end{table} 568 467 569 468 A good example of a problem made easier with coroutines is generators, e.g., generating the Fibonacci sequence. … … 575 474 Listing \ref{lst:fibonacci-cfa} is an example of a solution to the Fibonacci problem using \CFA coroutines, where the coroutine stack holds sufficient state for the next generation. 576 475 This solution has the advantage of having very strong decoupling between how the sequence is generated and how it is used. 577 Indeed, this version is as easy to use as the @fibonacci_state@ solution, while the implementation is very similar to the @fibonacci_func@example.476 Indeed, this version is as easy to use as the \code{fibonacci_state} solution, while the implementation is very similar to the \code{fibonacci_func} example. 578 477 579 478 \begin{figure} 580 \begin{cfa} 581 coroutine Fibonacci { int fn; }; $\C{// used for communication}$ 582 583 void ?{}( Fibonacci & fib ) with( fib ) { fn = 0; } $\C{// constructor}$ 584 585 void main( Fibonacci & fib ) with( fib ) { $\C{// main called on first resume}$ 586 int fn1, fn2; $\C{// retained between resumes}$ 587 fn = 0; fn1 = fn; $\C{// 1st case}$ 588 suspend(); $\C{// restart last resume}$ 589 fn = 1; fn2 = fn1; fn1 = fn; $\C{// 2nd case}$ 590 suspend(); $\C{// restart last resume}$ 479 \begin{cfacode}[caption={Implementation of Fibonacci using coroutines},label={lst:fibonacci-cfa}] 480 coroutine Fibonacci { 481 int fn; //used for communication 482 }; 483 484 void ?{}(Fibonacci& this) { //constructor 485 this.fn = 0; 486 } 487 488 //main automatically called on first resume 489 void main(Fibonacci& this) with (this) { 490 int fn1, fn2; //retained between resumes 491 fn = 0; 492 fn1 = fn; 493 suspend(this); //return to last resume 494 495 fn = 1; 496 fn2 = fn1; 497 fn1 = fn; 498 suspend(this); //return to last resume 499 591 500 for ( ;; ) { 592 fn = fn1 + fn2; fn2 = fn1; fn1 = fn; $\C{// general case}$ 593 suspend(); $\C{// restart last resume}$ 594 } 595 } 596 int next( Fibonacci & fib ) with( fib ) { 597 resume( fib ); $\C{// restart last suspend}$ 598 return fn; 599 } 600 int main() { 501 fn = fn1 + fn2; 502 fn2 = fn1; 503 fn1 = fn; 504 suspend(this); //return to last resume 505 } 506 } 507 508 int next(Fibonacci& this) { 509 resume(this); //transfer to last suspend 510 return this.fn; 511 } 512 513 void main() { //regular program main 601 514 Fibonacci f1, f2; 602 for ( int i = 1; i <= 10; i ++) {515 for ( int i = 1; i <= 10; i += 1 ) { 603 516 sout | next( f1 ) | next( f2 ) | endl; 604 517 } 605 518 } 606 \end{cfa} 607 \caption{Coroutine Fibonacci } 608 \label{lst:fibonacci-cfa} 519 \end{cfacode} 609 520 \end{figure} 610 521 611 Listing \ref{lst:fmt-line} shows the @Format@coroutine for restructuring text into groups of character blocks of fixed size.522 Listing \ref{lst:fmt-line} shows the \code{Format} coroutine for restructuring text into groups of character blocks of fixed size. 612 523 The example takes advantage of resuming coroutines in the constructor to simplify the code and highlights the idea that interesting control flow can occur in the constructor. 613 524 614 525 \begin{figure} 615 \begin{cfa }[tabsize=3,caption={Formatting text into lines of 5 blocks of 4 characters.},label={lst:fmt-line}]616 // format characters into blocks of 4 and groups of 5 blocks per line526 \begin{cfacode}[tabsize=3,caption={Formatting text into lines of 5 blocks of 4 characters.},label={lst:fmt-line}] 527 //format characters into blocks of 4 and groups of 5 blocks per line 617 528 coroutine Format { 618 char ch; // used for communication619 int g, b; // global because used in destructor529 char ch; //used for communication 530 int g, b; //global because used in destructor 620 531 }; 621 532 622 533 void ?{}(Format& fmt) { 623 resume( fmt ); // prime (start) coroutine534 resume( fmt ); //prime (start) coroutine 624 535 } 625 536 … … 630 541 631 542 void main(Format& fmt) with fmt { 632 for ( ;; ) { // for as many characters633 for(g = 0; g < 5; g++) { // groups of 5 blocks634 for(b = 0; b < 4; fb++) { // blocks of 4 characters543 for ( ;; ) { //for as many characters 544 for(g = 0; g < 5; g++) { //groups of 5 blocks 545 for(b = 0; b < 4; fb++) { //blocks of 4 characters 635 546 suspend(); 636 sout | ch; // print character547 sout | ch; //print character 637 548 } 638 sout | " "; // print block separator549 sout | " "; //print block separator 639 550 } 640 sout | endl; // print group separator551 sout | endl; //print group separator 641 552 } 642 553 } … … 650 561 Format fmt; 651 562 char ch; 652 Eof: for ( ;; ) { // read until end of file653 sin | ch; // read one character654 if(eof(sin)) break Eof; // eof ?655 prt(fmt, ch); // push character for formatting656 } 657 } 658 \end{cfa }563 Eof: for ( ;; ) { //read until end of file 564 sin | ch; //read one character 565 if(eof(sin)) break Eof; //eof ? 566 prt(fmt, ch); //push character for formatting 567 } 568 } 569 \end{cfacode} 659 570 \end{figure} 660 571 … … 671 582 For example, the following code, while looking benign, can run into undefined behaviour because of thunks: 672 583 673 \begin{cfa }674 // async: Runs function asynchronously on another thread584 \begin{cfacode} 585 //async: Runs function asynchronously on another thread 675 586 forall(otype T) 676 587 extern void async(void (*func)(T*), T* obj); … … 681 592 void bar() { 682 593 int a; 683 async(noop, &a); // start thread running noop with argument a684 } 685 \end{cfa }594 async(noop, &a); //start thread running noop with argument a 595 } 596 \end{cfacode} 686 597 687 598 The generated C code\footnote{Code trimmed down for brevity} creates a local thunk to hold type information: 688 599 689 \begin{c fa}600 \begin{ccode} 690 601 extern void async(/* omitted */, void (*func)(void*), void* obj); 691 602 … … 701 612 async(/* omitted */, ((void (*)(void*))(&_thunk0)), (&a)); 702 613 } 703 \end{c fa}704 The problem in this example is a storage management issue, the function pointer @_thunk0@is only valid until the end of the block, which limits the viable solutions because storing the function pointer for too long causes undefined behaviour; i.e., the stack-based thunk being destroyed before it can be used.614 \end{ccode} 615 The problem in this example is a storage management issue, the function pointer \code{_thunk0} is only valid until the end of the block, which limits the viable solutions because storing the function pointer for too long causes undefined behaviour; i.e., the stack-based thunk being destroyed before it can be used. 705 616 This challenge is an extension of challenges that come with second-class routines. 706 617 Indeed, GCC nested routines also have the limitation that nested routine cannot be passed outside of the declaration scope. … … 710 621 One solution to this challenge is to use composition/containment, where coroutine fields are added to manage the coroutine. 711 622 712 \begin{cfa }623 \begin{cfacode} 713 624 struct Fibonacci { 714 int fn; // used for communication715 coroutine c; // composition625 int fn; //used for communication 626 coroutine c; //composition 716 627 }; 717 628 … … 722 633 void ?{}(Fibonacci& this) { 723 634 this.fn = 0; 724 // Call constructor to initialize coroutine635 //Call constructor to initialize coroutine 725 636 (this.c){myMain}; 726 637 } 727 \end{cfa }638 \end{cfacode} 728 639 The downside of this approach is that users need to correctly construct the coroutine handle before using it. 729 640 Like any other objects, the user must carefully choose construction order to prevent usage of objects not yet constructed. … … 734 645 The next alternative is to use language support to annotate coroutines as follows: 735 646 736 \begin{cfa }647 \begin{cfacode} 737 648 coroutine Fibonacci { 738 int fn; // used for communication649 int fn; //used for communication 739 650 }; 740 \end{cfa }741 The @coroutine@keyword means the compiler can find and inject code where needed.651 \end{cfacode} 652 The \code{coroutine} keyword means the compiler can find and inject code where needed. 742 653 The downside of this approach is that it makes coroutine a special case in the language. 743 654 Users wanting to extend coroutines or build their own for various reasons can only do so in ways offered by the language. … … 750 661 For coroutines as for threads, many implementations are based on routine pointers or function objects~\cite{Butenhof97, C++14, MS:VisualC++, BoostCoroutines15}. 751 662 For example, Boost implements coroutines in terms of four functor object types: 752 \begin{cfa }663 \begin{cfacode} 753 664 asymmetric_coroutine<>::pull_type 754 665 asymmetric_coroutine<>::push_type 755 666 symmetric_coroutine<>::call_type 756 667 symmetric_coroutine<>::yield_type 757 \end{cfa }758 Often, the canonical threading paradigm in languages is based on function pointers, @pthread@being one of the most well-known examples.668 \end{cfacode} 669 Often, the canonical threading paradigm in languages is based on function pointers, \texttt{pthread} being one of the most well-known examples. 759 670 The main problem of this approach is that the thread usage is limited to a generic handle that must otherwise be wrapped in a custom type. 760 671 Since the custom type is simple to write in \CFA and solves several issues, added support for routine/lambda based coroutines adds very little. 761 672 762 A variation of this would be to use a simple function pointer in the same way @pthread@does for threads:763 \begin{cfa }673 A variation of this would be to use a simple function pointer in the same way \texttt{pthread} does for threads: 674 \begin{cfacode} 764 675 void foo( coroutine_t cid, void* arg ) { 765 676 int* value = (int*)arg; 766 // Coroutine body677 //Coroutine body 767 678 } 768 679 … … 772 683 coroutine_resume( &cid ); 773 684 } 774 \end{cfa }685 \end{cfacode} 775 686 This semantics is more common for thread interfaces but coroutines work equally well. 776 687 As discussed in section \ref{threads}, this approach is superseded by static approaches in terms of expressivity. … … 779 690 780 691 Finally, the underlying approach, which is the one closest to \CFA idioms, is to use trait-based lazy coroutines. 781 This approach defines a coroutine as anything that satisfies the trait @is_coroutine@(as defined below) and is used as a coroutine.782 783 \begin{cfa }692 This approach defines a coroutine as anything that satisfies the trait \code{is_coroutine} (as defined below) and is used as a coroutine. 693 694 \begin{cfacode} 784 695 trait is_coroutine(dtype T) { 785 696 void main(T& this); … … 789 700 forall( dtype T | is_coroutine(T) ) void suspend(T&); 790 701 forall( dtype T | is_coroutine(T) ) void resume (T&); 791 \end{cfa }792 This ensures that an object is not a coroutine until @resume@is called on the object.793 Correspondingly, any object that is passed to @resume@ is a coroutine since it must satisfy the @is_coroutine@trait to compile.794 The advantage of this approach is that users can easily create different types of coroutines, for example, changing the memory layout of a coroutine is trivial when implementing the @get_coroutine@routine.795 The \CFA keyword @coroutine@simply has the effect of implementing the getter and forward declarations required for users to implement the main routine.702 \end{cfacode} 703 This ensures that an object is not a coroutine until \code{resume} is called on the object. 704 Correspondingly, any object that is passed to \code{resume} is a coroutine since it must satisfy the \code{is_coroutine} trait to compile. 705 The advantage of this approach is that users can easily create different types of coroutines, for example, changing the memory layout of a coroutine is trivial when implementing the \code{get_coroutine} routine. 706 The \CFA keyword \code{coroutine} simply has the effect of implementing the getter and forward declarations required for users to implement the main routine. 796 707 797 708 \begin{center} 798 709 \begin{tabular}{c c c} 799 \begin{cfa }[tabsize=3]710 \begin{cfacode}[tabsize=3] 800 711 coroutine MyCoroutine { 801 712 int someValue; 802 713 }; 803 \end{cfa } & == & \begin{cfa}[tabsize=3]714 \end{cfacode} & == & \begin{cfacode}[tabsize=3] 804 715 struct MyCoroutine { 805 716 int someValue; … … 815 726 816 727 void main(struct MyCoroutine* this); 817 \end{cfa }728 \end{cfacode} 818 729 \end{tabular} 819 730 \end{center} … … 825 736 Both user and kernel threads are supported, where user threads are the concurrency mechanism and kernel threads are the parallel mechanism. 826 737 User threads offer a flexible and lightweight interface. 827 A thread can be declared using a struct declaration @thread@as follows:828 829 \begin{cfa }738 A thread can be declared using a struct declaration \code{thread} as follows: 739 740 \begin{cfacode} 830 741 thread foo {}; 831 \end{cfa }742 \end{cfacode} 832 743 833 744 As for coroutines, the keyword is a thin wrapper around a \CFA trait: 834 745 835 \begin{cfa }746 \begin{cfacode} 836 747 trait is_thread(dtype T) { 837 748 void ^?{}(T & mutex this); … … 839 750 thread_desc* get_thread(T & this); 840 751 }; 841 \end{cfa }752 \end{cfacode} 842 753 843 754 Obviously, for this thread implementation to be useful it must run some user code. 844 755 Several other threading interfaces use a function-pointer representation as the interface of threads (for example \Csharp~\cite{Csharp} and Scala~\cite{Scala}). 845 However, this proposal considers that statically tying a @main@routine to a thread supersedes this approach.846 Since the @main@routine is already a special routine in \CFA (where the program begins), it is a natural extension of the semantics to use overloading to declare mains for different threads (the normal main being the main of the initial thread).847 As such the @main@routine of a thread can be defined as848 \begin{cfa }756 However, this proposal considers that statically tying a \code{main} routine to a thread supersedes this approach. 757 Since the \code{main} routine is already a special routine in \CFA (where the program begins), it is a natural extension of the semantics to use overloading to declare mains for different threads (the normal main being the main of the initial thread). 758 As such the \code{main} routine of a thread can be defined as 759 \begin{cfacode} 849 760 thread foo {}; 850 761 … … 852 763 sout | "Hello World!" | endl; 853 764 } 854 \end{cfa }855 856 In this example, threads of type @foo@ start execution in the @void main(foo &)@ routine, which prints @"Hello World!".@While this paper encourages this approach to enforce strongly typed programming, users may prefer to use the routine-based thread semantics for the sake of simplicity.765 \end{cfacode} 766 767 In this example, threads of type \code{foo} start execution in the \code{void main(foo &)} routine, which prints \code{"Hello World!".} While this paper encourages this approach to enforce strongly typed programming, users may prefer to use the routine-based thread semantics for the sake of simplicity. 857 768 With the static semantics it is trivial to write a thread type that takes a function pointer as a parameter and executes it on its stack asynchronously. 858 \begin{cfa }769 \begin{cfacode} 859 770 typedef void (*voidFunc)(int); 860 771 … … 870 781 871 782 void main(FuncRunner & this) { 872 // thread starts here and runs the function783 //thread starts here and runs the function 873 784 this.func( this.arg ); 874 785 } … … 882 793 return 0? 883 794 } 884 \end{cfa }795 \end{cfacode} 885 796 886 797 A consequence of the strongly typed approach to main is that memory layout of parameters and return values to/from a thread are now explicitly specified in the \textbf{api}. 887 798 888 799 Of course, for threads to be useful, it must be possible to start and stop threads and wait for them to complete execution. 889 While using an \textbf{api} such as @fork@ and @join@is relatively common in the literature, such an interface is unnecessary.890 Indeed, the simplest approach is to use \textbf{raii} principles and have threads @fork@ after the constructor has completed and @join@before the destructor runs.891 \begin{cfa }800 While using an \textbf{api} such as \code{fork} and \code{join} is relatively common in the literature, such an interface is unnecessary. 801 Indeed, the simplest approach is to use \textbf{raii} principles and have threads \code{fork} after the constructor has completed and \code{join} before the destructor runs. 802 \begin{cfacode} 892 803 thread World; 893 804 … … 898 809 void main() { 899 810 World w; 900 // Thread forks here901 902 // Printing "Hello " and "World!" are run concurrently811 //Thread forks here 812 813 //Printing "Hello " and "World!" are run concurrently 903 814 sout | "Hello " | endl; 904 815 905 // Implicit join at end of scope906 } 907 \end{cfa }816 //Implicit join at end of scope 817 } 818 \end{cfacode} 908 819 909 820 This semantic has several advantages over explicit semantics: a thread is always started and stopped exactly once, users cannot make any programming errors, and it naturally scales to multiple threads meaning basic synchronization is very simple. 910 821 911 \begin{cfa }822 \begin{cfacode} 912 823 thread MyThread { 913 824 //... 914 825 }; 915 826 916 // main827 //main 917 828 void main(MyThread& this) { 918 829 //... … … 921 832 void foo() { 922 833 MyThread thrds[10]; 923 // Start 10 threads at the beginning of the scope834 //Start 10 threads at the beginning of the scope 924 835 925 836 DoStuff(); 926 837 927 // Wait for the 10 threads to finish928 } 929 \end{cfa }838 //Wait for the 10 threads to finish 839 } 840 \end{cfacode} 930 841 931 842 However, one of the drawbacks of this approach is that threads always form a tree where nodes must always outlive their children, i.e., they are always destroyed in the opposite order of construction because of C scoping rules. 932 843 This restriction is relaxed by using dynamic allocation, so threads can outlive the scope in which they are created, much like dynamically allocating memory lets objects outlive the scope in which they are created. 933 844 934 \begin{cfa }845 \begin{cfacode} 935 846 thread MyThread { 936 847 //... … … 944 855 MyThread* long_lived; 945 856 { 946 // Start a thread at the beginning of the scope857 //Start a thread at the beginning of the scope 947 858 MyThread short_lived; 948 859 949 // create another thread that will outlive the thread in this scope860 //create another thread that will outlive the thread in this scope 950 861 long_lived = new MyThread; 951 862 952 863 DoStuff(); 953 864 954 // Wait for the thread short_lived to finish865 //Wait for the thread short_lived to finish 955 866 } 956 867 DoMoreStuff(); 957 868 958 // Now wait for the long_lived to finish869 //Now wait for the long_lived to finish 959 870 delete long_lived; 960 871 } 961 \end{cfa }872 \end{cfacode} 962 873 963 874 … … 977 888 At the lowest level, concurrent paradigms are implemented as atomic operations and locks. 978 889 Many such mechanisms have been proposed, including semaphores~\cite{Dijkstra68b} and path expressions~\cite{Campbell74}. 979 However, for productivity reasons it is desirable to have a higher-level construct be the core concurrency paradigm~\cite{H ochstein05}.890 However, for productivity reasons it is desirable to have a higher-level construct be the core concurrency paradigm~\cite{HPP:Study}. 980 891 981 892 An approach that is worth mentioning because it is gaining in popularity is transactional memory~\cite{Herlihy93}. … … 998 909 Methods range from low-level locks, which are fast and flexible but require significant attention to be correct, to higher-level concurrency techniques, which sacrifice some performance in order to improve ease of use. 999 910 Ease of use comes by either guaranteeing some problems cannot occur (e.g., being deadlock free) or by offering a more explicit coupling between data and corresponding critical section. 1000 For example, the \CC @std::atomic<T>@offers an easy way to express mutual-exclusion on a restricted set of operations (e.g., reading/writing large types atomically).911 For example, the \CC \code{std::atomic<T>} offers an easy way to express mutual-exclusion on a restricted set of operations (e.g., reading/writing large types atomically). 1001 912 Another challenge with low-level locks is composability. 1002 913 Locks have restricted composability because it takes careful organizing for multiple locks to be used while preventing deadlocks. … … 1027 938 This concept is generally associated with object-oriented languages like Java~\cite{Java} or \uC~\cite{uC++book} but does not strictly require OO semantics. 1028 939 The only requirement is the ability to declare a handle to a shared object and a set of routines that act on it: 1029 \begin{cfa }940 \begin{cfacode} 1030 941 typedef /*some monitor type*/ monitor; 1031 942 int f(monitor & m); 1032 943 1033 944 int main() { 1034 monitor m; // Handle m1035 f(m); // Routine using handle1036 } 1037 \end{cfa }945 monitor m; //Handle m 946 f(m); //Routine using handle 947 } 948 \end{cfacode} 1038 949 1039 950 % ====================================================================== … … 1045 956 First, it is necessary to use pass-by-reference over pass-by-value for monitor routines. 1046 957 This semantics is important, because at their core, monitors are implicit mutual-exclusion objects (locks), and these objects cannot be copied. 1047 Therefore, monitors are non-copy-able objects ( @dtype@).958 Therefore, monitors are non-copy-able objects (\code{dtype}). 1048 959 1049 960 Another aspect to consider is when a monitor acquires its mutual exclusion. 1050 961 For example, a monitor may need to be passed through multiple helper routines that do not acquire the monitor mutual-exclusion on entry. 1051 Passthrough can occur for generic helper routines ( @swap@, @sort@, etc.) or specific helper routines like the following to implement an atomic counter:1052 1053 \begin{cfa }962 Passthrough can occur for generic helper routines (\code{swap}, \code{sort}, etc.) or specific helper routines like the following to implement an atomic counter: 963 964 \begin{cfacode} 1054 965 monitor counter_t { /*...see section $\ref{data}$...*/ }; 1055 966 1056 void ?{}(counter_t & nomutex this); // constructor1057 size_t ++?(counter_t & mutex this); // increment1058 1059 // need for mutex is platform dependent1060 void ?{}(size_t * this, counter_t & mutex cnt); // conversion1061 \end{cfa }967 void ?{}(counter_t & nomutex this); //constructor 968 size_t ++?(counter_t & mutex this); //increment 969 970 //need for mutex is platform dependent 971 void ?{}(size_t * this, counter_t & mutex cnt); //conversion 972 \end{cfacode} 1062 973 This counter is used as follows: 1063 974 \begin{center} 1064 975 \begin{tabular}{c @{\hskip 0.35in} c @{\hskip 0.35in} c} 1065 \begin{cfa }1066 // shared counter976 \begin{cfacode} 977 //shared counter 1067 978 counter_t cnt1, cnt2; 1068 979 1069 // multiple threads access counter980 //multiple threads access counter 1070 981 thread 1 : cnt1++; cnt2++; 1071 982 thread 2 : cnt1++; cnt2++; … … 1073 984 ... 1074 985 thread N : cnt1++; cnt2++; 1075 \end{cfa }986 \end{cfacode} 1076 987 \end{tabular} 1077 988 \end{center} 1078 Notice how the counter is used without any explicit synchronization and yet supports thread-safe semantics for both reading and writing, which is similar in usage to the \CC template @std::atomic@.1079 1080 Here, the constructor ( @?{}@) uses the @nomutex@keyword to signify that it does not acquire the monitor mutual-exclusion when constructing.1081 This semantics is because an object not yet con structed should never be shared and therefore does not require mutual exclusion.989 Notice how the counter is used without any explicit synchronization and yet supports thread-safe semantics for both reading and writing, which is similar in usage to the \CC template \code{std::atomic}. 990 991 Here, the constructor (\code{?\{\}}) uses the \code{nomutex} keyword to signify that it does not acquire the monitor mutual-exclusion when constructing. 992 This semantics is because an object not yet con\-structed should never be shared and therefore does not require mutual exclusion. 1082 993 Furthermore, it allows the implementation greater freedom when it initializes the monitor locking. 1083 The prefix increment operator uses @mutex@to protect the incrementing process from race conditions.1084 Finally, there is a conversion operator from @counter_t@ to @size_t@.1085 This conversion may or may not require the @mutex@ keyword depending on whether or not reading a @size_t@is an atomic operation.994 The prefix increment operator uses \code{mutex} to protect the incrementing process from race conditions. 995 Finally, there is a conversion operator from \code{counter_t} to \code{size_t}. 996 This conversion may or may not require the \code{mutex} keyword depending on whether or not reading a \code{size_t} is an atomic operation. 1086 997 1087 998 For maximum usability, monitors use \textbf{multi-acq} semantics, which means a single thread can acquire the same monitor multiple times without deadlock. 1088 999 For example, listing \ref{fig:search} uses recursion and \textbf{multi-acq} to print values inside a binary tree. 1089 1000 \begin{figure} 1090 \begin{cfa }[caption={Recursive printing algorithm using \textbf{multi-acq}.},label={fig:search}]1001 \begin{cfacode}[caption={Recursive printing algorithm using \textbf{multi-acq}.},label={fig:search}] 1091 1002 monitor printer { ... }; 1092 1003 struct tree { … … 1101 1012 print(p, t->right); 1102 1013 } 1103 \end{cfa }1014 \end{cfacode} 1104 1015 \end{figure} 1105 1016 1106 Having both @mutex@ and @nomutex@keywords can be redundant, depending on the meaning of a routine having neither of these keywords.1107 For example, it is reasonable that it should default to the safest option ( @mutex@) when given a routine without qualifiers @void foo(counter_t & this)@, whereas assuming @nomutex@is unsafe and may cause subtle errors.1108 On the other hand, @nomutex@is the ``normal'' parameter behaviour, it effectively states explicitly that ``this routine is not special''.1017 Having both \code{mutex} and \code{nomutex} keywords can be redundant, depending on the meaning of a routine having neither of these keywords. 1018 For example, it is reasonable that it should default to the safest option (\code{mutex}) when given a routine without qualifiers \code{void foo(counter_t & this)}, whereas assuming \code{nomutex} is unsafe and may cause subtle errors. 1019 On the other hand, \code{nomutex} is the ``normal'' parameter behaviour, it effectively states explicitly that ``this routine is not special''. 1109 1020 Another alternative is making exactly one of these keywords mandatory, which provides the same semantics but without the ambiguity of supporting routines with neither keyword. 1110 1021 Mandatory keywords would also have the added benefit of being self-documented but at the cost of extra typing. … … 1112 1023 Mandatory keywords in \CFA would imply that the compiler must know without doubt whether or not a parameter is a monitor or not. 1113 1024 Since \CFA relies heavily on traits as an abstraction mechanism, the distinction between a type that is a monitor and a type that looks like a monitor can become blurred. 1114 For this reason, \CFA only has the @mutex@ keyword and uses no keyword to mean @nomutex@.1115 1116 The next semantic decision is to establish when @mutex@may be used as a type qualifier.1025 For this reason, \CFA only has the \code{mutex} keyword and uses no keyword to mean \code{nomutex}. 1026 1027 The next semantic decision is to establish when \code{mutex} may be used as a type qualifier. 1117 1028 Consider the following declarations: 1118 \begin{cfa }1029 \begin{cfacode} 1119 1030 int f1(monitor & mutex m); 1120 1031 int f2(const monitor & mutex m); … … 1122 1033 int f4(monitor * mutex m []); 1123 1034 int f5(graph(monitor *) & mutex m); 1124 \end{cfa }1035 \end{cfacode} 1125 1036 The problem is to identify which object(s) should be acquired. 1126 1037 Furthermore, each object needs to be acquired only once. 1127 In the case of simple routines like @f1@ and @f2@it is easy to identify an exhaustive list of objects to acquire on entry.1128 Adding indirections ( @f3@) still allows the compiler and programmer to identify which object is acquired.1129 However, adding in arrays ( @f4@) makes it much harder.1038 In the case of simple routines like \code{f1} and \code{f2} it is easy to identify an exhaustive list of objects to acquire on entry. 1039 Adding indirections (\code{f3}) still allows the compiler and programmer to identify which object is acquired. 1040 However, adding in arrays (\code{f4}) makes it much harder. 1130 1041 Array lengths are not necessarily known in C, and even then, making sure objects are only acquired once becomes none-trivial. 1131 This problem can be extended to absurd limits like @f5@, which uses a graph of monitors.1042 This problem can be extended to absurd limits like \code{f5}, which uses a graph of monitors. 1132 1043 To make the issue tractable, this project imposes the requirement that a routine may only acquire one monitor per parameter and it must be the type of the parameter with at most one level of indirection (ignoring potential qualifiers). 1133 Also note that while routine @f3@ can be supported, meaning that monitor @**m@is acquired, passing an array to this routine would be type-safe and yet result in undefined behaviour because only the first element of the array is acquired.1044 Also note that while routine \code{f3} can be supported, meaning that monitor \code{**m} is acquired, passing an array to this routine would be type-safe and yet result in undefined behaviour because only the first element of the array is acquired. 1134 1045 However, this ambiguity is part of the C type-system with respects to arrays. 1135 For this reason, @mutex@is disallowed in the context where arrays may be passed:1136 \begin{cfa }1137 int f1(monitor & mutex m); // Okay : recommended case1138 int f2(monitor * mutex m); // Not Okay : Could be an array1139 int f3(monitor mutex m []); // Not Okay : Array of unknown length1140 int f4(monitor ** mutex m); // Not Okay : Could be an array1141 int f5(monitor * mutex m []); // Not Okay : Array of unknown length1142 \end{cfa }1046 For this reason, \code{mutex} is disallowed in the context where arrays may be passed: 1047 \begin{cfacode} 1048 int f1(monitor & mutex m); //Okay : recommended case 1049 int f2(monitor * mutex m); //Not Okay : Could be an array 1050 int f3(monitor mutex m []); //Not Okay : Array of unknown length 1051 int f4(monitor ** mutex m); //Not Okay : Could be an array 1052 int f5(monitor * mutex m []); //Not Okay : Array of unknown length 1053 \end{cfacode} 1143 1054 Note that not all array functions are actually distinct in the type system. 1144 1055 However, even if the code generation could tell the difference, the extra information is still not sufficient to extend meaningfully the monitor call semantic. … … 1146 1057 Unlike object-oriented monitors, where calling a mutex member \emph{implicitly} acquires mutual-exclusion of the receiver object, \CFA uses an explicit mechanism to specify the object that acquires mutual-exclusion. 1147 1058 A consequence of this approach is that it extends naturally to multi-monitor calls. 1148 \begin{cfa }1059 \begin{cfacode} 1149 1060 int f(MonitorA & mutex a, MonitorB & mutex b); 1150 1061 … … 1152 1063 MonitorB b; 1153 1064 f(a,b); 1154 \end{cfa }1065 \end{cfacode} 1155 1066 While OO monitors could be extended with a mutex qualifier for multiple-monitor calls, no example of this feature could be found. 1156 1067 The capability to acquire multiple locks before entering a critical section is called \emph{\textbf{bulk-acq}}. … … 1160 1071 This consistent ordering means acquiring multiple monitors is safe from deadlock when using \textbf{bulk-acq}. 1161 1072 However, users can still force the acquiring order. 1162 For example, notice which routines use @mutex@/@nomutex@and how this affects acquiring order:1163 \begin{cfa }1164 void foo(A& mutex a, B& mutex b) { // acquire a & b1073 For example, notice which routines use \code{mutex}/\code{nomutex} and how this affects acquiring order: 1074 \begin{cfacode} 1075 void foo(A& mutex a, B& mutex b) { //acquire a & b 1165 1076 ... 1166 1077 } 1167 1078 1168 void bar(A& mutex a, B& /*nomutex*/ b) { // acquire a1169 ... foo(a, b); ... // acquire b1170 } 1171 1172 void baz(A& /*nomutex*/ a, B& mutex b) { // acquire b1173 ... foo(a, b); ... // acquire a1174 } 1175 \end{cfa }1176 The \textbf{multi-acq} monitor lock allows a monitor lock to be acquired by both @bar@ or @baz@ and acquired again in @foo@.1177 In the calls to @bar@ and @baz@the monitors are acquired in opposite order.1079 void bar(A& mutex a, B& /*nomutex*/ b) { //acquire a 1080 ... foo(a, b); ... //acquire b 1081 } 1082 1083 void baz(A& /*nomutex*/ a, B& mutex b) { //acquire b 1084 ... foo(a, b); ... //acquire a 1085 } 1086 \end{cfacode} 1087 The \textbf{multi-acq} monitor lock allows a monitor lock to be acquired by both \code{bar} or \code{baz} and acquired again in \code{foo}. 1088 In the calls to \code{bar} and \code{baz} the monitors are acquired in opposite order. 1178 1089 1179 1090 However, such use leads to lock acquiring order problems. 1180 In the example above, the user uses implicit ordering in the case of function @foo@ but explicit ordering in the case of @bar@ and @baz@.1091 In the example above, the user uses implicit ordering in the case of function \code{foo} but explicit ordering in the case of \code{bar} and \code{baz}. 1181 1092 This subtle difference means that calling these routines concurrently may lead to deadlock and is therefore undefined behaviour. 1182 1093 As shown~\cite{Lister77}, solving this problem requires: … … 1190 1101 1191 1102 For example, \textbf{multi-acq} and \textbf{bulk-acq} can be used together in interesting ways: 1192 \begin{cfa }1103 \begin{cfacode} 1193 1104 monitor bank { ... }; 1194 1105 … … 1199 1110 deposit( yourbank, me2you ); 1200 1111 } 1201 \end{cfa }1112 \end{cfacode} 1202 1113 This example shows a trivial solution to the bank-account transfer problem~\cite{BankTransfer}. 1203 1114 Without \textbf{multi-acq} and \textbf{bulk-acq}, the solution to this problem is much more involved and requires careful engineering. 1204 1115 1205 1206 \subsection{\protect\lstinline|mutex| statement} \label{mutex-stmt} 1207 1208 The call semantics discussed above have one software engineering issue: only a routine can acquire the mutual-exclusion of a set of monitor. \CFA offers the @mutex@ statement to work around the need for unnecessary names, avoiding a major software engineering problem~\cite{2FTwoHardThings}. 1209 Table \ref{lst:mutex-stmt} shows an example of the @mutex@ statement, which introduces a new scope in which the mutual-exclusion of a set of monitor is acquired. 1210 Beyond naming, the @mutex@ statement has no semantic difference from a routine call with @mutex@ parameters. 1116 \subsection{\code{mutex} statement} \label{mutex-stmt} 1117 1118 The call semantics discussed above have one software engineering issue: only a routine can acquire the mutual-exclusion of a set of monitor. \CFA offers the \code{mutex} statement to work around the need for unnecessary names, avoiding a major software engineering problem~\cite{2FTwoHardThings}. 1119 Table \ref{lst:mutex-stmt} shows an example of the \code{mutex} statement, which introduces a new scope in which the mutual-exclusion of a set of monitor is acquired. 1120 Beyond naming, the \code{mutex} statement has no semantic difference from a routine call with \code{mutex} parameters. 1211 1121 1212 1122 \begin{table} 1213 1123 \begin{center} 1214 1124 \begin{tabular}{|c|c|} 1215 function call & @mutex@statement \\1125 function call & \code{mutex} statement \\ 1216 1126 \hline 1217 \begin{cfa }[tabsize=3]1127 \begin{cfacode}[tabsize=3] 1218 1128 monitor M {}; 1219 1129 void foo( M & mutex m1, M & mutex m2 ) { 1220 // critical section1130 //critical section 1221 1131 } 1222 1132 … … 1224 1134 foo( m1, m2 ); 1225 1135 } 1226 \end{cfa }&\begin{cfa}[tabsize=3]1136 \end{cfacode}&\begin{cfacode}[tabsize=3] 1227 1137 monitor M {}; 1228 1138 void bar( M & m1, M & m2 ) { 1229 1139 mutex(m1, m2) { 1230 // critical section1231 } 1232 } 1233 1234 1235 \end{cfa }1140 //critical section 1141 } 1142 } 1143 1144 1145 \end{cfacode} 1236 1146 \end{tabular} 1237 1147 \end{center} 1238 \caption{Regular call semantics vs. \ protect\lstinline|mutex|statement}1148 \caption{Regular call semantics vs. \code{mutex} statement} 1239 1149 \label{lst:mutex-stmt} 1240 1150 \end{table} … … 1249 1159 This data should be intrinsic to the monitor declaration to prevent any accidental use of data without its appropriate protection. 1250 1160 For example, here is a complete version of the counter shown in section \ref{call}: 1251 \begin{cfa }1161 \begin{cfacode} 1252 1162 monitor counter_t { 1253 1163 int value; … … 1262 1172 } 1263 1173 1264 // need for mutex is platform dependent here1174 //need for mutex is platform dependent here 1265 1175 void ?{}(int * this, counter_t & mutex cnt) { 1266 1176 *this = (int)cnt; 1267 1177 } 1268 \end{cfa }1269 1270 Like threads and coroutines, monitors are defined in terms of traits with some additional language support in the form of the @monitor@keyword.1178 \end{cfacode} 1179 1180 Like threads and coroutines, monitors are defined in terms of traits with some additional language support in the form of the \code{monitor} keyword. 1271 1181 The monitor trait is: 1272 \begin{cfa }1182 \begin{cfacode} 1273 1183 trait is_monitor(dtype T) { 1274 1184 monitor_desc * get_monitor( T & ); 1275 1185 void ^?{}( T & mutex ); 1276 1186 }; 1277 \end{cfa }1278 Note that the destructor of a monitor must be a @mutex@routine to prevent deallocation while a thread is accessing the monitor.1279 As with any object, calls to a monitor, using @mutex@or otherwise, is undefined behaviour after the destructor has run.1187 \end{cfacode} 1188 Note that the destructor of a monitor must be a \code{mutex} routine to prevent deallocation while a thread is accessing the monitor. 1189 As with any object, calls to a monitor, using \code{mutex} or otherwise, is undefined behaviour after the destructor has run. 1280 1190 1281 1191 % ====================================================================== … … 1292 1202 First, here is a simple example of internal scheduling: 1293 1203 1294 \begin{cfa }1204 \begin{cfacode} 1295 1205 monitor A { 1296 1206 condition e; … … 1299 1209 void foo(A& mutex a1, A& mutex a2) { 1300 1210 ... 1301 // Wait for cooperation from bar()1211 //Wait for cooperation from bar() 1302 1212 wait(a1.e); 1303 1213 ... … … 1305 1215 1306 1216 void bar(A& mutex a1, A& mutex a2) { 1307 // Provide cooperation for foo()1217 //Provide cooperation for foo() 1308 1218 ... 1309 // Unblock foo1219 //Unblock foo 1310 1220 signal(a1.e); 1311 1221 } 1312 \end{cfa }1222 \end{cfacode} 1313 1223 There are two details to note here. 1314 First, @signal@is a delayed operation; it only unblocks the waiting thread when it reaches the end of the critical section.1224 First, \code{signal} is a delayed operation; it only unblocks the waiting thread when it reaches the end of the critical section. 1315 1225 This semantics is needed to respect mutual-exclusion, i.e., the signaller and signalled thread cannot be in the monitor simultaneously. 1316 The alternative is to return immediately after the call to @signal@, which is significantly more restrictive.1317 Second, in \CFA, while it is common to store a @condition@ as a field of the monitor, a @condition@variable can be stored/created independently of a monitor.1318 Here routine @foo@ waits for the @signal@ from @bar@before making further progress, ensuring a basic ordering.1319 1320 An important aspect of the implementation is that \CFA does not allow barging, which means that once function @bar@ releases the monitor, @foo@is guaranteed to be the next thread to acquire the monitor (unless some other thread waited on the same condition).1226 The alternative is to return immediately after the call to \code{signal}, which is significantly more restrictive. 1227 Second, in \CFA, while it is common to store a \code{condition} as a field of the monitor, a \code{condition} variable can be stored/created independently of a monitor. 1228 Here routine \code{foo} waits for the \code{signal} from \code{bar} before making further progress, ensuring a basic ordering. 1229 1230 An important aspect of the implementation is that \CFA does not allow barging, which means that once function \code{bar} releases the monitor, \code{foo} is guaranteed to be the next thread to acquire the monitor (unless some other thread waited on the same condition). 1321 1231 This guarantee offers the benefit of not having to loop around waits to recheck that a condition is met. 1322 1232 The main reason \CFA offers this guarantee is that users can easily introduce barging if it becomes a necessity but adding barging prevention or barging avoidance is more involved without language support. … … 1330 1240 It is easy to understand the problem of multi-monitor scheduling using a series of pseudo-code examples. 1331 1241 Note that for simplicity in the following snippets of pseudo-code, waiting and signalling is done using an implicit condition variable, like Java built-in monitors. 1332 Indeed, @wait@statements always use the implicit condition variable as parameters and explicitly name the monitors (A and B) associated with the condition.1242 Indeed, \code{wait} statements always use the implicit condition variable as parameters and explicitly name the monitors (A and B) associated with the condition. 1333 1243 Note that in \CFA, condition variables are tied to a \emph{group} of monitors on first use (called branding), which means that using internal scheduling with distinct sets of monitors requires one condition variable per set of monitors. 1334 1244 The example below shows the simple case of having two threads (one for each column) and a single monitor A. … … 1336 1246 \begin{multicols}{2} 1337 1247 thread 1 1338 \begin{ cfa}1248 \begin{pseudo} 1339 1249 acquire A 1340 1250 wait A 1341 1251 release A 1342 \end{ cfa}1252 \end{pseudo} 1343 1253 1344 1254 \columnbreak 1345 1255 1346 1256 thread 2 1347 \begin{ cfa}1257 \begin{pseudo} 1348 1258 acquire A 1349 1259 signal A 1350 1260 release A 1351 \end{ cfa}1261 \end{pseudo} 1352 1262 \end{multicols} 1353 1263 One thread acquires before waiting (atomically blocking and releasing A) and the other acquires before signalling. 1354 It is important to note here that both @wait@ and @signal@must be called with the proper monitor(s) already acquired.1264 It is important to note here that both \code{wait} and \code{signal} must be called with the proper monitor(s) already acquired. 1355 1265 This semantic is a logical requirement for barging prevention. 1356 1266 1357 1267 A direct extension of the previous example is a \textbf{bulk-acq} version: 1358 1268 \begin{multicols}{2} 1359 \begin{ cfa}1269 \begin{pseudo} 1360 1270 acquire A & B 1361 1271 wait A & B 1362 1272 release A & B 1363 \end{ cfa}1273 \end{pseudo} 1364 1274 \columnbreak 1365 \begin{ cfa}1275 \begin{pseudo} 1366 1276 acquire A & B 1367 1277 signal A & B 1368 1278 release A & B 1369 \end{ cfa}1279 \end{pseudo} 1370 1280 \end{multicols} 1371 1281 \noindent This version uses \textbf{bulk-acq} (denoted using the {\sf\&} symbol), but the presence of multiple monitors does not add a particularly new meaning. … … 1375 1285 1376 1286 While deadlock issues can occur when nesting monitors, these issues are only a symptom of the fact that locks, and by extension monitors, are not perfectly composable. 1377 For monitors, a well-known deadlock problem is the Nested Monitor Problem~\cite{Lister77}, which occurs when a @wait@is made by a thread that holds more than one monitor.1378 For example, the following cfa-code runs into the nested-monitor problem:1287 For monitors, a well-known deadlock problem is the Nested Monitor Problem~\cite{Lister77}, which occurs when a \code{wait} is made by a thread that holds more than one monitor. 1288 For example, the following pseudo-code runs into the nested-monitor problem: 1379 1289 \begin{multicols}{2} 1380 \begin{ cfa}1290 \begin{pseudo} 1381 1291 acquire A 1382 1292 acquire B … … 1384 1294 release B 1385 1295 release A 1386 \end{ cfa}1296 \end{pseudo} 1387 1297 1388 1298 \columnbreak 1389 1299 1390 \begin{ cfa}1300 \begin{pseudo} 1391 1301 acquire A 1392 1302 acquire B … … 1394 1304 release B 1395 1305 release A 1396 \end{ cfa}1306 \end{pseudo} 1397 1307 \end{multicols} 1398 \noindent The @wait@ only releases monitor @B@ so the signalling thread cannot acquire monitor @A@ to get to the @signal@.1399 Attempting release of all acquired monitors at the @wait@ introduces a different set of problems, such as releasing monitor @C@, which has nothing to do with the @signal@.1308 \noindent The \code{wait} only releases monitor \code{B} so the signalling thread cannot acquire monitor \code{A} to get to the \code{signal}. 1309 Attempting release of all acquired monitors at the \code{wait} introduces a different set of problems, such as releasing monitor \code{C}, which has nothing to do with the \code{signal}. 1400 1310 1401 1311 However, for monitors as for locks, it is possible to write a program using nesting without encountering any problems if nesting is done correctly. 1402 For example, the next cfa-code snippet acquires monitors {\sf A} then {\sf B} before waiting, while only acquiring {\sf B} when signalling, effectively avoiding the Nested Monitor Problem~\cite{Lister77}.1312 For example, the next pseudo-code snippet acquires monitors {\sf A} then {\sf B} before waiting, while only acquiring {\sf B} when signalling, effectively avoiding the Nested Monitor Problem~\cite{Lister77}. 1403 1313 1404 1314 \begin{multicols}{2} 1405 \begin{ cfa}1315 \begin{pseudo} 1406 1316 acquire A 1407 1317 acquire B … … 1409 1319 release B 1410 1320 release A 1411 \end{ cfa}1321 \end{pseudo} 1412 1322 1413 1323 \columnbreak 1414 1324 1415 \begin{ cfa}1325 \begin{pseudo} 1416 1326 1417 1327 acquire B … … 1419 1329 release B 1420 1330 1421 \end{ cfa}1331 \end{pseudo} 1422 1332 \end{multicols} 1423 1333 … … 1431 1341 1432 1342 A larger example is presented to show complex issues for \textbf{bulk-acq} and its implementation options are analyzed. 1433 Listing \ref{lst:int-bulk- cfa} shows an example where \textbf{bulk-acq} adds a significant layer of complexity to the internal signalling semantics, and listing \ref{lst:int-bulk-cfa} shows the corresponding \CFA code to implement the cfa-code in listing \ref{lst:int-bulk-cfa}.1434 For the purpose of translating the given cfa-code into \CFA-code, any method of introducing a monitor is acceptable, e.g., @mutex@ parameters, global variables, pointer parameters, or using locals with the @mutex@statement.1435 1436 \begin{figure} 1343 Listing \ref{lst:int-bulk-pseudo} shows an example where \textbf{bulk-acq} adds a significant layer of complexity to the internal signalling semantics, and listing \ref{lst:int-bulk-cfa} shows the corresponding \CFA code to implement the pseudo-code in listing \ref{lst:int-bulk-pseudo}. 1344 For the purpose of translating the given pseudo-code into \CFA-code, any method of introducing a monitor is acceptable, e.g., \code{mutex} parameters, global variables, pointer parameters, or using locals with the \code{mutex} statement. 1345 1346 \begin{figure}[!t] 1437 1347 \begin{multicols}{2} 1438 1348 Waiting thread 1439 \begin{ cfa}[numbers=left]1349 \begin{pseudo}[numbers=left] 1440 1350 acquire A 1441 // Code Section 11351 //Code Section 1 1442 1352 acquire A & B 1443 // Code Section 21353 //Code Section 2 1444 1354 wait A & B 1445 // Code Section 31355 //Code Section 3 1446 1356 release A & B 1447 // Code Section 41357 //Code Section 4 1448 1358 release A 1449 \end{ cfa}1359 \end{pseudo} 1450 1360 \columnbreak 1451 1361 Signalling thread 1452 \begin{ cfa}[numbers=left, firstnumber=10,escapechar=|]1362 \begin{pseudo}[numbers=left, firstnumber=10,escapechar=|] 1453 1363 acquire A 1454 // Code Section 51364 //Code Section 5 1455 1365 acquire A & B 1456 // Code Section 61366 //Code Section 6 1457 1367 |\label{line:signal1}|signal A & B 1458 // Code Section 71368 //Code Section 7 1459 1369 |\label{line:releaseFirst}|release A & B 1460 // Code Section 81370 //Code Section 8 1461 1371 |\label{line:lastRelease}|release A 1462 \end{ cfa}1372 \end{pseudo} 1463 1373 \end{multicols} 1464 \begin{cfa }[caption={Internal scheduling with \textbf{bulk-acq}},label={lst:int-bulk-cfa}]1465 \end{cfa }1374 \begin{cfacode}[caption={Internal scheduling with \textbf{bulk-acq}},label={lst:int-bulk-pseudo}] 1375 \end{cfacode} 1466 1376 \begin{center} 1467 \begin{cfa }[xleftmargin=.4\textwidth]1377 \begin{cfacode}[xleftmargin=.4\textwidth] 1468 1378 monitor A a; 1469 1379 monitor B b; 1470 1380 condition c; 1471 \end{cfa }1381 \end{cfacode} 1472 1382 \end{center} 1473 1383 \begin{multicols}{2} 1474 1384 Waiting thread 1475 \begin{cfa }1385 \begin{cfacode} 1476 1386 mutex(a) { 1477 // Code Section 11387 //Code Section 1 1478 1388 mutex(a, b) { 1479 // Code Section 21389 //Code Section 2 1480 1390 wait(c); 1481 // Code Section 31482 } 1483 // Code Section 41484 } 1485 \end{cfa }1391 //Code Section 3 1392 } 1393 //Code Section 4 1394 } 1395 \end{cfacode} 1486 1396 \columnbreak 1487 1397 Signalling thread 1488 \begin{cfa }1398 \begin{cfacode} 1489 1399 mutex(a) { 1490 // Code Section 51400 //Code Section 5 1491 1401 mutex(a, b) { 1492 // Code Section 61402 //Code Section 6 1493 1403 signal(c); 1494 // Code Section 71495 } 1496 // Code Section 81497 } 1498 \end{cfa }1404 //Code Section 7 1405 } 1406 //Code Section 8 1407 } 1408 \end{cfacode} 1499 1409 \end{multicols} 1500 \begin{cfa }[caption={Equivalent \CFA code for listing \ref{lst:int-bulk-cfa}},label={lst:int-bulk-cfa}]1501 \end{cfa }1410 \begin{cfacode}[caption={Equivalent \CFA code for listing \ref{lst:int-bulk-pseudo}},label={lst:int-bulk-cfa}] 1411 \end{cfacode} 1502 1412 \begin{multicols}{2} 1503 1413 Waiter 1504 \begin{ cfa}[numbers=left]1414 \begin{pseudo}[numbers=left] 1505 1415 acquire A 1506 1416 acquire A & B … … 1508 1418 release A & B 1509 1419 release A 1510 \end{ cfa}1420 \end{pseudo} 1511 1421 1512 1422 \columnbreak 1513 1423 1514 1424 Signaller 1515 \begin{ cfa}[numbers=left, firstnumber=6,escapechar=|]1425 \begin{pseudo}[numbers=left, firstnumber=6,escapechar=|] 1516 1426 acquire A 1517 1427 acquire A & B 1518 1428 signal A & B 1519 1429 release A & B 1520 |\label{line:secret}|// Secretly keep B here1430 |\label{line:secret}|//Secretly keep B here 1521 1431 release A 1522 // Wakeup waiter and transfer A & B1523 \end{ cfa}1432 //Wakeup waiter and transfer A & B 1433 \end{pseudo} 1524 1434 \end{multicols} 1525 \begin{cfa }[caption={Listing \ref{lst:int-bulk-cfa}, with delayed signalling comments},label={lst:int-secret}]1526 \end{cfa }1435 \begin{cfacode}[caption={Listing \ref{lst:int-bulk-pseudo}, with delayed signalling comments},label={lst:int-secret}] 1436 \end{cfacode} 1527 1437 \end{figure} 1528 1438 1529 The complexity begins at code sections 4 and 8 in listing \ref{lst:int-bulk- cfa}, which are where the existing semantics of internal scheduling needs to be extended for multiple monitors.1530 The root of the problem is that \textbf{bulk-acq} is used in a context where one of the monitors is already acquired, which is why it is important to define the behaviour of the previous cfa-code.1531 When the signaller thread reaches the location where it should ``release @A & B@'' (listing \ref{lst:int-bulk-cfa} line \ref{line:releaseFirst}), it must actually transfer ownership of monitor @B@to the waiting thread.1532 This ownership transfer is required in order to prevent barging into @B@ by another thread, since both the signalling and signalled threads still need monitor @A@.1439 The complexity begins at code sections 4 and 8 in listing \ref{lst:int-bulk-pseudo}, which are where the existing semantics of internal scheduling needs to be extended for multiple monitors. 1440 The root of the problem is that \textbf{bulk-acq} is used in a context where one of the monitors is already acquired, which is why it is important to define the behaviour of the previous pseudo-code. 1441 When the signaller thread reaches the location where it should ``release \code{A & B}'' (listing \ref{lst:int-bulk-pseudo} line \ref{line:releaseFirst}), it must actually transfer ownership of monitor \code{B} to the waiting thread. 1442 This ownership transfer is required in order to prevent barging into \code{B} by another thread, since both the signalling and signalled threads still need monitor \code{A}. 1533 1443 There are three options: 1534 1444 … … 1542 1452 1543 1453 However, listing \ref{lst:int-secret} shows this solution can become much more complicated depending on what is executed while secretly holding B at line \ref{line:secret}, while avoiding the need to transfer ownership of a subset of the condition monitors. 1544 Listing \ref{lst:dependency} shows a slightly different example where a third thread is waiting on monitor @A@, using a different condition variable.1545 Because the third thread is signalled when secretly holding @B@, the goal becomes unreachable.1454 Listing \ref{lst:dependency} shows a slightly different example where a third thread is waiting on monitor \code{A}, using a different condition variable. 1455 Because the third thread is signalled when secretly holding \code{B}, the goal becomes unreachable. 1546 1456 Depending on the order of signals (listing \ref{lst:dependency} line \ref{line:signal-ab} and \ref{line:signal-a}) two cases can happen: 1547 1457 1548 \paragraph{Case 1: thread $\alpha$ goes first.} In this case, the problem is that monitor @A@needs to be passed to thread $\beta$ when thread $\alpha$ is done with it.1549 \paragraph{Case 2: thread $\beta$ goes first.} In this case, the problem is that monitor @B@ needs to be retained and passed to thread $\alpha$ along with monitor @A@, which can be done directly or possibly using thread $\beta$ as an intermediate.1458 \paragraph{Case 1: thread $\alpha$ goes first.} In this case, the problem is that monitor \code{A} needs to be passed to thread $\beta$ when thread $\alpha$ is done with it. 1459 \paragraph{Case 2: thread $\beta$ goes first.} In this case, the problem is that monitor \code{B} needs to be retained and passed to thread $\alpha$ along with monitor \code{A}, which can be done directly or possibly using thread $\beta$ as an intermediate. 1550 1460 \\ 1551 1461 … … 1561 1471 \begin{multicols}{3} 1562 1472 Thread $\alpha$ 1563 \begin{ cfa}[numbers=left, firstnumber=1]1473 \begin{pseudo}[numbers=left, firstnumber=1] 1564 1474 acquire A 1565 1475 acquire A & B … … 1567 1477 release A & B 1568 1478 release A 1569 \end{ cfa}1479 \end{pseudo} 1570 1480 \columnbreak 1571 1481 Thread $\gamma$ 1572 \begin{ cfa}[numbers=left, firstnumber=6, escapechar=|]1482 \begin{pseudo}[numbers=left, firstnumber=6, escapechar=|] 1573 1483 acquire A 1574 1484 acquire A & B … … 1577 1487 |\label{line:signal-a}|signal A 1578 1488 |\label{line:release-a}|release A 1579 \end{ cfa}1489 \end{pseudo} 1580 1490 \columnbreak 1581 1491 Thread $\beta$ 1582 \begin{ cfa}[numbers=left, firstnumber=12, escapechar=|]1492 \begin{pseudo}[numbers=left, firstnumber=12, escapechar=|] 1583 1493 acquire A 1584 1494 wait A 1585 1495 |\label{line:release-aa}|release A 1586 \end{ cfa}1496 \end{pseudo} 1587 1497 \end{multicols} 1588 \begin{cfa }[caption={Pseudo-code for the three thread example.},label={lst:dependency}]1589 \end{cfa }1498 \begin{cfacode}[caption={Pseudo-code for the three thread example.},label={lst:dependency}] 1499 \end{cfacode} 1590 1500 \begin{center} 1591 1501 \input{dependency} … … 1595 1505 \end{figure} 1596 1506 1597 In listing \ref{lst:int-bulk- cfa}, there is a solution that satisfies both barging prevention and mutual exclusion.1598 If ownership of both monitors is transferred to the waiter when the signaller releases @A & B@ and then the waiter transfers back ownership of @A@ back to the signaller when it releases it, then the problem is solved (@B@is no longer in use at this point).1507 In listing \ref{lst:int-bulk-pseudo}, there is a solution that satisfies both barging prevention and mutual exclusion. 1508 If ownership of both monitors is transferred to the waiter when the signaller releases \code{A & B} and then the waiter transfers back ownership of \code{A} back to the signaller when it releases it, then the problem is solved (\code{B} is no longer in use at this point). 1599 1509 Dynamically finding the correct order is therefore the second possible solution. 1600 1510 The problem is effectively resolving a dependency graph of ownership requirements. … … 1604 1514 \begin{figure} 1605 1515 \begin{multicols}{2} 1606 \begin{ cfa}1516 \begin{pseudo} 1607 1517 acquire A 1608 1518 acquire B … … 1612 1522 release B 1613 1523 release A 1614 \end{ cfa}1524 \end{pseudo} 1615 1525 1616 1526 \columnbreak 1617 1527 1618 \begin{ cfa}1528 \begin{pseudo} 1619 1529 acquire A 1620 1530 acquire B … … 1624 1534 release B 1625 1535 release A 1626 \end{ cfa}1536 \end{pseudo} 1627 1537 \end{multicols} 1628 \begin{cfa }[caption={Extension to three monitors of listing \ref{lst:int-bulk-cfa}},label={lst:explosion}]1629 \end{cfa }1538 \begin{cfacode}[caption={Extension to three monitors of listing \ref{lst:int-bulk-pseudo}},label={lst:explosion}] 1539 \end{cfacode} 1630 1540 \end{figure} 1631 1541 … … 1636 1546 \subsubsection{Partial Signalling} \label{partial-sig} 1637 1547 Finally, the solution that is chosen for \CFA is to use partial signalling. 1638 Again using listing \ref{lst:int-bulk- cfa}, the partial signalling solution transfers ownership of monitor @B@ at lines \ref{line:signal1} to the waiter but does not wake the waiting thread since it is still using monitor @A@.1548 Again using listing \ref{lst:int-bulk-pseudo}, the partial signalling solution transfers ownership of monitor \code{B} at lines \ref{line:signal1} to the waiter but does not wake the waiting thread since it is still using monitor \code{A}. 1639 1549 Only when it reaches line \ref{line:lastRelease} does it actually wake up the waiting thread. 1640 1550 This solution has the benefit that complexity is encapsulated into only two actions: passing monitors to the next owner when they should be released and conditionally waking threads if all conditions are met. … … 1644 1554 Using partial signalling, listing \ref{lst:dependency} can be solved easily: 1645 1555 \begin{itemize} 1646 \item When thread $\gamma$ reaches line \ref{line:release-ab} it transfers monitor @B@ to thread $\alpha$ and continues to hold monitor @A@.1647 \item When thread $\gamma$ reaches line \ref{line:release-a} it transfers monitor @A@to thread $\beta$ and wakes it up.1648 \item When thread $\beta$ reaches line \ref{line:release-aa} it transfers monitor @A@to thread $\alpha$ and wakes it up.1556 \item When thread $\gamma$ reaches line \ref{line:release-ab} it transfers monitor \code{B} to thread $\alpha$ and continues to hold monitor \code{A}. 1557 \item When thread $\gamma$ reaches line \ref{line:release-a} it transfers monitor \code{A} to thread $\beta$ and wakes it up. 1558 \item When thread $\beta$ reaches line \ref{line:release-aa} it transfers monitor \code{A} to thread $\alpha$ and wakes it up. 1649 1559 \end{itemize} 1650 1560 … … 1656 1566 \begin{table} 1657 1567 \begin{tabular}{|c|c|} 1658 @signal@ & @signal_block@\\1568 \code{signal} & \code{signal_block} \\ 1659 1569 \hline 1660 \begin{cfa}[tabsize=3] 1661 monitor DatingService { 1662 // compatibility codes 1570 \begin{cfacode}[tabsize=3] 1571 monitor DatingService 1572 { 1573 //compatibility codes 1663 1574 enum{ CCodes = 20 }; 1664 1575 … … 1671 1582 condition exchange; 1672 1583 1673 int girl(int phoneNo, int cfa) { 1674 // no compatible boy ? 1675 if(empty(boys[cfa])) { 1676 wait(girls[cfa]); // wait for boy 1677 girlPhoneNo = phoneNo; // make phone number available 1678 signal(exchange); // wake boy from chair 1679 } else { 1680 girlPhoneNo = phoneNo; // make phone number available 1681 signal(boys[cfa]); // wake boy 1682 wait(exchange); // sit in chair 1584 int girl(int phoneNo, int ccode) 1585 { 1586 //no compatible boy ? 1587 if(empty(boys[ccode])) 1588 { 1589 //wait for boy 1590 wait(girls[ccode]); 1591 1592 //make phone number available 1593 girlPhoneNo = phoneNo; 1594 1595 //wake boy from chair 1596 signal(exchange); 1597 } 1598 else 1599 { 1600 //make phone number available 1601 girlPhoneNo = phoneNo; 1602 1603 //wake boy 1604 signal(boys[ccode]); 1605 1606 //sit in chair 1607 wait(exchange); 1683 1608 } 1684 1609 return boyPhoneNo; 1685 1610 } 1686 int boy(int phoneNo, int cfa) { 1687 // same as above 1688 // with boy/girl interchanged 1689 } 1690 \end{cfa}&\begin{cfa}[tabsize=3] 1691 monitor DatingService { 1692 1693 enum{ CCodes = 20 }; // compatibility codes 1611 1612 int boy(int phoneNo, int ccode) 1613 { 1614 //same as above 1615 //with boy/girl interchanged 1616 } 1617 \end{cfacode}&\begin{cfacode}[tabsize=3] 1618 monitor DatingService 1619 { 1620 //compatibility codes 1621 enum{ CCodes = 20 }; 1694 1622 1695 1623 int girlPhoneNo; … … 1699 1627 condition girls[CCodes]; 1700 1628 condition boys [CCodes]; 1701 // exchange is not needed 1702 1703 int girl(int phoneNo, int cfa) { 1704 // no compatible boy ? 1705 if(empty(boys[cfa])) { 1706 wait(girls[cfa]); // wait for boy 1707 girlPhoneNo = phoneNo; // make phone number available 1708 signal(exchange); // wake boy from chair 1709 } else { 1710 girlPhoneNo = phoneNo; // make phone number available 1711 signal_block(boys[cfa]); // wake boy 1712 1713 // second handshake unnecessary 1629 //exchange is not needed 1630 1631 int girl(int phoneNo, int ccode) 1632 { 1633 //no compatible boy ? 1634 if(empty(boys[ccode])) 1635 { 1636 //wait for boy 1637 wait(girls[ccode]); 1638 1639 //make phone number available 1640 girlPhoneNo = phoneNo; 1641 1642 //wake boy from chair 1643 signal(exchange); 1644 } 1645 else 1646 { 1647 //make phone number available 1648 girlPhoneNo = phoneNo; 1649 1650 //wake boy 1651 signal_block(boys[ccode]); 1652 1653 //second handshake unnecessary 1714 1654 1715 1655 } … … 1717 1657 } 1718 1658 1719 int boy(int phoneNo, int cfa) { 1720 // same as above 1721 // with boy/girl interchanged 1722 } 1723 \end{cfa} 1659 int boy(int phoneNo, int ccode) 1660 { 1661 //same as above 1662 //with boy/girl interchanged 1663 } 1664 \end{cfacode} 1724 1665 \end{tabular} 1725 \caption{Dating service example using \ protect\lstinline|signal| and \protect\lstinline|signal_block|. }1666 \caption{Dating service example using \code{signal} and \code{signal_block}. } 1726 1667 \label{tbl:datingservice} 1727 1668 \end{table} 1728 1669 An important note is that, until now, signalling a monitor was a delayed operation. 1729 The ownership of the monitor is transferred only when the monitor would have otherwise been released, not at the point of the @signal@statement.1730 However, in some cases, it may be more convenient for users to immediately transfer ownership to the thread that is waiting for cooperation, which is achieved using the @signal_block@routine.1670 The ownership of the monitor is transferred only when the monitor would have otherwise been released, not at the point of the \code{signal} statement. 1671 However, in some cases, it may be more convenient for users to immediately transfer ownership to the thread that is waiting for cooperation, which is achieved using the \code{signal_block} routine. 1731 1672 1732 1673 The example in table \ref{tbl:datingservice} highlights the difference in behaviour. 1733 As mentioned, @signal@only transfers ownership once the current critical section exits; this behaviour requires additional synchronization when a two-way handshake is needed.1734 To avoid this explicit synchronization, the @condition@ type offers the @signal_block@routine, which handles the two-way handshake as shown in the example.1674 As mentioned, \code{signal} only transfers ownership once the current critical section exits; this behaviour requires additional synchronization when a two-way handshake is needed. 1675 To avoid this explicit synchronization, the \code{condition} type offers the \code{signal_block} routine, which handles the two-way handshake as shown in the example. 1735 1676 This feature removes the need for a second condition variables and simplifies programming. 1736 Like every other monitor semantic, @signal_block@ uses barging prevention, which means mutual-exclusion is baton-passed both on the front end and the back end of the call to @signal_block@, meaning no other thread can acquire the monitor either before or after the call.1677 Like every other monitor semantic, \code{signal_block} uses barging prevention, which means mutual-exclusion is baton-passed both on the front end and the back end of the call to \code{signal_block}, meaning no other thread can acquire the monitor either before or after the call. 1737 1678 1738 1679 % ====================================================================== … … 1746 1687 Internal Scheduling & External Scheduling & Go\\ 1747 1688 \hline 1748 \begin{u C++}[tabsize=3]1689 \begin{ucppcode}[tabsize=3] 1749 1690 _Monitor Semaphore { 1750 1691 condition c; … … 1761 1702 } 1762 1703 } 1763 \end{u C++}&\begin{uC++}[tabsize=3]1704 \end{ucppcode}&\begin{ucppcode}[tabsize=3] 1764 1705 _Monitor Semaphore { 1765 1706 … … 1776 1717 } 1777 1718 } 1778 \end{u C++}&\begin{Go}[tabsize=3]1719 \end{ucppcode}&\begin{gocode}[tabsize=3] 1779 1720 type MySem struct { 1780 1721 inUse bool … … 1796 1737 s.inUse = false 1797 1738 1798 // This actually deadlocks1799 // when single thread1739 //This actually deadlocks 1740 //when single thread 1800 1741 s.c <- false 1801 1742 } 1802 \end{ Go}1743 \end{gocode} 1803 1744 \end{tabular} 1804 1745 \caption{Different forms of scheduling.} … … 1807 1748 This method is more constrained and explicit, which helps users reduce the non-deterministic nature of concurrency. 1808 1749 Indeed, as the following examples demonstrate, external scheduling allows users to wait for events from other threads without the concern of unrelated events occurring. 1809 External scheduling can generally be done either in terms of control flow (e.g., Ada with @accept@, \uC with @_Accept@) or in terms of data (e.g., Go with channels).1750 External scheduling can generally be done either in terms of control flow (e.g., Ada with \code{accept}, \uC with \code{_Accept}) or in terms of data (e.g., Go with channels). 1810 1751 Of course, both of these paradigms have their own strengths and weaknesses, but for this project, control-flow semantics was chosen to stay consistent with the rest of the languages semantics. 1811 1752 Two challenges specific to \CFA arise when trying to add external scheduling with loose object definitions and multiple-monitor routines. 1812 The previous example shows a simple use @_Accept@ versus @wait@/@signal@and its advantages.1813 Note that while other languages often use @accept@/@select@ as the core external scheduling keyword, \CFA uses @waitfor@to prevent name collisions with existing socket \textbf{api}s.1814 1815 For the @P@ member above using internal scheduling, the call to @wait@ only guarantees that @V@ is the last routine to access the monitor, allowing a third routine, say @isInUse()@, acquire mutual exclusion several times while routine @P@is waiting.1816 On the other hand, external scheduling guarantees that while routine @P@ is waiting, no other routine than @V@can acquire the monitor.1753 The previous example shows a simple use \code{_Accept} versus \code{wait}/\code{signal} and its advantages. 1754 Note that while other languages often use \code{accept}/\code{select} as the core external scheduling keyword, \CFA uses \code{waitfor} to prevent name collisions with existing socket \textbf{api}s. 1755 1756 For the \code{P} member above using internal scheduling, the call to \code{wait} only guarantees that \code{V} is the last routine to access the monitor, allowing a third routine, say \code{isInUse()}, acquire mutual exclusion several times while routine \code{P} is waiting. 1757 On the other hand, external scheduling guarantees that while routine \code{P} is waiting, no other routine than \code{V} can acquire the monitor. 1817 1758 1818 1759 % ====================================================================== … … 1824 1765 Since \CFA is not object oriented, monitors become both more difficult to implement and less clear for a user: 1825 1766 1826 \begin{cfa }1767 \begin{cfacode} 1827 1768 monitor A {}; 1828 1769 1829 1770 void f(A & mutex a); 1830 1771 void g(A & mutex a) { 1831 waitfor(f); // Obvious which f() to wait for1832 } 1833 1834 void f(A & mutex a, int); // New different F added in scope1772 waitfor(f); //Obvious which f() to wait for 1773 } 1774 1775 void f(A & mutex a, int); //New different F added in scope 1835 1776 void h(A & mutex a) { 1836 waitfor(f); // Less obvious which f() to wait for1837 } 1838 \end{cfa }1777 waitfor(f); //Less obvious which f() to wait for 1778 } 1779 \end{cfacode} 1839 1780 1840 1781 Furthermore, external scheduling is an example where implementation constraints become visible from the interface. 1841 Here is the cfa-code for the entering phase of a monitor:1782 Here is the pseudo-code for the entering phase of a monitor: 1842 1783 \begin{center} 1843 1784 \begin{tabular}{l} 1844 \begin{ cfa}1785 \begin{pseudo} 1845 1786 if monitor is free 1846 1787 enter … … 1851 1792 else 1852 1793 block 1853 \end{ cfa}1794 \end{pseudo} 1854 1795 \end{tabular} 1855 1796 \end{center} 1856 1797 For the first two conditions, it is easy to implement a check that can evaluate the condition in a few instructions. 1857 However, a fast check for @monitor accepts me@is much harder to implement depending on the constraints put on the monitors.1798 However, a fast check for \pscode{monitor accepts me} is much harder to implement depending on the constraints put on the monitors. 1858 1799 Indeed, monitors are often expressed as an entry queue and some acceptor queue as in Figure~\ref{fig:ClassicalMonitor}. 1859 1800 … … 1881 1822 The alternative is to alter the implementation as in Figure~\ref{fig:BulkMonitor}. 1882 1823 Here, the mutex routine called is associated with a thread on the entry queue while a list of acceptable routines is kept separate. 1883 Generating a mask dynamically means that the storage for the mask information can vary between calls to @waitfor@, allowing for more flexibility and extensions.1824 Generating a mask dynamically means that the storage for the mask information can vary between calls to \code{waitfor}, allowing for more flexibility and extensions. 1884 1825 Storing an array of accepted function pointers replaces the single instruction bitmask comparison with dereferencing a pointer followed by a linear search. 1885 Furthermore, supporting nested external scheduling (e.g., listing \ref{lst:nest-ext}) may now require additional searches for the @waitfor@statement to check if a routine is already queued.1826 Furthermore, supporting nested external scheduling (e.g., listing \ref{lst:nest-ext}) may now require additional searches for the \code{waitfor} statement to check if a routine is already queued. 1886 1827 1887 1828 \begin{figure} 1888 \begin{cfa }[caption={Example of nested external scheduling},label={lst:nest-ext}]1829 \begin{cfacode}[caption={Example of nested external scheduling},label={lst:nest-ext}] 1889 1830 monitor M {}; 1890 1831 void foo( M & mutex a ) {} 1891 1832 void bar( M & mutex b ) { 1892 // Nested in the waitfor(bar, c) call1833 //Nested in the waitfor(bar, c) call 1893 1834 waitfor(foo, b); 1894 1835 } … … 1897 1838 } 1898 1839 1899 \end{cfa }1840 \end{cfacode} 1900 1841 \end{figure} 1901 1842 … … 1917 1858 External scheduling, like internal scheduling, becomes significantly more complex when introducing multi-monitor syntax. 1918 1859 Even in the simplest possible case, some new semantics needs to be established: 1919 \begin{cfa }1860 \begin{cfacode} 1920 1861 monitor M {}; 1921 1862 … … 1923 1864 1924 1865 void g(M & mutex b, M & mutex c) { 1925 waitfor(f); // two monitors M => unknown which to pass to f(M & mutex)1926 } 1927 \end{cfa }1866 waitfor(f); //two monitors M => unknown which to pass to f(M & mutex) 1867 } 1868 \end{cfacode} 1928 1869 The obvious solution is to specify the correct monitor as follows: 1929 1870 1930 \begin{cfa }1871 \begin{cfacode} 1931 1872 monitor M {}; 1932 1873 … … 1934 1875 1935 1876 void g(M & mutex a, M & mutex b) { 1936 // wait for call to f with argument b1877 //wait for call to f with argument b 1937 1878 waitfor(f, b); 1938 1879 } 1939 \end{cfa }1880 \end{cfacode} 1940 1881 This syntax is unambiguous. 1941 Both locks are acquired and kept by @g@.1942 When routine @f@ is called, the lock for monitor @b@ is temporarily transferred from @g@ to @f@ (while @g@ still holds lock @a@).1943 This behaviour can be extended to the multi-monitor @waitfor@statement as follows.1944 1945 \begin{cfa }1882 Both locks are acquired and kept by \code{g}. 1883 When routine \code{f} is called, the lock for monitor \code{b} is temporarily transferred from \code{g} to \code{f} (while \code{g} still holds lock \code{a}). 1884 This behaviour can be extended to the multi-monitor \code{waitfor} statement as follows. 1885 1886 \begin{cfacode} 1946 1887 monitor M {}; 1947 1888 … … 1949 1890 1950 1891 void g(M & mutex a, M & mutex b) { 1951 // wait for call to f with arguments a and b1892 //wait for call to f with arguments a and b 1952 1893 waitfor(f, a, b); 1953 1894 } 1954 \end{cfa }1955 1956 Note that the set of monitors passed to the @waitfor@ statement must be entirely contained in the set of monitors already acquired in the routine. @waitfor@used in any other context is undefined behaviour.1895 \end{cfacode} 1896 1897 Note that the set of monitors passed to the \code{waitfor} statement must be entirely contained in the set of monitors already acquired in the routine. \code{waitfor} used in any other context is undefined behaviour. 1957 1898 1958 1899 An important behaviour to note is when a set of monitors only match partially: 1959 1900 1960 \begin{cfa }1901 \begin{cfacode} 1961 1902 mutex struct A {}; 1962 1903 … … 1971 1912 1972 1913 void foo() { 1973 g(a1, b); // block on accept1914 g(a1, b); //block on accept 1974 1915 } 1975 1916 1976 1917 void bar() { 1977 f(a2, b); // fulfill cooperation1978 } 1979 \end{cfa }1918 f(a2, b); //fulfill cooperation 1919 } 1920 \end{cfacode} 1980 1921 While the equivalent can happen when using internal scheduling, the fact that conditions are specific to a set of monitors means that users have to use two different condition variables. 1981 1922 In both cases, partially matching monitor sets does not wakeup the waiting thread. 1982 It is also important to note that in the case of external scheduling the order of parameters is irrelevant; @waitfor(f,a,b)@ and @waitfor(f,b,a)@are indistinguishable waiting condition.1983 1984 % ====================================================================== 1985 % ====================================================================== 1986 \subsection{\ protect\lstinline|waitfor|Semantics}1987 % ====================================================================== 1988 % ====================================================================== 1989 1990 Syntactically, the @waitfor@statement takes a function identifier and a set of monitors.1991 While the set of monitors can be any list of expressions, the function name is more restricted because the compiler validates at compile time the validity of the function type and the parameters used with the @waitfor@statement.1923 It is also important to note that in the case of external scheduling the order of parameters is irrelevant; \code{waitfor(f,a,b)} and \code{waitfor(f,b,a)} are indistinguishable waiting condition. 1924 1925 % ====================================================================== 1926 % ====================================================================== 1927 \subsection{\code{waitfor} Semantics} 1928 % ====================================================================== 1929 % ====================================================================== 1930 1931 Syntactically, the \code{waitfor} statement takes a function identifier and a set of monitors. 1932 While the set of monitors can be any list of expressions, the function name is more restricted because the compiler validates at compile time the validity of the function type and the parameters used with the \code{waitfor} statement. 1992 1933 It checks that the set of monitors passed in matches the requirements for a function call. 1993 1934 Listing \ref{lst:waitfor} shows various usages of the waitfor statement and which are acceptable. 1994 The choice of the function type is made ignoring any non- @mutex@parameter.1935 The choice of the function type is made ignoring any non-\code{mutex} parameter. 1995 1936 One limitation of the current implementation is that it does not handle overloading, but overloading is possible. 1996 1937 \begin{figure} 1997 \begin{cfa }[caption={Various correct and incorrect uses of the waitfor statement},label={lst:waitfor}]1938 \begin{cfacode}[caption={Various correct and incorrect uses of the waitfor statement},label={lst:waitfor}] 1998 1939 monitor A{}; 1999 1940 monitor B{}; … … 2009 1950 void (*fp)( A & mutex ) = f1; 2010 1951 2011 waitfor(f1, a1); // Correct : 1 monitor case2012 waitfor(f2, a1, b1); // Correct : 2 monitor case2013 waitfor(f3, a1); // Correct : non-mutex arguments are ignored2014 waitfor(f1, *ap); // Correct : expression as argument2015 2016 waitfor(f1, a1, b1); // Incorrect : Too many mutex arguments2017 waitfor(f2, a1); // Incorrect : Too few mutex arguments2018 waitfor(f2, a1, a2); // Incorrect : Mutex arguments don't match2019 waitfor(f1, 1); // Incorrect : 1 not a mutex argument2020 waitfor(f9, a1); // Incorrect : f9 function does not exist2021 waitfor(*fp, a1 ); // Incorrect : fp not an identifier2022 waitfor(f4, a1); // Incorrect : f4 ambiguous2023 2024 waitfor(f2, a1, b2); // Undefined behaviour : b2 not mutex2025 } 2026 \end{cfa }1952 waitfor(f1, a1); //Correct : 1 monitor case 1953 waitfor(f2, a1, b1); //Correct : 2 monitor case 1954 waitfor(f3, a1); //Correct : non-mutex arguments are ignored 1955 waitfor(f1, *ap); //Correct : expression as argument 1956 1957 waitfor(f1, a1, b1); //Incorrect : Too many mutex arguments 1958 waitfor(f2, a1); //Incorrect : Too few mutex arguments 1959 waitfor(f2, a1, a2); //Incorrect : Mutex arguments don't match 1960 waitfor(f1, 1); //Incorrect : 1 not a mutex argument 1961 waitfor(f9, a1); //Incorrect : f9 function does not exist 1962 waitfor(*fp, a1 ); //Incorrect : fp not an identifier 1963 waitfor(f4, a1); //Incorrect : f4 ambiguous 1964 1965 waitfor(f2, a1, b2); //Undefined behaviour : b2 not mutex 1966 } 1967 \end{cfacode} 2027 1968 \end{figure} 2028 1969 2029 Finally, for added flexibility, \CFA supports constructing a complex @waitfor@ statement using the @or@, @timeout@ and @else@.2030 Indeed, multiple @waitfor@ clauses can be chained together using @or@; this chain forms a single statement that uses baton pass to any function that fits one of the function+monitor set passed in.2031 To enable users to tell which accepted function executed, @waitfor@s are followed by a statement (including the null statement @;@) or a compound statement, which is executed after the clause is triggered.2032 A @waitfor@ chain can also be followed by a @timeout@, to signify an upper bound on the wait, or an @else@, to signify that the call should be non-blocking, which checks for a matching function call already arrived and otherwise continues.2033 Any and all of these clauses can be preceded by a @when@condition to dynamically toggle the accept clauses on or off based on some current state.1970 Finally, for added flexibility, \CFA supports constructing a complex \code{waitfor} statement using the \code{or}, \code{timeout} and \code{else}. 1971 Indeed, multiple \code{waitfor} clauses can be chained together using \code{or}; this chain forms a single statement that uses baton pass to any function that fits one of the function+monitor set passed in. 1972 To enable users to tell which accepted function executed, \code{waitfor}s are followed by a statement (including the null statement \code{;}) or a compound statement, which is executed after the clause is triggered. 1973 A \code{waitfor} chain can also be followed by a \code{timeout}, to signify an upper bound on the wait, or an \code{else}, to signify that the call should be non-blocking, which checks for a matching function call already arrived and otherwise continues. 1974 Any and all of these clauses can be preceded by a \code{when} condition to dynamically toggle the accept clauses on or off based on some current state. 2034 1975 Listing \ref{lst:waitfor2} demonstrates several complex masks and some incorrect ones. 2035 1976 2036 1977 \begin{figure} 2037 \lstset{language=CFA,deletedelim=**[is][]{`}{`}} 2038 \begin{cfa} 1978 \begin{cfacode}[caption={Various correct and incorrect uses of the or, else, and timeout clause around a waitfor statement},label={lst:waitfor2}] 2039 1979 monitor A{}; 2040 1980 … … 2043 1983 2044 1984 void foo( A & mutex a, bool b, int t ) { 2045 waitfor(f1, a); $\C{// Correct : blocking case}$ 2046 2047 waitfor(f1, a) { $\C{// Correct : block with statement}$ 1985 //Correct : blocking case 1986 waitfor(f1, a); 1987 1988 //Correct : block with statement 1989 waitfor(f1, a) { 2048 1990 sout | "f1" | endl; 2049 1991 } 2050 waitfor(f1, a) { $\C{// Correct : block waiting for f1 or f2}$ 1992 1993 //Correct : block waiting for f1 or f2 1994 waitfor(f1, a) { 2051 1995 sout | "f1" | endl; 2052 1996 } or waitfor(f2, a) { 2053 1997 sout | "f2" | endl; 2054 1998 } 2055 waitfor(f1, a); or else; $\C{// Correct : non-blocking case}$ 2056 2057 waitfor(f1, a) { $\C{// Correct : non-blocking case}$ 1999 2000 //Correct : non-blocking case 2001 waitfor(f1, a); or else; 2002 2003 //Correct : non-blocking case 2004 waitfor(f1, a) { 2058 2005 sout | "blocked" | endl; 2059 2006 } or else { 2060 2007 sout | "didn't block" | endl; 2061 2008 } 2062 waitfor(f1, a) { $\C{// Correct : block at most 10 seconds}$ 2009 2010 //Correct : block at most 10 seconds 2011 waitfor(f1, a) { 2063 2012 sout | "blocked" | endl; 2064 2013 } or timeout( 10`s) { 2065 2014 sout | "didn't block" | endl; 2066 2015 } 2067 // Correct : block only if b == true if b == false, don't even make the call 2016 2017 //Correct : block only if b == true 2018 //if b == false, don't even make the call 2068 2019 when(b) waitfor(f1, a); 2069 2020 2070 // Correct : block only if b == true if b == false, make non-blocking call 2021 //Correct : block only if b == true 2022 //if b == false, make non-blocking call 2071 2023 waitfor(f1, a); or when(!b) else; 2072 2024 2073 // Correct : block only of t > 12025 //Correct : block only of t > 1 2074 2026 waitfor(f1, a); or when(t > 1) timeout(t); or else; 2075 2027 2076 // Incorrect : timeout clause is dead code2028 //Incorrect : timeout clause is dead code 2077 2029 waitfor(f1, a); or timeout(t); or else; 2078 2030 2079 // Incorrect : order must be waitfor [or waitfor... [or timeout] [or else]] 2031 //Incorrect : order must be 2032 //waitfor [or waitfor... [or timeout] [or else]] 2080 2033 timeout(t); or waitfor(f1, a); or else; 2081 2034 } 2082 \end{cfa} 2083 \caption{Correct and incorrect uses of the or, else, and timeout clause around a waitfor statement} 2084 \label{lst:waitfor2} 2035 \end{cfacode} 2085 2036 \end{figure} 2086 2037 … … 2090 2041 % ====================================================================== 2091 2042 % ====================================================================== 2092 An interesting use for the @waitfor@statement is destructor semantics.2093 Indeed, the @waitfor@ statement can accept any @mutex@routine, which includes the destructor (see section \ref{data}).2043 An interesting use for the \code{waitfor} statement is destructor semantics. 2044 Indeed, the \code{waitfor} statement can accept any \code{mutex} routine, which includes the destructor (see section \ref{data}). 2094 2045 However, with the semantics discussed until now, waiting for the destructor does not make any sense, since using an object after its destructor is called is undefined behaviour. 2095 The simplest approach is to disallow @waitfor@on a destructor.2096 However, a more expressive approach is to flip ordering of execution when waiting for the destructor, meaning that waiting for the destructor allows the destructor to run after the current @mutex@routine, similarly to how a condition is signalled.2046 The simplest approach is to disallow \code{waitfor} on a destructor. 2047 However, a more expressive approach is to flip ordering of execution when waiting for the destructor, meaning that waiting for the destructor allows the destructor to run after the current \code{mutex} routine, similarly to how a condition is signalled. 2097 2048 \begin{figure} 2098 \begin{cfa }[caption={Example of an executor which executes action in series until the destructor is called.},label={lst:dtor-order}]2049 \begin{cfacode}[caption={Example of an executor which executes action in series until the destructor is called.},label={lst:dtor-order}] 2099 2050 monitor Executer {}; 2100 2051 struct Action; … … 2110 2061 } 2111 2062 } 2112 \end{cfa }2063 \end{cfacode} 2113 2064 \end{figure} 2114 2065 For example, listing \ref{lst:dtor-order} shows an example of an executor with an infinite loop, which waits for the destructor to break out of this loop. … … 2128 2079 In this decade, it is no longer reasonable to create a high-performance application without caring about parallelism. 2129 2080 Indeed, parallelism is an important aspect of performance and more specifically throughput and hardware utilization. 2130 The lowest-level approach of parallelism is to use \textbf{kthread} in combination with semantics like @fork@, @join@, etc.2081 The lowest-level approach of parallelism is to use \textbf{kthread} in combination with semantics like \code{fork}, \code{join}, etc. 2131 2082 However, since these have significant costs and limitations, \textbf{kthread} are now mostly used as an implementation tool rather than a user oriented one. 2132 2083 There are several alternatives to solve these issues that all have strengths and weaknesses. … … 2207 2158 Conveniently, the call stack fits that description and is easy to use, which is why it is used heavily in the implementation of internal scheduling, particularly variable-length arrays. 2208 2159 Since stack allocation is based on scopes, the first step of the implementation is to identify the scopes that are available to store the information, and which of these can have a variable-length array. 2209 The threads and the condition both have a fixed amount of memory, while @mutex@routines and blocking calls allow for an unbound amount, within the stack size.2160 The threads and the condition both have a fixed amount of memory, while \code{mutex} routines and blocking calls allow for an unbound amount, within the stack size. 2210 2161 2211 2162 Note that since the major contributions of this paper are extending monitor semantics to \textbf{bulk-acq} and loose object definitions, any challenges that are not resulting of these characteristics of \CFA are considered as solved problems and therefore not discussed. … … 2217 2168 % ====================================================================== 2218 2169 2219 The first step towards the monitor implementation is simple @mutex@routines.2170 The first step towards the monitor implementation is simple \code{mutex} routines. 2220 2171 In the single monitor case, mutual-exclusion is done using the entry/exit procedure in listing \ref{lst:entry1}. 2221 2172 The entry/exit procedures do not have to be extended to support multiple monitors. … … 2228 2179 \begin{multicols}{2} 2229 2180 Entry 2230 \begin{ cfa}2181 \begin{pseudo} 2231 2182 if monitor is free 2232 2183 enter … … 2236 2187 block 2237 2188 increment recursions 2238 \end{ cfa}2189 \end{pseudo} 2239 2190 \columnbreak 2240 2191 Exit 2241 \begin{ cfa}2192 \begin{pseudo} 2242 2193 decrement recursion 2243 2194 if recursion == 0 2244 2195 if entry queue not empty 2245 2196 wake-up thread 2246 \end{ cfa}2197 \end{pseudo} 2247 2198 \end{multicols} 2248 \begin{ cfa}[caption={Initial entry and exit routine for monitors},label={lst:entry1}]2249 \end{ cfa}2199 \begin{pseudo}[caption={Initial entry and exit routine for monitors},label={lst:entry1}] 2200 \end{pseudo} 2250 2201 \end{figure} 2251 2202 … … 2254 2205 However, it is shown that entry-point locking solves most of the issues. 2255 2206 2256 First of all, interaction between @otype@polymorphism (see Section~\ref{s:ParametricPolymorphism}) and monitors is impossible since monitors do not support copying.2257 Therefore, the main question is how to support @dtype@polymorphism.2207 First of all, interaction between \code{otype} polymorphism (see Section~\ref{s:ParametricPolymorphism}) and monitors is impossible since monitors do not support copying. 2208 Therefore, the main question is how to support \code{dtype} polymorphism. 2258 2209 It is important to present the difference between the two acquiring options: \textbf{callsite-locking} and entry-point locking, i.e., acquiring the monitors before making a mutex routine-call or as the first operation of the mutex routine-call. 2259 2210 For example: 2260 \begin{table} 2211 \begin{table}[H] 2261 2212 \begin{center} 2262 2213 \begin{tabular}{|c|c|c|} 2263 2214 Mutex & \textbf{callsite-locking} & \textbf{entry-point-locking} \\ 2264 call & cfa-code & cfa-code \\2215 call & pseudo-code & pseudo-code \\ 2265 2216 \hline 2266 \begin{cfa }[tabsize=3]2217 \begin{cfacode}[tabsize=3] 2267 2218 void foo(monitor& mutex a){ 2268 2219 2269 // Do Work2220 //Do Work 2270 2221 //... 2271 2222 … … 2278 2229 2279 2230 } 2280 \end{cfa } & \begin{cfa}[tabsize=3]2231 \end{cfacode} & \begin{pseudo}[tabsize=3] 2281 2232 foo(& a) { 2282 2233 2283 // Do Work2234 //Do Work 2284 2235 //... 2285 2236 … … 2292 2243 release(a); 2293 2244 } 2294 \end{ cfa} & \begin{cfa}[tabsize=3]2245 \end{pseudo} & \begin{pseudo}[tabsize=3] 2295 2246 foo(& a) { 2296 2247 acquire(a); 2297 // Do Work2248 //Do Work 2298 2249 //... 2299 2250 release(a); … … 2306 2257 2307 2258 } 2308 \end{ cfa}2259 \end{pseudo} 2309 2260 \end{tabular} 2310 2261 \end{center} … … 2313 2264 \end{table} 2314 2265 2315 Note the @mutex@keyword relies on the type system, which means that in cases where a generic monitor-routine is desired, writing the mutex routine is possible with the proper trait, e.g.:2316 \begin{cfa }2317 // Incorrect: T may not be monitor2266 Note the \code{mutex} keyword relies on the type system, which means that in cases where a generic monitor-routine is desired, writing the mutex routine is possible with the proper trait, e.g.: 2267 \begin{cfacode} 2268 //Incorrect: T may not be monitor 2318 2269 forall(dtype T) 2319 2270 void foo(T * mutex t); 2320 2271 2321 // Correct: this function only works on monitors (any monitor)2272 //Correct: this function only works on monitors (any monitor) 2322 2273 forall(dtype T | is_monitor(T)) 2323 2274 void bar(T * mutex t)); 2324 \end{cfa }2275 \end{cfacode} 2325 2276 2326 2277 Both entry point and \textbf{callsite-locking} are feasible implementations. … … 2348 2299 2349 2300 \subsection{Processors} 2350 Parallelism in \CFA is built around using processors to specify how much parallelism is desired. \CFA processors are object wrappers around kernel threads, specifically @pthread@s in the current implementation of \CFA.2301 Parallelism in \CFA is built around using processors to specify how much parallelism is desired. \CFA processors are object wrappers around kernel threads, specifically \texttt{pthread}s in the current implementation of \CFA. 2351 2302 Indeed, any parallelism must go through operating-system libraries. 2352 2303 However, \textbf{uthread} are still the main source of concurrency, processors are simply the underlying source of parallelism. … … 2357 2308 \subsection{Stack Management} 2358 2309 One of the challenges of this system is to reduce the footprint as much as possible. 2359 Specifically, all @pthread@s created also have a stack created with them, which should be used as much as possible.2310 Specifically, all \texttt{pthread}s created also have a stack created with them, which should be used as much as possible. 2360 2311 Normally, coroutines also create their own stack to run on, however, in the case of the coroutines used for processors, these coroutines run directly on the \textbf{kthread} stack, effectively stealing the processor stack. 2361 2312 The exception to this rule is the Main Processor, i.e., the initial \textbf{kthread} that is given to any program. … … 2372 2323 Obviously, this doubles the context-switch cost because threads must context-switch to an intermediate stack. 2373 2324 The alternative 1-step context-switch uses the stack of the ``from'' thread to schedule and then context-switches directly to the ``to'' thread. 2374 However, the performance of the 2-step context-switch is still superior to a @pthread_yield@(see section \ref{results}).2375 Additionally, for users in need for optimal performance, it is important to note that having a 2-step context-switch as the default does not prevent \CFA from offering a 1-step context-switch (akin to the Microsoft @SwitchToFiber@~\cite{switchToWindows} routine).2325 However, the performance of the 2-step context-switch is still superior to a \code{pthread_yield} (see section \ref{results}). 2326 Additionally, for users in need for optimal performance, it is important to note that having a 2-step context-switch as the default does not prevent \CFA from offering a 1-step context-switch (akin to the Microsoft \code{SwitchToFiber}~\cite{switchToWindows} routine). 2376 2327 This option is not currently present in \CFA, but the changes required to add it are strictly additive. 2377 2328 … … 2405 2356 This behaviour is only a problem if all kernel threads, among which a user thread can migrate, differ in terms of signal masks\footnote{Sadly, official POSIX documentation is silent on what distinguishes ``async-signal-safe'' functions from other functions.}. 2406 2357 However, since the kernel thread handling preemption requires a different signal mask, executing user threads on the kernel-alarm thread can cause deadlocks. 2407 For this reason, the alarm thread is in a tight loop around a system call to @sigwaitinfo@, requiring very little CPU time for preemption.2358 For this reason, the alarm thread is in a tight loop around a system call to \code{sigwaitinfo}, requiring very little CPU time for preemption. 2408 2359 One final detail about the alarm thread is how to wake it when additional communication is required (e.g., on thread termination). 2409 This unblocking is also done using {\tt SIGALRM}, but sent through the @pthread_sigqueue@.2410 Indeed, @sigwait@ can differentiate signals sent from @pthread_sigqueue@from signals sent from alarms or the kernel.2360 This unblocking is also done using {\tt SIGALRM}, but sent through the \code{pthread_sigqueue}. 2361 Indeed, \code{sigwait} can differentiate signals sent from \code{pthread_sigqueue} from signals sent from alarms or the kernel. 2411 2362 2412 2363 \subsection{Scheduler} … … 2422 2373 The following figure is the traditional illustration of a monitor (repeated from page~\pageref{fig:ClassicalMonitor} for convenience): 2423 2374 2424 \begin{figure} 2375 \begin{figure}[H] 2425 2376 \begin{center} 2426 2377 {\resizebox{0.4\textwidth}{!}{\input{monitor}}} … … 2437 2388 Secondly, the thread waiting on the condition has to be separated across multiple monitors, seen in figure \ref{fig:monitor_cfa}. 2438 2389 2439 \begin{figure} 2390 \begin{figure}[H] 2440 2391 \begin{center} 2441 2392 {\resizebox{0.8\textwidth}{!}{\input{int_monitor}}} … … 2451 2402 For a specific signalling operation every monitor needs a piece of thread on its AS-stack. 2452 2403 2453 \begin{figure} 2404 \begin{figure}[b] 2454 2405 \begin{multicols}{2} 2455 2406 Entry 2456 \begin{ cfa}2407 \begin{pseudo} 2457 2408 if monitor is free 2458 2409 enter … … 2463 2414 increment recursion 2464 2415 2465 \end{ cfa}2416 \end{pseudo} 2466 2417 \columnbreak 2467 2418 Exit 2468 \begin{ cfa}2419 \begin{pseudo} 2469 2420 decrement recursion 2470 2421 if recursion == 0 … … 2476 2427 if entry queue not empty 2477 2428 wake-up thread 2478 \end{ cfa}2429 \end{pseudo} 2479 2430 \end{multicols} 2480 \begin{ cfa}[caption={Entry and exit routine for monitors with internal scheduling},label={lst:entry2}]2481 \end{ cfa}2431 \begin{pseudo}[caption={Entry and exit routine for monitors with internal scheduling},label={lst:entry2}] 2432 \end{pseudo} 2482 2433 \end{figure} 2483 2434 … … 2485 2436 Basically, the solution boils down to having a separate data structure for the condition queue and the AS-stack, and unconditionally transferring ownership of the monitors but only unblocking the thread when the last monitor has transferred ownership. 2486 2437 This solution is deadlock safe as well as preventing any potential barging. 2487 The data structures used for the AS-stack are reused extensively for external scheduling, but in the case of internal scheduling, the data is allocated using variable-length arrays on the call stack of the @wait@ and @signal_block@routines.2488 2489 \begin{figure} 2438 The data structures used for the AS-stack are reused extensively for external scheduling, but in the case of internal scheduling, the data is allocated using variable-length arrays on the call stack of the \code{wait} and \code{signal_block} routines. 2439 2440 \begin{figure}[H] 2490 2441 \begin{center} 2491 2442 {\resizebox{0.8\textwidth}{!}{\input{monitor_structs.pstex_t}}} … … 2497 2448 Figure \ref{fig:structs} shows a high-level representation of these data structures. 2498 2449 The main idea behind them is that, a thread cannot contain an arbitrary number of intrusive ``next'' pointers for linking onto monitors. 2499 The @condition node@ is the data structure that is queued onto a condition variable and, when signalled, the condition queue is popped and each @condition criterion@is moved to the AS-stack.2450 The \code{condition node} is the data structure that is queued onto a condition variable and, when signalled, the condition queue is popped and each \code{condition criterion} is moved to the AS-stack. 2500 2451 Once all the criteria have been popped from their respective AS-stacks, the thread is woken up, which is what is shown in listing \ref{lst:entry2}. 2501 2452 … … 2507 2458 Similarly to internal scheduling, external scheduling for multiple monitors relies on the idea that waiting-thread queues are no longer specific to a single monitor, as mentioned in section \ref{extsched}. 2508 2459 For internal scheduling, these queues are part of condition variables, which are still unique for a given scheduling operation (i.e., no signal statement uses multiple conditions). 2509 However, in the case of external scheduling, there is no equivalent object which is associated with @waitfor@statements.2460 However, in the case of external scheduling, there is no equivalent object which is associated with \code{waitfor} statements. 2510 2461 This absence means the queues holding the waiting threads must be stored inside at least one of the monitors that is acquired. 2511 These monitors being the only objects that have sufficient lifetime and are available on both sides of the @waitfor@statement.2462 These monitors being the only objects that have sufficient lifetime and are available on both sides of the \code{waitfor} statement. 2512 2463 This requires an algorithm to choose which monitor holds the relevant queue. 2513 2464 It is also important that said algorithm be independent of the order in which users list parameters. … … 2526 2477 \begin{itemize} 2527 2478 \item The threads waiting on the entry queue need to keep track of which routine they are trying to enter, and using which set of monitors. 2528 The @mutex@routine already has all the required information on its stack, so the thread only needs to keep a pointer to that information.2479 The \code{mutex} routine already has all the required information on its stack, so the thread only needs to keep a pointer to that information. 2529 2480 \item The monitors need to keep a mask of acceptable routines. 2530 2481 This mask contains for each acceptable routine, a routine pointer and an array of monitors to go with it. 2531 2482 It also needs storage to keep track of which routine was accepted. 2532 2483 Since this information is not specific to any monitor, the monitors actually contain a pointer to an integer on the stack of the waiting thread. 2533 Note that if a thread has acquired two monitors but executes a @waitfor@with only one monitor as a parameter, setting the mask of acceptable routines to both monitors will not cause any problems since the extra monitor will not change ownership regardless.2534 This becomes relevant when @when@ clauses affect the number of monitors passed to a @waitfor@statement.2484 Note that if a thread has acquired two monitors but executes a \code{waitfor} with only one monitor as a parameter, setting the mask of acceptable routines to both monitors will not cause any problems since the extra monitor will not change ownership regardless. 2485 This becomes relevant when \code{when} clauses affect the number of monitors passed to a \code{waitfor} statement. 2535 2486 \item The entry/exit routines need to be updated as shown in listing \ref{lst:entry3}. 2536 2487 \end{itemize} … … 2540 2491 This routine is needed because of the storage requirements of the call order inversion. 2541 2492 Indeed, when waiting for the destructors, storage is needed for the waiting context and the lifetime of said storage needs to outlive the waiting operation it is needed for. 2542 For regular @waitfor@statements, the call stack of the routine itself matches this requirement but it is no longer the case when waiting for the destructor since it is pushed on to the AS-stack for later.2543 The @waitfor@semantics can then be adjusted correspondingly, as seen in listing \ref{lst:entry-dtor}2493 For regular \code{waitfor} statements, the call stack of the routine itself matches this requirement but it is no longer the case when waiting for the destructor since it is pushed on to the AS-stack for later. 2494 The \code{waitfor} semantics can then be adjusted correspondingly, as seen in listing \ref{lst:entry-dtor} 2544 2495 2545 2496 \begin{figure} 2546 2497 \begin{multicols}{2} 2547 2498 Entry 2548 \begin{ cfa}2499 \begin{pseudo} 2549 2500 if monitor is free 2550 2501 enter … … 2557 2508 block 2558 2509 increment recursion 2559 \end{ cfa}2510 \end{pseudo} 2560 2511 \columnbreak 2561 2512 Exit 2562 \begin{ cfa}2513 \begin{pseudo} 2563 2514 decrement recursion 2564 2515 if recursion == 0 … … 2573 2524 wake-up thread 2574 2525 endif 2575 \end{ cfa}2526 \end{pseudo} 2576 2527 \end{multicols} 2577 \begin{ cfa}[caption={Entry and exit routine for monitors with internal scheduling and external scheduling},label={lst:entry3}]2578 \end{ cfa}2528 \begin{pseudo}[caption={Entry and exit routine for monitors with internal scheduling and external scheduling},label={lst:entry3}] 2529 \end{pseudo} 2579 2530 \end{figure} 2580 2531 … … 2582 2533 \begin{multicols}{2} 2583 2534 Destructor Entry 2584 \begin{ cfa}2535 \begin{pseudo} 2585 2536 if monitor is free 2586 2537 enter … … 2596 2547 wait 2597 2548 increment recursion 2598 \end{ cfa}2549 \end{pseudo} 2599 2550 \columnbreak 2600 2551 Waitfor 2601 \begin{ cfa}2552 \begin{pseudo} 2602 2553 if matching thread is already there 2603 2554 if found destructor … … 2619 2570 block 2620 2571 return 2621 \end{ cfa}2572 \end{pseudo} 2622 2573 \end{multicols} 2623 \begin{ cfa}[caption={Pseudo code for the \protect\lstinline|waitfor| routine and the \protect\lstinline|mutex|entry routine for destructors},label={lst:entry-dtor}]2624 \end{ cfa}2574 \begin{pseudo}[caption={Pseudo code for the \code{waitfor} routine and the \code{mutex} entry routine for destructors},label={lst:entry-dtor}] 2575 \end{pseudo} 2625 2576 \end{figure} 2626 2577 … … 2634 2585 2635 2586 \section{Threads As Monitors} 2636 As it was subtly alluded in section \ref{threads}, @thread@s in \CFA are in fact monitors, which means that all monitor features are available when using threads.2587 As it was subtly alluded in section \ref{threads}, \code{thread}s in \CFA are in fact monitors, which means that all monitor features are available when using threads. 2637 2588 For example, here is a very simple two thread pipeline that could be used for a simulator of a game engine: 2638 \begin{figure} 2639 \begin{cfa }[caption={Toy simulator using \protect\lstinline|thread|s and \protect\lstinline|monitor|s.},label={lst:engine-v1}]2589 \begin{figure}[H] 2590 \begin{cfacode}[caption={Toy simulator using \code{thread}s and \code{monitor}s.},label={lst:engine-v1}] 2640 2591 // Visualization declaration 2641 2592 thread Renderer {} renderer; … … 2664 2615 } 2665 2616 } 2666 \end{cfa }2617 \end{cfacode} 2667 2618 \end{figure} 2668 2619 One of the obvious complaints of the previous code snippet (other than its toy-like simplicity) is that it does not handle exit conditions and just goes on forever. 2669 2620 Luckily, the monitor semantics can also be used to clearly enforce a shutdown order in a concise manner: 2670 \begin{figure} 2671 \begin{cfa }[caption={Same toy simulator with proper termination condition.},label={lst:engine-v2}]2621 \begin{figure}[H] 2622 \begin{cfacode}[caption={Same toy simulator with proper termination condition.},label={lst:engine-v2}] 2672 2623 // Visualization declaration 2673 2624 thread Renderer {} renderer; … … 2707 2658 // Call destructor for simulator once simulator finishes 2708 2659 // Call destructor for renderer to signify shutdown 2709 \end{cfa }2660 \end{cfacode} 2710 2661 \end{figure} 2711 2662 … … 2713 2664 As mentioned in section \ref{preemption}, \CFA uses preemptive threads by default but can use fibers on demand. 2714 2665 Currently, using fibers is done by adding the following line of code to the program~: 2715 \begin{cfa }2666 \begin{cfacode} 2716 2667 unsigned int default_preemption() { 2717 2668 return 0; 2718 2669 } 2719 \end{cfa }2670 \end{cfacode} 2720 2671 This function is called by the kernel to fetch the default preemption rate, where 0 signifies an infinite time-slice, i.e., no preemption. 2721 2672 However, once clusters are fully implemented, it will be possible to create fibers and \textbf{uthread} in the same system, as in listing \ref{lst:fiber-uthread} 2722 2673 \begin{figure} 2723 \lstset{language=CFA,deletedelim=**[is][]{`}{`}} 2724 \begin{cfa}[caption={Using fibers and \textbf{uthread} side-by-side in \CFA},label={lst:fiber-uthread}] 2725 // Cluster forward declaration 2674 \begin{cfacode}[caption={Using fibers and \textbf{uthread} side-by-side in \CFA},label={lst:fiber-uthread}] 2675 //Cluster forward declaration 2726 2676 struct cluster; 2727 2677 2728 // Processor forward declaration2678 //Processor forward declaration 2729 2679 struct processor; 2730 2680 2731 // Construct clusters with a preemption rate2681 //Construct clusters with a preemption rate 2732 2682 void ?{}(cluster& this, unsigned int rate); 2733 // Construct processor and add it to cluster2683 //Construct processor and add it to cluster 2734 2684 void ?{}(processor& this, cluster& cluster); 2735 // Construct thread and schedule it on cluster2685 //Construct thread and schedule it on cluster 2736 2686 void ?{}(thread& this, cluster& cluster); 2737 2687 2738 // Declare two clusters2739 cluster thread_cluster = { 10`ms }; // Preempt every 10 ms2740 cluster fibers_cluster = { 0 }; // Never preempt2741 2742 // Construct 4 processors2688 //Declare two clusters 2689 cluster thread_cluster = { 10`ms }; //Preempt every 10 ms 2690 cluster fibers_cluster = { 0 }; //Never preempt 2691 2692 //Construct 4 processors 2743 2693 processor processors[4] = { 2744 2694 //2 for the thread cluster … … 2750 2700 }; 2751 2701 2752 // Declares thread2702 //Declares thread 2753 2703 thread UThread {}; 2754 2704 void ?{}(UThread& this) { 2755 // Construct underlying thread to automatically2756 // be scheduled on the thread cluster2705 //Construct underlying thread to automatically 2706 //be scheduled on the thread cluster 2757 2707 (this){ thread_cluster } 2758 2708 } … … 2760 2710 void main(UThread & this); 2761 2711 2762 // Declares fibers2712 //Declares fibers 2763 2713 thread Fiber {}; 2764 2714 void ?{}(Fiber& this) { 2765 // Construct underlying thread to automatically2766 // be scheduled on the fiber cluster2715 //Construct underlying thread to automatically 2716 //be scheduled on the fiber cluster 2767 2717 (this.__thread){ fibers_cluster } 2768 2718 } 2769 2719 2770 2720 void main(Fiber & this); 2771 \end{cfa }2721 \end{cfacode} 2772 2722 \end{figure} 2773 2723 … … 2781 2731 Table \ref{tab:machine} shows the characteristics of the machine used to run the benchmarks. 2782 2732 All tests were made on this machine. 2783 \begin{table} 2733 \begin{table}[H] 2784 2734 \begin{center} 2785 2735 \begin{tabular}{| l | r | l | r |} … … 2813 2763 2814 2764 \section{Micro Benchmarks} 2815 All benchmarks are run using the same harness to produce the results, seen as the @BENCH()@macro in the following examples.2765 All benchmarks are run using the same harness to produce the results, seen as the \code{BENCH()} macro in the following examples. 2816 2766 This macro uses the following logic to benchmark the code: 2817 \begin{ cfa}2767 \begin{pseudo} 2818 2768 #define BENCH(run, result) \ 2819 2769 before = gettime(); \ … … 2821 2771 after = gettime(); \ 2822 2772 result = (after - before) / N; 2823 \end{ cfa}2824 The method used to get time is @clock_gettime(CLOCK_THREAD_CPUTIME_ID);@.2773 \end{pseudo} 2774 The method used to get time is \code{clock_gettime(CLOCK_THREAD_CPUTIME_ID);}. 2825 2775 Each benchmark is using many iterations of a simple call to measure the cost of the call. 2826 2776 The specific number of iterations depends on the specific benchmark. … … 2837 2787 \begin{multicols}{2} 2838 2788 \CFA Coroutines 2839 \begin{cfa }2789 \begin{cfacode} 2840 2790 coroutine GreatSuspender {}; 2841 2791 void main(GreatSuspender& this) { … … 2853 2803 printf("%llu\n", result); 2854 2804 } 2855 \end{cfa }2805 \end{cfacode} 2856 2806 \columnbreak 2857 2807 \CFA Threads 2858 \begin{cfa }2808 \begin{cfacode} 2859 2809 2860 2810 … … 2872 2822 printf("%llu\n", result); 2873 2823 } 2874 \end{cfa }2824 \end{cfacode} 2875 2825 \end{multicols} 2876 \begin{cfa }[caption={\CFA benchmark code used to measure context-switches for coroutines and threads.},label={lst:ctx-switch}]2877 \end{cfa }2826 \begin{cfacode}[caption={\CFA benchmark code used to measure context-switches for coroutines and threads.},label={lst:ctx-switch}] 2827 \end{cfacode} 2878 2828 \end{figure} 2879 2829 … … 2903 2853 For monitors, the simplest approach is to measure how long it takes to enter and leave a monitor routine. 2904 2854 Listing \ref{lst:mutex} shows the code for \CFA. 2905 To put the results in context, the cost of entering a non-inline function and the cost of acquiring and releasing a @pthread_mutex@lock is also measured.2855 To put the results in context, the cost of entering a non-inline function and the cost of acquiring and releasing a \code{pthread_mutex} lock is also measured. 2906 2856 The results can be shown in table \ref{tab:mutex}. 2907 2857 2908 2858 \begin{figure} 2909 \begin{cfa }[caption={\CFA benchmark code used to measure mutex routines.},label={lst:mutex}]2859 \begin{cfacode}[caption={\CFA benchmark code used to measure mutex routines.},label={lst:mutex}] 2910 2860 monitor M {}; 2911 2861 void __attribute__((noinline)) call( M & mutex m /*, m2, m3, m4*/ ) {} … … 2921 2871 printf("%llu\n", result); 2922 2872 } 2923 \end{cfa }2873 \end{cfacode} 2924 2874 \end{figure} 2925 2875 … … 2933 2883 FetchAdd + FetchSub & 26 & 26 & 0 \\ 2934 2884 Pthreads Mutex Lock & 31 & 31.86 & 0.99 \\ 2935 \uC @monitor@member routine & 30 & 30 & 0 \\2936 \CFA @mutex@routine, 1 argument & 41 & 41.57 & 0.9 \\2937 \CFA @mutex@routine, 2 argument & 76 & 76.96 & 1.57 \\2938 \CFA @mutex@routine, 4 argument & 145 & 146.68 & 3.85 \\2885 \uC \code{monitor} member routine & 30 & 30 & 0 \\ 2886 \CFA \code{mutex} routine, 1 argument & 41 & 41.57 & 0.9 \\ 2887 \CFA \code{mutex} routine, 2 argument & 76 & 76.96 & 1.57 \\ 2888 \CFA \code{mutex} routine, 4 argument & 145 & 146.68 & 3.85 \\ 2939 2889 Java synchronized routine & 27 & 28.57 & 2.6 \\ 2940 2890 \hline … … 2952 2902 2953 2903 \begin{figure} 2954 \begin{cfa }[caption={Benchmark code for internal scheduling},label={lst:int-sched}]2904 \begin{cfacode}[caption={Benchmark code for internal scheduling},label={lst:int-sched}] 2955 2905 volatile int go = 0; 2956 2906 condition c; … … 2982 2932 return do_wait(m1); 2983 2933 } 2984 \end{cfa }2934 \end{cfacode} 2985 2935 \end{figure} 2986 2936 … … 2992 2942 \hline 2993 2943 Pthreads Condition Variable & 5902.5 & 6093.29 & 714.78 \\ 2994 \uC @signal@& 322 & 323 & 3.36 \\2995 \CFA @signal@, 1 @monitor@& 352.5 & 353.11 & 3.66 \\2996 \CFA @signal@, 2 @monitor@& 430 & 430.29 & 8.97 \\2997 \CFA @signal@, 4 @monitor@& 594.5 & 606.57 & 18.33 \\2998 Java @notify@& 13831.5 & 15698.21 & 4782.3 \\2944 \uC \code{signal} & 322 & 323 & 3.36 \\ 2945 \CFA \code{signal}, 1 \code{monitor} & 352.5 & 353.11 & 3.66 \\ 2946 \CFA \code{signal}, 2 \code{monitor} & 430 & 430.29 & 8.97 \\ 2947 \CFA \code{signal}, 4 \code{monitor} & 594.5 & 606.57 & 18.33 \\ 2948 Java \code{notify} & 13831.5 & 15698.21 & 4782.3 \\ 2999 2949 \hline 3000 2950 \end{tabular} … … 3006 2956 3007 2957 \subsection{External Scheduling} 3008 The Internal scheduling benchmark measures the cost of the @waitfor@ statement (@_Accept@in \uC).2958 The Internal scheduling benchmark measures the cost of the \code{waitfor} statement (\code{_Accept} in \uC). 3009 2959 Listing \ref{lst:ext-sched} shows the code for \CFA, with results in table \ref{tab:ext-sched}. 3010 2960 As with all other benchmarks, all omitted tests are functionally identical to one of these tests. 3011 2961 3012 2962 \begin{figure} 3013 \begin{cfa }[caption={Benchmark code for external scheduling},label={lst:ext-sched}]2963 \begin{cfacode}[caption={Benchmark code for external scheduling},label={lst:ext-sched}] 3014 2964 volatile int go = 0; 3015 2965 monitor M {}; … … 3040 2990 return do_wait(m1); 3041 2991 } 3042 \end{cfa }2992 \end{cfacode} 3043 2993 \end{figure} 3044 2994 … … 3049 2999 \multicolumn{1}{c |}{} & \multicolumn{1}{c |}{ Median } &\multicolumn{1}{c |}{ Average } & \multicolumn{1}{c |}{ Standard Deviation} \\ 3050 3000 \hline 3051 \uC @Accept@& 350 & 350.61 & 3.11 \\3052 \CFA @waitfor@, 1 @monitor@& 358.5 & 358.36 & 3.82 \\3053 \CFA @waitfor@, 2 @monitor@& 422 & 426.79 & 7.95 \\3054 \CFA @waitfor@, 4 @monitor@& 579.5 & 585.46 & 11.25 \\3001 \uC \code{Accept} & 350 & 350.61 & 3.11 \\ 3002 \CFA \code{waitfor}, 1 \code{monitor} & 358.5 & 358.36 & 3.82 \\ 3003 \CFA \code{waitfor}, 2 \code{monitor} & 422 & 426.79 & 7.95 \\ 3004 \CFA \code{waitfor}, 4 \code{monitor} & 579.5 & 585.46 & 11.25 \\ 3055 3005 \hline 3056 3006 \end{tabular} … … 3063 3013 \subsection{Object Creation} 3064 3014 Finally, the last benchmark measures the cost of creation for concurrent objects. 3065 Listing \ref{lst:creation} shows the code for @pthread@s and \CFA threads, with results shown in table \ref{tab:creation}.3015 Listing \ref{lst:creation} shows the code for \texttt{pthread}s and \CFA threads, with results shown in table \ref{tab:creation}. 3066 3016 As with all other benchmarks, all omitted tests are functionally identical to one of these tests. 3067 3017 The only note here is that the call stacks of \CFA coroutines are lazily created, therefore without priming the coroutine, the creation cost is very low. … … 3069 3019 \begin{figure} 3070 3020 \begin{center} 3071 @pthread@ 3072 \begin{c fa}3021 \texttt{pthread} 3022 \begin{ccode} 3073 3023 int main() { 3074 3024 BENCH( … … 3089 3039 printf("%llu\n", result); 3090 3040 } 3091 \end{c fa}3041 \end{ccode} 3092 3042 3093 3043 3094 3044 3095 3045 \CFA Threads 3096 \begin{cfa }3046 \begin{cfacode} 3097 3047 int main() { 3098 3048 BENCH( … … 3104 3054 printf("%llu\n", result); 3105 3055 } 3106 \end{cfa }3056 \end{cfacode} 3107 3057 \end{center} 3108 \ caption{Benchmark code for \protect\lstinline|pthread|s and \CFA to measure object creation}3109 \ label{lst:creation}3058 \begin{cfacode}[caption={Benchmark code for \texttt{pthread}s and \CFA to measure object creation},label={lst:creation}] 3059 \end{cfacode} 3110 3060 \end{figure} 3111 3061 … … 3191 3141 \begin{tabular}[t]{|c|c|c|} 3192 3142 Sequential & Library Parallel & Language Parallel \\ 3193 \begin{cfa }[tabsize=3]3143 \begin{cfacode}[tabsize=3] 3194 3144 void big_sum( 3195 3145 int* a, int* b, … … 3215 3165 //... fill in a & b 3216 3166 big_sum(a,b,c,10000); 3217 \end{cfa } &\begin{cfa}[tabsize=3]3167 \end{cfacode} &\begin{cfacode}[tabsize=3] 3218 3168 void big_sum( 3219 3169 int* a, int* b, … … 3239 3189 //... fill in a & b 3240 3190 big_sum(a,b,c,10000); 3241 \end{cfa }&\begin{cfa}[tabsize=3]3191 \end{cfacode}&\begin{cfacode}[tabsize=3] 3242 3192 void big_sum( 3243 3193 int* a, int* b, … … 3263 3213 //... fill in a & b 3264 3214 big_sum(a,b,c,10000); 3265 \end{cfa }3215 \end{cfacode} 3266 3216 \end{tabular} 3267 3217 \end{center} -
doc/papers/concurrency/annex/local.bib
rb5563e1 ra9b1b0c 21 21 @string{pldi="Programming Language Design and Implementation"} 22 22 23 @inproceedings{Hochstein05, 24 keywords = {Application software; Computer aided software engineering; Concurrent computing; Educational 25 institutions; High performance computing; Humans; Instruments; Productivity; Programming profession; 26 Software engineering}, 27 author = {Lorin Hochstein and Jeff Carver and Forrest Shull and Sima Asgari and Victor Basili and Jeffrey K. Hollingsworth and Marvin V. Zelkowitz}, 28 title = {Parallel Programmer Productivity: A Case Study of Novice Parallel Programmers}, 29 booktitle = {Supercomputing, 2005. Proceedings of the ACM/IEEE SC 2005 Conference}, 30 publisher = {IEEE}, 31 year = {2005}, 32 pages = {35-35}, 33 month = nov, 23 24 @article{HPP:Study, 25 keywords = {Parallel, Productivity}, 26 author = {Lorin Hochstein and Jeff Carver and Forrest Shull and Sima Asgari and Victor Basili and Jeffrey K. Hollingsworth and Marvin V. Zelkowitz }, 27 title = {Parallel Programmer Productivity: A Case Study of Novice Parallel Programmers}, 34 28 } 35 29 … … 41 35 } 42 36 43 @misc{TBB, 44 keywords = {Intel, TBB}, 45 key = {TBB}, 46 title = {Thread Building Blocks}, 47 howpublished= {Intel, \url{https://www.threadingbuildingblocks.org}}, 48 note = {Accessed: 2018-3}, 37 @article{TBB, 38 key = {TBB}, 39 keywords = {Intel, TBB}, 40 title = {Intel Thread Building Blocks}, 41 note = "\url{https://www.threadingbuildingblocks.org/}" 49 42 } 50 43 … … 55 48 title = {C$\forall$ Programmming Language}, 56 49 note = {\url{https://plg.uwaterloo.ca/~cforall}}, 50 } 51 52 @mastersthesis{rob-thesis, 53 keywords = {Constructors, Destructors, Tuples}, 54 author = {Rob Schluntz}, 55 title = {Resource Management and Tuples in Cforall}, 56 year = 2017, 57 school = {University of Waterloo}, 58 note = {\url{https://uwspace.uwaterloo.ca/handle/10012/11830}}, 57 59 } 58 60 -
doc/papers/concurrency/style/cfa-format.tex
rb5563e1 ra9b1b0c 11 11 % from https://gist.github.com/nikolajquorning/92bbbeef32e1dd80105c9bf2daceb89a 12 12 \lstdefinelanguage{sml} { 13 morekeywords= { 14 EQUAL, GREATER, LESS, NONE, SOME, abstraction, abstype, and, andalso, array, as, before, 15 bool, case, char, datatype, do, else, end, eqtype, exception, exn, false, fn, fun, functor, 16 handle, if, in, include, infix, infixr, int, let, list, local, nil, nonfix, not, o, of, op, 17 open, option, orelse, overload, print, raise, real, rec, ref, sharing, sig, signature, 18 string, struct, structure, substring, then, true, type, unit, val, vector, where, while, 19 with, withtype, word 20 }, 21 morestring=[b]", 22 morecomment=[s]{(*}{*)}, 13 morekeywords= { 14 EQUAL, GREATER, LESS, NONE, SOME, abstraction, abstype, and, andalso, array, as, before, bool, case, char, datatype, do, else, end, eqtype, exception, exn, false, fn, fun, functor, handle, if, in, include, infix, infixr, int, let, list, local, nil, nonfix, not, o, of, op, open, option, orelse, overload, print, raise, real, rec, ref, sharing, sig, signature, string, struct, structure, substring, then, true, type, unit, val, vector, where, while, with, withtype, word 15 }, 16 morestring=[b]", 17 morecomment=[s]{(*}{*)}, 23 18 } 24 19 25 20 \lstdefinelanguage{D}{ 26 % Keywords 27 morekeywords=[1]{ 28 abstract, alias, align, auto, body, break, cast, catch, class, const, continue, debug, 29 delegate, delete, deprecated, do, else, enum, export, false, final, finally, for, foreach, 30 foreach_reverse, function, goto, if, immutable, import, in, inout, interface, invariant, is, 31 lazy, macro, mixin, module, new, nothrow, null, out, override, package, pragma, private, 32 protected, public, pure, ref, return, shared, static, struct, super, switch, synchronized, 33 template, this, throw, true, try, typedef, typeid, typeof, union, unittest, volatile, while, 34 with 35 }, 36 % Special identifiers, common functions 37 morekeywords=[2]{enforce}, 38 % Ugly identifiers 39 morekeywords=[3]{ 40 __DATE__, __EOF__, __FILE__, __LINE__, __TIMESTAMP__, __TIME__, __VENDOR__, 41 __VERSION__, __ctfe, __gshared, __monitor, __thread, __vptr, _argptr, 42 _arguments, _ctor, _dtor 43 }, 44 % Basic types 45 morekeywords=[4]{ 46 byte, ubyte, short, ushort, int, uint, long, ulong, cent, ucent, void, bool, bit, float, 47 double, real, ushort, int, uint, long, ulong, float, char, wchar, dchar, string, wstring, 48 dstring, ireal, ifloat, idouble, creal, cfloat, cdouble, size_t, ptrdiff_t, sizediff_t, 49 equals_t, hash_t 50 }, 51 % Strings 52 morestring=[b]{"}, 53 morestring=[b]{'}, 54 morestring=[b]{`}, 55 % Comments 56 comment=[l]{//}, 57 morecomment=[s]{/*}{*/}, 58 morecomment=[s][\color{blue}]{/**}{*/}, 59 morecomment=[n]{/+}{+/}, 60 morecomment=[n][\color{blue}]{/++}{+/}, 61 % Options 62 sensitive=true 21 % Keywords 22 morekeywords=[1]{ 23 abstract, alias, align, auto, body, break, cast, catch, class, const, 24 continue, debug, delegate, delete, deprecated, do, else, enum, export, 25 false, final, finally, for, foreach, foreach_reverse, function, goto, if, 26 immutable, import, in, inout, interface, invariant, is, lazy, macro, mixin, 27 module, new, nothrow, null, out, override, package, pragma, private, 28 protected, public, pure, ref, return, shared, static, struct, super, 29 switch, synchronized, template, this, throw, true, try, typedef, typeid, 30 typeof, union, unittest, volatile, while, with 31 }, 32 % Special identifiers, common functions 33 morekeywords=[2]{enforce}, 34 % Ugly identifiers 35 morekeywords=[3]{ 36 __DATE__, __EOF__, __FILE__, __LINE__, __TIMESTAMP__, __TIME__, __VENDOR__, 37 __VERSION__, __ctfe, __gshared, __monitor, __thread, __vptr, _argptr, 38 _arguments, _ctor, _dtor 39 }, 40 % Basic types 41 morekeywords=[4]{ 42 byte, ubyte, short, ushort, int, uint, long, ulong, cent, ucent, void, 43 bool, bit, float, double, real, ushort, int, uint, long, ulong, float, 44 char, wchar, dchar, string, wstring, dstring, ireal, ifloat, idouble, 45 creal, cfloat, cdouble, size_t, ptrdiff_t, sizediff_t, equals_t, hash_t 46 }, 47 % Strings 48 morestring=[b]{"}, 49 morestring=[b]{'}, 50 morestring=[b]{`}, 51 % Comments 52 comment=[l]{//}, 53 morecomment=[s]{/*}{*/}, 54 morecomment=[s][\color{blue}]{/**}{*/}, 55 morecomment=[n]{/+}{+/}, 56 morecomment=[n][\color{blue}]{/++}{+/}, 57 % Options 58 sensitive=true 63 59 } 64 60 65 61 \lstdefinelanguage{rust}{ 66 % Keywords 67 morekeywords=[1]{ 68 abstract, alignof, as, become, box, break, const, continue, crate, do, else, enum, extern, 69 false, final, fn, for, if, impl, in, let, loop, macro, match, mod, move, mut, offsetof, 70 override, priv, proc, pub, pure, ref, return, Self, self, sizeof, static, struct, super, 71 trait, true, type, typeof, unsafe, unsized, use, virtual, where, while, yield 72 }, 73 % Strings 74 morestring=[b]{"}, 75 % Comments 76 comment=[l]{//}, 77 morecomment=[s]{/*}{*/}, 78 % Options 79 sensitive=true 62 % Keywords 63 morekeywords=[1]{ 64 abstract, alignof, as, become, box, 65 break, const, continue, crate, do, 66 else, enum, extern, false, final, 67 fn, for, if, impl, in, 68 let, loop, macro, match, mod, 69 move, mut, offsetof, override, priv, 70 proc, pub, pure, ref, return, 71 Self, self, sizeof, static, struct, 72 super, trait, true, type, typeof, 73 unsafe, unsized, use, virtual, where, 74 while, yield 75 }, 76 % Strings 77 morestring=[b]{"}, 78 % Comments 79 comment=[l]{//}, 80 morecomment=[s]{/*}{*/}, 81 % Options 82 sensitive=true 80 83 } 81 84 82 85 \lstdefinelanguage{pseudo}{ 83 morekeywords={string,uint,int,bool,float}, 84 sensitive=true, 85 morecomment=[l]{//}, 86 morecomment=[s]{/*}{*/}, 87 morestring=[b]', 88 morestring=[b]", 89 morestring=[s]{`}{`}, 90 } 86 morekeywords={string,uint,int,bool,float},% 87 sensitive=true,% 88 morecomment=[l]{//},% 89 morecomment=[s]{/*}{*/},% 90 morestring=[b]',% 91 morestring=[b]",% 92 morestring=[s]{`}{`},% 93 }% 91 94 92 95 \newcommand{\KWC}{K-W C\xspace} 93 96 94 97 \lstdefinestyle{pseudoStyle}{ 95 escapeinside={@@},96 basicstyle=\linespread{0.9}\sf\footnotesize, % reduce line spacing and use typewriter font97 keywordstyle=\bfseries\color{blue},98 keywordstyle=[2]\bfseries\color{Plum},99 commentstyle=\itshape\color{OliveGreen}, % green and italic comments100 identifierstyle=\color{identifierCol},101 stringstyle=\sf\color{Mahogany},% use sanserif font102 mathescape=true,103 columns=fixed,104 aboveskip=4pt,% spacing above/below code block105 belowskip=3pt,106 keepspaces=true,107 tabsize=4,108 % frame=lines,109 literate=,110 showlines=true,% show blank lines at end of code111 showspaces=false,112 showstringspaces=false,113 escapechar=\$,114 xleftmargin=\parindentlnth,% indent code to paragraph indentation115 moredelim=[is][\color{red}\bfseries]{**R**}{**R**}, % red highlighting116 % moredelim=* detects keywords, comments, strings, and other delimiters and applies their formatting117 % moredelim=** allows cumulative application98 escapeinside={@@}, 99 basicstyle=\linespread{0.9}\sf\footnotesize, % reduce line spacing and use typewriter font 100 keywordstyle=\bfseries\color{blue}, 101 keywordstyle=[2]\bfseries\color{Plum}, 102 commentstyle=\itshape\color{OliveGreen}, % green and italic comments 103 identifierstyle=\color{identifierCol}, 104 stringstyle=\sf\color{Mahogany}, % use sanserif font 105 mathescape=true, 106 columns=fixed, 107 aboveskip=4pt, % spacing above/below code block 108 belowskip=3pt, 109 keepspaces=true, 110 tabsize=4, 111 % frame=lines, 112 literate=, 113 showlines=true, % show blank lines at end of code 114 showspaces=false, 115 showstringspaces=false, 116 escapechar=\$, 117 xleftmargin=\parindentlnth, % indent code to paragraph indentation 118 moredelim=[is][\color{red}\bfseries]{**R**}{**R**}, % red highlighting 119 % moredelim=* detects keywords, comments, strings, and other delimiters and applies their formatting 120 % moredelim=** allows cumulative application 118 121 } 119 122 120 123 \lstdefinestyle{defaultStyle}{ 121 escapeinside={@@},122 basicstyle=\linespread{0.9}\tt\footnotesize, % reduce line spacing and use typewriter font123 keywordstyle=\bfseries\color{blue},124 keywordstyle=[2]\bfseries\color{Plum},125 commentstyle=\itshape\color{OliveGreen}, % green and italic comments126 identifierstyle=\color{identifierCol},127 stringstyle=\sf\color{Mahogany},% use sanserif font128 mathescape=true,129 columns=fixed,130 aboveskip=4pt,% spacing above/below code block131 belowskip=3pt,132 keepspaces=true,133 tabsize=4,134 % frame=lines,135 literate=,136 showlines=true,% show blank lines at end of code137 showspaces=false,138 showstringspaces=false,139 escapechar=\$,140 xleftmargin=\parindentlnth,% indent code to paragraph indentation141 moredelim=[is][\color{red}\bfseries]{**R**}{**R**}, % red highlighting142 % moredelim=* detects keywords, comments, strings, and other delimiters and applies their formatting143 % moredelim=** allows cumulative application124 escapeinside={@@}, 125 basicstyle=\linespread{0.9}\tt\footnotesize, % reduce line spacing and use typewriter font 126 keywordstyle=\bfseries\color{blue}, 127 keywordstyle=[2]\bfseries\color{Plum}, 128 commentstyle=\itshape\color{OliveGreen}, % green and italic comments 129 identifierstyle=\color{identifierCol}, 130 stringstyle=\sf\color{Mahogany}, % use sanserif font 131 mathescape=true, 132 columns=fixed, 133 aboveskip=4pt, % spacing above/below code block 134 belowskip=3pt, 135 keepspaces=true, 136 tabsize=4, 137 % frame=lines, 138 literate=, 139 showlines=true, % show blank lines at end of code 140 showspaces=false, 141 showstringspaces=false, 142 escapechar=\$, 143 xleftmargin=\parindentlnth, % indent code to paragraph indentation 144 moredelim=[is][\color{red}\bfseries]{**R**}{**R**}, % red highlighting 145 % moredelim=* detects keywords, comments, strings, and other delimiters and applies their formatting 146 % moredelim=** allows cumulative application 144 147 } 145 148 146 149 \lstdefinestyle{cfaStyle}{ 147 escapeinside={@@},148 basicstyle=\linespread{0.9}\sf, % reduce line spacing and use typewriter font150 escapeinside={@@}, 151 basicstyle=\linespread{0.9}\sf, % reduce line spacing and use typewriter font 149 152 % keywordstyle=\bfseries\color{blue}, 150 keywordstyle=[2]\bfseries\color{red},153 keywordstyle=[2]\bfseries\color{red}, 151 154 % commentstyle=\sf\itshape\color{OliveGreen}, % green and italic comments 152 identifierstyle=\color{identifierCol},153 % stringstyle=\sf\color{Mahogany}, % use sanserif font154 stringstyle=\tt, % use typewriter font155 mathescape=true,156 columns=fixed,157 aboveskip=4pt,% spacing above/below code block158 belowskip=3pt,159 keepspaces=true,160 tabsize=4,161 % frame=lines,162 literate=,163 showlines=true,% show blank lines at end of code164 showspaces=false,165 showstringspaces=false,166 escapechar=\$,167 xleftmargin=\parindentlnth,% indent code to paragraph indentation168 moredelim=[is][\color{red}\bfseries]{**R**}{**R**}, % red highlighting169 morekeywords=[2]{accept, signal, signal_block, wait, waitfor},155 identifierstyle=\color{identifierCol}, 156 % stringstyle=\sf\color{Mahogany}, % use sanserif font 157 stringstyle=\tt, % use typewriter font 158 mathescape=true, 159 columns=fixed, 160 aboveskip=4pt, % spacing above/below code block 161 belowskip=3pt, 162 keepspaces=true, 163 tabsize=4, 164 % frame=lines, 165 literate=, 166 showlines=true, % show blank lines at end of code 167 showspaces=false, 168 showstringspaces=false, 169 escapechar=\$, 170 xleftmargin=\parindentlnth, % indent code to paragraph indentation 171 moredelim=[is][\color{red}\bfseries]{**R**}{**R**}, % red highlighting 172 morekeywords=[2]{accept, signal, signal_block, wait, waitfor}, 170 173 } 171 174 … … 173 176 174 177 \lstnewenvironment{ccode}[1][]{ 175 \lstset{176 language = C,177 style=defaultStyle,178 captionpos=b,179 #1180 }178 \lstset{ 179 language = C, 180 style=defaultStyle, 181 captionpos=b, 182 #1 183 } 181 184 }{} 182 185 183 186 \lstnewenvironment{cfacode}[1][]{ 184 \lstset{185 language = CFA,186 style=cfaStyle,187 captionpos=b,188 #1189 }187 \lstset{ 188 language = CFA, 189 style=cfaStyle, 190 captionpos=b, 191 #1 192 } 190 193 }{} 191 194 192 195 \lstnewenvironment{pseudo}[1][]{ 193 \lstset{194 language = pseudo,195 style=pseudoStyle,196 captionpos=b,197 #1198 }196 \lstset{ 197 language = pseudo, 198 style=pseudoStyle, 199 captionpos=b, 200 #1 201 } 199 202 }{} 200 203 201 204 \lstnewenvironment{cppcode}[1][]{ 202 \lstset{203 language = c++,204 style=defaultStyle,205 captionpos=b,206 #1207 }205 \lstset{ 206 language = c++, 207 style=defaultStyle, 208 captionpos=b, 209 #1 210 } 208 211 }{} 209 212 210 213 \lstnewenvironment{ucppcode}[1][]{ 211 \lstset{212 language = c++,213 style=defaultStyle,214 captionpos=b,215 #1216 }214 \lstset{ 215 language = c++, 216 style=defaultStyle, 217 captionpos=b, 218 #1 219 } 217 220 }{} 218 221 219 222 \lstnewenvironment{javacode}[1][]{ 220 \lstset{221 language = java,222 style=defaultStyle,223 captionpos=b,224 #1225 }223 \lstset{ 224 language = java, 225 style=defaultStyle, 226 captionpos=b, 227 #1 228 } 226 229 }{} 227 230 228 231 \lstnewenvironment{scalacode}[1][]{ 229 \lstset{230 language = scala,231 style=defaultStyle,232 captionpos=b,233 #1234 }232 \lstset{ 233 language = scala, 234 style=defaultStyle, 235 captionpos=b, 236 #1 237 } 235 238 }{} 236 239 237 240 \lstnewenvironment{smlcode}[1][]{ 238 \lstset{239 language = sml,240 style=defaultStyle,241 captionpos=b,242 #1243 }241 \lstset{ 242 language = sml, 243 style=defaultStyle, 244 captionpos=b, 245 #1 246 } 244 247 }{} 245 248 246 249 \lstnewenvironment{dcode}[1][]{ 247 \lstset{248 language = D,249 style=defaultStyle,250 captionpos=b,251 #1252 }250 \lstset{ 251 language = D, 252 style=defaultStyle, 253 captionpos=b, 254 #1 255 } 253 256 }{} 254 257 255 258 \lstnewenvironment{rustcode}[1][]{ 256 \lstset{257 language = rust,258 style=defaultStyle,259 captionpos=b,260 #1261 }259 \lstset{ 260 language = rust, 261 style=defaultStyle, 262 captionpos=b, 263 #1 264 } 262 265 }{} 263 266 264 267 \lstnewenvironment{gocode}[1][]{ 265 \lstset{266 language = Golang,267 style=defaultStyle,268 captionpos=b,269 #1270 }268 \lstset{ 269 language = Golang, 270 style=defaultStyle, 271 captionpos=b, 272 #1 273 } 271 274 }{} 272 275 … … 276 279 \newcommand{\code}[1]{\lstinline[language=CFA,style=cfaStyle]{#1}} 277 280 \newcommand{\pscode}[1]{\lstinline[language=pseudo,style=pseudoStyle]{#1}} 278 279 % Local Variables: %280 % tab-width: 4 %281 % fill-column: 100 %282 % End: % -
doc/papers/general/Paper.tex
rb5563e1 ra9b1b0c 671 671 \begin{cfa} 672 672 int f( int, int ); 673 [int]g( [int, int] );674 [int]h( int, [int, int] );673 int g( [int, int] ); 674 int h( int, [int, int] ); 675 675 [int, int] x; 676 676 int y; … … 706 706 This example shows mass, multiple, and cascading assignment used in one expression: 707 707 \begin{cfa} 708 [void]f( [int, int] );708 void f( [int, int] ); 709 709 f( [x, y] = z = 1.5 ); $\C{// assignments in parameter list}$ 710 710 \end{cfa} … … 814 814 Flattening and restructuring conversions are also applied to tuple types in polymorphic type assertions. 815 815 \begin{cfa} 816 [int]f( [int, double], double );816 int f( [int, double], double ); 817 817 forall( otype T, otype U | { T f( T, U, U ); } ) void g( T, U ); 818 818 g( 5, 10.21 ); … … 1152 1152 case 4: 1153 1153 ... `fallthrough common;` 1154 `common`: // below fallthrough at same level as case clauses1154 common: // below fallthrough at same level as case clauses 1155 1155 ... // common code for cases 3 and 4 1156 1156 // implicit break … … 1857 1857 \begin{cfa} 1858 1858 struct S { double x, y; }; 1859 int x, y;1859 int i, j; 1860 1860 void f( int & i, int & j, S & s, int v[] ); 1861 f( 3, x + y, (S){ 1.0, 7.0 }, (int [3]){ 1, 2, 3 } );$\C{// pass rvalue to lvalue \(\Rightarrow\) implicit temporary}$1861 f( 3, i + j, (S){ 1.0, 7.0 }, (int [3]){ 1, 2, 3 } ); $\C{// pass rvalue to lvalue \(\Rightarrow\) implicit temporary}$ 1862 1862 \end{cfa} 1863 1863 This allows complex values to be succinctly and efficiently passed to functions, without the syntactic overhead of explicit definition of a temporary variable or the runtime cost of pass-by-value. … … 1946 1946 1947 1947 One of the strengths (and weaknesses) of C is memory-management control, allowing resource release to be precisely specified versus unknown release with garbage-collected memory-management. 1948 However, this manual approach is verbose, and it is useful to manage resources other than memory (\eg file handles) using the same mechanism as memory.1948 However, this manual approach is often verbose, and it is useful to manage resources other than memory (\eg file handles) using the same mechanism as memory. 1949 1949 \CC addresses these issues using Resource Aquisition Is Initialization (RAII), implemented by means of \newterm{constructor} and \newterm{destructor} functions; 1950 1950 \CFA adopts constructors and destructors (and @finally@) to facilitate RAII. … … 1998 1998 { 1999 1999 VLA x, y = { 20, 0x01 }, z = y; $\C{// z points to y}$ 2000 // ?{}( x ); ?{}( y, 20, 0x01 );?{}( z, y );2000 // ?{}( x ); ?{}( y, 20, 0x01 ); ?{}( z, y ); 2001 2001 ^x{}; $\C{// deallocate x}$ 2002 2002 x{}; $\C{// reallocate x}$ … … 2099 2099 2100 2100 For readability, it is useful to associate units to scale literals, \eg weight (stone, pound, kilogram) or time (seconds, minutes, hours). 2101 The left of Figure~\ref{f:UserLiteral} shows the \CFA alternative call-syntax ( postfix:literal argument before function name), using the backquote, to convert basic literals into user literals.2101 The left of Figure~\ref{f:UserLiteral} shows the \CFA alternative call-syntax (literal argument before function name), using the backquote, to convert basic literals into user literals. 2102 2102 The backquote is a small character, making the unit (function name) predominate. 2103 2103 For examples, the multi-precision integer-type in Section~\ref{s:MultiPrecisionIntegers} has user literals: … … 2107 2107 y = "12345678901234567890123456789"|`mp| + "12345678901234567890123456789"|`mp|; 2108 2108 \end{cfa} 2109 Because \CFA uses a standard function, all types and literals are applicable, as well as overloading and conversions , where @?`@ denotes a postfix-function name and @`@ denotes a postfix-function call.2109 Because \CFA uses a standard function, all types and literals are applicable, as well as overloading and conversions. 2110 2110 }% 2111 \begin{cquote}2112 \lstset{language=CFA,moredelim=**[is][\color{red}]{|}{|},deletedelim=**[is][]{`}{`}}2113 \lstDeleteShortInline@%2114 \begin{tabular}{@{}l@{\hspace{2\parindentlnth}}l@{\hspace{2\parindentlnth}}l@{\hspace{2\parindentlnth}}l@{}}2115 \multicolumn{1}{c@{\hspace{2\parindentlnth}}}{\textbf{postfix function}} & \multicolumn{1}{c@{\hspace{2\parindentlnth}}}{\textbf{constant}} & \multicolumn{1}{c@{\hspace{2\parindentlnth}}}{\textbf{variable/expression}} & \multicolumn{1}{c}{\textbf{postfix pointer}} \\2116 \begin{cfa}2117 int ?`h( int s );2118 int ?`h( double s );2119 int ?`m( char c );2120 int ?`m( const char * s );2121 int ?`t( int a, int b, int c );2122 \end{cfa}2123 &2124 \begin{cfa}2125 0 `h;2126 3.5`h;2127 '1'`m;2128 "123" "456"`m;2129 [1,2,3]`t;2130 \end{cfa}2131 &2132 \begin{cfa}2133 int i = 7;2134 i`h;2135 (i + 3)`h;2136 (i + 3.5)`h;2137 2138 \end{cfa}2139 &2140 \begin{cfa}2141 int (* ?`p)( int i );2142 ?`p = ?`h;2143 3`p;2144 i`p;2145 (i + 3)`p;2146 \end{cfa}2147 \end{tabular}2148 \lstMakeShortInline@%2149 \end{cquote}2150 2111 2151 2112 The right of Figure~\ref{f:UserLiteral} shows the equivalent \CC version using the underscore for the call-syntax. -
src/Parser/ExpressionNode.cc
rb5563e1 ra9b1b0c 10 10 // Created On : Sat May 16 13:17:07 2015 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Thu Mar 22 11:57:39201813 // Update Count : 80112 // Last Modified On : Sat Mar 3 18:22:33 2018 13 // Update Count : 796 14 14 // 15 15 … … 94 94 } // checkLNInt 95 95 96 static void sepNumeric( string & str, string & units ) { 97 string::size_type posn = str.find_first_of( "`" ); 98 if ( posn != string::npos ) { 99 units = "?" + str.substr( posn ); // extract units 100 str.erase( posn ); // remove units 101 } // if 102 } // sepNumeric 103 96 104 Expression * build_constantInteger( string & str ) { 97 105 static const BasicType::Kind kind[2][6] = { … … 100 108 { BasicType::ShortUnsignedInt, BasicType::UnsignedChar, BasicType::UnsignedInt, BasicType::LongUnsignedInt, BasicType::LongLongUnsignedInt, BasicType::UnsignedInt128, }, 101 109 }; 110 111 string units; 112 sepNumeric( str, units ); // separate constant from units 102 113 103 114 bool dec = true, Unsigned = false; // decimal, unsigned constant … … 221 232 } // if 222 233 CLEANUP: 234 if ( units.length() != 0 ) { 235 ret = new UntypedExpr( new NameExpr( units ), { ret } ); 236 } // if 223 237 224 238 delete &str; // created by lex … … 254 268 }; 255 269 270 string units; 271 sepNumeric( str, units ); // separate constant from units 272 256 273 bool complx = false; // real, complex 257 274 int size = 1; // 0 => float, 1 => double, 2 => long double … … 286 303 if ( lnth != -1 ) { // explicit length ? 287 304 ret = new CastExpr( ret, new BasicType( Type::Qualifiers(), kind[complx][size] ) ); 305 } // if 306 if ( units.length() != 0 ) { 307 ret = new UntypedExpr( new NameExpr( units ), { ret } ); 288 308 } // if 289 309 -
src/Parser/lex.ll
rb5563e1 ra9b1b0c 10 10 * Created On : Sat Sep 22 08:58:10 2001 11 11 * Last Modified By : Peter A. Buhr 12 * Last Modified On : Thu Mar 22 16:47:06 201813 * Update Count : 6 6812 * Last Modified On : Sat Mar 3 18:38:16 2018 13 * Update Count : 640 14 14 */ 15 15 … … 54 54 55 55 void rm_underscore() { 56 // SKULLDUGGERY: remove underscores (ok to shorten?)56 // Remove underscores in numeric constant by copying the non-underscore characters to the front of the string. 57 57 yyleng = 0; 58 for ( int i = 0; yytext[i] != '\0'; i += 1 ) { // copying non-underscore characters to front of string 58 for ( int i = 0; yytext[i] != '\0'; i += 1 ) { 59 if ( yytext[i] == '`' ) { 60 // copy user suffix 61 for ( ; yytext[i] != '\0'; i += 1 ) { 62 yytext[yyleng] = yytext[i]; 63 yyleng += 1; 64 } // for 65 break; 66 } // if 59 67 if ( yytext[i] != '_' ) { 60 68 yytext[yyleng] = yytext[i]; … … 63 71 } // for 64 72 yytext[yyleng] = '\0'; 65 } // rm_underscore73 } 66 74 67 75 // Stop warning due to incorrectly generated flex code. … … 82 90 attr_identifier "@"{identifier} 83 91 92 user_suffix_opt ("`"{identifier})? 93 84 94 // numeric constants, CFA: '_' in constant 85 95 hex_quad {hex}("_"?{hex}){3} 86 96 size_opt (8|16|32|64|128)? 87 97 length ("ll"|"LL"|[lL]{size_opt})|("hh"|"HH"|[hH]) 88 integer_suffix_opt ("_"?(([uU]({length}?[iI]?)|([iI]{length}))|([iI]({length}?[uU]?)|([uU]{length}))|({length}([iI]?[uU]?)|([uU][iI]))|[zZ]))? 98 integer_suffix_opt ("_"?(([uU]({length}?[iI]?)|([iI]{length}))|([iI]({length}?[uU]?)|([uU]{length}))|({length}([iI]?[uU]?)|([uU][iI]))|[zZ]))?{user_suffix_opt} 89 99 90 100 octal_digits ({octal})|({octal}({octal}|"_")*{octal}) … … 108 118 floating_length ([fFdDlL]|[lL]{floating_size}) 109 119 floating_suffix ({floating_length}?[iI]?)|([iI]{floating_length}) 110 floating_suffix_opt ("_"?({floating_suffix}|"DL"))? 120 floating_suffix_opt ("_"?({floating_suffix}|"DL"))?{user_suffix_opt} 111 121 decimal_digits ({decimal})|({decimal}({decimal}|"_")*{decimal}) 112 122 floating_decimal {decimal_digits}"."{exponent}?{floating_suffix_opt} … … 115 125 116 126 binary_exponent "_"?[pP]"_"?[+-]?{decimal_digits} 117 hex_floating_suffix_opt ("_"?({floating_suffix}))? 127 hex_floating_suffix_opt ("_"?({floating_suffix}))?{user_suffix_opt} 118 128 hex_floating_fraction ({hex_digits}?"."{hex_digits})|({hex_digits}".") 119 129 hex_floating_constant {hex_prefix}(({hex_floating_fraction}{binary_exponent})|({hex_digits}{binary_exponent})){hex_floating_suffix_opt} … … 304 314 /* identifier */ 305 315 {identifier} { IDENTIFIER_RETURN(); } 306 "`"{identifier}"`" { // CFA307 yytext[yyleng - 1] = '\0'; yytext += 1; // SKULLDUGGERY: remove backquotes (ok to shorten?)308 IDENTIFIER_RETURN();309 }310 316 {attr_identifier} { ATTRIBUTE_RETURN(); } 317 "`" { BEGIN BKQUOTE; } 318 <BKQUOTE>{identifier} { IDENTIFIER_RETURN(); } 319 <BKQUOTE>"`" { BEGIN 0; } 311 320 312 321 /* numeric constants */ … … 323 332 ({cwide_prefix}[_]?)?['] { BEGIN QUOTE; rm_underscore(); strtext = new string( yytext, yyleng ); } 324 333 <QUOTE>[^'\\\n]* { strtext->append( yytext, yyleng ); } 325 <QUOTE>['\n] { BEGIN 0; strtext->append( yytext, yyleng ); RETURN_STR(CHARACTERconstant); }334 <QUOTE>['\n]{user_suffix_opt} { BEGIN 0; strtext->append( yytext, yyleng ); RETURN_STR(CHARACTERconstant); } 326 335 /* ' stop editor highlighting */ 327 336 … … 329 338 ({swide_prefix}[_]?)?["] { BEGIN STRING; rm_underscore(); strtext = new string( yytext, yyleng ); } 330 339 <STRING>[^"\\\n]* { strtext->append( yytext, yyleng ); } 331 <STRING>["\n] { BEGIN 0; strtext->append( yytext, yyleng ); RETURN_STR(STRINGliteral); }340 <STRING>["\n]{user_suffix_opt} { BEGIN 0; strtext->append( yytext, yyleng ); RETURN_STR(STRINGliteral); } 332 341 /* " stop editor highlighting */ 333 342 … … 339 348 /* punctuation */ 340 349 "@" { ASCIIOP_RETURN(); } 341 "`" { ASCIIOP_RETURN(); }342 350 "[" { ASCIIOP_RETURN(); } 343 351 "]" { ASCIIOP_RETURN(); } … … 404 412 "?"({op_unary_pre_post}|"()"|"[?]"|"{}") { IDENTIFIER_RETURN(); } 405 413 "^?{}" { IDENTIFIER_RETURN(); } 406 "?`"{identifier} { IDENTIFIER_RETURN(); } // postfixoperator414 "?`"{identifier} { IDENTIFIER_RETURN(); } // unit operator 407 415 "?"{op_binary_over}"?" { IDENTIFIER_RETURN(); } // binary 408 416 /* -
src/Parser/parser.yy
rb5563e1 ra9b1b0c 10 10 // Created On : Sat Sep 1 20:22:55 2001 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Thu Mar 22 16:56:21201813 // Update Count : 3 12512 // Last Modified On : Fri Mar 16 17:24:44 2018 13 // Update Count : 3081 14 14 // 15 15 … … 126 126 } // if 127 127 } // rebindForall 128 129 NameExpr * build_postfix_name( const string * name ) {130 NameExpr * new_name = build_varref( new string( "?`" + *name ) );131 delete name;132 return new_name;133 } // build_postfix_name134 128 135 129 bool forall = false; // aggregate have one or more forall qualifiers ? … … 486 480 | '(' compound_statement ')' // GCC, lambda expression 487 481 { $$ = new ExpressionNode( new StmtExpr( dynamic_cast< CompoundStmt * >(maybeMoveBuild< Statement >($2) ) ) ); } 488 | constant '`' IDENTIFIER // CFA, postfix call489 { $$ = new ExpressionNode( build_func( new ExpressionNode( build_postfix_name( $3 ) ), $1 ) ); }490 | string_literal '`' IDENTIFIER // CFA, postfix call491 { $$ = new ExpressionNode( build_func( new ExpressionNode( build_postfix_name( $3 ) ), new ExpressionNode( $1 ) ) ); }492 | IDENTIFIER '`' IDENTIFIER // CFA, postfix call493 { $$ = new ExpressionNode( build_func( new ExpressionNode( build_postfix_name( $3 ) ), new ExpressionNode( build_varref( $1 ) ) ) ); }494 | tuple '`' IDENTIFIER // CFA, postfix call495 { $$ = new ExpressionNode( build_func( new ExpressionNode( build_postfix_name( $3 ) ), $1 ) ); }496 | '(' comma_expression ')' '`' IDENTIFIER // CFA, postfix call497 { $$ = new ExpressionNode( build_func( new ExpressionNode( build_postfix_name( $5 ) ), $2 ) ); }498 482 | type_name '.' no_attr_identifier // CFA, nested type 499 483 { SemanticError( yylloc, "Qualified names are currently unimplemented." ); $$ = nullptr; } // FIX ME -
src/libcfa/Makefile.am
rb5563e1 ra9b1b0c 11 11 ## Created On : Sun May 31 08:54:01 2015 12 12 ## Last Modified By : Peter A. Buhr 13 ## Last Modified On : Thu Mar 22 17:14:15201814 ## Update Count : 22 413 ## Last Modified On : Fri Feb 9 15:51:24 2018 14 ## Update Count : 223 15 15 ############################################################################### 16 16 … … 99 99 ${stdhdr} \ 100 100 math \ 101 time \102 101 gmp \ 103 102 bits/align.h \ -
src/libcfa/Makefile.in
rb5563e1 ra9b1b0c 263 263 containers/result containers/vector concurrency/coroutine \ 264 264 concurrency/thread concurrency/kernel concurrency/monitor \ 265 ${shell find stdhdr -type f -printf "%p "} math timegmp \265 ${shell find stdhdr -type f -printf "%p "} math gmp \ 266 266 bits/align.h bits/cfatime.h bits/containers.h bits/defs.h \ 267 267 bits/debug.h bits/locks.h concurrency/invoke.h … … 435 435 ${stdhdr} \ 436 436 math \ 437 time \438 437 gmp \ 439 438 bits/align.h \ -
src/tests/coroutine/fibonacci.c
rb5563e1 ra9b1b0c 10 10 // Created On : Thu Jun 8 07:29:37 2017 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : T hu Mar 22 22:45:44 201813 // Update Count : 1 512 // Last Modified On : Tue Dec 5 22:27:54 2017 13 // Update Count : 14 14 14 // 15 15 … … 21 21 void ?{}( Fibonacci & fib ) with( fib ) { fn = 0; } 22 22 23 // main automatically called on first resume24 23 void main( Fibonacci & fib ) with( fib ) { 25 24 int fn1, fn2; // retained between resumes 26 fn = 0; fn1 = fn; // 1st case 25 26 fn = 0; fn1 = fn; // 1st case 27 27 suspend(); // restart last resume 28 fn = 1; fn2 = fn1; fn1 = fn; // 2nd case 28 29 fn = 1; fn2 = fn1; fn1 = fn; // 2nd case 29 30 suspend(); // restart last resume 31 30 32 for ( ;; ) { 31 33 fn = fn1 + fn2; fn2 = fn1; fn1 = fn; // general case
Note:
See TracChangeset
for help on using the changeset viewer.