Changes in / [aba20d2:8b34df0]
- Files:
-
- 3 added
- 4 deleted
- 17 edited
-
doc/papers/concurrency/FullCoroutinePhases.fig (added)
-
doc/papers/concurrency/FullProdConsStack.fig (added)
-
doc/papers/concurrency/Paper.tex (modified) (76 diffs)
-
doc/papers/concurrency/corlayout.fig (added)
-
doc/papers/concurrency/examples/PingPong.c (modified) (1 diff)
-
doc/papers/concurrency/examples/Pingpong.cfa (modified) (1 diff)
-
doc/papers/concurrency/figures/FullCoroutinePhases.fig (deleted)
-
doc/papers/concurrency/figures/FullProdConsStack.fig (deleted)
-
doc/papers/concurrency/figures/corlayout.fig (deleted)
-
doc/papers/concurrency/figures/ext_monitor.fig (modified) (1 diff)
-
doc/papers/concurrency/figures/monitor.fig (modified) (1 diff)
-
libcfa/src/iostream.cfa (modified) (3 diffs)
-
libcfa/src/iostream.hfa (modified) (4 diffs)
-
src/AST/Convert.cpp (modified) (3 diffs)
-
src/AST/Expr.cpp (modified) (5 diffs)
-
src/AST/Expr.hpp (modified) (5 diffs)
-
src/Parser/ExpressionNode.cc (modified) (1 diff)
-
src/ResolvExpr/Candidate.hpp (modified) (2 diffs)
-
src/SynTree/Constant.cc (modified) (4 diffs)
-
src/SynTree/Constant.h (modified) (4 diffs)
-
src/include/optional (deleted)
-
tests/.in/manipulatorsInput.txt (modified) (1 diff)
-
tests/io2.cfa (modified) (2 diffs)
-
tests/manipulatorsInput.cfa (modified) (4 diffs)
Legend:
- Unmodified
- Added
- Removed
-
doc/papers/concurrency/Paper.tex
raba20d2 r8b34df0 158 158 _Thread_local, throw, throwResume, timeout, trait, try, ttype, typeof, __typeof, __typeof__, 159 159 virtual, __volatile, __volatile__, waitfor, when, with, zero_t}, 160 moredirectives={defined,include_next}, 161 % replace/adjust listing characters that look bad in sanserif 162 literate={-}{\makebox[1ex][c]{\raisebox{0.4ex}{\rule{0.8ex}{0.1ex}}}}1 {^}{\raisebox{0.6ex}{$\scriptstyle\land\,$}}1 163 {~}{\raisebox{0.3ex}{$\scriptstyle\sim\,$}}1 % {`}{\ttfamily\upshape\hspace*{-0.1ex}`}1 164 {<}{\textrm{\textless}}1 {>}{\textrm{\textgreater}}1 165 {<-}{$\leftarrow$}2 {=>}{$\Rightarrow$}2 {->}{\makebox[1ex][c]{\raisebox{0.5ex}{\rule{0.8ex}{0.075ex}}}\kern-0.2ex{\textrm{\textgreater}}}2, 160 moredirectives={defined,include_next}% 166 161 } 167 162 … … 180 175 aboveskip=4pt, % spacing above/below code block 181 176 belowskip=3pt, 177 % replace/adjust listing characters that look bad in sanserif 178 literate={-}{\makebox[1ex][c]{\raisebox{0.4ex}{\rule{0.8ex}{0.1ex}}}}1 {^}{\raisebox{0.6ex}{$\scriptstyle\land\,$}}1 179 {~}{\raisebox{0.3ex}{$\scriptstyle\sim\,$}}1 % {`}{\ttfamily\upshape\hspace*{-0.1ex}`}1 180 {<}{\textrm{\textless}}1 {>}{\textrm{\textgreater}}1 181 {<-}{$\leftarrow$}2 {=>}{$\Rightarrow$}2 {->}{\makebox[1ex][c]{\raisebox{0.5ex}{\rule{0.8ex}{0.075ex}}}\kern-0.2ex{\textrm{\textgreater}}}2, 182 182 moredelim=**[is][\color{red}]{`}{`}, 183 183 }% lstset … … 205 205 } 206 206 207 % Go programming language: https://github.com/julienc91/listings-golang/blob/master/listings-golang.sty208 \lstdefinelanguage{Golang}{209 morekeywords=[1]{package,import,func,type,struct,return,defer,panic,recover,select,var,const,iota,},210 morekeywords=[2]{string,uint,uint8,uint16,uint32,uint64,int,int8,int16,int32,int64,211 bool,float32,float64,complex64,complex128,byte,rune,uintptr, error,interface},212 morekeywords=[3]{map,slice,make,new,nil,len,cap,copy,close,true,false,delete,append,real,imag,complex,chan,},213 morekeywords=[4]{for,break,continue,range,goto,switch,case,fallthrough,if,else,default,},214 morekeywords=[5]{Println,Printf,Error,},215 sensitive=true,216 morecomment=[l]{//},217 morecomment=[s]{/*}{*/},218 morestring=[b]',219 morestring=[b]",220 morestring=[s]{`}{`},221 % replace/adjust listing characters that look bad in sanserif222 literate={-}{\makebox[1ex][c]{\raisebox{0.4ex}{\rule{0.8ex}{0.1ex}}}}1 {^}{\raisebox{0.6ex}{$\scriptstyle\land\,$}}1223 {~}{\raisebox{0.3ex}{$\scriptstyle\sim\,$}}1 % {`}{\ttfamily\upshape\hspace*{-0.1ex}`}1224 {<}{\textrm{\textless}}1 {>}{\textrm{\textgreater}}1225 {<-}{\makebox[2ex][c]{\textrm{\textless}\raisebox{0.5ex}{\rule{0.8ex}{0.075ex}}}}2,226 }227 228 207 \lstnewenvironment{cfa}[1][] 229 208 {\lstset{#1}} … … 236 215 {} 237 216 \lstnewenvironment{Go}[1][] 238 {\lstset{language= Golang,moredelim=**[is][\protect\color{red}]{`}{`},#1}\lstset{#1}}217 {\lstset{language=go,moredelim=**[is][\protect\color{red}]{`}{`},#1}\lstset{#1}} 239 218 {} 240 219 \lstnewenvironment{python}[1][] … … 274 253 These features are created from scratch as ISO C has only low-level and/or unimplemented concurrency, so C programmers continue to rely on library features like C pthreads. 275 254 \CFA introduces modern language-level control-flow mechanisms, like generators, coroutines, user-level threading, and monitors for mutual exclusion and synchronization. 276 %Library extension for executors, futures, and actors are built on these basic mechanisms.255 Library extension for executors, futures, and actors are built on these basic mechanisms. 277 256 The runtime provides significant programmer simplification and safety by eliminating spurious wakeup and optional monitor barging. 278 257 The runtime also ensures multiple monitors can be safely acquired \emph{simultaneously} (deadlock free), and this feature is fully integrated with all monitor synchronization mechanisms. … … 297 276 However, functions \emph{cannot} be nested in structures, so there is no lexical binding between a structure and set of functions (member/method) implemented by an implicit \lstinline@this@ (receiver) parameter.}, 298 277 backwards-compatible extension of the C programming language. 299 In many ways, \CFA is to C as Scala~\cite{Scala} is to Java, providing a \emph{research vehicle} for new typing and control flow capabilities on top of a highly popular programming language allowing immediate dissemination.300 278 Within the \CFA framework, new control-flow features are created from scratch. 301 279 ISO \Celeven defines only a subset of the \CFA extensions, where the overlapping features are concurrency~\cite[\S~7.26]{C11}. … … 310 288 Kernel threading was chosen, largely because of its simplicity and fit with the simpler operating systems and hardware architectures at the time, which gave it a performance advantage~\cite{Drepper03}. 311 289 Libraries like pthreads were developed for C, and the Solaris operating-system switched from user (JDK 1.1~\cite{JDK1.1}) to kernel threads. 312 As a result, languages like Java, Scala , Objective-C~\cite{obj-c-book}, \CCeleven~\cite{C11}, and C\#~\cite{Csharp} adopt the 1:1 kernel-threading model, with a variety of presentation mechanisms.290 As a result, languages like Java, Scala~\cite{Scala}, Objective-C~\cite{obj-c-book}, \CCeleven~\cite{C11}, and C\#~\cite{Csharp} adopt the 1:1 kernel-threading model, with a variety of presentation mechanisms. 313 291 From 2000 onwards, languages like Go~\cite{Go}, Erlang~\cite{Erlang}, Haskell~\cite{Haskell}, D~\cite{D}, and \uC~\cite{uC++,uC++book} have championed the M:N user-threading model, and many user-threading libraries have appeared~\cite{Qthreads,MPC,BoostThreads}, including putting green threads back into Java~\cite{Quasar}. 314 292 The main argument for user-level threading is that they are lighter weight than kernel threads (locking and context switching do not cross the kernel boundary), so there is less restriction on programming styles that encourage large numbers of threads performing medium work-units to facilitate load balancing by the runtime~\cite{Verch12}. … … 364 342 \item 365 343 providing statically type-safe interfaces that integrate with the \CFA polymorphic type-system and other language features. 366 %\item367 %library extensions for executors, futures, and actors built on the basic mechanisms.344 \item 345 library extensions for executors, futures, and actors built on the basic mechanisms. 368 346 \item 369 347 a runtime system with no spurious wakeup. 370 348 \item 371 349 a dynamic partitioning mechanism to segregate the execution environment for specialized requirements. 372 %\item373 %a non-blocking I/O library350 \item 351 a non-blocking I/O library 374 352 \item 375 353 experimental results showing comparable performance of the new features with similar mechanisms in other programming languages. … … 392 370 Therefore, selecting between stackless and stackful semantics is a tradeoff between programming requirements and performance, where stackless is faster and stackful is more general. 393 371 Note, creation cost is amortized across usage, so activation cost is usually the dominant factor. 372 394 373 395 374 \begin{figure} … … 476 455 \hspace{3pt} 477 456 \subfloat[C generator implementation]{\label{f:CFibonacciSim}\usebox\myboxC} 478 \caption{Fibonacci (output) asymmetric generator}457 \caption{Fibonacci (output) Asymmetric Generator} 479 458 \label{f:FibonacciAsymmetricGenerator} 480 459 … … 551 530 \subfloat[C generator simulation]{\label{f:CFormatSim}\usebox\myboxB} 552 531 \hspace{3pt} 553 \caption{Formatter (input) asymmetric generator}532 \caption{Formatter (input) Asymmetric Generator} 554 533 \label{f:FormatterAsymmetricGenerator} 555 534 \end{figure} 556 535 557 For generators, coroutines, and threads, many designs are based on function objects or pointers~\cite{Butenhof97, C++14, MS:VisualC++, BoostCoroutines15}.558 For example, Python presents generators as a function object:559 \begin{python}560 def Gen():561 ... `yield val` ...562 gen = Gen()563 for i in range( 10 ):564 print( next( gen ) )565 \end{python}566 Boost presents coroutines in terms of four functor object-types:567 \begin{cfa}568 asymmetric_coroutine<>::pull_type569 asymmetric_coroutine<>::push_type570 symmetric_coroutine<>::call_type571 symmetric_coroutine<>::yield_type572 \end{cfa}573 and many languages present threading using function pointers, @pthreads@~\cite{Butenhof97}, \Csharp~\cite{Csharp}, Go~\cite{Go}, and Scala~\cite{Scala}, \eg pthreads:574 \begin{cfa}575 void * rtn( void * arg ) { ... }576 int i = 3, rc;577 pthread_t t; $\C{// thread id}$578 `rc = pthread_create( &t, rtn, (void *)i );` $\C{// create and initialized task, type-unsafe input parameter}$579 \end{cfa}580 % void mycor( pthread_t cid, void * arg ) {581 % int * value = (int *)arg; $\C{// type unsafe, pointer-size only}$582 % // thread body583 % }584 % int main() {585 % int input = 0, output;586 % coroutine_t cid = coroutine_create( &mycor, (void *)&input ); $\C{// type unsafe, pointer-size only}$587 % coroutine_resume( cid, (void *)input, (void **)&output ); $\C{// type unsafe, pointer-size only}$588 % }589 \CFA's preferred presentation model for generators/coroutines/threads is a hybrid of objects and functions, with an object-oriented flavour.590 Essentially, the generator/coroutine/thread function is semantically coupled with a generator/coroutine/thread custom type.591 The custom type solves several issues, while accessing the underlying mechanisms used by the custom types is still allowed.592 593 536 594 537 \subsection{Generator} … … 596 539 Stackless generators have the potential to be very small and fast, \ie as small and fast as function call/return for both creation and execution. 597 540 The \CFA goal is to achieve this performance target, possibly at the cost of some semantic complexity. 598 A series of different kinds of generators and their implementation demonstrate how this goal is accomplished. 541 How this goal is accomplished is done through a series of different kinds of generators and their implementation. 599 542 600 543 Figure~\ref{f:FibonacciAsymmetricGenerator} shows an unbounded asymmetric generator for an infinite sequence of Fibonacci numbers written in C and \CFA, with a simple C implementation for the \CFA version. … … 605 548 Figure~\ref{f:CFibonacci} shows the C approach of manually creating the closure in structure @Fib@, and multiple instances of this closure provide multiple Fibonacci generators. 606 549 The C version only has the middle execution state because the top execution state becomes declaration initialization. 607 Figure~\ref{f:CFAFibonacciGen} shows the \CFA approach, which also has a manual closure, but replaces the structure with a custom \CFA @generator@ type. 608 This generator type is then connected to a function that \emph{must be named \lstinline|main|},\footnote{ 609 The name \lstinline|main| has special meaning in C, specifically the function where a program starts execution. 610 Hence, overloading this name for other starting points (generator/coroutine/thread) is a logical extension.} 611 which takes as its only parameter a reference to the generator type, called a \emph{generator main}. 550 Figure~\ref{f:CFAFibonacciGen} shows the \CFA approach, which also has a manual closure, but replaces the structure with a special \CFA @generator@ type. 551 This generator type is then connected to a function named @main@ that takes as its only parameter a reference to the generator type, called a \emph{generator main}. 612 552 The generator main contains @suspend@ statements that suspend execution without ending the generator versus @return@. 613 553 For the Fibonacci generator-main,\footnote{ … … 634 574 For contrast, Figure~\ref{f:PythonFibonacci} shows the equivalent Python Fibonacci generator, which does not use a generator type, and hence only has a single interface, but an implicit closure. 635 575 636 Having to manually create the generator closure by moving local-state variables into the generator type is an additional programmer burden .576 Having to manually create the generator closure by moving local-state variables into the generator type is an additional programmer burden and possible point of error. 637 577 (This restriction is removed by the coroutine, see Section~\ref{s:Coroutine}.) 638 This requirement follows from the generality of variable-size local-state, \eg local state with a variable-length array requires dynamic allocation because the array size is unknown at compile time.639 However, dynamic allocation significantly increases the cost of generator creation/destruction and is a show-stopper for embedded real-timeprogramming.578 Our concern is that static analysis to discriminate local state from temporary variables is complex and beyond the current scope of the \CFA project. 579 As well, supporting variable-size local-state, like variable-length arrays, requires dynamic allocation of the local state, which significantly increases the cost of generator creation/destruction, and is a show-stopper for embedded programming. 640 580 But more importantly, the size of the generator type is tied to the local state in the generator main, which precludes separate compilation of the generator main, \ie a generator must be inlined or local state must be dynamically allocated. 641 With respect to safety, we believe static analysis can discriminate local state from temporary variables in a generator, \ie variable usage spanning @suspend@, and generate a compile-time error. 642 Finally, our current experience is that most generator problems have simple data state, including local state, but complex execution state, so the burden of creating the generator type is small. 581 Our current experience is that most generator problems have simple data state, including local state, but complex execution state. 643 582 As well, C programmers are not afraid with this kind of semantic programming requirement, if it results in very small, fast generators. 644 583 … … 690 629 \begin{python}[aboveskip=0pt,belowskip=0pt] 691 630 def Fib(): 692 fn1, fn = 0, 1693 while True:694 `yield fn1`695 fn1, fn = fn, fn1 + fn631 fn1, fn = 0, 1 632 while True: 633 `yield fn1` 634 fn1, fn = fn, fn1 + fn 696 635 f1 = Fib() 697 636 f2 = Fib() … … 732 671 \hspace{3pt} 733 672 \subfloat[Formatter]{\label{f:PythonFormatter}\usebox\myboxB} 734 \caption{Python generator}673 \caption{Python Generator} 735 674 \label{f:PythonGenerator} 736 675 … … 820 759 `generator PingPong` { 821 760 const char * name; 822 int N;823 int i; // local state824 761 PingPong & partner; // rebindable reference 762 int N, i; 825 763 }; 826 764 void ?{}(PingPong & pp, char * nm, int N) with(pp) { 765 name = nm; partner = 0p; pp.N = N; i = 0; } 827 766 void `main( PingPong & pp )` with(pp) { 828 767 for ( ; i < N; i += 1 ) { … … 833 772 int main() { 834 773 enum { N = 5 }; 835 PingPong ping = {"ping", N,0}, pong = {"pong",N,0};774 PingPong ping = {"ping", N}, pong = {"pong", N}; 836 775 &ping.partner = &pong; &pong.partner = &ping; 837 776 `resume( ping );` … … 844 783 typedef struct PingPong { 845 784 const char * name; 785 struct PingPong * partner; 846 786 int N, i; 847 struct PingPong * partner;848 787 void * next; 849 788 } PingPong; 850 #define PPCtor(name, N) { name,N,0,NULL,NULL}789 #define PPCtor(name, N) { name, NULL, N, 0, NULL } 851 790 void comain( PingPong * pp ) { 852 791 if ( pp->next ) goto *pp->next; … … 870 809 \subfloat[C generator simulation]{\label{f:CPingPongSim}\usebox\myboxB} 871 810 \hspace{3pt} 872 \caption{Ping-Pong symmetric generator}811 \caption{Ping-Pong Symmetric Generator} 873 812 \label{f:PingPongSymmetricGenerator} 874 813 \end{figure} … … 892 831 A coroutine is specified by replacing @generator@ with @coroutine@ for the type. 893 832 This generality results in higher cost for creation, due to dynamic stack allocation, execution, due to context switching among stacks, and terminating, due to possible stack unwinding and dynamic stack deallocation. 894 A series of different kinds of coroutines and their implementation demonstrate how coroutines extend generators.833 How coroutines differ from generators is done through a series of examples. 895 834 896 835 First, the previous generator examples are converted to their coroutine counterparts, allowing local-state variables to be moved from the generator type into the coroutine main. … … 923 862 \end{cfa} 924 863 \end{description} 925 It is also possible to refactor code containing local-state and @suspend@ statements into a helper function, like the computation of the CRC for the device driver.864 It is also possible to refactor code containing local-state and @suspend@ statements into a helper routine, like the computation of the CRC for the device driver. 926 865 \begin{cfa} 927 866 unsigned int Crc() { … … 938 877 939 878 \begin{comment} 940 Figure~\ref{f:Coroutine3States} creates a @coroutine@ type, @`coroutine` Fib { int fn; }@, which provides communication, @fn@, for the \newterm{coroutine main}, @main@, which runs on the coroutine stack, and possibly multiple interface functions, \eg @next@.879 Figure~\ref{f:Coroutine3States} creates a @coroutine@ type, @`coroutine` Fib { int fn; }@, which provides communication, @fn@, for the \newterm{coroutine main}, @main@, which runs on the coroutine stack, and possibly multiple interface routines, \eg @next@. 941 880 Like the structure in Figure~\ref{f:ExternalState}, the coroutine type allows multiple instances, where instances of this type are passed to the (overloaded) coroutine main. 942 881 The coroutine main's stack holds the state for the next generation, @f1@ and @f2@, and the code represents the three states in the Fibonacci formula via the three suspend points, to context switch back to the caller's @resume@. 943 The interface function@next@, takes a Fibonacci instance and context switches to it using @resume@;882 The interface routine @next@, takes a Fibonacci instance and context switches to it using @resume@; 944 883 on restart, the Fibonacci field, @fn@, contains the next value in the sequence, which is returned. 945 884 The first @resume@ is special because it allocates the coroutine stack and cocalls its coroutine main on that stack; … … 1027 966 def Fib(): 1028 967 1029 fn1, fn = 0, 11030 while True:1031 `yield fn1`1032 fn1, fn = fn, fn1 + fn968 fn1, fn = 0, 1 969 while True: 970 `yield fn1` 971 fn1, fn = fn, fn1 + fn 1033 972 1034 973 … … 1055 994 \hspace{3pt} 1056 995 \subfloat[Python]{\label{f:ExternalState}\usebox\myboxC} 1057 \caption{Fibonacci generator}996 \caption{Fibonacci Generator} 1058 997 \label{f:C-fibonacci} 1059 998 \end{figure} … … 1179 1118 \caption{Producer / consumer: resume-resume cycle, bi-directional communication} 1180 1119 \label{f:ProdCons} 1120 1121 \medskip 1122 1123 \begin{center} 1124 \input{FullProdConsStack.pstex_t} 1125 \end{center} 1126 \vspace*{-10pt} 1127 \caption{Producer / consumer runtime stacks} 1128 \label{f:ProdConsRuntimeStacks} 1129 1130 \medskip 1131 1132 \begin{center} 1133 \input{FullCoroutinePhases.pstex_t} 1134 \end{center} 1135 \vspace*{-10pt} 1136 \caption{Ping / Pong coroutine steps} 1137 \label{f:PingPongFullCoroutineSteps} 1181 1138 \end{figure} 1182 1139 … … 1199 1156 The loop then repeats calling @payment@, where each call resumes the producer coroutine. 1200 1157 Figure~\ref{f:ProdConsRuntimeStacks} shows the runtime stacks of the program main, and the coroutine mains for @prod@ and @cons@ during the cycling. 1201 1202 \begin{figure}1203 \begin{center}1204 \input{FullProdConsStack.pstex_t}1205 \end{center}1206 \vspace*{-10pt}1207 \caption{Producer / consumer runtime stacks}1208 \label{f:ProdConsRuntimeStacks}1209 1210 \medskip1211 1212 \begin{center}1213 \input{FullCoroutinePhases.pstex_t}1214 \end{center}1215 \vspace*{-10pt}1216 \caption{Ping / Pong coroutine steps}1217 \label{f:PingPongFullCoroutineSteps}1218 \end{figure}1219 1158 1220 1159 Terminating a coroutine cycle is more complex than a generator cycle, because it requires context switching to the program main's \emph{stack} to shutdown the program, whereas generators started by the program main run on its stack. … … 1258 1197 1259 1198 1260 \subsection{ (Generator)Coroutine Implementation}1261 1262 A significant implementation challenge for generators/coroutines (and threads, see Section~\ref{s:threads}) is adding extra fields to the custom types and related functions, \eg inserting code after/before the coroutine constructor/destructor and @main@ to create/initialize/de-initialize/destroy any extra fields, \egstack.1263 There are several solutions to th ese problem, which follow from the object-oriented-flavoured choice to adopt custom types.1264 1265 For object-oriented languages, inheritance is used to provide extra fields and code via explicitinheritance:1199 \subsection{Coroutine Implementation} 1200 1201 A significant implementation challenge for coroutines (and threads, see Section~\ref{threads}) is adding extra fields and executing code after/before the coroutine constructor/destructor and coroutine main to create/initialize/de-initialize/destroy extra fields and the stack. 1202 There are several solutions to this problem and the chosen option directed the \CFA coroutine design. 1203 1204 For object-oriented languages, inheritance can be used to provide extra fields and code, but it requires explicitly writing the inheritance: 1266 1205 \begin{cfa}[morekeywords={class,inherits}] 1267 class my Coroutine inherits baseCoroutine { ... }1268 \end{cfa} 1269 The problem is that the programming language and its tool chain, \eg debugger, @valgrind@, need to understand @baseCoroutine@ because it infers special property, so type @baseCoroutine@ becomes a de facto keyword and all types inheriting from it are implicitly custom types.1270 As well, some special properties are not handled by existing language semantics, \eg the execution of constructors/destructors is in the wrong order to implicitly start threads because the thread must start \emph{after} all constructors as it relies on a completely initialized object, but the inherited constructor runs \emph{before} the derived.1271 Alternatives, such as explicitly starting threads as in Java, are repetitive and forgetting to call start is a common source of errors.1206 class mycoroutine inherits baseCoroutine { ... } 1207 \end{cfa} 1208 In addition, the programming language and possibly its tool set, \eg debugger, @valgrind@ need to understand @baseCoroutine@ because of special stack property of coroutines. 1209 Furthermore, the execution of constructors/destructors is in the wrong order for certain operations. 1210 For example, for threads if the thread is implicitly started, it must start \emph{after} all constructors, because the thread relies on a completely initialized object, but the inherited constructor runs \emph{before} the derived. 1272 1211 1273 1212 An alternative is composition: 1274 1213 \begin{cfa} 1275 struct my Coroutine {1276 ... // declaration /communication variables1214 struct mycoroutine { 1215 ... // declarations 1277 1216 baseCoroutine dummy; // composition, last declaration 1278 1217 } 1279 1218 \end{cfa} 1280 1219 which also requires an explicit declaration that must be last to ensure correct initialization order. 1281 However, there is nothing preventing wrong placement or multiple declarations. 1282 1283 \CFA custom types make any special properties explicit to the language and its tool chain, \eg the language code-generator knows where to inject code and when it is unsafe to perform certain optimizations, and IDEs using simple parsing can find and manipulate types with special properties. 1284 The downside of this approach is that it makes custom types a special case in the language. 1285 Users wanting to extend custom types or build their own can only do so in ways offered by the language. 1286 Furthermore, implementing custom types without language supports may display the power of a programming language. 1287 \CFA blends the two approaches, providing custom type for idiomatic \CFA code, extending and building new custom types is still possible, similar to Java approach with builtin and library concurrency. 1288 1289 Part of the mechanism to generalize custom types is the \CFA trait, \eg the definition for custom-type @coroutine@ is anything satisfying the trait @is_coroutine@, and this trait both enforces and restricts the coroutine-interface functions. 1290 \begin{cfa} 1291 trait is_coroutine( `dtype` T ) { 1292 void main( T & ); 1293 coroutine_desc * get_coroutine( T & ); 1294 }; 1295 forall( `dtype` T | is_coroutine(T) ) void $suspend$( T & ); 1296 forall( `dtype` T | is_coroutine(T) ) void resume( T & ); 1297 \end{cfa} 1298 Note, copying generators/coroutines/threads is not meaningful. 1299 For example, a coroutine retains its last resumer and suspends back to it; 1300 having a copy also suspend back to the same resumer is undefined semantics. 1301 Furthermore, two coroutines cannot logically execute on the same stack. 1302 A deep coroutine copy, which copies the stack, is also meaningless in an unmanaged language (no garbage collection), like C, because the stack may contain pointers to object within it that require updating for the copy. 1303 Now, the @dtype@ property provides no \emph{implicit} copying operations and the @is_coroutine@ trait provides no \emph{explicit} copying operations, so all coroutines must be passed by reference (pointer). 1304 The function definitions ensures there is a statically-typed @main@ function that is the starting point (first stack frame) of a coroutine, and a mechanism to get (read) the currently executing coroutine handle. 1305 The @main@ function has no return value or additional parameters because the coroutine type allows an arbitrary number of interface functions with corresponding arbitrary typed input/output values versus fixed ones. 1306 The advantage of this approach is that users can easily create different types of coroutines, \eg changing the memory layout of a coroutine is trivial when implementing the @get_coroutine@ function, and possibly redefining @suspend@ and @resume@. 1307 1308 The \CFA custom-type @coroutine@ implicitly implements the getter and forward declarations required for implementing the coroutine main: 1309 \begin{cquote} 1310 \begin{tabular}{@{}ccc@{}} 1311 \begin{cfa} 1312 coroutine MyCor { 1313 int value; 1314 1315 }; 1316 \end{cfa} 1317 & 1318 {\Large $\Rightarrow$} 1319 & 1320 \begin{tabular}{@{}ccc@{}} 1321 \begin{cfa} 1322 struct MyCor { 1323 int value; 1324 coroutine_desc cor; 1325 }; 1326 \end{cfa} 1327 & 1328 \begin{cfa} 1329 static inline coroutine_desc * 1330 get_coroutine( MyCor & this ) { 1331 return &this.cor; 1332 } 1333 \end{cfa} 1334 & 1335 \begin{cfa} 1336 void main( MyCor * this ); 1337 1338 1339 1340 \end{cfa} 1341 \end{tabular} 1342 \end{tabular} 1343 \end{cquote} 1344 The combination of custom types and fundamental @trait@ description of these types allows an concise specification for programmers and tools, while more advanced programmers can have tighter control over memory layout and initialization. 1220 However, there is nothing preventing wrong placement or multiple declarations of @baseCoroutine@. 1221 1222 For coroutines, as for threads, many implementations are based on function pointers or function objects~\cite{Butenhof97, C++14, MS:VisualC++, BoostCoroutines15}. 1223 For example, Boost implements coroutines in terms of four functor object-types: 1224 \begin{cfa} 1225 asymmetric_coroutine<>::pull_type 1226 asymmetric_coroutine<>::push_type 1227 symmetric_coroutine<>::call_type 1228 symmetric_coroutine<>::yield_type 1229 \end{cfa} 1345 1230 1346 1231 Figure~\ref{f:CoroutineMemoryLayout} shows different memory-layout options for a coroutine (where a task is similar). 1347 The coroutine handle is the @coroutine@ instance containing programmer specified global/communication variables across interface functions. 1348 The coroutine descriptor contains all implicit declarations needed by the runtime, \eg @suspend@/@resume@, and can be part of the coroutine handle or separate. 1349 The coroutine stack can appear in a number of locations and fixed or variable sized. 1350 Hence, the coroutine's stack could be a VLS\footnote{ 1232 The coroutine handle is the @coroutine@ instance containing programmer specified global/communication variables. 1233 The coroutine descriptor contains all implicit declarations needed by the runtime, \eg @suspend@/@resume@, and can be part of the coroutine handle or separate.\footnote{ 1351 1234 We are examining variable-sized structures (VLS), where fields can be variable-sized structures or arrays. 1352 1235 Once allocated, a VLS is fixed sized.} 1353 on the allocating stack, provided the allocating stack is large enough.1354 For a VLS stack allocation, allocation/deallocation is an inexpensive adjustment of the stack point, modulo any stack constructor costs (\eg initial frame setup).1355 For heap stack allocation, allocation/deallocation is an expensive heap allocation (where the heap can be a shared resource), modulo any stackconstructor costs.1356 With heap stack allocation, it is also possible to use a split (segmented) stack calling-convention, available with gcc and clang, so the stack is variable sized.1357 Currently, \CFA supports stack/heap allocated descriptors but only fixed-sized heap allocated stacks; 1358 In \CFA debug-mode, the fixed-sized stack is terminated with a write-only page, which catches most stack overflows. 1359 Experience teaching concurrency with \uC~\cite{CS343} shows fixed-sized stacks are rarely an issue for students.1360 Split-stack allocation is under development but requires recompilation of existing code, which may be impossible.1236 The coroutine stack can appear in a number of locations and forms, fixed or variable sized. 1237 Hence, the coroutine's stack could be a VLS on the allocating stack, provided the allocating stack is large enough. 1238 For stack allocation, allocation/deallocation only requires a few arithmetic operation to compute the size and adjust the stack point, modulo any constructor costs. 1239 For heap allocation, allocation/deallocation is an expensive heap operation (where the heap can be a shared resource), modulo any constructor costs. 1240 For heap stack allocation, it is also possible to use a split (segmented) stack calling-convention, available with gcc and clang, so the stack is variable sized. 1241 Currently, \CFA supports stack/heap allocated descriptors but only heap allocated stacks; 1242 split-stack allocation is under development. 1243 In \CFA debug-mode, a fixed-sized stack is terminated with a write-only page, which catches most stack overflows. 1361 1244 1362 1245 \begin{figure} … … 1367 1250 \end{figure} 1368 1251 1252 Similarly, the canonical threading paradigm is often based on function pointers, \eg @pthreads@~\cite{Butenhof97}, \Csharp~\cite{Csharp}, Go~\cite{Go}, and Scala~\cite{Scala}. 1253 However, the generic thread-handle (identifier) is limited (few operations), unless it is wrapped in a custom type, as in the pthreads approach. 1254 \begin{cfa} 1255 void mycor( coroutine_t cid, void * arg ) { 1256 int * value = (int *)arg; $\C{// type unsafe, pointer-size only}$ 1257 // Coroutine body 1258 } 1259 int main() { 1260 int input = 0, output; 1261 coroutine_t cid = coroutine_create( &mycor, (void *)&input ); $\C{// type unsafe, pointer-size only}$ 1262 coroutine_resume( cid, (void *)input, (void **)&output ); $\C{// type unsafe, pointer-size only}$ 1263 } 1264 \end{cfa} 1265 Since the custom type is simple to write in \CFA and solves several issues, added support for function/lambda-based coroutines adds very little. 1266 1267 Note, the type @coroutine_t@ must be an abstract handle to the coroutine, because the coroutine descriptor and its stack are non-copyable. 1268 Copying the coroutine descriptor results in copies being out of date with the current state of the stack. 1269 Correspondingly, copying the stack results is copies being out of date with the coroutine descriptor, and pointers in the stack being out of date to data on the stack. 1270 (There is no mechanism in C to find all stack-specific pointers and update them as part of a copy.) 1271 1272 The selected approach is to use language support by introducing a new kind of aggregate (structure): 1273 \begin{cfa} 1274 coroutine Fibonacci { 1275 int fn; // communication variables 1276 }; 1277 \end{cfa} 1278 The @coroutine@ keyword means the compiler (and tool set) can find and inject code where needed. 1279 The downside of this approach is that it makes coroutine a special case in the language. 1280 Users wanting to extend coroutines or build their own for various reasons can only do so in ways offered by the language. 1281 Furthermore, implementing coroutines without language supports also displays the power of a programming language. 1282 While this is ultimately the option used for idiomatic \CFA code, coroutines and threads can still be constructed without language support. 1283 The reserved keyword simply eases use for the common case. 1284 1285 Part of the mechanism to generalize coroutines is using a \CFA trait, which defines a coroutine as anything satisfying the trait @is_coroutine@, and this trait restricts the available set of coroutine-manipulation routines: 1286 \begin{cfa} 1287 trait is_coroutine( `dtype` T ) { 1288 void main( T & ); 1289 coroutine_desc * get_coroutine( T & ); 1290 }; 1291 forall( `dtype` T | is_coroutine(T) ) void suspend( T & ); 1292 forall( `dtype` T | is_coroutine(T) ) void resume( T & ); 1293 \end{cfa} 1294 The @dtype@ property provides no implicit copying operations and the @is_coroutine@ trait provides no explicit copying operations, so all coroutines must be passed by reference (pointer). 1295 The routine definitions ensures there is a statically-typed @main@ routine that is the starting point (first stack frame) of a coroutine, and a mechanism to get (read) the currently executing coroutine handle. 1296 The @main@ routine has no return value or additional parameters because the coroutine type allows an arbitrary number of interface routines with corresponding arbitrary typed input/output values versus fixed ones. 1297 The advantage of this approach is that users can easily create different types of coroutines, \eg changing the memory layout of a coroutine is trivial when implementing the @get_coroutine@ routine, and possibly redefining @suspend@ and @resume@. 1298 The \CFA keyword @coroutine@ implicitly implements the getter and forward declarations required for implementing the coroutine main: 1299 \begin{cquote} 1300 \begin{tabular}{@{}ccc@{}} 1301 \begin{cfa} 1302 coroutine MyCor { 1303 int value; 1304 1305 }; 1306 \end{cfa} 1307 & 1308 {\Large $\Rightarrow$} 1309 & 1310 \begin{tabular}{@{}ccc@{}} 1311 \begin{cfa} 1312 struct MyCor { 1313 int value; 1314 coroutine_desc cor; 1315 }; 1316 \end{cfa} 1317 & 1318 \begin{cfa} 1319 static inline coroutine_desc * 1320 get_coroutine( MyCor & this ) { 1321 return &this.cor; 1322 } 1323 \end{cfa} 1324 & 1325 \begin{cfa} 1326 void main( MyCor * this ); 1327 1328 1329 1330 \end{cfa} 1331 \end{tabular} 1332 \end{tabular} 1333 \end{cquote} 1334 The combination of these two approaches allows an easy and concise specification to coroutining (and concurrency) for normal users, while more advanced users have tighter control on memory layout and initialization. 1335 1369 1336 1370 1337 \section{Concurrency} 1371 1338 \label{s:Concurrency} 1372 1339 1373 Concurrency is nondeterministic scheduling of independent sequential execution-paths (threads), where each thread has its own stack. 1374 A single thread with multiple call stacks, \newterm{coroutining}~\cite{Conway63,Marlin80}, does \emph{not} imply concurrency~\cite[\S~2]{Buhr05a}. 1375 In coroutining, coroutines self-schedule the thread across stacks so execution is deterministic. 1376 (It is \emph{impossible} to generate a concurrency error when coroutining.) 1377 However, coroutines are a stepping stone towards concurrency. 1378 1379 The transition to concurrency, even for a single thread with multiple stacks, occurs when coroutines context switch to a \newterm{scheduling coroutine}, introducing non-determinism from the coroutine perspective~\cite[\S~3]{Buhr05a}. 1380 Therefore, a minimal concurrency system requires coroutines \emph{in conjunction with a nondeterministic scheduler}. 1340 At its core, concurrency is based on multiple call-stacks and scheduling threads executing on these stacks. 1341 Multiple call stacks (or contexts) and a single thread of execution, called \newterm{coroutining}~\cite{Conway63,Marlin80}, does \emph{not} imply concurrency~\cite[\S~2]{Buhr05a}. 1342 In coroutining, the single thread is self-scheduling across the stacks, so execution is deterministic, \ie the execution path from input to output is fixed and predictable. 1343 A \newterm{stackless} coroutine executes on the caller's stack~\cite{Python} but this approach is restrictive, \eg preventing modularization and supporting only iterator/generator-style programming; 1344 a \newterm{stackful} coroutine executes on its own stack, allowing full generality. 1345 Only stackful coroutines are a stepping stone to concurrency. 1346 1347 The transition to concurrency, even for execution with a single thread and multiple stacks, occurs when coroutines also context switch to a \newterm{scheduling oracle}, introducing non-determinism from the coroutine perspective~\cite[\S~3]{Buhr05a}. 1348 Therefore, a minimal concurrency system is possible using coroutines (see Section \ref{coroutine}) in conjunction with a scheduler to decide where to context switch next. 1381 1349 The resulting execution system now follows a cooperative threading-model, called \newterm{non-preemptive scheduling}. 1382 Adding \newterm{preemption} introduces non-cooperative scheduling, where context switching occurs randomly between any two instructions often based on a timer interrupt, called \newterm{preemptive scheduling}. 1383 While a scheduler introduces uncertain execution among explicit context switches, preemption introduces uncertainty by introducing implicit context switches. 1384 Uncertainty gives the illusion of parallelism on a single processor and provides a mechanism to access and increase performance on multiple processors. 1385 The reason is that the scheduler/runtime have complete knowledge about resources and how to best utilized them. 1386 However, the introduction of unrestricted nondeterminism results in the need for \newterm{mutual exclusion} and \newterm{synchronization}, which restrict nondeterminism for correctness; 1387 otherwise, it is impossible to write meaningful concurrent programs. 1388 Optimal concurrent performance is often obtained by having as much nondeterminism as mutual exclusion and synchronization correctness allow. 1389 1390 A scheduler can either be a stackless or stackful. 1350 1351 Because the scheduler is special, it can either be a stackless or stackful coroutine. 1391 1352 For stackless, the scheduler performs scheduling on the stack of the current coroutine and switches directly to the next coroutine, so there is one context switch. 1392 1353 For stackful, the current coroutine switches to the scheduler, which performs scheduling, and it then switches to the next coroutine, so there are two context switches. 1393 1354 A stackful scheduler is often used for simplicity and security. 1394 1355 1395 1396 \subsection{Thread} 1397 \label{s:threads} 1398 1399 Threading needs the ability to start a thread and wait for its completion. 1400 A common API for this ability is @fork@ and @join@. 1356 Regardless of the approach used, a subset of concurrency related challenges start to appear. 1357 For the complete set of concurrency challenges to occur, the missing feature is \newterm{preemption}, where context switching occurs randomly between any two instructions, often based on a timer interrupt, called \newterm{preemptive scheduling}. 1358 While a scheduler introduces uncertainty in the order of execution, preemption introduces uncertainty about where context switches occur. 1359 Interestingly, uncertainty is necessary for the runtime (operating) system to give the illusion of parallelism on a single processor and increase performance on multiple processors. 1360 The reason is that only the runtime has complete knowledge about resources and how to best utilized them. 1361 However, the introduction of unrestricted non-determinism results in the need for \newterm{mutual exclusion} and \newterm{synchronization} to restrict non-determinism for correctness; 1362 otherwise, it is impossible to write meaningful programs. 1363 Optimal performance in concurrent applications is often obtained by having as much non-determinism as correctness allows. 1364 1365 An important missing feature in C is threading\footnote{While the C11 standard defines a \protect\lstinline@threads.h@ header, it is minimal and defined as optional. 1366 As such, library support for threading is far from widespread. 1367 At the time of writing the paper, neither \protect\lstinline@gcc@ nor \protect\lstinline@clang@ support \protect\lstinline@threads.h@ in their standard libraries.}. 1368 In modern programming languages, a lack of threading is unacceptable~\cite{Sutter05, Sutter05b}, and therefore existing and new programming languages must have tools for writing efficient concurrent programs to take advantage of parallelism. 1369 As an extension of C, \CFA needs to express these concepts in a way that is as natural as possible to programmers familiar with imperative languages. 1370 Furthermore, because C is a system-level language, programmers expect to choose precisely which features they need and which cost they are willing to pay. 1371 Hence, concurrent programs should be written using high-level mechanisms, and only step down to lower-level mechanisms when performance bottlenecks are encountered. 1372 1373 1374 \subsection{Thread Interface} 1375 \label{threads} 1376 1377 Both user and kernel threads are supported, where user threads provide concurrency and kernel threads provide parallelism. 1378 Like coroutines and for the same design reasons, the selected approach for user threads is to use language support by introducing a new kind of aggregate (structure) and a \CFA trait: 1401 1379 \begin{cquote} 1402 \begin{tabular}{@{}lll@{}} 1403 \multicolumn{1}{c}{\textbf{Java}} & \multicolumn{1}{c}{\textbf{\Celeven}} & \multicolumn{1}{c}{\textbf{pthreads}} \\ 1404 \begin{cfa}[aboveskip=0pt,belowskip=0pt] 1405 class MyTask extends Thread {...} 1406 mytask t = new MyTask(...); 1407 `t.start();` // start 1408 // concurrency 1409 `t.join();` // wait 1380 \begin{tabular}{@{}c@{\hspace{3\parindentlnth}}c@{}} 1381 \begin{cfa} 1382 thread myThread { 1383 // communication variables 1384 }; 1385 1386 1410 1387 \end{cfa} 1411 1388 & 1412 \begin{cfa}[aboveskip=0pt,belowskip=0pt] 1413 class MyTask { ... } // functor 1414 MyTask mytask; 1415 `thread t( mytask, ... );` // start 1416 // concurrency 1417 `t.join();` // wait 1418 \end{cfa} 1419 & 1420 \begin{cfa}[aboveskip=0pt,belowskip=0pt] 1421 void * rtn( void * arg ) {...} 1422 pthread_t t; int i = 3; 1423 `pthread_create( &t, rtn, (void *)i );` // start 1424 // concurrency 1425 `pthread_join( t, NULL );` // wait 1389 \begin{cfa} 1390 trait is_thread( `dtype` T ) { 1391 void main( T & ); 1392 thread_desc * get_thread( T & ); 1393 void ^?{}( T & `mutex` ); 1394 }; 1426 1395 \end{cfa} 1427 1396 \end{tabular} 1428 1397 \end{cquote} 1429 \CFA has a simpler approach using a custom @thread@ type and leveraging declaration semantics (allocation/deallocation), where threads implicitly @fork@ after construction and @join@ before destruction. 1430 \begin{cfa} 1431 thread MyTask {}; 1432 void main( MyTask & this ) { ... } 1398 (The qualifier @mutex@ for the destructor parameter is discussed in Section~\ref{s:Monitor}.) 1399 Like a coroutine, the statically-typed @main@ routine is the starting point (first stack frame) of a user thread. 1400 The difference is that a coroutine borrows a thread from its caller, so the first thread resuming a coroutine creates an instance of @main@; 1401 whereas, a user thread receives its own thread from the runtime system, which starts in @main@ as some point after the thread constructor is run.\footnote{ 1402 The \lstinline@main@ routine is already a special routine in C, \ie where the program's initial thread begins, so it is a natural extension of this semantics to use overloading to declare \lstinline@main@s for user coroutines and threads.} 1403 No return value or additional parameters are necessary for this routine because the task type allows an arbitrary number of interface routines with corresponding arbitrary typed input/output values. 1404 1405 \begin{comment} % put in appendix with coroutine version ??? 1406 As such the @main@ routine of a thread can be defined as 1407 \begin{cfa} 1408 thread foo {}; 1409 1410 void main(foo & this) { 1411 sout | "Hello World!"; 1412 } 1413 \end{cfa} 1414 1415 In this example, threads of type @foo@ start execution in the @void main(foo &)@ routine, which prints @"Hello World!".@ While this paper encourages this approach to enforce strongly typed programming, users may prefer to use the routine-based thread semantics for the sake of simplicity. 1416 With the static semantics it is trivial to write a thread type that takes a routine pointer as a parameter and executes it on its stack asynchronously. 1417 \begin{cfa} 1418 typedef void (*voidRtn)(int); 1419 1420 thread RtnRunner { 1421 voidRtn func; 1422 int arg; 1423 }; 1424 1425 void ?{}(RtnRunner & this, voidRtn inRtn, int arg) { 1426 this.func = inRtn; 1427 this.arg = arg; 1428 } 1429 1430 void main(RtnRunner & this) { 1431 // thread starts here and runs the routine 1432 this.func( this.arg ); 1433 } 1434 1435 void hello(/*unused*/ int) { 1436 sout | "Hello World!"; 1437 } 1438 1433 1439 int main() { 1434 MyTask team`[10]`; $\C[2.5in]{// allocate stack-based threads, implicit start after construction}$ 1435 // concurrency 1436 } $\C{// deallocate stack-based threads, implicit joins before destruction}$ 1437 \end{cfa} 1438 This semantic ensures a thread is started and stopped exactly once, eliminating some programming error, and scales to multiple threads for basic (termination) synchronization. 1439 For block allocation to arbitrary depth, including recursion, threads are created/destroyed in a lattice structure (tree with top and bottom). 1440 Arbitrary topologies are possible using dynamic allocation, allowing threads to outlive their declaration scope, identical to normal dynamically allocating. 1441 \begin{cfa} 1442 MyTask * factory( int N ) { ... return `anew( N )`; } $\C{// allocate heap-based threads, implicit start after construction}$ 1440 RtnRunner f = {hello, 42}; 1441 return 0? 1442 } 1443 \end{cfa} 1444 A consequence of the strongly typed approach to main is that memory layout of parameters and return values to/from a thread are now explicitly specified in the \textbf{API}. 1445 \end{comment} 1446 1447 For user threads to be useful, it must be possible to start and stop the underlying thread, and wait for it to complete execution. 1448 While using an API such as @fork@ and @join@ is relatively common, such an interface is awkward and unnecessary. 1449 A simple approach is to use allocation/deallocation principles, and have threads implicitly @fork@ after construction and @join@ before destruction. 1450 \begin{cfa} 1451 thread World {}; 1452 void main( World & this ) { 1453 sout | "World!"; 1454 } 1443 1455 int main() { 1444 MyTask * team = factory( 10 ); 1445 // concurrency 1446 `delete( team );` $\C{// deallocate heap-based threads, implicit joins before destruction}\CRT$ 1447 } 1448 \end{cfa} 1456 World w`[10]`; $\C{// implicit forks after creation}$ 1457 sout | "Hello "; $\C{// "Hello " and 10 "World!" printed concurrently}$ 1458 } $\C{// implicit joins before destruction}$ 1459 \end{cfa} 1460 This semantics ensures a thread is started and stopped exactly once, eliminating some programming error, and scales to multiple threads for basic (termination) synchronization. 1461 This tree-structure (lattice) create/delete from C block-structure is generalized by using dynamic allocation, so threads can outlive the scope in which they are created, much like dynamically allocating memory lets objects outlive the scope in which they are created. 1462 \begin{cfa} 1463 int main() { 1464 MyThread * heapLive; 1465 { 1466 MyThread blockLive; $\C{// fork block-based thread}$ 1467 heapLive = `new`( MyThread ); $\C{// fork heap-based thread}$ 1468 ... 1469 } $\C{// join block-based thread}$ 1470 ... 1471 `delete`( heapLive ); $\C{// join heap-based thread}$ 1472 } 1473 \end{cfa} 1474 The heap-based approach allows arbitrary thread-creation topologies, with respect to fork/join-style concurrency. 1449 1475 1450 1476 Figure~\ref{s:ConcurrentMatrixSummation} shows concurrently adding the rows of a matrix and then totalling the subtotals sequentially, after all the row threads have terminated. … … 1453 1479 The allocation/deallocation pattern appears unusual because allocated objects are immediately deallocated without any intervening code. 1454 1480 However, for threads, the deletion provides implicit synchronization, which is the intervening code. 1455 %While the subtotals are added in linear order rather than completion order, which slightly inhibits concurrency, the computation is restricted by the critical-path thread (\ie the thread that takes the longest), and so any inhibited concurrency is very small as totalling the subtotals is trivial.1481 While the subtotals are added in linear order rather than completion order, which slightly inhibits concurrency, the computation is restricted by the critical-path thread (\ie the thread that takes the longest), and so any inhibited concurrency is very small as totalling the subtotals is trivial. 1456 1482 1457 1483 \begin{figure} … … 1459 1485 `thread` Adder { int * row, cols, & subtotal; } $\C{// communication variables}$ 1460 1486 void ?{}( Adder & adder, int row[], int cols, int & subtotal ) { 1461 adder.[ row, cols, &subtotal ] = [ row, cols, &subtotal ];1487 adder.[ row, cols, &subtotal ] = [ row, cols, &subtotal ]; 1462 1488 } 1463 1489 void main( Adder & adder ) with( adder ) { 1464 subtotal = 0;1465 for ( c; cols) { subtotal += row[c]; }1490 subtotal = 0; 1491 for ( int c = 0; c < cols; c += 1 ) { subtotal += row[c]; } 1466 1492 } 1467 1493 int main() { 1468 const int rows = 10, cols = 1000;1469 int matrix[rows][cols], subtotals[rows], total = 0;1470 // read matrix1471 Adder * adders[rows];1472 for ( r; rows; ) {$\C{// start threads to sum rows}$1494 const int rows = 10, cols = 1000; 1495 int matrix[rows][cols], subtotals[rows], total = 0; 1496 // read matrix 1497 Adder * adders[rows]; 1498 for ( int r = 0; r < rows; r += 1 ) { $\C{// start threads to sum rows}$ 1473 1499 adders[r] = `new( matrix[r], cols, &subtotals[r] );` 1474 }1475 for ( r; rows ) {$\C{// wait for threads to finish}$1476 `delete( adders[r] );` $\C{// termination join}$1477 total += subtotals[r]; $\C{// total subtotal}$1478 }1479 sout | total;1480 } 1481 \end{cfa} 1482 \caption{Concurrent matrix summation}1500 } 1501 for ( int r = 0; r < rows; r += 1 ) { $\C{// wait for threads to finish}$ 1502 `delete( adders[r] );` $\C{// termination join}$ 1503 total += subtotals[r]; $\C{// total subtotal}$ 1504 } 1505 sout | total; 1506 } 1507 \end{cfa} 1508 \caption{Concurrent Matrix Summation} 1483 1509 \label{s:ConcurrentMatrixSummation} 1484 1510 \end{figure} 1485 1511 1486 1512 1487 \subsection{Thread Implementation}1488 1489 Threads in \CFA are user level run by runtime kernel threads (see Section~\ref{s:Parallelism}), where user threads provide concurrency and kernel threads provide parallelism.1490 Like coroutines, and for the same design reasons, \CFA provides a custom @thread@ type and a @trait@ to enforce and restrict the task-interface functions.1491 \begin{cquote}1492 \begin{tabular}{@{}c@{\hspace{3\parindentlnth}}c@{}}1493 \begin{cfa}1494 thread myThread {1495 ... // declaration/communication variables1496 };1497 1498 1499 \end{cfa}1500 &1501 \begin{cfa}1502 trait is_thread( `dtype` T ) {1503 void main( T & );1504 thread_desc * get_thread( T & );1505 void ^?{}( T & `mutex` );1506 };1507 \end{cfa}1508 \end{tabular}1509 \end{cquote}1510 Like coroutines, the @dtype@ property prevents \emph{implicit} copy operations and the @is_coroutine@ trait provides no \emph{explicit} copy operations, so threads must be passed by reference (pointer).1511 Similarly, the function definitions ensures there is a statically-typed @main@ function that is the thread starting point (first stack frame), a mechanism to get (read) the currently executing thread handle, and a special destructor to prevent deallocation while the thread is executing.1512 (The qualifier @mutex@ for the destructor parameter is discussed in Section~\ref{s:Monitor}.)1513 The difference between the coroutine and thread is that a coroutine borrows a thread from its caller, so the first thread resuming a coroutine creates the coroutine's stack starts running the coroutine main on the stack;1514 whereas, a thread is scheduling for execution in @main@ immediately after its constructor is run.1515 No return value or additional parameters are necessary for this function because the @thread@ type allows an arbitrary number of interface functions with corresponding arbitrary typed input/output values.1516 1517 \begin{comment} % put in appendix with coroutine version ???1518 As such the @main@ function of a thread can be defined as1519 \begin{cfa}1520 thread foo {};1521 1522 void main(foo & this) {1523 sout | "Hello World!";1524 }1525 \end{cfa}1526 1527 In this example, threads of type @foo@ start execution in the @void main(foo &)@ function, which prints @"Hello World!".@ While this paper encourages this approach to enforce strongly typed programming, users may prefer to use the function-based thread semantics for the sake of simplicity.1528 With the static semantics it is trivial to write a thread type that takes a function pointer as a parameter and executes it on its stack asynchronously.1529 \begin{cfa}1530 typedef void (*voidRtn)(int);1531 1532 thread RtnRunner {1533 voidRtn func;1534 int arg;1535 };1536 1537 void ?{}(RtnRunner & this, voidRtn inRtn, int arg) {1538 this.func = inRtn;1539 this.arg = arg;1540 }1541 1542 void main(RtnRunner & this) {1543 // thread starts here and runs the function1544 this.func( this.arg );1545 }1546 1547 void hello(/*unused*/ int) {1548 sout | "Hello World!";1549 }1550 1551 int main() {1552 RtnRunner f = {hello, 42};1553 return 0?1554 }1555 \end{cfa}1556 A consequence of the strongly typed approach to main is that memory layout of parameters and return values to/from a thread are now explicitly specified in the \textbf{API}.1557 \end{comment}1558 1559 1560 1513 \section{Mutual Exclusion / Synchronization} 1561 1514 1562 Uncontrolled nondeterministic execution produces meaningless results. 1563 To produce meaningful execution requires clawing back some determinism using mutual exclusion and synchronization, where mutual exclusion provides access-control for threads using shared data, and synchronization is a timing relationship among threads~\cite[\S~4]{Buhr05a}. 1564 Some concurrent systems eliminate mutable shared-state by switching to stateless communication like message passing~\cite{Thoth,Harmony,V-Kernel,MPI} (Erlang, MPI), channels~\cite{CSP} (CSP,Go), actors~\cite{Akka} (Akka, Scala), or functional techniques (Haskell). 1565 However, in call/return-based languages, these approaches force a clear distinction, \ie introduce a new programming paradigm between regular and concurrent computation, \eg function call versus message passing. 1566 Hence, a programmer must learn and manipulate two sets of design/programming patterns. 1515 Uncontrolled non-deterministic execution is meaningless. 1516 To reestablish meaningful execution requires mechanisms to reintroduce determinism, \ie restrict non-determinism, called mutual exclusion and synchronization, where mutual exclusion is an access-control mechanism on data shared by threads, and synchronization is a timing relationship among threads~\cite[\S~4]{Buhr05a}. 1517 Since many deterministic challenges appear with the use of mutable shared state, some languages/libraries disallow it, \eg Erlang~\cite{Erlang}, Haskell~\cite{Haskell}, Akka~\cite{Akka} (Scala). 1518 In these paradigms, interaction among concurrent objects is performed by stateless message-passing~\cite{Thoth,Harmony,V-Kernel} or other paradigms closely related to networking concepts, \eg channels~\cite{CSP,Go}. 1519 However, in call/return-based languages, these approaches force a clear distinction, \ie introduce a new programming paradigm between regular and concurrent computation, \eg routine call versus message passing. 1520 Hence, a programmer must learn and manipulate two sets of design patterns. 1567 1521 While this distinction can be hidden away in library code, effective use of the library still has to take both paradigms into account. 1568 1522 In contrast, approaches based on stateful models more closely resemble the standard call/return programming-model, resulting in a single programming paradigm. … … 1583 1537 A group of instructions manipulating a specific instance of shared data that must be performed atomically is called an (individual) \newterm{critical-section}~\cite{Dijkstra65}. 1584 1538 The generalization is called a \newterm{group critical-section}~\cite{Joung00}, where multiple tasks with the same session may use the resource simultaneously, but different sessions may not use the resource simultaneously. 1585 The readers/writer problem~\cite{Courtois71} is an instance of a group critical-section, where readers share a session butwriters have a unique session.1586 \newterm{Mutual exclusion} enforces th e correct kind and number of threads usea critical section.1539 The readers/writer problem~\cite{Courtois71} is an instance of a group critical-section, where readers have the same session and all writers have a unique session. 1540 \newterm{Mutual exclusion} enforces that the correct kind and number of threads are using a critical section. 1587 1541 1588 1542 However, many solutions exist for mutual exclusion, which vary in terms of performance, flexibility and ease of use. … … 1598 1552 Synchronization enforces relative ordering of execution, and synchronization tools provide numerous mechanisms to establish these timing relationships. 1599 1553 Low-level synchronization primitives offer good performance and flexibility at the cost of ease of use; 1600 higher-level mechanisms often simplify usage by adding better coupling between synchronization and data, \eg receive-specific versus receive-any thread in message passing or offering specialized solutions, \eg barrier lock.1601 Often synchronization is used to order access to a critical section, \eg ensuring a waiting writer thread enters the critical section before a calling reader thread.1602 If the calling reader is scheduled before the waiting writer, the reader has\newterm{barged}.1554 higher-level mechanisms often simplify usage by adding better coupling between synchronization and data, \eg message passing, or offering a simpler solution to otherwise involved challenges, \eg barrier lock. 1555 Often synchronization is used to order access to a critical section, \eg ensuring a reader thread is the next kind of thread to enter a critical section. 1556 If a writer thread is scheduled for next access, but another reader thread acquires the critical section first, that reader \newterm{barged}. 1603 1557 Barging can result in staleness/freshness problems, where a reader barges ahead of a writer and reads temporally stale data, or a writer barges ahead of another writer overwriting data with a fresh value preventing the previous value from ever being read (lost computation). 1604 Preventing or detecting barging is an involved challenge with low-level locks, which is made easier through higher-level constructs. 1605 This challenge is often split into two different approaches: barging avoidance and prevention. 1606 Algorithms that unconditionally releasing a lock for competing threads to acquire use barging avoidance during synchronization to force a barging thread to wait. 1607 algorithms that conditionally hold locks during synchronization, \eg baton-passing~\cite{Andrews89}, prevent barging completely. 1558 Preventing or detecting barging is an involved challenge with low-level locks, which can be made much easier by higher-level constructs. 1559 This challenge is often split into two different approaches: barging avoidance and barging prevention. 1560 Algorithms that allow a barger, but divert it until later using current synchronization state (flags), are avoiding the barger; 1561 algorithms that preclude a barger from entering during synchronization in the critical section prevent barging completely. 1562 Techniques like baton-passing locks~\cite{Andrews89} between threads instead of unconditionally releasing locks is an example of barging prevention. 1608 1563 1609 1564 … … 1611 1566 \label{s:Monitor} 1612 1567 1613 A \textbf{monitor} is a set of functions that ensure mutual exclusion when accessing shared state. 1614 More precisely, a monitor is a programming technique that binds mutual exclusion to function scope, as opposed to locks, where mutual-exclusion is defined by acquire/release calls, independent of lexical context (analogous to block and heap storage allocation). 1615 The strong association with the call/return paradigm eases programming, comprehension, and maintenance, at a slight cost in flexibility and efficiency. 1616 \CFA uses a custom @monitor@ type and leverages declaration semantics (deallocation) to protect active or waiting threads in a monitor. 1617 1618 The following is a \CFA monitor implementation of an atomic counter. 1619 \newpage 1620 \begin{cfa}[morekeywords=nomutex] 1621 `monitor` Aint { int cnt; }; $\C[4.25in]{// atomic integer counter}$ 1622 int ++?( Aint & `mutex`$\(_{opt}\)$ this ) with( this ) { return ++cnt; } $\C{// increment}$ 1623 int ?=?( Aint & `mutex`$\(_{opt}\)$ lhs, int rhs ) with( lhs ) { cnt = rhs; } $\C{// conversions}\CRT$ 1624 int ?=?( int & lhs, Aint & `mutex`$\(_{opt}\)$ rhs ) with( rhs ) { lhs = cnt; } 1625 \end{cfa} 1626 % The @Aint@ constructor, @?{}@, uses the \lstinline[morekeywords=nomutex]@nomutex@ qualifier indicating mutual exclusion is unnecessary during construction because an object is inaccessible (private) until after it is initialized. 1627 % (While a constructor may publish its address into a global variable, doing so generates a race-condition.) 1628 The prefix increment operation, @++?@, is normally @mutex@, indicating mutual exclusion is necessary during function execution, to protect the incrementing from race conditions, unless there is an atomic increment instruction for the implementation type. 1629 The assignment operators provide bi-directional conversion between an atomic and normal integer without accessing field @cnt@; 1630 these operations only need @mutex@, if reading/writing the implementation type is not atomic. 1631 The atomic counter is used without any explicit mutual-exclusion and provides thread-safe semantics, which is similar to the \CC template @std::atomic@. 1632 \begin{cfa} 1633 int i = 0, j = 0, k = 5; 1634 Aint x = { 0 }, y = { 0 }, z = { 5 }; $\C{// no mutex required}$ 1635 ++x; ++y; ++z; $\C{// safe increment by multiple threads}$ 1636 x = 2; y = i; z = k; $\C{// conversions}$ 1637 i = x; j = y; k = z; 1638 \end{cfa} 1639 1640 \CFA monitors have \newterm{multi-acquire} semantics so the thread in the monitor may acquire it multiple times without deadlock, allowing recursion and calling other interface functions. 1641 \begin{cfa} 1642 monitor M { ... } m; 1643 void foo( M & mutex m ) { ... } $\C{// acquire mutual exclusion}$ 1644 void bar( M & mutex m ) { $\C{// acquire mutual exclusion}$ 1645 ... `bar( m );` ... `foo( m );` ... $\C{// reacquire mutual exclusion}$ 1646 } 1647 \end{cfa} 1648 \CFA monitors also ensure the monitor lock is always released regardless of how an acquiring function ends (normal or exceptional), and returning a shared variable is safe via copying before the lock is released. 1649 Similar safety is offered by explicit mechanisms like \CC RAII; 1650 monitor implicit safety ensures no programmer error. 1651 Furthermore, RAII mechansims cannot handle complex synchronization within a monitor, where the monitor lock may not be released on function exit but passed to an unblocking thread. 1652 RAII is purely a mutual-exclusion mechanism (see Section~\ref{s:Scheduling}). 1653 1654 1655 \subsection{Monitor Implementation} 1656 1657 For the same design reasons, \CFA provides a custom @monitor@ type and a @trait@ to enforce and restrict the monitor-interface functions. 1658 \begin{cquote} 1659 \begin{tabular}{@{}c@{\hspace{3\parindentlnth}}c@{}} 1660 \begin{cfa} 1661 monitor M { 1662 ... // shared data 1663 }; 1664 1665 \end{cfa} 1666 & 1568 A \textbf{monitor} is a set of routines that ensure mutual exclusion when accessing shared state. 1569 More precisely, a monitor is a programming technique that binds mutual exclusion to routine scope, as opposed to locks, where mutual-exclusion is defined by acquire/release calls, independent of lexical context (analogous to block and heap storage allocation). 1570 The strong association with the call/return paradigm eases programmability, readability and maintainability, at a slight cost in flexibility and efficiency. 1571 1572 Note, like coroutines/threads, both locks and monitors require an abstract handle to reference them, because at their core, both mechanisms are manipulating non-copyable shared-state. 1573 Copying a lock is insecure because it is possible to copy an open lock and then use the open copy when the original lock is closed to simultaneously access the shared data. 1574 Copying a monitor is secure because both the lock and shared data are copies, but copying the shared data is meaningless because it no longer represents a unique entity. 1575 As for coroutines/tasks, the @dtype@ property provides no implicit copying operations and the @is_monitor@ trait provides no explicit copying operations, so all locks/monitors must be passed by reference (pointer). 1667 1576 \begin{cfa} 1668 1577 trait is_monitor( `dtype` T ) { … … 1671 1580 }; 1672 1581 \end{cfa} 1673 \end{tabular}1674 \end{cquote}1675 Like before, the @dtype@ property prevents \emph{implicit} copy operations and the @is_monitor@ trait provides no \emph{explicit} copy operations, so monitors must be passed by reference (pointer).1676 % Copying a lock is insecure because it is possible to copy an open lock and then use the open copy when the original lock is closed to simultaneously access the shared data.1677 % Copying a monitor is secure because both the lock and shared data are copies, but copying the shared data is meaningless because it no longer represents a unique entity.1678 Similarly, the function definitions ensures there is a mechanism to get (read) the currently executing monitor handle, and a special destructor to prevent deallocation if a thread using the shared data.1679 The custom monitor type also inserts any locking vehicles needed to implement the monitor locking semnatics.1680 1582 1681 1583 … … 1683 1585 \label{s:MutexAcquisition} 1684 1586 1685 While the monitor lock provides mutual exclusion for shared data, there are implementation options forwhen and where the locking/unlocking occurs.1587 While correctness implies a monitor's mutual exclusion is acquired and released, there are implementation options about when and where the locking/unlocking occurs. 1686 1588 (Much of this discussion also applies to basic locks.) 1687 For example, a monitor may be passed through multiple helper functions before it is necessary to acquire the monitor's mutual exclusion. 1589 For example, a monitor may need to be passed through multiple helper routines before it becomes necessary to acquire the monitor mutual-exclusion. 1590 \begin{cfa}[morekeywords=nomutex] 1591 monitor Aint { int cnt; }; $\C{// atomic integer counter}$ 1592 void ?{}( Aint & `nomutex` this ) with( this ) { cnt = 0; } $\C{// constructor}$ 1593 int ?=?( Aint & `mutex`$\(_{opt}\)$ lhs, int rhs ) with( lhs ) { cnt = rhs; } $\C{// conversions}$ 1594 void ?{}( int & this, Aint & `mutex`$\(_{opt}\)$ v ) { this = v.cnt; } 1595 int ?=?( int & lhs, Aint & `mutex`$\(_{opt}\)$ rhs ) with( rhs ) { lhs = cnt; } 1596 int ++?( Aint & `mutex`$\(_{opt}\)$ this ) with( this ) { return ++cnt; } $\C{// increment}$ 1597 \end{cfa} 1598 The @Aint@ constructor, @?{}@, uses the \lstinline[morekeywords=nomutex]@nomutex@ qualifier indicating mutual exclusion is unnecessary during construction because an object is inaccessible (private) until after it is initialized. 1599 (While a constructor may publish its address into a global variable, doing so generates a race-condition.) 1600 The conversion operators for initializing and assigning with a normal integer only need @mutex@, if reading/writing the implementation type is not atomic. 1601 Finally, the prefix increment operato, @++?@, is normally @mutex@ to protect the incrementing from race conditions, unless there is an atomic increment instruction for the implementation type. 1602 1603 The atomic counter is used without any explicit mutual-exclusion and provides thread-safe semantics, which is similar to the \CC template @std::atomic@. 1604 \begin{cfa} 1605 Aint x, y, z; 1606 ++x; ++y; ++z; $\C{// safe increment by multiple threads}$ 1607 x = 2; y = 2; z = 2; $\C{// conversions}$ 1608 int i = x, j = y, k = z; 1609 i = x; j = y; k = z; 1610 \end{cfa} 1611 1612 For maximum usability, monitors have \newterm{multi-acquire} semantics allowing a thread to acquire it multiple times without deadlock. 1613 \begin{cfa} 1614 monitor M { ... } m; 1615 void foo( M & mutex m ) { ... } $\C{// acquire mutual exclusion}$ 1616 void bar( M & mutex m ) { $\C{// acquire mutual exclusion}$ 1617 ... `foo( m );` ... $\C{// reacquire mutual exclusion}$ 1618 } 1619 `bar( m );` $\C{// nested monitor call}$ 1620 \end{cfa} 1688 1621 1689 1622 The benefit of mandatory monitor qualifiers is self-documentation, but requiring both @mutex@ and \lstinline[morekeywords=nomutex]@nomutex@ for all monitor parameters is redundant. … … 1696 1629 1697 1630 The next semantic decision is establishing which parameter \emph{types} may be qualified with @mutex@. 1698 The following has monitor parameter types that are composed of multiple objects. 1631 Given: 1699 1632 \begin{cfa} 1700 1633 monitor M { ... } 1701 int f1( M & mutex m ); $\C{// single parameter object}$ 1702 int f2( M * mutex m ); $\C{// single or multiple parameter object}$ 1703 int f3( M * mutex m[$\,$] ); $\C{// multiple parameter object}$ 1704 int f4( stack( M * ) & mutex m ); $\C{// multiple parameters object}$ 1705 \end{cfa} 1706 Function @f1@ has a single parameter object, while @f2@'s indirection could be a single or multi-element array, where static array size is often unknown in C. 1707 Function @f3@ has a multiple object matrix, and @f4@ a multiple object data structure. 1708 While shown shortly, multiple object acquisition is possible, but the number of objects must be statically known. 1709 Therefore, \CFA only acquires one monitor per parameter with at most one level of indirection, excluding pointers as it is impossible to statically determine the size. 1710 1711 For object-oriented monitors, \eg Java, calling a mutex member \emph{implicitly} acquires mutual exclusion of the receiver object, @`rec`.foo(...)@. 1712 \CFA has no receiver, and hence, the explicit @mutex@ qualifier is used to specify which objects acquire mutual exclusion. 1713 A positive consequence of this design decision is the ability to support multi-monitor functions,\footnote{ 1714 While object-oriented monitors can be extended with a mutex qualifier for multiple-monitor members, no prior example of this feature could be found.} 1715 called \newterm{bulk acquire} (see Section~\ref{s:Implementation} for implementation details). 1716 \CFA guarantees acquisition order is consistent across calls to @mutex@ functions using the same monitors as arguments, so acquiring multiple monitors is safe from deadlock. 1717 Figure~\ref{f:BankTransfer} shows a trivial solution to the bank-account transfer problem using \CFA monitors with implicit locking and \CC with explicit locking. 1718 A \CFA programmer only has to manage when to acquire mutual exclusion; 1719 a \CC programmer must select the correct lock and acquisition mechanism from a panoply of locking options. 1720 Making good choices for common cases in \CFA simplifies the programming experience and enhances safety. 1721 1722 \begin{figure} 1723 \centering 1724 \begin{lrbox}{\myboxA} 1725 \begin{cfa}[aboveskip=0pt,belowskip=0pt] 1726 monitor BankAccount { 1727 1728 int balance; 1729 } b1 = { 0 }, b2 = { 0 }; 1730 void deposit( BankAccount & `mutex` b, 1731 int deposit ) with(b) { 1732 balance += deposit; 1733 } 1734 void transfer( BankAccount & `mutex` my, 1735 BankAccount & `mutex` your, int me2you ) { 1736 1737 deposit( my, -me2you ); // debit 1738 deposit( your, me2you ); // credit 1739 } 1740 `thread` Person { BankAccount & b1, & b2; }; 1741 void main( Person & person ) with(person) { 1742 for ( 10_000_000 ) { 1743 if ( random() % 3 ) deposit( b1, 3 ); 1744 if ( random() % 3 ) transfer( b1, b2, 7 ); 1745 } 1746 } 1747 int main() { 1748 `Person p1 = { b1, b2 }, p2 = { b2, b1 };` 1749 1750 } // wait for threads to complete 1751 \end{cfa} 1752 \end{lrbox} 1753 1754 \begin{lrbox}{\myboxB} 1755 \begin{cfa}[aboveskip=0pt,belowskip=0pt] 1756 struct BankAccount { 1757 `recursive_mutex m;` 1758 int balance = 0; 1759 } b1, b2; 1760 void deposit( BankAccount & b, int deposit ) { 1761 `scoped_lock lock( b.m );` 1762 b.balance += deposit; 1763 } 1764 void transfer( BankAccount & my, 1765 BankAccount & your, int me2you ) { 1766 `scoped_lock lock( my.m, your.m );` 1767 deposit( my, -me2you ); // debit 1768 deposit( your, me2you ); // credit 1769 } 1770 1771 void person( BankAccount & b1, BankAccount & b2 ) { 1772 for ( int i = 0; i < 10$'$000$'$000; i += 1 ) { 1773 if ( random() % 3 ) deposit( b1, 3 ); 1774 if ( random() % 3 ) transfer( b1, b2, 7 ); 1775 } 1776 } 1777 int main() { 1778 `thread p1(person, ref(b1), ref(b2)), p2(person, ref(b2), ref(b1));` 1779 `p1.join(); p2.join();` 1780 } 1781 \end{cfa} 1782 \end{lrbox} 1783 1784 \subfloat[\CFA]{\label{f:CFABank}\usebox\myboxA} 1785 \hspace{3pt} 1786 \vrule 1787 \hspace{3pt} 1788 \subfloat[\CC]{\label{f:C++Bank}\usebox\myboxB} 1789 \hspace{3pt} 1790 \caption{Bank transfer problem} 1791 \label{f:BankTransfer} 1792 \end{figure} 1793 1794 Users can force the acquiring order, by using @mutex@/\lstinline[morekeywords=nomutex]@nomutex@. 1795 \begin{cfa} 1796 void foo( M & mutex m1, M & mutex m2 ); $\C{// acquire m1 and m2}$ 1634 int f1( M & mutex m ); 1635 int f2( M * mutex m ); 1636 int f3( M * mutex m[] ); 1637 int f4( stack( M * ) & mutex m ); 1638 \end{cfa} 1639 the issue is that some of these parameter types are composed of multiple objects. 1640 For @f1@, there is only a single parameter object. 1641 Adding indirection in @f2@ still identifies a single object. 1642 However, the matrix in @f3@ introduces multiple objects. 1643 While shown shortly, multiple acquisition is possible; 1644 however array lengths are often unknown in C. 1645 This issue is exacerbated in @f4@, where the data structure must be safely traversed to acquire all of its elements. 1646 1647 To make the issue tractable, \CFA only acquires one monitor per parameter with at most one level of indirection. 1648 However, there is an ambiguity in the C type-system with respects to arrays. 1649 Is the argument for @f2@ a single object or an array of objects? 1650 If it is an array, only the first element of the array is acquired, which seems unsafe; 1651 hence, @mutex@ is disallowed for array parameters. 1652 \begin{cfa} 1653 int f1( M & mutex m ); $\C{// allowed: recommended case}$ 1654 int f2( M * mutex m ); $\C{// disallowed: could be an array}$ 1655 int f3( M mutex m[$\,$] ); $\C{// disallowed: array length unknown}$ 1656 int f4( M ** mutex m ); $\C{// disallowed: could be an array}$ 1657 int f5( M * mutex m[$\,$] ); $\C{// disallowed: array length unknown}$ 1658 \end{cfa} 1659 % Note, not all array routines have distinct types: @f2@ and @f3@ have the same type, as do @f4@ and @f5@. 1660 % However, even if the code generation could tell the difference, the extra information is still not sufficient to extend meaningfully the monitor call semantic. 1661 1662 For object-oriented monitors, calling a mutex member \emph{implicitly} acquires mutual exclusion of the receiver object, @`rec`.foo(...)@. 1663 \CFA has no receiver, and hence, must use an explicit mechanism to specify which object acquires mutual exclusion. 1664 A positive consequence of this design decision is the ability to support multi-monitor routines. 1665 \begin{cfa} 1666 int f( M & mutex x, M & mutex y ); $\C{// multiple monitor parameter of any type}$ 1667 M m1, m2; 1668 f( m1, m2 ); 1669 \end{cfa} 1670 (While object-oriented monitors can be extended with a mutex qualifier for multiple-monitor members, no prior example of this feature could be found.) 1671 In practice, writing multi-locking routines that do not deadlock is tricky. 1672 Having language support for such a feature is therefore a significant asset for \CFA. 1673 1674 The capability to acquire multiple locks before entering a critical section is called \newterm{bulk acquire} (see Section~\ref{s:Implementation} for implementation details). 1675 In the previous example, \CFA guarantees the order of acquisition is consistent across calls to different routines using the same monitors as arguments. 1676 This consistent ordering means acquiring multiple monitors is safe from deadlock. 1677 However, users can force the acquiring order. 1678 For example, notice the use of @mutex@/\lstinline[morekeywords=nomutex]@nomutex@ and how this affects the acquiring order: 1679 \begin{cfa} 1680 void foo( M & mutex m1, M & mutex m2 ); $\C{// acquire m1 and m2}$ 1797 1681 void bar( M & mutex m1, M & /* nomutex */ m2 ) { $\C{// acquire m1}$ 1798 ... foo( m1, m2 ); ... $\C{// acquire m2}$1682 ... foo( m1, m2 ); ... $\C{// acquire m2}$ 1799 1683 } 1800 1684 void baz( M & /* nomutex */ m1, M & mutex m2 ) { $\C{// acquire m2}$ 1801 ... foo( m1, m2 ); ... $\C{// acquire m1}$ 1802 } 1803 \end{cfa} 1804 The bulk-acquire semantics allow @bar@ or @baz@ to acquire a monitor lock and reacquire it in @foo@. 1805 In the calls to @bar@ and @baz@, the monitors are acquired in opposite order, resulting in deadlock. 1806 However, this case is the simplest instance of the \emph{nested-monitor problem}~\cite{Lister77}, where monitors are acquired in sequence versus bulk. 1807 Detecting the nested-monitor problem requires dynamic tracking of monitor calls, and dealing with it requires rollback semantics~\cite{Dice10}. 1808 \CFA does not deal with this fundamental problem. 1685 ... foo( m1, m2 ); ... $\C{// acquire m1}$ 1686 } 1687 \end{cfa} 1688 The multi-acquire semantics allows @bar@ or @baz@ to acquire a monitor lock and reacquire it in @foo@. 1689 In the calls to @bar@ and @baz@, the monitors are acquired in opposite order. 1690 1691 However, such use leads to lock acquiring order problems resulting in deadlock~\cite{Lister77}, where detecting it requires dynamic tracking of monitor calls, and dealing with it requires rollback semantics~\cite{Dice10}. 1692 In \CFA, a safety aid is provided by using bulk acquire of all monitors to shared objects, whereas other monitor systems provide no aid. 1693 While \CFA provides only a partial solution, it handles many useful cases, \eg: 1694 \begin{cfa} 1695 monitor BankAccount { ... }; 1696 void deposit( BankAccount & `mutex` b, int deposit ); 1697 void transfer( BankAccount & `mutex` my, BankAccount & `mutex` your, int me2you ) { 1698 deposit( my, `-`me2you ); $\C{// debit}$ 1699 deposit( your, me2you ); $\C{// credit}$ 1700 } 1701 \end{cfa} 1702 This example shows a trivial solution to the bank-account transfer problem. 1703 Without multi- and bulk acquire, the solution to this problem requires careful engineering. 1809 1704 1810 1705 … … 1812 1707 \label{mutex-stmt} 1813 1708 1814 The monitor call-semantics associate all locking semantics with functions.1709 The monitor call-semantics associate all locking semantics to routines. 1815 1710 Like Java, \CFA offers an alternative @mutex@ statement to reduce refactoring and naming. 1816 1711 \begin{cquote} 1817 1712 \begin{tabular}{@{}l@{\hspace{3\parindentlnth}}l@{}} 1818 \multicolumn{1}{c}{\textbf{\lstinline@mutex@ call}} & \multicolumn{1}{c}{\lstinline@mutex@ \textbf{statement}} \\1819 1713 \begin{cfa} 1820 1714 monitor M { ... }; … … 1836 1730 1837 1731 \end{cfa} 1732 \\ 1733 \multicolumn{1}{c}{\textbf{routine call}} & \multicolumn{1}{c}{\lstinline@mutex@ \textbf{statement}} 1838 1734 \end{tabular} 1839 1735 \end{cquote} 1840 1736 1841 1737 1842 \s ubsection{Scheduling}1738 \section{Scheduling} 1843 1739 \label{s:Scheduling} 1844 1740 1845 While monitor mutual-exclusion provides safe access to shared data, the monitor data may indicate that a thread accessing it cannot proceed, \eg a bounded buffer may be full/empty so produce/consumer threads must block. 1741 While monitor mutual-exclusion provides safe access to shared data, the monitor data may indicate that a thread accessing it cannot proceed. 1742 For example, Figure~\ref{f:GenericBoundedBuffer} shows a bounded buffer that may be full/empty so produce/consumer threads must block. 1846 1743 Leaving the monitor and trying again (busy waiting) is impractical for high-level programming. 1847 1744 Monitors eliminate busy waiting by providing synchronization to schedule threads needing access to the shared data, where threads block versus spinning. 1848 1745 Synchronization is generally achieved with internal~\cite{Hoare74} or external~\cite[\S~2.9.2]{uC++} scheduling, where \newterm{scheduling} defines which thread acquires the critical section next. 1849 1746 \newterm{Internal scheduling} is characterized by each thread entering the monitor and making an individual decision about proceeding or blocking, while \newterm{external scheduling} is characterized by an entering thread making a decision about proceeding for itself and on behalf of other threads attempting entry. 1850 Finally, \CFA monitors do not allow calling threads to barge ahead of signalled threads, which simplifies synchronization among threads in the monitor and increases correctness.1851 If barging is allowed, synchronization between a signaller and signallee is difficult, often requiring additional flags and multiple unblock/block cycles.1852 In fact, signals-as-hints is completely opposite from that proposed by Hoare in the seminal paper on monitors:1853 \begin{cquote}1854 However, we decree that a signal operation be followed immediately by resumption of a waiting program, without possibility of an intervening procedure call from yet a third program.1855 It is only in this way that a waiting program has an absolute guarantee that it can acquire the resource just released by the signalling program without any danger that a third program will interpose a monitor entry and seize the resource instead.~\cite[p.~550]{Hoare74}1856 \end{cquote}1857 Furthermore, \CFA concurrency has no spurious wakeup~\cite[\S~9]{Buhr05a}, which eliminates an implicit form of barging.1858 Hence, a \CFA @wait@ statement is not enclosed in a @while@ loop that can cause thread starvation.1859 1747 1860 1748 Figure~\ref{f:BBInt} shows a \CFA generic bounded-buffer with internal scheduling, where producers/consumers enter the monitor, see the buffer is full/empty, and block on an appropriate condition lock, @full@/@empty@. 1861 The @wait@ function atomically blocks the calling thread and implicitly releases the monitor lock(s) for all monitors in the function's parameter list.1749 The @wait@ routine atomically blocks the calling thread and implicitly releases the monitor lock(s) for all monitors in the routine's parameter list. 1862 1750 The appropriate condition lock is signalled to unblock an opposite kind of thread after an element is inserted/removed from the buffer. 1863 1751 Signalling is unconditional, because signalling an empty condition lock does nothing. … … 1937 1825 \end{lrbox} 1938 1826 1939 \subfloat[Internal scheduling]{\label{f:BBInt}\usebox\myboxA}1827 \subfloat[Internal Scheduling]{\label{f:BBInt}\usebox\myboxA} 1940 1828 %\qquad 1941 \subfloat[External scheduling]{\label{f:BBExt}\usebox\myboxB}1942 \caption{Generic bounded-buffer}1829 \subfloat[External Scheduling]{\label{f:BBExt}\usebox\myboxB} 1830 \caption{Generic Bounded-Buffer} 1943 1831 \label{f:GenericBoundedBuffer} 1944 1832 \end{figure} 1945 1833 1946 Figure~\ref{f:BBExt} shows a \CFA generic bounded-buffer with external scheduling, where producers/consumers detecting a full/ \-empty buffer block and prevent more producers/consumers from entering the monitor until there is a free/empty slot in the buffer.1947 External scheduling is controlled by the @waitfor@ statement, which atomically blocks the calling thread, releases the monitor lock, and restricts the functioncalls that can next acquire mutual exclusion.1834 Figure~\ref{f:BBExt} shows a \CFA generic bounded-buffer with external scheduling, where producers/consumers detecting a full/empty buffer block and prevent more producers/consumers from entering the monitor until there is a free/empty slot in the buffer. 1835 External scheduling is controlled by the @waitfor@ statement, which atomically blocks the calling thread, releases the monitor lock, and restricts the routine calls that can next acquire mutual exclusion. 1948 1836 If the buffer is full, only calls to @remove@ can acquire the buffer, and if the buffer is empty, only calls to @insert@ can acquire the buffer. 1949 Threads making calls to functions that are currently excluded, block outside of (external to) the monitor on a calling queue, versus blocking on condition queues inside of (internal to) the monitor.1837 Threads making calls to routines that are currently excluded, block outside of (external to) the monitor on a calling queue, versus blocking on condition queues inside of (internal to) the monitor. 1950 1838 External scheduling allows users to wait for events from other threads without concern of unrelated events occurring. 1951 The mechnaism can be done in terms of control flow, \eg Ada @accept@ or \uC @_Accept@, or in terms of data, \eg Go @select@ onchannels.1839 The mechnaism can be done in terms of control flow, \eg Ada @accept@ or \uC @_Accept@, or in terms of data, \eg Go channels. 1952 1840 While both mechanisms have strengths and weaknesses, this project uses a control-flow mechanism to stay consistent with other language semantics. 1953 Two challenges specific to \CFA for external scheduling are loose object-definitions (see Section~\ref{s:LooseObjectDefinitions}) and multiple-monitor functions (see Section~\ref{s:Multi-MonitorScheduling}).1841 Two challenges specific to \CFA for external scheduling are loose object-definitions (see Section~\ref{s:LooseObjectDefinitions}) and multiple-monitor routines (see Section~\ref{s:Multi-MonitorScheduling}). 1954 1842 1955 1843 For internal scheduling, non-blocking signalling (as in the producer/consumer example) is used when the signaller is providing the cooperation for a waiting thread; … … 1966 1854 For signal scheduling, the @exchange@ condition is necessary to block the thread finding the match, while the matcher unblocks to take the opposite number, post its phone number, and unblock the partner. 1967 1855 For signal-block scheduling, the implicit urgent-queue replaces the explict @exchange@-condition and @signal_block@ puts the finding thread on the urgent condition and unblocks the matcher. 1856 1968 1857 The dating service is an example of a monitor that cannot be written using external scheduling because it requires knowledge of calling parameters to make scheduling decisions, and parameters of waiting threads are unavailable; 1969 1858 as well, an arriving thread may not find a partner and must wait, which requires a condition variable, and condition variables imply internal scheduling. 1970 Furthermore, barging corrupts the dating service during an exchange because a barger may also match and change the phone numbers, invalidating the previous exchange phone number.1971 Putting loops around the @wait@s does not correct the problem;1972 the solution must be restructured to account for barging.1973 1859 1974 1860 \begin{figure} … … 2031 1917 \qquad 2032 1918 \subfloat[\lstinline@signal_block@]{\label{f:DatingSignalBlock}\usebox\myboxB} 2033 \caption{Dating service }1919 \caption{Dating service. } 2034 1920 \label{f:DatingService} 2035 1921 \end{figure} … … 2060 1946 To override the implicit multi-monitor wait, specific mutex parameter(s) can be specified, \eg @wait( e, m1 )@. 2061 1947 Wait statically verifies the released monitors are the acquired mutex-parameters so unconditional release is safe. 2062 While \CC supports bulk locking, @wait@ only accepts a single lock for a condition variable, so bulk locking with condition variables is asymmetric.2063 1948 Finally, a signaller, 2064 1949 \begin{cfa} … … 2071 1956 Similarly, for @waitfor( rtn )@, the default semantics is to atomically block the acceptor and release all acquired mutex types in the parameter list, \ie @waitfor( rtn, m1, m2 )@. 2072 1957 To override the implicit multi-monitor wait, specific mutex parameter(s) can be specified, \eg @waitfor( rtn, m1 )@. 2073 @waitfor@ statically verifies the released monitors are the same as the acquired mutex-parameters of the given function or functionpointer.2074 To statically verify the released monitors match with the accepted function's mutex parameters, the function(pointer) prototype must be accessible.2075 % When an overloaded function appears in an @waitfor@ statement, calls to any functionwith that name are accepted.1958 @waitfor@ statically verifies the released monitors are the same as the acquired mutex-parameters of the given routine or routine pointer. 1959 To statically verify the released monitors match with the accepted routine's mutex parameters, the routine (pointer) prototype must be accessible. 1960 % When an overloaded routine appears in an @waitfor@ statement, calls to any routine with that name are accepted. 2076 1961 % The rationale is that members with the same name should perform a similar function, and therefore, all should be eligible to accept a call. 2077 Overloaded functions can be disambiguated using a cast1962 Overloaded routines can be disambiguated using a cast: 2078 1963 \begin{cfa} 2079 1964 void rtn( M & mutex m ); … … 2089 1974 ... signal( `e` ); ... 2090 1975 \end{cfa} 2091 The @wait@ only releases @m1@ so the signalling thread cannot acquire @m1@ and @m2@ to enter @bar@ and @signal@ the condition. 2092 While deadlock can occur with multiple/nesting acquisition, this is a consequence of locks, and by extension monitors, not being perfectly composable. 2093 2094 % Supporting barging prevention as well as extending internal scheduling to multiple monitors is the main source of complexity in the design and implementation of \CFA concurrency. 1976 The @wait@ only releases @m1@ so the signalling thread cannot acquire both @m1@ and @m2@ to enter @bar@ to get to the @signal@. 1977 While deadlock issues can occur with multiple/nesting acquisition, this issue results from the fact that locks, and by extension monitors, are not perfectly composable. 1978 1979 Finally, an important aspect of monitor implementation is barging, \ie can calling threads barge ahead of signalled threads? 1980 If barging is allowed, synchronization between a signaller and signallee is difficult, often requiring multiple unblock/block cycles (looping around a wait rechecking if a condition is met). 1981 In fact, signals-as-hints is completely opposite from that proposed by Hoare in the seminal paper on monitors: 1982 \begin{quote} 1983 However, we decree that a signal operation be followed immediately by resumption of a waiting program, without possibility of an intervening procedure call from yet a third program. 1984 It is only in this way that a waiting program has an absolute guarantee that it can acquire the resource just released by the signalling program without any danger that a third program will interpose a monitor entry and seize the resource instead.~\cite[p.~550]{Hoare74} 1985 \end{quote} 1986 \CFA scheduling \emph{precludes} barging, which simplifies synchronization among threads in the monitor and increases correctness. 1987 Furthermore, \CFA concurrency has no spurious wakeup~\cite[\S~9]{Buhr05a}, which eliminates an implict form of barging. 1988 For example, there are no loops in either bounded buffer solution in Figure~\ref{f:GenericBoundedBuffer}. 1989 Supporting barging prevention as well as extending internal scheduling to multiple monitors is the main source of complexity in the design and implementation of \CFA concurrency. 2095 1990 2096 1991 … … 2154 2049 \begin{cquote} 2155 2050 \subfloat[Signalling Thread]{\label{f:SignallingThread}\usebox\myboxA} 2156 \hspace{ 3\parindentlnth}2051 \hspace{2\parindentlnth} 2157 2052 \subfloat[Waiting Thread (W1)]{\label{f:WaitingThread}\usebox\myboxB} 2158 2053 \hspace{2\parindentlnth} … … 2181 2076 In an object-oriented programming-language, a class includes an exhaustive list of operations. 2182 2077 However, new members can be added via static inheritance or dynamic members, \eg JavaScript~\cite{JavaScript}. 2183 Similarly, monitor functions can be added at any time in \CFA, making it less clear for programmers and more difficult to implement.2078 Similarly, monitor routines can be added at any time in \CFA, making it less clear for programmers and more difficult to implement. 2184 2079 \begin{cfa} 2185 2080 monitor M { ... }; 2186 2081 void `f`( M & mutex m ); 2187 void g( M & mutex m ) { waitfor( `f` ); } $\C{// clear which f}$2188 void `f`( M & mutex m, int ); $\C{// different f}$2189 void h( M & mutex m ) { waitfor( `f` ); } $\C{// unclear which f}$2190 \end{cfa} 2191 Hence, the \CFAcode for entering a monitor looks like:2192 \begin{cfa} 2193 if ( $\textrm{\textit{monitor is free}}$ ) $\ C{//enter}$2194 else if ( $\textrm{\textit{already own monitor}}$ ) $\ C{//continue}$2195 else if ( $\textrm{\textit{monitor accepts me}}$ ) $\ C{//enter}$2196 else $\ C{//block}$2082 void g( M & mutex m ) { waitfor( `f` ); } $\C{// clear which f}$ 2083 void `f`( M & mutex m, int ); $\C{// different f}$ 2084 void h( M & mutex m ) { waitfor( `f` ); } $\C{// unclear which f}$ 2085 \end{cfa} 2086 Hence, the cfa-code for entering a monitor looks like: 2087 \begin{cfa} 2088 if ( $\textrm{\textit{monitor is free}}$ ) $\LstCommentStyle{// \color{red}enter}$ 2089 else if ( $\textrm{\textit{already own monitor}}$ ) $\LstCommentStyle{// \color{red}continue}$ 2090 else if ( $\textrm{\textit{monitor accepts me}}$ ) $\LstCommentStyle{// \color{red}enter}$ 2091 else $\LstCommentStyle{// \color{red}block}$ 2197 2092 \end{cfa} 2198 2093 For the first two conditions, it is easy to implement a check that can evaluate the condition in a few instructions. … … 2211 2106 {\resizebox{0.45\textwidth}{!}{\input{ext_monitor.pstex_t}}} 2212 2107 }% subfloat 2213 \caption{Monitor implementation}2108 \caption{Monitor Implementation} 2214 2109 \label{f:MonitorImplementation} 2215 2110 \end{figure} 2216 2111 2217 For a fixed (small) number of mutex functions (\eg 128), the accept check reduces to a bitmask of allowed callers, which can be checked with a single instruction.2218 This approach requires a unique dense ordering of functions with a small upper-bound and the ordering must be consistent across translation units.2219 For object-oriented languages these constraints are common, but \CFA mutex functions can be added in any scope and are only visible in certain translation unit, precluding program-wide dense-ordering among mutex functions.2112 For a fixed (small) number of mutex routines (\eg 128), the accept check reduces to a bitmask of allowed callers, which can be checked with a single instruction. 2113 This approach requires a unique dense ordering of routines with a small upper-bound and the ordering must be consistent across translation units. 2114 For object-oriented languages these constraints are common, but \CFA mutex routines can be added in any scope and are only visible in certain translation unit, precluding program-wide dense-ordering among mutex routines. 2220 2115 2221 2116 Figure~\ref{fig:BulkMonitor} shows the \CFA monitor implementation. 2222 The mutex function called is associated with each thread on the entry queue, while a list of acceptable functions is kept separately.2223 The accepted list is a variable-sized array of accepted functionpointers, so the single instruction bitmask comparison is replaced by dereferencing a pointer followed by a (usually short) linear search.2117 The mutex routine called is associated with each thread on the entry queue, while a list of acceptable routines is kept separately. 2118 The accepted list is a variable-sized array of accepted routine pointers, so the single instruction bitmask comparison is replaced by dereferencing a pointer followed by a (usually short) linear search. 2224 2119 2225 2120 … … 2229 2124 External scheduling, like internal scheduling, becomes significantly more complex for multi-monitor semantics. 2230 2125 Even in the simplest case, new semantics needs to be established. 2126 \newpage 2231 2127 \begin{cfa} 2232 2128 monitor M { ... }; 2233 2129 void f( M & mutex m1 ); 2234 void g( M & mutex m1, M & mutex m2 ) { `waitfor( f );` } $\C{// pass m1 or m2 to f?}$ 2130 void g( M & mutex m1, M & mutex m2 ) { 2131 waitfor( f ); $\C{\color{red}// pass m1 or m2 to f?}$ 2132 } 2235 2133 \end{cfa} 2236 2134 The solution is for the programmer to disambiguate: 2237 2135 \begin{cfa} 2238 waitfor( f, `m2` ); $\C{// wait for call to f with argument m2}$2239 \end{cfa} 2240 Both locks are acquired by function @g@, so when function@f@ is called, the lock for monitor @m2@ is passed from @g@ to @f@, while @g@ still holds lock @m1@.2136 waitfor( f, m2 ); $\C{\color{red}// wait for call to f with argument m2}$ 2137 \end{cfa} 2138 Both locks are acquired by routine @g@, so when routine @f@ is called, the lock for monitor @m2@ is passed from @g@ to @f@, while @g@ still holds lock @m1@. 2241 2139 This behaviour can be extended to the multi-monitor @waitfor@ statement. 2242 2140 \begin{cfa} 2243 2141 monitor M { ... }; 2244 2142 void f( M & mutex m1, M & mutex m2 ); 2245 void g( M & mutex m1, M & mutex m2 ) { waitfor( f, `m1, m2` ); $\C{// wait for call to f with arguments m1 and m2}$ 2246 \end{cfa} 2247 Again, the set of monitors passed to the @waitfor@ statement must be entirely contained in the set of monitors already acquired by the accepting function. 2143 void g( M & mutex m1, M & mutex m2 ) { 2144 waitfor( f, m1, m2 ); $\C{\color{red}// wait for call to f with arguments m1 and m2}$ 2145 } 2146 \end{cfa} 2147 Again, the set of monitors passed to the @waitfor@ statement must be entirely contained in the set of monitors already acquired by the accepting routine. 2248 2148 Also, the order of the monitors in a @waitfor@ statement is unimportant. 2249 2149 … … 2252 2152 2253 2153 \begin{figure} 2254 \ centering2255 \begin{ lrbox}{\myboxA}2256 \begin{cfa} [aboveskip=0pt,belowskip=0pt]2154 \lstDeleteShortInline@% 2155 \begin{tabular}{@{}l@{\hspace{\parindentlnth}}|@{\hspace{\parindentlnth}}l@{}} 2156 \begin{cfa} 2257 2157 monitor M1 {} m11, m12; 2258 2158 monitor M2 {} m2; … … 2267 2167 f( `m12`, m2 ); // cannot fulfil 2268 2168 \end{cfa} 2269 \end{lrbox} 2270 2271 \begin{lrbox}{\myboxB} 2272 \begin{cfa}[aboveskip=0pt,belowskip=0pt] 2169 & 2170 \begin{cfa} 2273 2171 monitor M1 {} m11, m12; 2274 2172 monitor M2 {} m2; … … 2283 2181 f( `m12`, m2 ); // cannot fulfil 2284 2182 \end{cfa} 2285 \end{lrbox} 2286 \subfloat[Internal scheduling]{\label{f:InternalScheduling}\usebox\myboxA} 2287 \hspace{3pt} 2288 \vrule 2289 \hspace{3pt} 2290 \subfloat[External scheduling]{\label{f:ExternalScheduling}\usebox\myboxB} 2183 \end{tabular} 2184 \lstMakeShortInline@% 2291 2185 \caption{Unmatched \protect\lstinline@mutex@ sets} 2292 2186 \label{f:UnmatchedMutexSets} … … 2296 2190 \subsection{Extended \protect\lstinline@waitfor@} 2297 2191 2298 Figure~\ref{f:ExtendedWaitfor} show the extended form of the @waitfor@ statement to conditionally accept one of a group of mutex functions, with a specific action to be performed \emph{after} the mutex functionfinishes.2192 Figure~\ref{f:ExtendedWaitfor} show the extended form of the @waitfor@ statement to conditionally accept one of a group of mutex routines, with a specific action to be performed \emph{after} the mutex routine finishes. 2299 2193 For a @waitfor@ clause to be executed, its @when@ must be true and an outstanding call to its corresponding member(s) must exist. 2300 The \emph{conditional-expression} of a @when@ may call a function, but the functionmust not block or context switch.2301 If there are multiple acceptable mutex calls, selection occurs top-to-bottom (prioritized) in the @waitfor@ clauses, whereas some programming languages with similar mechanisms accept non deterministically for this case, \eg Go \lstinline[morekeywords=select]@select@.2194 The \emph{conditional-expression} of a @when@ may call a routine, but the routine must not block or context switch. 2195 If there are multiple acceptable mutex calls, selection occurs top-to-bottom (prioritized) in the @waitfor@ clauses, whereas some programming languages with similar mechanisms accept non-deterministically for this case, \eg Go \lstinline[morekeywords=select]@select@. 2302 2196 If some accept guards are true and there are no outstanding calls to these members, the acceptor is accept-blocked until a call to one of these members is made. 2303 2197 If all the accept guards are false, the statement does nothing, unless there is a terminating @else@ clause with a true guard, which is executed instead. … … 2308 2202 2309 2203 \begin{figure} 2310 \centering2311 2204 \begin{cfa} 2312 2205 `when` ( $\emph{conditional-expression}$ ) $\C{// optional guard}$ … … 2333 2226 else if ( C2 ) waitfor( mem2 ); or when ( C2 ) waitfor( mem2 ); 2334 2227 \end{cfa} 2335 The left example only accepts@mem1@ if @C1@ is true or only @mem2@ if @C2@ is true.2228 The left example accepts only @mem1@ if @C1@ is true or only @mem2@ if @C2@ is true. 2336 2229 The right example accepts either @mem1@ or @mem2@ if @C1@ and @C2@ are true. 2337 2230 … … 2353 2246 \subsection{\protect\lstinline@mutex@ Threads} 2354 2247 2355 Threads in \CFA can also be monitors to allow \emph{direct communication} among threads, \ie threads can have mutex functions that are called by other threads.2248 Threads in \CFA are monitors to allow direct communication among threads, \ie threads can have mutex routines that are called by other threads. 2356 2249 Hence, all monitor features are available when using threads. 2357 Figure~\ref{f:DirectCommunication} shows a comparison of direct call communication in \CFA with direct channel communication in Go.2358 (Ada provides a similar mechanism to the \CFA direct communication.)2359 The program main in both programs communicates directly with the other thread versus indirect communication where two threads interact through a passive monitor.2360 Both direct and indirection thread communication are valuable tools in structuring concurrent programs.2361 2362 \begin{figure}2363 \centering2364 \begin{lrbox}{\myboxA}2365 \begin{cfa}[aboveskip=0pt,belowskip=0pt]2366 2367 struct Msg { int i, j; };2368 thread Gortn { int i; float f; Msg m; };2369 void mem1( Gortn & mutex gortn, int i ) { gortn.i = i; }2370 void mem2( Gortn & mutex gortn, float f ) { gortn.f = f; }2371 void mem3( Gortn & mutex gortn, Msg m ) { gortn.m = m; }2372 void ^?{}( Gortn & mutex ) {}2373 2374 void main( Gortn & gortn ) with( gortn ) { // thread starts2375 2376 for () {2377 2378 `waitfor( mem1, gortn )` sout | i; // wait for calls2379 or `waitfor( mem2, gortn )` sout | f;2380 or `waitfor( mem3, gortn )` sout | m.i | m.j;2381 or `waitfor( ^?{}, gortn )` break;2382 2383 }2384 2385 }2386 int main() {2387 Gortn gortn; $\C[2.0in]{// start thread}$2388 `mem1( gortn, 0 );` $\C{// different calls}\CRT$2389 `mem2( gortn, 2.5 );`2390 `mem3( gortn, (Msg){1, 2} );`2391 2392 2393 } // wait for completion2394 \end{cfa}2395 \end{lrbox}2396 2397 \begin{lrbox}{\myboxB}2398 \begin{Go}[aboveskip=0pt,belowskip=0pt]2399 func main() {2400 type Msg struct{ i, j int }2401 2402 ch1 := make( chan int )2403 ch2 := make( chan float32 )2404 ch3 := make( chan Msg )2405 hand := make( chan string )2406 shake := make( chan string )2407 gortn := func() { $\C[1.5in]{// thread starts}$2408 var i int; var f float32; var m Msg2409 L: for {2410 select { $\C{// wait for messages}$2411 case `i = <- ch1`: fmt.Println( i )2412 case `f = <- ch2`: fmt.Println( f )2413 case `m = <- ch3`: fmt.Println( m )2414 case `<- hand`: break L $\C{// sentinel}$2415 }2416 }2417 `shake <- "SHAKE"` $\C{// completion}$2418 }2419 2420 go gortn() $\C{// start thread}$2421 `ch1 <- 0` $\C{// different messages}$2422 `ch2 <- 2.5`2423 `ch3 <- Msg{1, 2}`2424 `hand <- "HAND"` $\C{// sentinel value}$2425 `<- shake` $\C{// wait for completion}\CRT$2426 }2427 \end{Go}2428 \end{lrbox}2429 2430 \subfloat[\CFA]{\label{f:CFAwaitfor}\usebox\myboxA}2431 \hspace{3pt}2432 \vrule2433 \hspace{3pt}2434 \subfloat[Go]{\label{f:Gochannel}\usebox\myboxB}2435 \caption{Direct communication}2436 \label{f:DirectCommunication}2437 \end{figure}2438 2439 \begin{comment}2440 2250 The following shows an example of two threads directly calling each other and accepting calls from each other in a cycle. 2441 2251 \begin{cfa} 2252 thread Ping {} pi; 2253 thread Pong {} po; 2254 void ping( Ping & mutex ) {} 2255 void pong( Pong & mutex ) {} 2256 int main() {} 2442 2257 \end{cfa} 2443 2258 \vspace{-0.8\baselineskip} … … 2445 2260 \begin{tabular}{@{}l@{\hspace{3\parindentlnth}}l@{}} 2446 2261 \begin{cfa} 2447 thread Ping {} pi;2448 void ping( Ping & mutex ) {}2449 2262 void main( Ping & pi ) { 2450 for ( 10) {2263 for ( int i = 0; i < 10; i += 1 ) { 2451 2264 `waitfor( ping, pi );` 2452 2265 `pong( po );` 2453 2266 } 2454 2267 } 2455 int main() {}2456 2268 \end{cfa} 2457 2269 & 2458 2270 \begin{cfa} 2459 thread Pong {} po;2460 void pong( Pong & mutex ) {}2461 2271 void main( Pong & po ) { 2462 for ( 10) {2272 for ( int i = 0; i < 10; i += 1 ) { 2463 2273 `ping( pi );` 2464 2274 `waitfor( pong, po );` 2465 2275 } 2466 2276 } 2467 2468 2277 \end{cfa} 2469 2278 \end{tabular} … … 2474 2283 % \end{figure} 2475 2284 Note, the ping/pong threads are globally declared, @pi@/@po@, and hence, start (and possibly complete) before the program main starts. 2476 \end{comment}2477 2478 2479 \subsection{Execution Properties}2480 2481 Table~\ref{t:ObjectPropertyComposition} shows how the \CFA high-level constructs cover 3 fundamental execution properties: thread, stateful function, and mutual exclusion.2482 Case 1 is a basic object, with none of the new execution properties.2483 Case 2 allows @mutex@ calls to Case 1 to protect shared data.2484 Case 3 allows stateful functions to suspend/resume but restricts operations because the state is stackless.2485 Case 4 allows @mutex@ calls to Case 3 to protect shared data.2486 Cases 5 and 6 are the same as 3 and 4 without restriction because the state is stackful.2487 Cases 7 and 8 are rejected because a thread cannot execute without a stackful state in a preemptive environment when context switching from the signal handler.2488 Cases 9 and 10 have a stackful thread without and with @mutex@ calls.2489 For situations where threads do not require direct communication, case 9 provides faster creation/destruction by eliminating @mutex@ setup.2490 2491 \begin{table}[b]2492 \caption{Object property composition}2493 \centering2494 \label{t:ObjectPropertyComposition}2495 \renewcommand{\arraystretch}{1.25}2496 %\setlength{\tabcolsep}{5pt}2497 \begin{tabular}{c|c|l|l}2498 \multicolumn{2}{c|}{object properties} & \multicolumn{2}{c}{mutual exclusion} \\2499 \hline2500 thread & stateful & \multicolumn{1}{c|}{No} & \multicolumn{1}{c}{Yes} \\2501 \hline2502 \hline2503 No & No & \textbf{1}\ \ \ aggregate type & \textbf{2}\ \ \ @monitor@ aggregate type \\2504 \hline2505 No & Yes (stackless) & \textbf{3}\ \ \ @generator@ & \textbf{4}\ \ \ @monitor@ generator \\2506 \hline2507 No & Yes (stackful) & \textbf{5}\ \ \ @coroutine@ & \textbf{6}\ \ \ @monitor@ @coroutine@ \\2508 \hline2509 Yes & No / Yes (stackless) & \textbf{7}\ \ \ {\color{red}rejected} & \textbf{8}\ \ \ {\color{red}rejected} \\2510 \hline2511 Yes & Yes (stackful) & \textbf{9}\ \ \ @thread@ & \textbf{10}\ \ @monitor@ @thread@ \\2512 \end{tabular}2513 \end{table}2514 2285 2515 2286 … … 2517 2288 2518 2289 For completeness and efficiency, \CFA provides a standard set of low-level locks: recursive mutex, condition, semaphore, barrier, \etc, and atomic instructions: @fetchAssign@, @fetchAdd@, @testSet@, @compareSet@, \etc. 2519 However, we strongly advocate using high-level concurrency mechanisms whenever possible.2520 2290 2521 2291 2522 2292 \section{Parallelism} 2523 \label{s:Parallelism}2524 2293 2525 2294 Historically, computer performance was about processor speeds. 2526 2295 However, with heat dissipation being a direct consequence of speed increase, parallelism is the new source for increased performance~\cite{Sutter05, Sutter05b}. 2527 Therefore, high-performance applications must care about parallelism, which requires concurrency.2296 Now, high-performance applications must care about parallelism, which requires concurrency. 2528 2297 The lowest-level approach of parallelism is to use \newterm{kernel threads} in combination with semantics like @fork@, @join@, \etc. 2529 2298 However, kernel threads are better as an implementation tool because of complexity and higher cost. … … 2531 2300 2532 2301 2533 \subsection{User Threads }2302 \subsection{User Threads with Preemption} 2534 2303 2535 2304 A direct improvement on kernel threads is user threads, \eg Erlang~\cite{Erlang} and \uC~\cite{uC++book}. 2536 This approach provides an interface that matches the language paradigms, givesmore control over concurrency by the language runtime, and an abstract (and portable) interface to the underlying kernel threads across operating systems.2305 This approach provides an interface that matches the language paradigms, more control over concurrency by the language runtime, and an abstract (and portable) interface to the underlying kernel threads across operating systems. 2537 2306 In many cases, user threads can be used on a much larger scale (100,000 threads). 2538 Like kernel threads, user threads support preemption, which maximizes nondeterminism, but in creases the potential forconcurrency errors: race, livelock, starvation, and deadlock.2307 Like kernel threads, user threads support preemption, which maximizes nondeterminism, but introduces the same concurrency errors: race, livelock, starvation, and deadlock. 2539 2308 \CFA adopts user-threads as they represent the truest realization of concurrency and can build any other concurrency approach, \eg thread pools and actors~\cite{Actors}. 2540 2309 2541 A variant of user thread is \newterm{fibres}, which removes preemption, \eg Go~\cite{Go} @goroutine@s. 2310 2311 \subsection{User Threads without Preemption (Fiber)} 2312 \label{s:fibers} 2313 2314 A variant of user thread is \newterm{fibers}, which removes preemption, \eg Go~\cite{Go} @goroutine@s. 2542 2315 Like functional programming, which removes mutation and its associated problems, removing preemption from concurrency reduces nondeterminism, making race and deadlock errors more difficult to generate. 2543 However, preemption is necessary for concurrency that relies on spinning, so there are a class of problems that cannot be programmed with fibres.2316 However, preemption is necessary for concurrency that relies on spinning, so there are a class of problems that cannot be programmed without preemption. 2544 2317 2545 2318 2546 2319 \subsection{Thread Pools} 2547 2320 2548 In contrast to direct threading is indirect \newterm{thread pools}, \eg Java @executor@,where small jobs (work units) are inserted into a work pool for execution.2321 In contrast to direct threading is indirect \newterm{thread pools}, where small jobs (work units) are inserted into a work pool for execution. 2549 2322 If the jobs are dependent, \ie interact, there is an implicit/explicit dependency graph that ties them together. 2550 2323 While removing direct concurrency, and hence the amount of context switching, thread pools significantly limit the interaction that can occur among jobs. … … 2563 2336 \centering 2564 2337 \input{RunTimeStructure} 2565 \caption{\CFA Runtime structure}2338 \caption{\CFA Runtime Structure} 2566 2339 \label{f:RunTimeStructure} 2567 2340 \end{figure} … … 2595 2368 Preemption occurs on virtual processors rather than user threads, via operating-system interrupts. 2596 2369 Thus virtual processors execute user threads, where preemption frequency applies to a virtual processor, so preemption occurs randomly across the executed user threads. 2597 Turning off preemption transforms user threads into fib res.2370 Turning off preemption transforms user threads into fibers. 2598 2371 2599 2372 … … 2607 2380 \section{Implementation} 2608 2381 \label{s:Implementation} 2382 2383 Currently, \CFA has fixed-sized stacks, where the stack size can be set at coroutine/thread creation but with no subsequent growth. 2384 Schemes exist for dynamic stack-growth, such as stack copying and chained stacks. 2385 However, stack copying requires pointer adjustment to items on the stack, which is impossible without some form of garbage collection. 2386 As well, chained stacks require all modules be recompiled to use this feature, which breaks backward compatibility with existing C libraries. 2387 In the long term, it is likely C libraries will migrate to stack chaining to support concurrency, at only a minimal cost to sequential programs. 2388 Nevertheless, experience teaching \uC~\cite{CS343} shows fixed-sized stacks are rarely an issue in most concurrent programs. 2609 2389 2610 2390 A primary implementation challenge is avoiding contention from dynamically allocating memory because of bulk acquire, \eg the internal-scheduling design is (almost) free of allocations. … … 2617 2397 This array persists for the entire duration of the mutual exclusion and is used extensively for synchronization operations. 2618 2398 2619 To improve performance and simplicity, context switching occurs inside a functioncall, so only callee-saved registers are copied onto the stack and then the stack register is switched;2399 To improve performance and simplicity, context switching occurs inside a routine call, so only callee-saved registers are copied onto the stack and then the stack register is switched; 2620 2400 the corresponding registers are then restored for the other context. 2621 Note, the instruction pointer is untouched since the context switch is always inside the same function. 2622 Experimental results (not presented) for a stackless or stackful scheduler (1 versus 2 context switches) (see Section~\ref{s:Concurrency}) show the performance is virtually equivalent, because both approaches are dominated by locking to prevent a race condition. 2401 Note, the instruction pointer is untouched since the context switch is always inside the same routine. 2402 Unlike coroutines, threads do not context switch among each other; 2403 they context switch to the cluster scheduler. 2404 This method is a 2-step context-switch and provides a clear distinction between user and kernel code, where scheduling and other system operations happen. 2405 The alternative 1-step context-switch uses the \emph{from} thread's stack to schedule and then context-switches directly to the \emph{to} thread's stack. 2406 Experimental results (not presented) show the performance of these two approaches is virtually equivalent, because both approaches are dominated by locking to prevent a race condition. 2623 2407 2624 2408 All kernel threads (@pthreads@) created a stack. … … 2637 2421 2638 2422 However, on current UNIX systems: 2639 \begin{ cquote}2423 \begin{quote} 2640 2424 A process-directed signal may be delivered to any one of the threads that does not currently have the signal blocked. 2641 2425 If more than one of the threads has the signal unblocked, then the kernel chooses an arbitrary thread to which to deliver the signal. 2642 2426 SIGNAL(7) - Linux Programmer's Manual 2643 \end{ cquote}2427 \end{quote} 2644 2428 Hence, the timer-expiry signal, which is generated \emph{externally} by the UNIX kernel to the UNIX process, is delivered to any of its UNIX subprocesses (kernel threads). 2645 2429 To ensure each virtual processor receives its own preemption signals, a discrete-event simulation is run on a special virtual processor, and only it sets and receives timer events. … … 2652 2436 \label{results} 2653 2437 2654 To verify the implementation of the \CFA runtime, a series of microbenchmarks are performed comparing \CFA with Java OpenJDK-9, Go 1.9.2 and \uC 7.0.0. 2655 The benchmark computer is an AMD Opteron\texttrademark\ 6380 NUMA 64-core, 8 socket, 2.5 GHz processor, running Ubuntu 16.04.3 LTS and \uC and \CFA are compiled with gcc 6.3. 2656 2657 \begin{comment} 2438 To verify the implementation of the \CFA runtime, a series of microbenchmarks are performed comparing \CFA with other widely used programming languages with concurrency. 2439 Table~\ref{t:machine} shows the specifications of the computer used to run the benchmarks, and the versions of the software used in the comparison. 2440 2658 2441 \begin{table} 2659 2442 \centering … … 2686 2469 \end{tabular} 2687 2470 \end{table} 2688 \end{comment} 2689 2690 All benchmarks are run using the following harness. 2691 \newpage 2471 2472 All benchmarks are run using the following harness: 2692 2473 \begin{cfa} 2693 2474 unsigned int N = 10_000_000; 2694 #define BENCH( `run` ) Time before = getTimeNsec(); `run;`Duration result = (getTimeNsec() - before) / N;2475 #define BENCH( run ) Time before = getTimeNsec(); run; Duration result = (getTimeNsec() - before) / N; 2695 2476 \end{cfa} 2696 2477 The method used to get time is @clock_gettime( CLOCK_REALTIME )@. … … 2702 2483 \paragraph{Context-Switching} 2703 2484 2704 In procedural programming, the cost of a functioncall is important as modularization (refactoring) increases.2705 (In many cases, a compiler inlines functioncalls to eliminate this cost.)2485 In procedural programming, the cost of a routine call is important as modularization (refactoring) increases. 2486 (In many cases, a compiler inlines routine calls to eliminate this cost.) 2706 2487 Similarly, when modularization extends to coroutines/tasks, the time for a context switch becomes a relevant factor. 2707 The coroutine test is from resumer to suspender and from suspender to resumer, which is two context switches. 2708 The thread test is using yield to enter and return from the runtime kernel, which is two context switches. 2488 The coroutine context-switch is 2-step using resume/suspend, \ie from resumer to suspender and from suspender to resumer. 2489 The thread context switch is 2-step using yield, \ie enter and return from the runtime kernel. 2490 Figure~\ref{f:ctx-switch} shows the code for coroutines/threads with all results in Table~\ref{tab:ctx-switch}. 2709 2491 The difference in performance between coroutine and thread context-switch is the cost of scheduling for threads, whereas coroutines are self-scheduling. 2710 Figure~\ref{f:ctx-switch} only shows the \CFA code for coroutines/threads (other systems are similar) with all results in Table~\ref{tab:ctx-switch}. 2711 2712 \begin{multicols}{2} 2492 2493 \begin{figure} 2713 2494 \lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}} 2495 2496 \newbox\myboxA 2497 \begin{lrbox}{\myboxA} 2714 2498 \begin{cfa}[aboveskip=0pt,belowskip=0pt] 2715 2499 coroutine C {} c; 2716 2500 void main( C & ) { for ( ;; ) { @suspend();@ } } 2717 int main() { // coroutine test 2718 BENCH( for ( N ) { @resume( c );@ } ) 2501 int main() { 2502 BENCH( 2503 for ( size_t i = 0; i < N; i += 1 ) { @resume( c );@ } ) 2719 2504 sout | result`ns; 2720 2505 } 2721 int main() { // task test 2722 BENCH( for ( N ) { @yield();@ } ) 2506 \end{cfa} 2507 \end{lrbox} 2508 2509 \newbox\myboxB 2510 \begin{lrbox}{\myboxB} 2511 \begin{cfa}[aboveskip=0pt,belowskip=0pt] 2512 2513 2514 int main() { 2515 BENCH( 2516 for ( size_t i = 0; i < N; i += 1 ) { @yield();@ } ) 2723 2517 sout | result`ns; 2724 2518 } 2725 2519 \end{cfa} 2520 \end{lrbox} 2521 2522 \subfloat[Coroutine]{\usebox\myboxA} 2523 \quad 2524 \subfloat[Thread]{\usebox\myboxB} 2726 2525 \captionof{figure}{\CFA context-switch benchmark} 2727 2526 \label{f:ctx-switch} 2728 2527 2729 \columnbreak 2730 2731 \vspace*{-16pt} 2528 \centering 2529 2732 2530 \captionof{table}{Context switch comparison (nanoseconds)} 2733 2531 \label{tab:ctx-switch} 2734 \begin{tabular}{@{}r*{3}{D{.}{.}{3.2}}@{}} 2735 \multicolumn{1}{@{}c}{} & \multicolumn{1}{c}{Median} &\multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\ 2736 Kernel Thread & 333.5 & 332.96 & 4.1 \\ 2737 \CFA Coroutine & 49 & 48.68 & 0.47 \\ 2738 \CFA Thread & 105 & 105.57 & 1.37 \\ 2739 \uC Coroutine & 44 & 44 & 0 \\ 2740 \uC Thread & 100 & 99.29 & 0.96 \\ 2741 Goroutine & 145 & 147.25 & 4.15 \\ 2742 Java Thread & 373.5 & 375.14 & 8.72 2532 \bigskip 2533 \begin{tabular}{|r|*{3}{D{.}{.}{3.2}|}} 2534 \cline{2-4} 2535 \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{Median} &\multicolumn{1}{c|}{Average} & \multicolumn{1}{c|}{Std Dev} \\ 2536 \hline 2537 Kernel Thread & 333.5 & 332.96 & 4.1 \\ 2538 \CFA Coroutine & 49 & 48.68 & 0.47 \\ 2539 \CFA Thread & 105 & 105.57 & 1.37 \\ 2540 \uC Coroutine & 44 & 44 & 0 \\ 2541 \uC Thread & 100 & 99.29 & 0.96 \\ 2542 Goroutine & 145 & 147.25 & 4.15 \\ 2543 Java Thread & 373.5 & 375.14 & 8.72 \\ 2544 \hline 2743 2545 \end{tabular} 2744 \end{multicols} 2745 2746 2747 \paragraph{Mutual-Exclusion} 2748 2749 Mutual exclusion is measured by entering/leaving a critical section. 2750 For monitors, entering and leaving a monitor function is measured. 2751 To put the results in context, the cost of entering a non-inline function and the cost of acquiring and releasing a @pthread_mutex@ lock is also measured. 2752 Figure~\ref{f:mutex} shows the code for \CFA with all results in Table~\ref{tab:mutex}. 2753 Note, the incremental cost of bulk acquire for \CFA, which is largely a fixed cost for small numbers of mutex objects. 2754 2755 \begin{multicols}{2} 2546 2547 \bigskip 2548 \bigskip 2549 2756 2550 \lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}} 2757 2551 \begin{cfa} 2758 monitor M {} m1/*, m2, m3, m4*/; 2759 void __attribute__((noinline)) 2760 do_call( M & mutex m/*, m2, m3, m4*/ ) {} 2552 monitor M { ... } m1/*, m2, m3, m4*/; 2553 void __attribute__((noinline)) do_call( M & mutex m/*, m2, m3, m4*/ ) {} 2761 2554 int main() { 2762 BENCH( 2763 for( N ) @do_call( m1/*, m2, m3, m4*/ );@ 2764 ) 2555 BENCH( for( size_t i = 0; i < N; i += 1 ) { @do_call( m1/*, m2, m3, m4*/ );@ } ) 2765 2556 sout | result`ns; 2766 2557 } … … 2769 2560 \label{f:mutex} 2770 2561 2771 \columnbreak 2772 2773 \vspace*{-16pt} 2562 \centering 2563 2774 2564 \captionof{table}{Mutex comparison (nanoseconds)} 2775 2565 \label{tab:mutex} 2776 \begin{tabular}{@{}r*{3}{D{.}{.}{3.2}}@{}} 2777 \multicolumn{1}{@{}c}{} & \multicolumn{1}{c}{Median} &\multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\ 2778 C function & 2 & 2 & 0 \\ 2779 FetchAdd + FetchSub & 26 & 26 & 0 \\ 2780 Pthreads Mutex Lock & 31 & 31.71 & 0.97 \\ 2781 \uC @monitor@ member rtn. & 31 & 31 & 0 \\ 2782 \CFA @mutex@ function, 1 arg. & 46 & 46.68 & 0.93 \\ 2783 \CFA @mutex@ function, 2 arg. & 84 & 85.36 & 1.99 \\ 2784 \CFA @mutex@ function, 4 arg. & 158 & 161 & 4.22 \\ 2785 Java synchronized function & 27.5 & 29.79 & 2.93 2566 \bigskip 2567 2568 \begin{tabular}{|r|*{3}{D{.}{.}{3.2}|}} 2569 \cline{2-4} 2570 \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{Median} &\multicolumn{1}{c|}{Average} & \multicolumn{1}{c|}{Std Dev} \\ 2571 \hline 2572 C routine & 2 & 2 & 0 \\ 2573 FetchAdd + FetchSub & 26 & 26 & 0 \\ 2574 Pthreads Mutex Lock & 31 & 31.71 & 0.97 \\ 2575 \uC @monitor@ member routine & 31 & 31 & 0 \\ 2576 \CFA @mutex@ routine, 1 argument & 46 & 46.68 & 0.93 \\ 2577 \CFA @mutex@ routine, 2 argument & 84 & 85.36 & 1.99 \\ 2578 \CFA @mutex@ routine, 4 argument & 158 & 161 & 4.22 \\ 2579 Java synchronized routine & 27.5 & 29.79 & 2.93 \\ 2580 \hline 2786 2581 \end{tabular} 2787 \end{multicols} 2582 \end{figure} 2583 2584 2585 \paragraph{Mutual-Exclusion} 2586 2587 Mutual exclusion is measured by entering/leaving a critical section. 2588 For monitors, entering and leaving a monitor routine is measured. 2589 Figure~\ref{f:mutex} shows the code for \CFA with all results in Table~\ref{tab:mutex}. 2590 To put the results in context, the cost of entering a non-inline routine and the cost of acquiring and releasing a @pthread_mutex@ lock is also measured. 2591 Note, the incremental cost of bulk acquire for \CFA, which is largely a fixed cost for small numbers of mutex objects. 2788 2592 2789 2593 … … 2795 2599 Java scheduling is significantly greater because the benchmark explicitly creates multiple thread in order to prevent the JIT from making the program sequential, \ie removing all locking. 2796 2600 2797 \begin{ multicols}{2}2601 \begin{figure} 2798 2602 \lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}} 2799 2603 \begin{cfa} 2800 2604 volatile int go = 0; 2801 monitor M { condition c; } m;2802 void __attribute__((noinline)) 2803 do_call( M & mutex a1 ) { signal( c ); }2605 condition c; 2606 monitor M { ... } m; 2607 void __attribute__((noinline)) do_call( M & mutex a1 ) { signal( c ); } 2804 2608 thread T {}; 2805 2609 void main( T & this ) { 2806 while ( go == 0 ) { yield(); } 2610 while ( go == 0 ) { yield(); } // wait for other thread to start 2807 2611 while ( go == 1 ) { @do_call( m );@ } 2808 2612 } 2809 int __attribute__((noinline)) 2810 do_wait( M & mutex m ) with(m) { 2613 int __attribute__((noinline)) do_wait( M & mutex m ) { 2811 2614 go = 1; // continue other thread 2812 BENCH( for ( N) { @wait( c );@ } );2615 BENCH( for ( size_t i = 0; i < N; i += 1 ) { @wait( c );@ } ); 2813 2616 go = 0; // stop other thread 2814 2617 sout | result`ns; … … 2822 2625 \label{f:int-sched} 2823 2626 2824 \columnbreak 2825 2826 \vspace*{-16pt} 2627 \centering 2827 2628 \captionof{table}{Internal-scheduling comparison (nanoseconds)} 2828 2629 \label{tab:int-sched} 2829 2630 \bigskip 2830 2631 2831 \begin{tabular}{@{}r*{3}{D{.}{.}{5.2}}@{}} 2832 \multicolumn{1}{@{}c}{} & \multicolumn{1}{c}{Median} & \multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\ 2833 Pthreads Cond. Variable & 6005 & 5681.43 & 835.45 \\ 2834 \uC @signal@ & 324 & 325.54 & 3,02 \\ 2835 \CFA @signal@, 1 @monitor@ & 368.5 & 370.61 & 4.77 \\ 2836 \CFA @signal@, 2 @monitor@ & 467 & 470.5 & 6.79 \\ 2837 \CFA @signal@, 4 @monitor@ & 700.5 & 702.46 & 7.23 \\ 2838 Java @notify@ & 15471 & 172511 & 5689 2632 \begin{tabular}{|r|*{3}{D{.}{.}{5.2}|}} 2633 \cline{2-4} 2634 \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{Median} &\multicolumn{1}{c|}{Average} & \multicolumn{1}{c|}{Std Dev} \\ 2635 \hline 2636 Pthreads Condition Variable & 6005 & 5681.43 & 835.45 \\ 2637 \uC @signal@ & 324 & 325.54 & 3,02 \\ 2638 \CFA @signal@, 1 @monitor@ & 368.5 & 370.61 & 4.77 \\ 2639 \CFA @signal@, 2 @monitor@ & 467 & 470.5 & 6.79 \\ 2640 \CFA @signal@, 4 @monitor@ & 700.5 & 702.46 & 7.23 \\ 2641 Java @notify@ & 15471 & 172511 & 5689 \\ 2642 \hline 2839 2643 \end{tabular} 2840 \end{ multicols}2644 \end{figure} 2841 2645 2842 2646 … … 2847 2651 Note, the incremental cost of bulk acquire for \CFA, which is largely a fixed cost for small numbers of mutex objects. 2848 2652 2849 \begin{ multicols}{2}2653 \begin{figure} 2850 2654 \lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}} 2851 \vspace*{-16pt}2852 2655 \begin{cfa} 2853 2656 volatile int go = 0; 2854 monitor M { } m;2657 monitor M { ... } m; 2855 2658 thread T {}; 2856 void __attribute__((noinline)) 2857 do_call( M & mutex ) {} 2659 void __attribute__((noinline)) do_call( M & mutex ) {} 2858 2660 void main( T & ) { 2859 while ( go == 0 ) { yield(); } 2661 while ( go == 0 ) { yield(); } // wait for other thread to start 2860 2662 while ( go == 1 ) { @do_call( m );@ } 2861 2663 } 2862 int __attribute__((noinline)) 2863 do_wait( M & mutex m ) { 2664 int __attribute__((noinline)) do_wait( M & mutex m ) { 2864 2665 go = 1; // continue other thread 2865 BENCH( for ( N) { @waitfor( do_call, m );@ } )2666 BENCH( for ( size_t i = 0; i < N; i += 1 ) { @waitfor( do_call, m );@ } ) 2866 2667 go = 0; // stop other thread 2867 2668 sout | result`ns; … … 2875 2676 \label{f:ext-sched} 2876 2677 2877 \columnbreak 2878 2879 \vspace*{-16pt} 2678 \centering 2679 2880 2680 \captionof{table}{External-scheduling comparison (nanoseconds)} 2881 2681 \label{tab:ext-sched} 2882 \begin{tabular}{@{}r*{3}{D{.}{.}{3.2}}@{}} 2883 \multicolumn{1}{@{}c}{} & \multicolumn{1}{c}{Median} &\multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\ 2884 \uC @_Accept@ & 358 & 359.11 & 2.53 \\ 2885 \CFA @waitfor@, 1 @monitor@ & 359 & 360.93 & 4.07 \\ 2886 \CFA @waitfor@, 2 @monitor@ & 450 & 449.39 & 6.62 \\ 2887 \CFA @waitfor@, 4 @monitor@ & 652 & 655.64 & 7.73 2682 \bigskip 2683 \begin{tabular}{|r|*{3}{D{.}{.}{3.2}|}} 2684 \cline{2-4} 2685 \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{Median} &\multicolumn{1}{c|}{Average} & \multicolumn{1}{c|}{Std Dev} \\ 2686 \hline 2687 \uC @_Accept@ & 358 & 359.11 & 2.53 \\ 2688 \CFA @waitfor@, 1 @monitor@ & 359 & 360.93 & 4.07 \\ 2689 \CFA @waitfor@, 2 @monitor@ & 450 & 449.39 & 6.62 \\ 2690 \CFA @waitfor@, 4 @monitor@ & 652 & 655.64 & 7.73 \\ 2691 \hline 2888 2692 \end{tabular} 2889 \end{multicols} 2890 2693 2694 \bigskip 2695 \medskip 2696 2697 \lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}} 2698 \begin{cfa} 2699 thread MyThread {}; 2700 void main( MyThread & ) {} 2701 int main() { 2702 BENCH( for ( size_t i = 0; i < N; i += 1 ) { @MyThread m;@ } ) 2703 sout | result`ns; 2704 } 2705 \end{cfa} 2706 \captionof{figure}{\CFA object-creation benchmark} 2707 \label{f:creation} 2708 2709 \centering 2710 2711 \captionof{table}{Creation comparison (nanoseconds)} 2712 \label{tab:creation} 2713 \bigskip 2714 2715 \begin{tabular}{|r|*{3}{D{.}{.}{5.2}|}} 2716 \cline{2-4} 2717 \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{Median} & \multicolumn{1}{c|}{Average} & \multicolumn{1}{c|}{Std Dev} \\ 2718 \hline 2719 Pthreads & 28091 & 28073.39 & 163.1 \\ 2720 \CFA Coroutine Lazy & 6 & 6.07 & 0.26 \\ 2721 \CFA Coroutine Eager & 520 & 520.61 & 2.04 \\ 2722 \CFA Thread & 2032 & 2016.29 & 112.07 \\ 2723 \uC Coroutine & 106 & 107.36 & 1.47 \\ 2724 \uC Thread & 536.5 & 537.07 & 4.64 \\ 2725 Goroutine & 3103 & 3086.29 & 90.25 \\ 2726 Java Thread & 103416.5 & 103732.29 & 1137 \\ 2727 \hline 2728 \end{tabular} 2729 \end{figure} 2891 2730 2892 2731 … … 2897 2736 The only note here is that the call stacks of \CFA coroutines are lazily created, therefore without priming the coroutine to force stack creation, the creation cost is artificially low. 2898 2737 2899 \begin{multicols}{2}2900 \lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}2901 \begin{cfa}2902 thread MyThread {};2903 void main( MyThread & ) {}2904 int main() {2905 BENCH( for ( N ) { @MyThread m;@ } )2906 sout | result`ns;2907 }2908 \end{cfa}2909 \captionof{figure}{\CFA object-creation benchmark}2910 \label{f:creation}2911 2912 \columnbreak2913 2914 \vspace*{-16pt}2915 \captionof{table}{Object creation comparison (nanoseconds)}2916 \label{tab:creation}2917 2918 \begin{tabular}[t]{@{}r*{3}{D{.}{.}{5.2}}@{}}2919 \multicolumn{1}{@{}c}{} & \multicolumn{1}{c}{Median} & \multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\2920 Pthreads & 28091 & 28073.39 & 163.1 \\2921 \CFA Coroutine Lazy & 6 & 6.07 & 0.26 \\2922 \CFA Coroutine Eager & 520 & 520.61 & 2.04 \\2923 \CFA Thread & 2032 & 2016.29 & 112.07 \\2924 \uC Coroutine & 106 & 107.36 & 1.47 \\2925 \uC Thread & 536.5 & 537.07 & 4.64 \\2926 Goroutine & 3103 & 3086.29 & 90.25 \\2927 Java Thread & 103416.5 & 103732.29 & 1137 \\2928 \end{tabular}2929 \end{multicols}2930 2931 2738 2932 2739 \section{Conclusion} 2933 2740 2934 Advanced control-flow will always be difficult, especially when there is temporal ordering and nondeterminism. 2935 However, many systems exacerbate the difficulty through their presentation mechanisms. 2936 This paper shows it is possible to present a hierarchy of control-flow features, generator, coroutine, thread, and monitor, providing an integrated set of high-level, efficient, and maintainable control-flow features. 2937 Eliminated from \CFA are spurious wakeup and barging, which are nonintuitive and lead to errors, and having to work with a bewildering set of low-level locks and acquisition techniques. 2938 \CFA high-level race-free monitors and tasks provide the core mechanisms for mutual exclusion and synchronization, without having to resort to magic qualifiers like @volatile@/@atomic@. 2939 Extending these mechanisms to handle high-level deadlock-free bulk acquire across both mutual exclusion and synchronization is a unique contribution. 2940 The \CFA runtime provides concurrency based on a preemptive M:N user-level threading-system, executing in clusters, which encapsulate scheduling of work on multiple kernel threads providing parallelism. 2741 This paper demonstrates a concurrency API that is simple, efficient, and able to build higher-level concurrency features. 2742 The approach provides concurrency based on a preemptive M:N user-level threading-system, executing in clusters, which encapsulate scheduling of work on multiple kernel threads providing parallelism. 2941 2743 The M:N model is judged to be efficient and provide greater flexibility than a 1:1 threading model. 2942 These concepts and the entire \CFA runtime-system are written in the \CFA language, extensively leveraging the \CFA type-system, which demonstrates the expressiveness of the \CFA language. 2744 High-level objects (monitor/task) are the core mechanism for mutual exclusion and synchronization. 2745 A novel aspect is allowing multiple mutex-objects to be accessed simultaneously reducing the potential for deadlock for this complex scenario. 2746 These concepts and the entire \CFA runtime-system are written in the \CFA language, demonstrating the expressiveness of the \CFA language. 2943 2747 Performance comparisons with other concurrent systems/languages show the \CFA approach is competitive across all low-level operations, which translates directly into good performance in well-written concurrent applications. 2944 C programmers should feel comfortable using these mechanisms for developing co mplex control-flow in applications, with the ability to obtain maximum available performance by selecting mechanisms at the appropriate level of need.2748 C programmers should feel comfortable using these mechanisms for developing concurrent applications, with the ability to obtain maximum available performance by mechanisms at the appropriate level. 2945 2749 2946 2750 2947 2751 \section{Future Work} 2948 2752 2949 While con trol flow in \CFA has a strong start, development is still underway to complete a number ofmissing features.2753 While concurrency in \CFA has a strong start, development is still underway and there are missing features. 2950 2754 2951 2755 \paragraph{Flexible Scheduling} … … 2955 2759 Different scheduling algorithms can affect performance (both in terms of average and variation). 2956 2760 However, no single scheduler is optimal for all workloads and therefore there is value in being able to change the scheduler for given programs. 2957 One solution is to offer various t uning options, allowing the scheduler to be adjusted to the requirements of the workload.2761 One solution is to offer various tweaking options, allowing the scheduler to be adjusted to the requirements of the workload. 2958 2762 However, to be truly flexible, a pluggable scheduler is necessary. 2959 2763 Currently, the \CFA pluggable scheduler is too simple to handle complex scheduling, \eg quality of service and real-time, where the scheduler must interact with mutex objects to deal with issues like priority inversion. … … 2975 2779 While monitors offer flexible and powerful concurrency for \CFA, other concurrency tools are also necessary for a complete multi-paradigm concurrency package. 2976 2780 Examples of such tools can include futures and promises~\cite{promises}, executors and actors. 2977 These additional features are useful for applications that can be constructed without shared data and direct blocking.2781 These additional features are useful when monitors offer a level of abstraction that is inadequate for certain tasks. 2978 2782 As well, new \CFA extensions should make it possible to create a uniform interface for virtually all mutual exclusion, including monitors and low-level locks. 2979 2783 -
doc/papers/concurrency/examples/PingPong.c
raba20d2 r8b34df0 3 3 typedef struct PingPong { 4 4 const char * name; 5 struct PingPong * partner; 5 6 int N, i; 6 struct PingPong * partner;7 7 void * next; 8 8 } PingPong; 9 #define PPCtor( name, N ) { name, N , 0, NULL, NULL }9 #define PPCtor( name, N ) { name, NULL, N, 0, NULL } 10 10 void comain( PingPong * pp ) __attribute__(( noinline )); 11 11 void comain( PingPong * pp ) { -
doc/papers/concurrency/examples/Pingpong.cfa
raba20d2 r8b34df0 8 8 }; 9 9 10 //void ?{}( PingPong & this, const char * name, unsigned int N, PingPong & part ) {11 //this.[name, N] = [name, N]; &this.part = ∂12 //}13 //void ?{}( PingPong & this, const char * name, unsigned int N ) {14 //this{ name, N, *0p }; // call first constructor15 //}10 void ?{}( PingPong & this, const char * name, unsigned int N, PingPong & part ) { 11 this.[name, N] = [name, N]; &this.part = ∂ 12 } 13 void ?{}( PingPong & this, const char * name, unsigned int N ) { 14 this{ name, N, *0p }; // call first constructor 15 } 16 16 void cycle( PingPong & pingpong ) { 17 17 resume( pingpong ); -
doc/papers/concurrency/figures/ext_monitor.fig
raba20d2 r8b34df0 8 8 -2 9 9 1200 2 10 5 1 0 1 -1 -1 0 0 -1 0.000 0 1 0 0 15 00.000 3600.000 1500 3300 1200 3600 1500 390011 5 1 0 1 -1 -1 0 0 -1 0.000 0 1 0 0 15 00.000 4500.000 1500 4200 1200 4500 1500 480012 6 42 00 2100 4500 240013 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4 350 2250 105 105 4350 2250 4455 235514 4 1 -1 0 0 0 10 0.0000 2 105 90 4 350 2310 d\00110 5 1 0 1 -1 -1 0 0 -1 0.000 0 1 0 0 1575.000 3450.000 1575 3150 1275 3450 1575 3750 11 5 1 0 1 -1 -1 0 0 -1 0.000 0 1 0 0 1575.000 4350.000 1575 4050 1275 4350 1575 4650 12 6 4275 1950 4575 2250 13 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4425 2100 105 105 4425 2100 4530 2205 14 4 1 -1 0 0 0 10 0.0000 2 105 90 4425 2160 d\001 15 15 -6 16 6 42 00 1800 4500 210017 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4 350 1950 105 105 4350 1950 4455 205518 4 1 -1 0 0 0 10 0.0000 2 105 90 4 350 2010 b\00116 6 4275 1650 4575 1950 17 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4425 1800 105 105 4425 1800 4530 1905 18 4 1 -1 0 0 0 10 0.0000 2 105 90 4425 1860 b\001 19 19 -6 20 6 14 20 5595 5625 580521 1 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 15 00 5700 80 80 1500 5700 1580 578022 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 2 850 5700 105 105 2850 5700 2955 580523 1 3 0 1 -1 -1 0 0 4 0.000 1 0.0000 4 350 5700 105 105 4350 5700 4455 580524 4 0 -1 0 0 0 12 0.0000 2 135 1035 3 075 5775 blocked task\00125 4 0 -1 0 0 0 12 0.0000 2 135 870 1 650 5775 active task\00126 4 0 -1 0 0 0 12 0.0000 2 135 1050 4 575 5775 routine mask\00120 6 1495 5445 5700 5655 21 1 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 1575 5550 80 80 1575 5550 1655 5630 22 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 2925 5550 105 105 2925 5550 3030 5655 23 1 3 0 1 -1 -1 0 0 4 0.000 1 0.0000 4425 5550 105 105 4425 5550 4530 5655 24 4 0 -1 0 0 0 12 0.0000 2 135 1035 3150 5625 blocked task\001 25 4 0 -1 0 0 0 12 0.0000 2 135 870 1725 5625 active task\001 26 4 0 -1 0 0 0 12 0.0000 2 135 1050 4650 5625 routine mask\001 27 27 -6 28 6 3 450 1950 3750 255029 1 3 0 1 -1 -1 1 0 4 0.000 1 0.0000 36 00 2100 105 105 3600 2100 3705 210028 6 3525 1800 3825 2400 29 1 3 0 1 -1 -1 1 0 4 0.000 1 0.0000 3675 1950 105 105 3675 1950 3780 1950 30 30 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 31 3 450 1950 3750 1950 3750 2550 3450 2550 3450 195032 4 1 4 0 0 0 10 0.0000 2 105 120 36 00 2160 Y\00131 3525 1800 3825 1800 3825 2400 3525 2400 3525 1800 32 4 1 4 0 0 0 10 0.0000 2 105 120 3675 2010 Y\001 33 33 -6 34 6 3 450 2250 3750 255035 1 3 0 1 -1 -1 1 0 4 0.000 1 0.0000 36 00 2400 105 105 3600 2400 3705 240036 4 1 4 0 0 0 10 0.0000 2 105 120 36 00 2445 X\00134 6 3525 2100 3825 2400 35 1 3 0 1 -1 -1 1 0 4 0.000 1 0.0000 3675 2250 105 105 3675 2250 3780 2250 36 4 1 4 0 0 0 10 0.0000 2 105 120 3675 2295 X\001 37 37 -6 38 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 1 650 3750 105 105 1650 3750 1755 385539 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 1950 3750 105 105 1950 3750 2055 385540 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4950 4050 105 105 4950 4050 5055 415541 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 5 250 4050 105 105 5250 4050 5355 415542 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4 350 2850 105 105 4350 2850 4455 295543 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4 350 2550 105 105 4350 2550 4455 265544 1 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 3 450 4725 80 80 3450 4725 3530 480538 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 1725 3600 105 105 1725 3600 1830 3705 39 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 2025 3600 105 105 2025 3600 2130 3705 40 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 5025 3900 105 105 5025 3900 5130 4005 41 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 5325 3900 105 105 5325 3900 5430 4005 42 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4425 2700 105 105 4425 2700 4530 2805 43 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4425 2400 105 105 4425 2400 4530 2505 44 1 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 3525 4575 80 80 3525 4575 3605 4655 45 45 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 46 24 00 3075 3825 3075 3825 3375 2400 3375 2400 307546 2475 2925 3900 2925 3900 3225 2475 3225 2475 2925 47 47 2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 4 48 15 00 3900 2100 3900 2100 4200 1500 420048 1575 3750 2175 3750 2175 4050 1575 4050 49 49 2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 3 50 15 00 3600 2100 3600 2250 382550 1575 3450 2175 3450 2325 3675 51 51 2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2 52 21 00 3300 1950 352552 2175 3150 2025 3375 53 53 2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 3 54 15 00 4500 2100 4500 2250 472554 1575 4350 2175 4350 2325 4575 55 55 2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2 56 21 00 4200 1950 442556 2175 4050 2025 4275 57 57 2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 4 58 15 00 4800 2100 4800 2100 5100 3300 510058 1575 4650 2175 4650 2175 4950 3375 4950 59 59 2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2 60 48 00 3900 4650 412560 4875 3750 4725 3975 61 61 2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2 62 33 00 5100 3525 525062 3375 4950 3600 5100 63 63 2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 9 64 36 00 5100 4800 5100 4800 4200 5400 4200 5400 3900 4800 390065 48 00 3000 4500 3000 4500 180064 3675 4950 4875 4950 4875 4050 5475 4050 5475 3750 4875 3750 65 4875 2850 4575 2850 4575 1650 66 66 2 2 1 1 -1 -1 0 0 -1 4.000 0 0 0 0 0 5 67 42 00 4350 4200 3450 2700 3450 2700 4350 4200 435067 4275 4200 4275 3300 2775 3300 2775 4200 4275 4200 68 68 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 69 69 1 1 1.00 60.00 120.00 70 36 00 3225 3600 255070 3675 3075 3675 2400 71 71 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 72 4 050 3000 4500 315072 4125 2850 4575 3000 73 73 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 74 1500 3300 2100 3300 2100 3000 4050 3000 4050 1800 75 4 1 -1 0 0 0 10 0.0000 2 75 75 4350 2895 a\001 76 4 1 -1 0 0 0 10 0.0000 2 75 75 4350 2595 c\001 77 4 1 -1 0 0 0 12 0.0000 2 135 315 3450 5475 exit\001 78 4 1 -1 0 0 0 12 0.0000 2 135 135 1650 3225 A\001 79 4 1 -1 0 0 0 12 0.0000 2 135 795 1650 5025 condition\001 80 4 1 -1 0 0 0 12 0.0000 2 135 135 1650 5250 B\001 81 4 0 -1 0 0 0 12 0.0000 2 135 420 4950 3825 stack\001 82 4 0 -1 0 0 0 12 0.0000 2 180 750 4950 3375 acceptor/\001 83 4 0 -1 0 0 0 12 0.0000 2 180 750 4950 3600 signalled\001 84 4 1 -1 0 0 0 12 0.0000 2 135 795 1650 3000 condition\001 85 4 0 -1 0 0 0 12 0.0000 2 135 525 4650 2550 arrival\001 86 4 0 -1 0 0 0 12 0.0000 2 135 630 4650 2325 order of\001 87 4 1 -1 0 0 0 12 0.0000 2 135 525 3450 3825 shared\001 88 4 1 -1 0 0 0 12 0.0000 2 135 735 3450 4125 variables\001 89 4 0 4 50 -1 0 11 0.0000 2 120 135 4075 2025 X\001 90 4 0 4 50 -1 0 11 0.0000 2 120 135 4075 2325 Y\001 91 4 0 4 50 -1 0 11 0.0000 2 120 135 4075 2625 Y\001 92 4 0 4 50 -1 0 11 0.0000 2 120 135 4075 2925 X\001 93 4 0 -1 0 0 3 12 0.0000 2 150 540 4950 4425 urgent\001 94 4 1 0 50 -1 0 11 0.0000 2 165 600 3075 3300 accepted\001 95 4 1 -1 0 0 0 12 0.0000 2 165 960 4275 1725 entry queue\001 74 1575 3150 2175 3150 2175 2850 4125 2850 4125 1650 75 4 1 -1 0 0 0 10 0.0000 2 75 75 4425 2745 a\001 76 4 1 -1 0 0 0 10 0.0000 2 75 75 4425 2445 c\001 77 4 1 -1 0 0 0 12 0.0000 2 135 315 3525 5325 exit\001 78 4 1 -1 0 0 0 12 0.0000 2 135 135 1725 3075 A\001 79 4 1 -1 0 0 0 12 0.0000 2 135 795 1725 4875 condition\001 80 4 1 -1 0 0 0 12 0.0000 2 135 135 1725 5100 B\001 81 4 0 -1 0 0 0 12 0.0000 2 135 420 5025 3675 stack\001 82 4 0 -1 0 0 0 12 0.0000 2 180 750 5025 3225 acceptor/\001 83 4 0 -1 0 0 0 12 0.0000 2 180 750 5025 3450 signalled\001 84 4 1 -1 0 0 0 12 0.0000 2 135 795 1725 2850 condition\001 85 4 1 -1 0 0 0 12 0.0000 2 165 420 4425 1350 entry\001 86 4 1 -1 0 0 0 12 0.0000 2 135 495 4425 1575 queue\001 87 4 0 -1 0 0 0 12 0.0000 2 135 525 4725 2400 arrival\001 88 4 0 -1 0 0 0 12 0.0000 2 135 630 4725 2175 order of\001 89 4 1 -1 0 0 0 12 0.0000 2 135 525 3525 3675 shared\001 90 4 1 -1 0 0 0 12 0.0000 2 135 735 3525 3975 variables\001 91 4 0 4 50 -1 0 11 0.0000 2 120 135 4150 1875 X\001 92 4 0 4 50 -1 0 11 0.0000 2 120 135 4150 2175 Y\001 93 4 0 4 50 -1 0 11 0.0000 2 120 135 4150 2475 Y\001 94 4 0 4 50 -1 0 11 0.0000 2 120 135 4150 2775 X\001 95 4 0 -1 0 0 3 12 0.0000 2 150 540 5025 4275 urgent\001 96 4 1 0 50 -1 0 11 0.0000 2 165 600 3150 3150 accepted\001 -
doc/papers/concurrency/figures/monitor.fig
raba20d2 r8b34df0 8 8 -2 9 9 1200 2 10 5 1 0 1 -1 -1 0 0 -1 0.000 0 1 0 0 1500.000 2700.000 1500 2400 1200 2700 1500 3000 10 11 5 1 0 1 -1 -1 0 0 -1 0.000 0 1 0 0 1500.000 3600.000 1500 3300 1200 3600 1500 3900 11 5 1 0 1 -1 -1 0 0 -1 0.000 0 1 0 0 1500.000 4500.000 1500 4200 1200 4500 1500 4800 12 6 2400 2400 2700 2700 13 1 3 0 1 -1 -1 1 0 4 0.000 1 0.0000 2550 2550 105 105 2550 2550 2655 2550 14 4 1 -1 0 0 0 10 0.0000 2 105 90 2550 2610 b\001 12 6 4200 1200 4500 1500 13 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4350 1350 105 105 4350 1350 4455 1455 14 4 1 -1 0 0 0 10 0.0000 2 105 90 4350 1410 d\001 15 15 -6 16 6 2400 2700 2700 300017 1 3 0 1 -1 -1 1 0 4 0.000 1 0.0000 2550 2850 105 105 2550 2850 2655 285018 4 1 -1 0 0 0 10 0.0000 2 75 75 2550 2895 a\00116 6 4200 900 4500 1200 17 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4350 1050 105 105 4350 1050 4455 1155 18 4 1 -1 0 0 0 10 0.0000 2 105 90 4350 1110 b\001 19 19 -6 20 6 3300 2400 3600 270021 1 3 0 1 -1 -1 1 0 4 0.000 1 0.0000 3450 2550 105 105 3450 2550 3555 255022 4 1 -1 0 0 0 10 0.0000 2 105 90 3450 2610 d\00120 6 2400 1500 2700 1800 21 1 3 0 1 -1 -1 1 0 4 0.000 1 0.0000 2550 1650 105 105 2550 1650 2655 1650 22 4 1 -1 0 0 0 10 0.0000 2 105 90 2550 1710 b\001 23 23 -6 24 6 1350 5550 5325 5850 25 1 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 1500 5700 80 80 1500 5700 1580 5780 26 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 2850 5700 105 105 2850 5700 2955 5805 27 1 3 0 1 -1 -1 0 0 4 0.000 1 0.0000 4350 5700 105 105 4350 5700 4455 5805 28 4 0 -1 0 0 0 12 0.0000 2 180 765 4575 5775 duplicate\001 29 4 0 -1 0 0 0 12 0.0000 2 135 1035 3075 5775 blocked task\001 30 4 0 -1 0 0 0 12 0.0000 2 135 870 1650 5775 active task\001 24 6 2400 1800 2700 2100 25 1 3 0 1 -1 -1 1 0 4 0.000 1 0.0000 2550 1950 105 105 2550 1950 2655 1950 26 4 1 -1 0 0 0 10 0.0000 2 75 75 2550 1995 a\001 31 27 -6 32 6 4200 2100 4500 240033 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4350 2250 105 105 4350 2250 4455 235534 4 1 -1 0 0 0 10 0.0000 2 105 90 4350 2310 d\00128 6 3300 1500 3600 1800 29 1 3 0 1 -1 -1 1 0 4 0.000 1 0.0000 3450 1650 105 105 3450 1650 3555 1650 30 4 1 -1 0 0 0 10 0.0000 2 105 90 3450 1710 d\001 35 31 -6 36 6 4200 1800 4500 2100 32 6 1350 4650 5325 4950 33 1 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 1500 4800 80 80 1500 4800 1580 4880 34 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 2850 4800 105 105 2850 4800 2955 4905 35 1 3 0 1 -1 -1 0 0 4 0.000 1 0.0000 4350 4800 105 105 4350 4800 4455 4905 36 4 0 -1 0 0 0 12 0.0000 2 180 765 4575 4875 duplicate\001 37 4 0 -1 0 0 0 12 0.0000 2 135 1035 3075 4875 blocked task\001 38 4 0 -1 0 0 0 12 0.0000 2 135 870 1650 4875 active task\001 39 -6 40 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 1650 2850 105 105 1650 2850 1755 2955 41 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 1950 2850 105 105 1950 2850 2055 2955 42 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4950 3150 105 105 4950 3150 5055 3255 43 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 5250 3150 105 105 5250 3150 5355 3255 37 44 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4350 1950 105 105 4350 1950 4455 2055 38 4 1 -1 0 0 0 10 0.0000 2 105 90 4350 2010 b\001 39 -6 40 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 1650 3750 105 105 1650 3750 1755 3855 41 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 1950 3750 105 105 1950 3750 2055 3855 42 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4950 4050 105 105 4950 4050 5055 4155 43 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 5250 4050 105 105 5250 4050 5355 4155 44 1 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 3450 4725 80 80 3450 4725 3530 4805 45 1 3 0 1 -1 -1 1 0 4 0.000 1 0.0000 3450 2850 105 105 3450 2850 3555 2850 46 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4350 2850 105 105 4350 2850 4455 2955 47 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4350 2550 105 105 4350 2550 4455 2655 45 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4350 1650 105 105 4350 1650 4455 1755 46 1 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 3450 3825 80 80 3450 3825 3530 3905 47 1 3 0 1 -1 -1 1 0 4 0.000 1 0.0000 3450 1950 105 105 3450 1950 3555 1950 48 48 2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2 49 2400 3000 2625 315049 2400 2100 2625 2250 50 50 2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2 51 3300 3000 3525 3150 51 3300 2100 3525 2250 52 2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2 53 4200 2100 4425 2250 52 54 2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 5 53 1500 3300 2100 3300 2100 3000 2400 3000 2400 240055 1500 2400 2100 2400 2100 2100 2400 2100 2400 1500 54 56 2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 4 55 1500 3900 2100 3900 2100 4200 1500 4200 57 1500 3000 2100 3000 2100 3300 1500 3300 58 2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 3 59 1500 2700 2100 2700 2250 2925 60 2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2 61 2100 2400 1950 2625 56 62 2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 3 57 63 1500 3600 2100 3600 2250 3825 58 64 2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2 59 65 2100 3300 1950 3525 60 2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 361 1500 4500 2100 4500 2250 472566 2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 4 67 1500 3900 2100 3900 2100 4200 3300 4200 62 68 2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2 63 2100 4200 1950 4425 69 4800 3000 4650 3225 70 2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2 71 3300 4200 3525 4350 64 72 2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 4 65 1500 4800 2100 4800 2100 5100 3300 5100 66 2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2 67 4800 3900 4650 4125 68 2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2 69 3300 5100 3525 5250 73 3600 1500 3600 2100 4200 2100 4200 900 74 2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 9 75 3600 4200 4800 4200 4800 3300 5400 3300 5400 3000 4800 3000 76 4800 2100 4500 2100 4500 900 70 77 2 2 1 1 -1 -1 0 0 -1 4.000 0 0 0 0 0 5 71 4200 4350 4200 3450 2700 3450 2700 4350 4200 435078 4200 3450 4200 2550 2700 2550 2700 3450 4200 3450 72 79 2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 4 73 2700 2400 2700 3000 3300 3000 3300 2400 74 2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 4 75 3600 2400 3600 3000 4050 3000 4050 1800 76 2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 9 77 3600 5100 4800 5100 4800 4200 5400 4200 5400 3900 4800 3900 78 4800 3000 4500 3000 4500 1800 79 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 80 4050 3000 4500 3150 81 4 1 -1 0 0 0 12 0.0000 2 135 315 3450 5475 exit\001 82 4 1 -1 0 0 0 12 0.0000 2 135 135 1650 3225 A\001 83 4 1 -1 0 0 0 12 0.0000 2 135 795 1650 5025 condition\001 84 4 1 -1 0 0 0 12 0.0000 2 135 135 1650 5250 B\001 85 4 0 -1 0 0 0 12 0.0000 2 135 420 4950 3825 stack\001 86 4 0 -1 0 0 0 12 0.0000 2 180 750 4950 3375 acceptor/\001 87 4 0 -1 0 0 0 12 0.0000 2 180 750 4950 3600 signalled\001 88 4 1 -1 0 0 0 12 0.0000 2 135 795 1650 3000 condition\001 89 4 1 4 0 0 0 12 0.0000 2 135 135 2550 2325 X\001 90 4 1 4 0 0 0 12 0.0000 2 135 135 3450 2325 Y\001 91 4 1 -1 0 0 0 12 0.0000 2 135 525 3450 3825 shared\001 92 4 1 -1 0 0 0 12 0.0000 2 135 735 3450 4125 variables\001 93 4 1 -1 0 0 0 10 0.0000 2 75 75 3450 2895 c\001 94 4 1 -1 0 0 0 12 0.0000 2 165 1125 3000 2100 mutex queues\001 95 4 0 -1 0 0 3 12 0.0000 2 150 540 4950 4425 urgent\001 96 4 1 -1 0 0 0 10 0.0000 2 75 75 4350 2895 a\001 97 4 1 -1 0 0 0 10 0.0000 2 75 75 4350 2595 c\001 98 4 0 -1 0 0 0 12 0.0000 2 135 525 4650 2550 arrival\001 99 4 0 -1 0 0 0 12 0.0000 2 135 630 4650 2325 order of\001 100 4 0 4 50 -1 0 11 0.0000 2 120 135 4075 2025 X\001 101 4 0 4 50 -1 0 11 0.0000 2 120 135 4075 2325 Y\001 102 4 0 4 50 -1 0 11 0.0000 2 120 135 4075 2625 Y\001 103 4 0 4 50 -1 0 11 0.0000 2 120 135 4075 2925 X\001 104 4 1 -1 0 0 0 12 0.0000 2 165 960 4275 1725 entry queue\001 80 2700 1500 2700 2100 3300 2100 3300 1500 81 4 1 -1 0 0 0 10 0.0000 2 75 75 4350 1995 a\001 82 4 1 -1 0 0 0 10 0.0000 2 75 75 4350 1695 c\001 83 4 1 -1 0 0 0 12 0.0000 2 135 315 3450 4575 exit\001 84 4 1 -1 0 0 0 12 0.0000 2 135 135 1650 2325 A\001 85 4 1 -1 0 0 0 12 0.0000 2 135 795 1650 4125 condition\001 86 4 1 -1 0 0 0 12 0.0000 2 135 135 1650 4350 B\001 87 4 0 -1 0 0 0 12 0.0000 2 135 420 4950 2925 stack\001 88 4 0 -1 0 0 0 12 0.0000 2 180 750 4950 2475 acceptor/\001 89 4 0 -1 0 0 0 12 0.0000 2 180 750 4950 2700 signalled\001 90 4 1 -1 0 0 0 12 0.0000 2 135 795 1650 2100 condition\001 91 4 1 4 0 0 0 12 0.0000 2 135 135 2550 1425 X\001 92 4 1 4 0 0 0 12 0.0000 2 135 135 3450 1425 Y\001 93 4 1 -1 0 0 0 12 0.0000 2 165 420 4350 600 entry\001 94 4 1 -1 0 0 0 12 0.0000 2 135 495 4350 825 queue\001 95 4 0 -1 0 0 0 12 0.0000 2 135 525 4650 1650 arrival\001 96 4 0 -1 0 0 0 12 0.0000 2 135 630 4650 1425 order of\001 97 4 1 -1 0 0 0 12 0.0000 2 135 525 3450 2925 shared\001 98 4 1 -1 0 0 0 12 0.0000 2 135 735 3450 3225 variables\001 99 4 1 -1 0 0 0 12 0.0000 2 120 510 3000 975 mutex\001 100 4 1 -1 0 0 0 10 0.0000 2 75 75 3450 1995 c\001 101 4 1 -1 0 0 0 12 0.0000 2 135 570 3000 1200 queues\001 102 4 0 -1 0 0 3 12 0.0000 2 150 540 4950 3525 urgent\001 -
libcfa/src/iostream.cfa
raba20d2 r8b34df0 10 10 // Created On : Wed May 27 17:56:53 2015 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Thu Jun 13 17:21:10201913 // Update Count : 81 212 // Last Modified On : Wed Jun 12 15:00:31 2019 13 // Update Count : 819 14 14 // 15 15 … … 740 740 } // ?|? 741 741 742 //istype & ?|?( istype & is, const char * fmt ) {743 //fmt( is, fmt, "" );744 //return is;745 //} // ?|?742 istype & ?|?( istype & is, const char * fmt ) { 743 fmt( is, fmt, "" ); 744 return is; 745 } // ?|? 746 746 747 747 istype & ?|?( istype & is, char * s ) { … … 777 777 // skip xxx 778 778 if ( ! f.s ) { 779 // printf( "skip %s %d\n", f.scanset, f.wd );780 if ( f.wd == -1 ) fmt( is, f.scanset, "" ); // no input arguments781 else f or ( f.wd ) fmt( is, "%*c" );779 //printf( "skip %s %d\n", f.scanset, f.wd ); 780 if ( f.wd != -1 ) for ( f.wd ) fmt( is, "%*c" ); // no input arguments 781 else fmt( is, f.scanset, "" ); 782 782 return is; 783 783 } // if -
libcfa/src/iostream.hfa
raba20d2 r8b34df0 10 10 // Created On : Wed May 27 17:56:53 2015 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Thu Jun 13 17:20:21201913 // Update Count : 3 2512 // Last Modified On : Wed Jun 12 13:35:42 2019 13 // Update Count : 331 14 14 // 15 15 … … 316 316 istype & ?|?( istype &, long double _Complex & ); 317 317 318 //istype & ?|?( istype &, const char * );318 istype & ?|?( istype &, const char * ); 319 319 istype & ?|?( istype &, char * ); 320 320 … … 350 350 _Istream_Cstr ignore( const char * s ) { return (_Istream_Cstr)@{ s, 0p, -1, { .flags.ignore : true } }; } 351 351 _Istream_Cstr & ignore( _Istream_Cstr & fmt ) { fmt.flags.ignore = true; return fmt; } 352 _Istream_Cstr wd i( unsigned int w, char * s ) { return (_Istream_Cstr)@{ s, 0p, w, { .all : 0 } }; }353 _Istream_Cstr & wd i( unsigned int w, _Istream_Cstr & fmt ) { fmt.wd = w; return fmt; }352 _Istream_Cstr wd( unsigned int w, char * s ) { return (_Istream_Cstr)@{ s, 0p, w, { .all : 0 } }; } 353 _Istream_Cstr & wd( unsigned int w, _Istream_Cstr & fmt ) { fmt.wd = w; return fmt; } 354 354 } // distribution 355 355 forall( dtype istype | istream( istype ) ) istype & ?|?( istype & is, _Istream_Cstr f ); … … 377 377 _Istream_Manip(T) & ignore( _Istream_Manip(T) & fmt ) { fmt.ignore = true; return fmt; } \ 378 378 _Istream_Manip(T) wdi( unsigned int w, T & val ) { return (_Istream_Manip(T))@{ val, w, false }; } \ 379 _Istream_Manip(T) & wd i( unsigned int w, _Istream_Manip(T) & fmt ) { fmt.wd = w; return fmt; } \379 _Istream_Manip(T) & wd( unsigned int w, _Istream_Manip(T) & fmt ) { fmt.wd = w; return fmt; } \ 380 380 } /* distribution */ \ 381 381 forall( dtype istype | istream( istype ) ) { \ -
src/AST/Convert.cpp
raba20d2 r8b34df0 734 734 const ast::Expr * visit( const ast::VariableExpr * node ) override final { 735 735 auto expr = new VariableExpr(); 736 visitBaseExpr( node, expr ); 736 737 expr->var = get<DeclarationWithType>().accept1(node->var); 737 visitBaseExpr( node, expr ); 738 Type * type = expr->var->get_type()->clone(); 739 if(FunctionType * ft = dynamic_cast<FunctionType*>(type)) { 740 if(node->result.as<ast::PointerType>()) { 741 type = new PointerType({}, ft); 742 } 743 } 744 745 type->set_lvalue( true ); 746 expr->result = type ; 738 747 this->node = expr; 739 748 return nullptr; … … 741 750 742 751 const ast::Expr * visit( const ast::ConstantExpr * node ) override final { 743 // Old world: two types: rslt->constant.type, rslt->result 744 // New workd: one public type: node->result, plus node->underlyer only to support roundtrip conversion 745 // preserving underlyer because the correct type for string literals is complicated to construct, 746 // and distinguishing a string from other literals using the type is hard to do accurately 747 // Both worlds: the outer, expression-level type can change during resolution 748 // for a string, that's char[k] before-resolve and char * after 749 // Old world: the inner Constant type stays what it was built with 750 // for a string, that's char[k] always 751 // Both worlds: the "rep" field of a constant is the C source file fragment that compiles to the desired value 752 // for a string, that includes outer quotes, backslashes, et al cases from the Literals test 753 ConstantExpr *rslt = new ConstantExpr(Constant( 754 get<Type>().accept1(node->underlyer), 755 node->rep, 756 node->ival)); 752 ConstantExpr *rslt = nullptr; 753 switch ( node->kind ) { 754 case ast::ConstantExpr::Integer: 755 rslt = new ConstantExpr{Constant{ 756 get<Type>().accept1( node->result ), 757 node->rep, 758 (unsigned long long) node->intValue() 759 }}; 760 break; 761 case ast::ConstantExpr::FloatingPoint: 762 rslt = new ConstantExpr{Constant{ 763 get<Type>().accept1(node->result), 764 node->rep, 765 (double) node->floatValue() 766 }}; 767 break; 768 case ast::ConstantExpr::String: 769 rslt = new ConstantExpr{Constant{ 770 get<Type>().accept1( node->result ), 771 node->rep, 772 (long long unsigned int)0 773 }}; 774 break; 775 } 776 assert(rslt); 757 777 auto expr = visitBaseExpr( node, rslt ); 758 778 this->node = expr; … … 2133 2153 ); 2134 2154 2155 visitBaseExpr_SkipResultType( old, 2156 expr 2157 ); 2158 2135 2159 expr->var = GET_ACCEPT_1(var, DeclWithType); 2136 visitBaseExpr( old, expr ); 2137 2138 this->node = expr; 2160 expr->result = expr->var->get_type(); 2161 if(const ast::FunctionType * ft = expr->result.as<ast::FunctionType>()) { 2162 if(dynamic_cast<PointerType *>(old->result)) { 2163 expr->result = new ast::PointerType(ft); 2164 } 2165 } 2166 add_qualifiers( expr->result, ast::CV::Lvalue ); 2167 this->node = expr; 2168 } 2169 2170 bool isIntlikeConstantType(const Type *t) { 2171 if ( const BasicType * basicType = dynamic_cast< const BasicType * >( t ) ) { 2172 if ( basicType->isInteger() ) { 2173 return true; 2174 } 2175 } else if ( dynamic_cast< const OneType * >( t ) ) { 2176 return true; 2177 } else if ( dynamic_cast< const ZeroType * >( t ) ) { 2178 return true; 2179 } else if ( dynamic_cast< const PointerType * >( t ) ) { 2180 // null pointer constants, with zero int-values 2181 return true; 2182 } 2183 return false; 2184 } 2185 2186 int isFloatlikeConstantType(const Type *t) { 2187 if ( const BasicType * bty = dynamic_cast< const BasicType * >( t ) ) { 2188 if ( ! bty->isInteger() ) { 2189 return true; 2190 } 2191 } 2192 return false; 2193 } 2194 2195 int isStringlikeConstantType(const Type *t) { 2196 const Type *referentType = nullptr; 2197 if ( const ArrayType * aty = dynamic_cast< const ArrayType * >( t ) ) { 2198 referentType = aty->base; 2199 } else if ( const PointerType * pty = dynamic_cast< const PointerType * >( t ) ) { 2200 referentType = pty->base; 2201 } 2202 if (referentType) { 2203 if ( const BasicType * bty = dynamic_cast< const BasicType * >( referentType ) ) { 2204 if ( bty->kind == BasicType::Kind::Char ) { 2205 return true; 2206 } 2207 } 2208 } 2209 return false; 2139 2210 } 2140 2211 2141 2212 virtual void visit( ConstantExpr * old ) override final { 2142 ast::ConstantExpr *rslt = new ast::ConstantExpr( 2143 old->location, 2144 GET_ACCEPT_1(result, Type), 2145 old->constant.get_value(), 2146 old->constant.ival 2147 ); 2148 rslt->underlyer = getAccept1< ast::Type, Type* >( old->constant.get_type() ); 2213 ast::ConstantExpr *rslt = nullptr; 2214 if (isStringlikeConstantType(old->result)) { 2215 rslt = new ast::ConstantExpr( 2216 old->location, 2217 GET_ACCEPT_1(result, Type), 2218 old->constant.get_value(), 2219 0, 2220 ast::ConstantExpr::Kind::String 2221 ); 2222 } else if (isIntlikeConstantType(old->result)) { 2223 rslt = new ast::ConstantExpr( 2224 old->location, 2225 GET_ACCEPT_1(result, Type), 2226 old->constant.get_value(), 2227 (unsigned long long) old->intValue(), 2228 ast::ConstantExpr::Kind::Integer 2229 ); 2230 } else if (isFloatlikeConstantType(old->result)) { 2231 rslt = new ast::ConstantExpr( 2232 old->location, 2233 GET_ACCEPT_1(result, Type), 2234 old->constant.get_value(), 2235 (double) old->constant.get_dval() 2236 ); 2237 } 2238 assert(rslt); 2149 2239 this->node = visitBaseExpr( old, rslt ); 2150 2240 } -
src/AST/Expr.cpp
raba20d2 r8b34df0 9 9 // Author : Aaron B. Moss 10 10 // Created On : Wed May 15 17:00:00 2019 11 // Last Modified By : A ndrew Beach12 // Created On : Thr Jun 13 13:38:00 201913 // Update Count : 211 // Last Modified By : Aaron B. Moss 12 // Created On : Wed May 15 17:00:00 2019 13 // Update Count : 1 14 14 // 15 15 … … 194 194 if ( const BasicType * bty = result.as< BasicType >() ) { 195 195 if ( bty->isInteger() ) { 196 assert(ival); 197 return ival.value(); 196 return val.ival; 198 197 } 199 198 } else if ( result.as< ZeroType >() ) { … … 205 204 } 206 205 206 double ConstantExpr::floatValue() const { 207 if ( const BasicType * bty = result.as< BasicType >() ) { 208 if ( ! bty->isInteger() ) { 209 return val.dval; 210 } 211 } 212 SemanticError( this, "Constant expression of non-floating-point type " ); 213 } 214 207 215 ConstantExpr * ConstantExpr::from_bool( const CodeLocation & loc, bool b ) { 208 216 return new ConstantExpr{ 209 217 loc, new BasicType{ BasicType::Bool }, b ? "1" : "0", (unsigned long long)b }; 218 } 219 220 ConstantExpr * ConstantExpr::from_char( const CodeLocation & loc, char c ) { 221 return new ConstantExpr{ 222 loc, new BasicType{ BasicType::Char }, std::to_string( c ), (unsigned long long)c }; 210 223 } 211 224 … … 219 232 loc, new BasicType{ BasicType::LongUnsignedInt }, std::to_string( i ), 220 233 (unsigned long long)i }; 234 } 235 236 ConstantExpr * ConstantExpr::from_double( const CodeLocation & loc, double d ) { 237 return new ConstantExpr{ loc, new BasicType{ BasicType::Double }, std::to_string( d ), d }; 238 } 239 240 ConstantExpr * ConstantExpr::from_string( const CodeLocation & loc, const std::string & s ) { 241 return new ConstantExpr{ 242 loc, 243 new ArrayType{ 244 new BasicType{ BasicType::Char, CV::Const }, 245 ConstantExpr::from_int( loc, s.size() + 1 /* null terminator */ ), 246 FixedLen, DynamicDim }, 247 std::string{"\""} + s + "\"", 248 (unsigned long long)0, 249 ConstantExpr::String }; 221 250 } 222 251 … … 353 382 354 383 UniqueExpr::UniqueExpr( const CodeLocation & loc, const Expr * e, unsigned long long i ) 355 : Expr( loc, e->result ), expr( e ),id( i ) {384 : Expr( loc, e->result ), id( i ) { 356 385 assert( expr ); 357 386 if ( id == -1ull ) { -
src/AST/Expr.hpp
raba20d2 r8b34df0 22 22 #include <utility> // for move 23 23 #include <vector> 24 #include <optional>25 24 26 25 #include "Fwd.hpp" // for UniqueId … … 33 32 34 33 class ConverterOldToNew; 35 class ConverterNewToOld;36 34 37 35 namespace ast { … … 348 346 }; 349 347 350 /// A compile-time constant. 351 /// Mostly carries C-source text from parse to code-gen, without interpretation. E.g. strings keep their outer quotes and never have backslashes interpreted. 352 /// Integer constants get special treatment, e.g. for verifying array operations, when an integer constant occurs as the length of an array. 348 /// A compile-time constant 353 349 class ConstantExpr final : public Expr { 354 public: 355 // Representation of this constant, as it occurs in .cfa source and .cfa.cc result. 350 union Val { 351 unsigned long long ival; 352 double dval; 353 354 Val( unsigned long long i ) : ival( i ) {} 355 Val( double d ) : dval( d ) {} 356 } val; 357 public: 356 358 std::string rep; 359 enum Kind { Integer, FloatingPoint, String } kind; 357 360 358 361 ConstantExpr( 359 const CodeLocation & loc, const Type * ty, const std::string & r, 360 std::optional<unsigned long long> i)361 : Expr( loc, ty ), rep( r ), ival( i) {}362 363 /// Gets the integer value of this constant, if one is appropriate to its type.364 /// Throws a SemanticError if the type is not appropriate for value-as-integer. 365 /// Suffers an assertion failure the type is appropriate but no integer value was supplied to the constructor.362 const CodeLocation & loc, const Type * ty, const std::string & r, unsigned long long v, 363 Kind k = Integer ) 364 : Expr( loc, ty ), val( v ), rep( r ), kind( k ) {} 365 ConstantExpr( const CodeLocation & loc, const Type * ty, const std::string & r, double v ) 366 : Expr( loc, ty ), val( v ), rep( r ), kind( FloatingPoint ) {} 367 368 /// Gets the value of this constant as an integer 366 369 long long int intValue() const; 370 /// Gets the value of this constant as floating point 371 double floatValue() const; 367 372 368 373 /// generates a boolean constant of the given bool 369 374 static ConstantExpr * from_bool( const CodeLocation & loc, bool b ); 375 /// generates a char constant of the given char 376 static ConstantExpr * from_char( const CodeLocation & loc, char c ); 370 377 /// generates an integer constant of the given int 371 378 static ConstantExpr * from_int( const CodeLocation & loc, int i ); 372 379 /// generates an integer constant of the given unsigned long int 373 380 static ConstantExpr * from_ulong( const CodeLocation & loc, unsigned long i ); 381 /// generates a floating point constant of the given double 382 static ConstantExpr * from_double( const CodeLocation & loc, double d ); 383 /// generates an array of chars constant of the given string 384 static ConstantExpr * from_string( const CodeLocation & loc, const std::string & s ); 374 385 /// generates a null pointer value for the given type. void * if omitted. 375 386 static ConstantExpr * null( const CodeLocation & loc, const Type * ptrType = nullptr ); … … 379 390 ConstantExpr * clone() const override { return new ConstantExpr{ *this }; } 380 391 MUTATE_FRIEND 381 382 std::optional<unsigned long long> ival;383 384 // Intended only for legacy support of roundtripping the old AST.385 // Captures the very-locally inferred type, before the resolver modifies the type of this ConstantExpression.386 // In the old AST it's constExpr->constant.type387 ptr<Type> underlyer;388 friend class ::ConverterOldToNew;389 friend class ::ConverterNewToOld;390 392 }; 391 393 … … 691 693 unsigned long long id; 692 694 693 UniqueExpr( const CodeLocation & loc, const Expr * e, unsigned long long i = -1 ull);695 UniqueExpr( const CodeLocation & loc, const Expr * e, unsigned long long i = -1 ); 694 696 695 697 const Expr * accept( Visitor & v ) const override { return v.visit( this ); } -
src/Parser/ExpressionNode.cc
raba20d2 r8b34df0 357 357 new ConstantExpr( Constant::from_ulong( str.size() + 1 - 2 ) ), // +1 for '\0' and -2 for '"' 358 358 false, false ); 359 Expression * ret = new ConstantExpr( Constant( at, str, std::nullopt ) );359 Expression * ret = new ConstantExpr( Constant( at, str, (unsigned long long int)0 ) ); // constant 0 is ignored for pure string value 360 360 if ( units.length() != 0 ) { 361 361 ret = new UntypedExpr( new NameExpr( units ), { ret } ); -
src/ResolvExpr/Candidate.hpp
raba20d2 r8b34df0 9 9 // Author : Aaron B. Moss 10 10 // Created On : Wed Jun 5 14:30:00 2019 11 // Last Modified By : A ndrew Beach12 // Last Modified On : Wed Jun 12 14:15:00 201913 // Update Count : 211 // Last Modified By : Aaron B. Moss 12 // Last Modified On : Wed Jun 5 14:30:00 2019 13 // Update Count : 1 14 14 // 15 15 … … 49 49 50 50 Candidate() : expr(), cost( Cost::zero ), cvtCost( Cost::zero ), env(), open(), need() {} 51 51 52 52 Candidate( const ast::Expr * x, const ast::TypeEnvironment & e ) 53 53 : expr( x ), cost( Cost::zero ), cvtCost( Cost::zero ), env( e ), open(), need() {} 54 54 55 55 Candidate( const Candidate & o, const ast::Expr * x ) 56 : expr( x ), cost( o.cost ), cvtCost( Cost::zero ), env( o.env ), open( o.open ), 56 : expr( x ), cost( o.cost ), cvtCost( Cost::zero ), env( o.env ), open( o.open ), 57 57 need( o.need ) {} 58 59 Candidate( 60 const ast::Expr * x, ast::TypeEnvironment && e, ast::OpenVarSet && o, 61 ast::AssertionSet && n, const Cost & c , const Cost & cvt = Cost::zero)62 : expr( x ), cost( c ), cvtCost( cvt ), env( std::move( e ) ), open( std::move( o ) ),58 59 Candidate( 60 const ast::Expr * x, ast::TypeEnvironment && e, ast::OpenVarSet && o, 61 ast::AssertionSet && n, const Cost & c ) 62 : expr( x ), cost( c ), cvtCost( Cost::zero ), env( std::move( e ) ), open( std::move( o ) ), 63 63 need( n.begin(), n.end() ) {} 64 64 }; -
src/SynTree/Constant.cc
raba20d2 r8b34df0 22 22 #include "Type.h" // for BasicType, Type, Type::Qualifiers, PointerType 23 23 24 Constant::Constant( Type * type, std::string rep, std::optional<unsigned long long> ival ) : type( type ), rep( rep ), ival( ival ) {} 24 Constant::Constant( Type * type, std::string rep, unsigned long long val ) : type( type ), rep( rep ), val( val ) {} 25 Constant::Constant( Type * type, std::string rep, double val ) : type( type ), rep( rep ), val( val ) {} 25 26 26 Constant::Constant( const Constant &other ) : BaseSyntaxNode( other ), rep( other.rep ), ival( other.ival ) {27 Constant::Constant( const Constant &other ) : BaseSyntaxNode( other ), rep( other.rep ), val( other.val ) { 27 28 type = other.type->clone(); 28 29 } … … 34 35 } 35 36 37 Constant Constant::from_char( char c ) { 38 return Constant( new BasicType( Type::Qualifiers(), BasicType::Char ), std::to_string( c ), (unsigned long long int)c ); 39 } 40 36 41 Constant Constant::from_int( int i ) { 37 42 return Constant( new BasicType( Type::Qualifiers(), BasicType::SignedInt ), std::to_string( i ), (unsigned long long int)i ); … … 40 45 Constant Constant::from_ulong( unsigned long i ) { 41 46 return Constant( new BasicType( Type::Qualifiers(), BasicType::LongUnsignedInt ), std::to_string( i ), (unsigned long long int)i ); 47 } 48 49 Constant Constant::from_double( double d ) { 50 return Constant( new BasicType( Type::Qualifiers(), BasicType::Double ), std::to_string( d ), d ); 51 } 52 53 Constant Constant::from_string( std::string const & str ) { 54 return Constant( 55 new ArrayType( 56 noQualifiers, 57 new BasicType( Type::Qualifiers( Type::Const ), BasicType::Char ), 58 new ConstantExpr( Constant::from_int( str.size() + 1 /* \0 */ )), 59 false, false ), 60 std::string("\"") + str + "\"", (unsigned long long int)0 ); 42 61 } 43 62 … … 55 74 unsigned long long Constant::get_ival() const { 56 75 assertf( strict_dynamic_cast<BasicType*>(type)->isInteger(), "Attempt to retrieve ival from non-integer constant." ); 57 return ival.value(); 76 return val.ival; 77 } 78 79 double Constant::get_dval() const { 80 assertf( ! strict_dynamic_cast<BasicType*>(type)->isInteger(), "Attempt to retrieve dval from integer constant." ); 81 return val.dval; 58 82 } 59 83 60 84 void Constant::print( std::ostream &os, Indenter ) const { 61 os << "(" << rep << " " << (ival ? toString(ival.value()) : "");85 os << "(" << rep << " " << val.ival; 62 86 if ( type ) { 63 87 os << ": "; -
src/SynTree/Constant.h
raba20d2 r8b34df0 18 18 #include <iosfwd> // for ostream 19 19 #include <string> // for string 20 #include <optional> // for optional21 20 22 21 #include "BaseSyntaxNode.h" … … 28 27 class Constant : public BaseSyntaxNode { 29 28 public: 30 Constant( Type * type, std::string rep, std::optional<unsigned long long> i ); 29 Constant( Type * type, std::string rep, unsigned long long val ); 30 Constant( Type * type, std::string rep, double val ); 31 31 Constant( const Constant & other ); 32 32 virtual ~Constant(); … … 39 39 void set_value( std::string newValue ) { rep = newValue; } 40 40 unsigned long long get_ival() const; 41 double get_dval() const; 41 42 42 43 /// generates a boolean constant of the given bool 43 44 static Constant from_bool( bool b ); 45 /// generates a char constant of the given char 46 static Constant from_char( char c ); 44 47 /// generates an integer constant of the given int 45 48 static Constant from_int( int i ); 46 49 /// generates an integer constant of the given unsigned long int 47 50 static Constant from_ulong( unsigned long i ); 51 /// generates a floating point constant of the given double 52 static Constant from_double( double d ); 53 /// generates an array of chars constant of the given string 54 static Constant from_string( std::string const & s ); 48 55 49 56 /// generates a null pointer value for the given type. void * if omitted. … … 56 63 Type * type; 57 64 std::string rep; 58 std::optional<unsigned long long> ival; 59 60 friend class ConverterOldToNew; 65 union Val { 66 unsigned long long ival; 67 double dval; 68 Val( unsigned long long ival ) : ival( ival ) {} 69 Val( double dval ) : dval( dval ) {} 70 } val; 61 71 }; 62 72 -
tests/.in/manipulatorsInput.txt
raba20d2 r8b34df0 5 5 abcyyy 6 6 aaaaaaaaxxxxxxxxaabbccbbdddwwwbbbbbbbbwwwwwwwwaaaaaaaawwwwwwww 7 abc 7 8 abc 8 9 xx -
tests/io2.cfa
raba20d2 r8b34df0 10 10 // Created On : Wed Mar 2 16:56:02 2016 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Thu Jun 13 16:43:14201913 // Update Count : 1 2012 // Last Modified On : Sun Jun 9 08:07:42 2019 13 // Update Count : 117 14 14 // 15 15 … … 49 49 in | f | d | ld; // floating point 50 50 in | fc | dc | ldc; // floating-point complex 51 in | s1 | wdi( size, s2 );// C string, length unchecked and checked51 in | cstr( s1 ) | wd( size, cstr( s2 ) ); // C string, length unchecked and checked 52 52 sout | nl; 53 53 -
tests/manipulatorsInput.cfa
raba20d2 r8b34df0 7 7 // Created On : Sat Jun 8 17:58:54 2019 8 8 // Last Modified By : Peter A. Buhr 9 // Last Modified On : Thu Jun 13 17:41:43201910 // Update Count : 379 // Last Modified On : Mon Jun 10 18:38:35 2019 10 // Update Count : 8 11 11 // 12 12 … … 17 17 { 18 18 char s[] = "yyyyyyyyyyyyyyyyyyyy"; 19 const char sk[] = "abc"; 20 scanf( "abc " ); scanf( sk ); for ( 5 ) scanf( "%*c" ); printf( "1 %s\n", s ); 21 scanf( "%s", s ); printf( "2 %s\n", s ); 22 scanf( "%*s" ); printf( "3 %s\n", s ); 23 scanf( "%8s", s ); printf( "4 %s\n", s ); 24 scanf( "%*8s" ); printf( "5 %s\n", s ); 19 scanf( "abc" ); printf( "1 %s\n", s ); 20 scanf( "%s", s ); printf( "2 %s\n", s ); 21 scanf( "%*s" ); printf( "3 %s\n", s ); 22 scanf( "%8s", s ); printf( "4 %s\n", s ); 23 scanf( "%*8s" ); printf( "5 %s\n", s ); 25 24 26 scanf( "%[abc]", s ); printf( "6 %s\n", s );27 scanf( "%[^abc]", s ); printf( "7 %s\n", s );28 scanf( "%*[abc]" ); printf( "8 %s\n", s );29 scanf( "%*[^abc]" ); printf( "9 %s\n", s );30 scanf( "%8[abc]", s ); printf( "10 %s\n", s );31 scanf( "%8[^abc]", s ); printf( "11 %s\n", s );32 scanf( "%*8[abc]" ); printf( "12 %s\n", s );33 scanf( "%*8[^abc]" ); printf( "13 %s\n", s );25 scanf( "%[abc]", s ); printf( "6 %s\n", s ); 26 scanf( "%[^abc]", s ); printf( "7 %s\n", s ); 27 scanf( "%*[abc]" ); printf( "8 %s\n", s ); 28 scanf( "%*[^abc]" ); printf( "9 %s\n", s ); 29 scanf( "%8[abc]", s ); printf( "10 %s\n", s ); 30 scanf( "%8[^abc]", s ); printf( "11 %s\n", s ); 31 scanf( "%*8[abc]" ); printf( "12 %s\n", s ); 32 scanf( "%*8[^abc]" ); printf( "13 %s\n", s ); 34 33 } 35 34 { 36 35 char s[] = "yyyyyyyyyyyyyyyyyyyy"; 37 char sk[] = "abc"; 38 sin /*| "abc "*/ | skip( sk ) | skip( 5 ); sout | "1" | s; 39 sin | s; sout | "2" | s; 40 sin | ignore( s ); sout | "3" | s; 41 sin | wdi( 8, s ); sout | "4" | s; 42 sin | ignore( wdi( 8, s ) ); sout | "5" | s; 36 sin | skip( "abc" ); sout | "1" | s; 37 sin | cstr( s ); sout | "2" | s; 38 sin | ignore( cstr( s ) ); sout | "3" | s; 39 sin | wd( 8, cstr( s ) ); sout | "4" | s; 40 sin | ignore( wd( 8, cstr( s ) ) ); sout | "5" | s; 43 41 44 42 sin | incl( "abc", s ); sout | "6" | s; … … 46 44 sin | ignore( incl( "abc", s ) ); sout | "8" | s; 47 45 sin | ignore( excl( "abc", s ) ); sout | "9" | s; 48 sin | wd i( 8, incl( "abc", s ) ); sout | "10" | s;49 sin | wd i( 8, excl( "abc", s ) ); sout | "11" | s;50 sin | ignore( wd i( 8, incl( "abc", s ) ) ); sout | "12" | s;51 sin | ignore( wd i( 8, excl( "abc", s ) ) ); sout | "13" | s;46 sin | wd( 8, incl( "abc", s ) ); sout | "10" | s; 47 sin | wd( 8, excl( "abc", s ) ); sout | "11" | s; 48 sin | ignore( wd( 8, incl( "abc", s ) ) ); sout | "12" | s; 49 sin | ignore( wd( 8, excl( "abc", s ) ) ); sout | "13" | s; 52 50 } 53 51 { 54 52 char c; 55 sin | c; sout | c;56 sin | ignore( c ); sout | c;53 sin | c; sout | c; 54 sin | ignore( c ); sout | c; 57 55 58 56 signed char sc; 59 sin | sc; sout | sc;60 sin | wdi( 3, sc ); sout | sc;61 sin | ignore( sc ); sout | sc;62 sin | ignore( wdi( 3, sc ) ); sout | sc;57 sin | sc; sout | sc; 58 sin | wdi( 3, sc ); sout | sc; 59 sin | ignore( sc ); sout | sc; 60 sin | ignore( wdi( 3, sc ) ); sout | sc; 63 61 64 62 unsigned char usc; 65 sin | usc; sout | usc;66 sin | wdi( 3, usc ); sout | usc;67 sin | ignore( usc ); sout | usc;68 sin | ignore( wdi( 3, usc ) ); sout | usc;63 sin | usc; sout | usc; 64 sin | wdi( 3, usc ); sout | usc; 65 sin | ignore( usc ); sout | usc; 66 sin | ignore( wdi( 3, usc ) ); sout | usc; 69 67 70 68 signed short int ssi; 71 sin | ssi; sout | ssi;72 sin | wdi( 3, ssi ); sout | ssi;73 sin | ignore( ssi ); sout | ssi;74 sin | ignore( wdi( 3, ssi ) ); sout | ssi;69 sin | ssi; sout | ssi; 70 sin | wdi( 3, ssi ); sout | ssi; 71 sin | ignore( ssi ); sout | ssi; 72 sin | ignore( wdi( 3, ssi ) ); sout | ssi; 75 73 76 74 unsigned short int usi; 77 sin | usi; sout | usi;78 sin | wdi( 3, usi ); sout | usi;79 sin | ignore( usi ); sout | usi;80 sin | ignore( wdi( 3, usi ) ); sout | usi;75 sin | usi; sout | usi; 76 sin | wdi( 3, usi ); sout | usi; 77 sin | ignore( usi ); sout | usi; 78 sin | ignore( wdi( 3, usi ) ); sout | usi; 81 79 82 80 signed int si; 83 sin | si; sout | si;84 sin | wdi( 3, si ); sout | si;85 sin | ignore( si ); sout | si;86 sin | ignore( wdi( 3, si ) ); sout | si;81 sin | si; sout | si; 82 sin | wdi( 3, si ); sout | si; 83 sin | ignore( si ); sout | si; 84 sin | ignore( wdi( 3, si ) ); sout | si; 87 85 88 86 unsigned int ui; 89 sin | ui; sout | ui;90 sin | wdi( 3, ui ); sout | ui;91 sin | ignore( ui ); sout | ui;92 sin | ignore( wdi( 3, ui ) ); sout | ui;87 sin | ui; sout | ui; 88 sin | wdi( 3, ui ); sout | ui; 89 sin | ignore( ui ); sout | ui; 90 sin | ignore( wdi( 3, ui ) ); sout | ui; 93 91 94 92 signed long int sli; 95 sin | sli; sout | sli;96 sin | wdi( 3, sli ); sout | sli;97 sin | ignore( sli ); sout | sli;98 sin | ignore( wdi( 3, sli ) ); sout | sli;93 sin | sli; sout | sli; 94 sin | wdi( 3, sli ); sout | sli; 95 sin | ignore( sli ); sout | sli; 96 sin | ignore( wdi( 3, sli ) ); sout | sli; 99 97 100 98 unsigned long int uli; 101 sin | uli; sout | uli;102 sin | wdi( 3, uli ); sout | uli;103 sin | ignore( uli ); sout | uli;104 sin | ignore( wdi( 3, uli ) ); sout | uli;99 sin | uli; sout | uli; 100 sin | wdi( 3, uli ); sout | uli; 101 sin | ignore( uli ); sout | uli; 102 sin | ignore( wdi( 3, uli ) ); sout | uli; 105 103 106 104 signed long long int slli; 107 sin | slli; sout | slli;108 sin | wdi( 3, slli ); sout | slli;109 sin | ignore( slli ); sout | slli;110 sin | ignore( wdi( 3, slli ) ); sout | slli;105 sin | slli; sout | slli; 106 sin | wdi( 3, slli ); sout | slli; 107 sin | ignore( slli ); sout | slli; 108 sin | ignore( wdi( 3, slli ) ); sout | slli; 111 109 112 110 unsigned long long int ulli; 113 sin | ulli; sout | ulli;114 sin | wdi( 3, ulli ); sout | ulli;115 sin | ignore( ulli ); sout | ulli;116 sin | ignore( wdi( 3, ulli ) ); sout | ulli;111 sin | ulli; sout | ulli; 112 sin | wdi( 3, ulli ); sout | ulli; 113 sin | ignore( ulli ); sout | ulli; 114 sin | ignore( wdi( 3, ulli ) ); sout | ulli; 117 115 118 116 float f; 119 sin | f; sout | f;120 sin | wdi( 8, f ); sout | f;121 sin | ignore( f ); sout | f;122 sin | ignore( wdi( 8, f ) ); sout | f;117 sin | f; sout | f; 118 sin | wdi( 8, f ); sout | f; 119 sin | ignore( f ); sout | f; 120 sin | ignore( wdi( 8, f ) ); sout | f; 123 121 124 122 double d; 125 sin | d; sout | d;126 sin | wdi( 8, d ); sout | d;127 sin | ignore( d ); sout | d;128 sin | ignore( wdi( 8, d ) ); sout | d;123 sin | d; sout | d; 124 sin | wdi( 8, d ); sout | d; 125 sin | ignore( d ); sout | d; 126 sin | ignore( wdi( 8, d ) ); sout | d; 129 127 130 128 long double ld; 131 sin | ld; sout | ld;132 sin | wdi( 8, ld ); sout | ld;133 sin | ignore( ld ); sout | ld;134 sin | ignore( wdi( 8, ld ) ); sout | ld;129 sin | ld; sout | ld; 130 sin | wdi( 8, ld ); sout | ld; 131 sin | ignore( ld ); sout | ld; 132 sin | ignore( wdi( 8, ld ) ); sout | ld; 135 133 136 134 float _Complex fc; 137 sin | fc; sout | fc;138 sin | wdi( 8, fc ); sout | fc;139 sin | ignore( fc ); sout | fc;140 sin | ignore( wdi( 8, fc ) ); sout | fc;135 sin | fc; sout | fc; 136 sin | wdi( 8, fc ); sout | fc; 137 sin | ignore( fc ); sout | fc; 138 sin | ignore( wdi( 8, fc ) ); sout | fc; 141 139 142 140 double _Complex dc; 143 sin | dc; sout | dc;144 sin | wdi( 8, dc ); sout | dc;145 sin | ignore( dc ); sout | dc;146 sin | ignore( wdi( 8, dc ) ); sout | dc;141 sin | dc; sout | dc; 142 sin | wdi( 8, dc ); sout | dc; 143 sin | ignore( dc ); sout | dc; 144 sin | ignore( wdi( 8, dc ) ); sout | dc; 147 145 148 146 long double _Complex ldc; 149 sin | ldc; sout | ldc;150 sin | wdi( 8, ldc ); sout | ldc;151 sin | ignore( ldc ); sout | ldc;152 sin | ignore( wdi( 8, ldc ) ); sout | ldc;147 sin | ldc; sout | ldc; 148 sin | wdi( 8, ldc ); sout | ldc; 149 sin | ignore( ldc ); sout | ldc; 150 sin | ignore( wdi( 8, ldc ) ); sout | ldc; 153 151 } 154 152 } // main … … 156 154 // Local Variables: // 157 155 // tab-width: 4 // 158 // compile-command: "cfa -Wall -WextramanipulatorsInput.cfa" //156 // compile-command: "cfa manipulatorsInput.cfa" // 159 157 // End: //
Note:
See TracChangeset
for help on using the changeset viewer.