Changeset 7b91c0e
- Timestamp:
- Jan 21, 2021, 2:24:01 PM (2 years ago)
- Branches:
- arm-eh, enum, forall-pointer-decay, jacob/cs343-translation, master, new-ast-unique-expr, pthread-emulation, qualifiedEnum
- Children:
- 7d01186d
- Parents:
- 5869cea (diff), 1adab3e (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the(diff)
links above to see all the changes relative to each parent. - Files:
-
- 5 added
- 157 edited
Legend:
- Unmodified
- Added
- Removed
-
doc/bibliography/pl.bib
r5869cea r7b91c0e 688 688 title = {Asynchronous Exception Propagation in Blocked Tasks}, 689 689 booktitle = {4th International Workshop on Exception Handling (WEH.08)}, 690 o rganization= {16th International Symposium on the Foundations of Software Engineering (FSE 16)},690 optorganization= {16th International Symposium on the Foundations of Software Engineering (FSE 16)}, 691 691 address = {Atlanta, U.S.A}, 692 692 month = nov, … … 7246 7246 7247 7247 @inproceedings{Edelson92, 7248 keywords = {persistence, pointers},7248 keywords = {persistence, smart pointers}, 7249 7249 contributer = {pabuhr@plg}, 7250 7250 author = {Daniel R. Edelson}, … … 7256 7256 year = 1992, 7257 7257 pages = {1-19}, 7258 } 7259 7260 @incollection{smartpointers, 7261 keywords = {smart pointers}, 7262 contributer = {pabuhr@plg}, 7263 author = {Andrei Alexandrescu}, 7264 title = {Smart Pointers}, 7265 booktitle = {Modern C++ Design: Generic Programming and Design Patterns Applied}, 7266 publisher = {Addison-Wesley}, 7267 year = 2001, 7268 chapter = 7, 7269 optpages = {?-?}, 7258 7270 } 7259 7271 … … 8245 8257 } 8246 8258 8259 @misc{vistorpattern, 8260 keywords = {visitor pattern}, 8261 contributer = {pabuhr@plg}, 8262 key = {vistor pattern}, 8263 title = {vistor pattern}, 8264 year = 2020, 8265 note = {WikipediA}, 8266 howpublished= {\href{https://en.wikipedia.org/wiki/Visitor\_pattern} 8267 {https://\-en.wikipedia.org/\-wiki/\-Visitor\_pattern}}, 8268 } 8269 8247 8270 % W 8248 8271 -
doc/theses/andrew_beach_MMath/Makefile
r5869cea r7b91c0e 1 1 ### Makefile for Andrew Beach's Masters Thesis 2 2 3 DOC= thesis.pdf3 DOC=uw-ethesis.pdf 4 4 BUILD=out 5 5 TEXSRC=$(wildcard *.tex) … … 7 7 STYSRC=$(wildcard *.sty) 8 8 CLSSRC=$(wildcard *.cls) 9 TEXLIB= .: ${BUILD}:9 TEXLIB= .:../../LaTeXmacros:${BUILD}: 10 10 BIBLIB= .:../../bibliography 11 11 … … 29 29 ${LATEX} ${BASE} 30 30 ${BIBTEX} ${BUILD}/${BASE} 31 ${LATEX} ${BASE} 31 32 ${GLOSSARY} ${BUILD}/${BASE} 32 33 ${LATEX} ${BASE} -
doc/theses/andrew_beach_MMath/existing.tex
r5869cea r7b91c0e 1 \chapter{\CFA{} Existing Features} 1 \chapter{\CFA Existing Features} 2 3 \CFA (C-for-all)~\cite{Cforall} is an open-source project extending ISO C with modern safety and productivity features, while still ensuring backwards compatibility with C and its programmers. 4 \CFA is designed to have an orthogonal feature-set based closely on the C programming paradigm (non-object-oriented) and these features can be added incrementally to an existing C code-base allowing programmers to learn \CFA on an as-needed basis. 2 5 3 6 \section{Overloading and extern} 4 7 Cforall has overloading, allowing multiple definitions of the same name to 5 be defined. 8 be defined.~\cite{Moss18} 6 9 7 10 This also adds name mangling so that the assembly symbols are unique for … … 11 14 12 15 The syntax for disabling mangling is: 13 \begin{ lstlisting}16 \begin{cfa} 14 17 extern "C" { 15 18 ... 16 19 } 17 \end{ lstlisting}20 \end{cfa} 18 21 19 22 To re-enable mangling once it is disabled the syntax is: 20 \begin{ lstlisting}23 \begin{cfa} 21 24 extern "Cforall" { 22 25 ... 23 26 } 24 \end{ lstlisting}27 \end{cfa} 25 28 26 29 Both should occur at the declaration level and effect all the declarations 27 in \texttt{...}. Neither care about the state of mangling when they begin30 in @...@. Neither care about the state of mangling when they begin 28 31 and will return to that state after the group is finished. So re-enabling 29 32 is only used to nest areas of mangled and unmangled declarations. … … 31 34 \section{References} 32 35 \CFA adds references to C. These are auto-dereferencing pointers and use the 33 same syntax as pointers except they use ampersand ( \codeCFA{\&}) instead of34 the asterisk ( \codeCFA{*}). They can also be constaint or mutable, if they36 same syntax as pointers except they use ampersand (@&@) instead of 37 the asterisk (@*@). They can also be constaint or mutable, if they 35 38 are mutable they may be assigned to by using the address-of operator 36 ( \codeCFA\&) which converts them into a pointer.39 (@&@) which converts them into a pointer. 37 40 38 41 \section{Constructors and Destructors} … … 41 44 functions with special names. The special names are used to define them and 42 45 may be used to call the functions expicately. The \CFA special names are 43 constructed by taking the tokens in the operators and putting \texttt{?}where44 the arguments would go. So multiplication is \texttt{?*?}while dereference45 is \texttt{*?}. This also make it easy to tell the difference between46 pre-fix operations (such as \texttt{++?}) and post-fix operations47 ( \texttt{?++}).48 49 The special name for contructors is \texttt{?\{\}}, which comes from the46 constructed by taking the tokens in the operators and putting @?@ where 47 the arguments would go. So multiplication is @?*?@ while dereference 48 is @*?@. This also make it easy to tell the difference between 49 pre-fix operations (such as @++?@) and post-fix operations 50 (@?++@). 51 52 The special name for contructors is @?{}@, which comes from the 50 53 initialization syntax in C. The special name for destructors is 51 \texttt{\^{}?\{\}}. % I don't like the \^{} symbol but $^\wedge$ isn't better.54 @^{}@. % I don't like the \^{} symbol but $^\wedge$ isn't better. 52 55 53 56 Any time a type T goes out of scope the destructor matching 54 \codeCFA{void ^?\{\}(T \&);}is called. In theory this is also true of55 primitive types such as \codeCFA{int}, but in practice those are no-ops and57 @void ^?{}(T &);@ is called. In theory this is also true of 58 primitive types such as @int@, but in practice those are no-ops and 56 59 are usually omitted for optimization. 57 60 58 61 \section{Polymorphism} 59 62 \CFA uses polymorphism to create functions and types that are defined over 60 different types. \CFA polymorphic declarations serve the same role as \C PP63 different types. \CFA polymorphic declarations serve the same role as \CC 61 64 templates or Java generics. 62 65 … … 65 68 except that you may use the names introduced by the forall clause in them. 66 69 67 Forall clauses are written \codeCFA{forall( ... )} where \codeCFA{...}becomes70 Forall clauses are written @forall( ... )@ where @...@ becomes 68 71 the list of polymorphic variables (local type names) and assertions, which 69 72 repersent required operations on those types. 70 73 71 \begin{ lstlisting}74 \begin{cfa} 72 75 forall(dtype T | { void do_once(T &); }) 73 76 void do_twice(T & value) { … … 75 78 do_once(value); 76 79 } 77 \end{ lstlisting}80 \end{cfa} 78 81 79 82 A polymorphic function can be used in the same way normal functions are. … … 83 86 the the call site. 84 87 85 As an example, even if no function named \codeCFA{do_once}is not defined86 near the definition of \codeCFA{do_twice}the following code will work.87 \begin{ lstlisting}88 As an example, even if no function named @do_once@ is not defined 89 near the definition of @do_twice@ the following code will work. 90 \begin{cfa} 88 91 int quadruple(int x) { 89 92 void do_once(int & y) { … … 93 96 return x; 94 97 } 95 \end{ lstlisting}98 \end{cfa} 96 99 This is not the recommended way to implement a quadruple function but it 97 does work. The complier will deduce that \codeCFA{do_twice}'s T is an100 does work. The complier will deduce that @do_twice@'s T is an 98 101 integer from the argument. It will then look for a definition matching the 99 assertion which is the \codeCFA{do_once}defined within the function. That100 function will be passed in as a function pointer to \codeCFA{do_twice}and102 assertion which is the @do_once@ defined within the function. That 103 function will be passed in as a function pointer to @do_twice@ and 101 104 called within it. 102 105 … … 104 107 traits which collect assertions into convenent packages that can then be used 105 108 in assertion lists instead of all of their components. 106 \begin{ lstlisting}109 \begin{cfa} 107 110 trait done_once(dtype T) { 108 111 void do_once(T &); 109 112 } 110 \end{ lstlisting}113 \end{cfa} 111 114 112 115 After this the forall list in the previous example could instead be written 113 116 with the trait instead of the assertion itself. 114 \begin{ lstlisting}117 \begin{cfa} 115 118 forall(dtype T | done_once(T)) 116 \end{ lstlisting}119 \end{cfa} 117 120 118 121 Traits can have arbitrary number of assertions in them and are usually used to … … 124 127 are now used in field declaractions instead of parameters and local variables. 125 128 126 \begin{ lstlisting}129 \begin{cfa} 127 130 forall(dtype T) 128 131 struct node { … … 130 133 T * data; 131 134 } 132 \end{ lstlisting}133 134 The \codeCFA{node(T)}is a use of a polymorphic structure. Polymorphic types135 \end{cfa} 136 137 The @node(T)@ is a use of a polymorphic structure. Polymorphic types 135 138 must be provided their polymorphic parameters. 136 139 … … 140 143 \section{Concurrency} 141 144 142 \CFA has a number of concurrency features, \codeCFA{thread}s,143 \codeCFA{monitor}s and \codeCFA{mutex} parameters, \codeCFA{coroutine}s and144 \codeCFA{generator}s. The two features that interact with the exception system145 are \codeCFA{thread}s and \codeCFA{coroutine}s; they and their supporting145 \CFA has a number of concurrency features, @thread@s, 146 @monitor@s and @mutex@ parameters, @coroutine@s and 147 @generator@s. The two features that interact with the exception system 148 are @thread@s and @coroutine@s; they and their supporting 146 149 constructs will be described here. 147 150 … … 154 157 library. 155 158 156 In \CFA coroutines are created using the \codeCFA{coroutine}keyword which157 works just like \codeCFA{struct}except that the created structure will be158 modified by the compiler to satify the \codeCFA{is_coroutine}trait.159 In \CFA coroutines are created using the @coroutine@ keyword which 160 works just like @struct@ except that the created structure will be 161 modified by the compiler to satify the @is_coroutine@ trait. 159 162 160 163 These structures act as the interface between callers and the coroutine, 161 164 the fields are used to pass information in and out. Here is a simple example 162 165 where the single field is used to pass the next number in a sequence out. 163 \begin{ lstlisting}166 \begin{cfa} 164 167 coroutine CountUp { 165 168 unsigned int next; 166 169 } 167 \end{ lstlisting}170 \end{cfa} 168 171 169 172 The routine part of the coroutine is a main function for the coroutine. It … … 173 176 function it continue from that same suspend statement instead of at the top 174 177 of the function. 175 \begin{ lstlisting}178 \begin{cfa} 176 179 void main(CountUp & this) { 177 180 unsigned int next = 0; … … 182 185 } 183 186 } 184 \end{ lstlisting}187 \end{cfa} 185 188 186 189 Control is passed to the coroutine with the resume function. This includes the … … 189 192 return value is for easy access to communication variables. For example the 190 193 next value from a count-up can be generated and collected in a single 191 expression: \codeCFA{resume(count).next}.194 expression: @resume(count).next@. 192 195 193 196 \subsection{Monitors and Mutex} … … 198 201 parameters. 199 202 200 Function parameters can have the \codeCFA{mutex}qualifiers on reference201 arguments, for example \codeCFA{void example(a_monitor & mutex arg);}. When the203 Function parameters can have the @mutex@ qualifiers on reference 204 arguments, for example @void example(a_monitor & mutex arg);@. When the 202 205 function is called it will acquire the lock on all of the mutex parameters. 203 206 … … 214 217 215 218 Threads are created like coroutines except the keyword is changed: 216 \begin{ lstlisting}219 \begin{cfa} 217 220 thread StringWorker { 218 221 const char * input; … … 225 228 this.result = result; 226 229 } 227 \end{ lstlisting}230 \end{cfa} 228 231 The main function will start executing after the fork operation and continue 229 232 executing until it is finished. If another thread joins with this one it will … … 233 236 From the outside this is the creation and destruction of the thread object. 234 237 Fork happens after the constructor is run and join happens before the 235 destructor runs. Join also happens during the \codeCFA{join}function which238 destructor runs. Join also happens during the @join@ function which 236 239 can be used to join a thread earlier. If it is used the destructor does not 237 240 join as that has already been completed. -
doc/theses/andrew_beach_MMath/features.tex
r5869cea r7b91c0e 54 54 returns a reference to the virtual table instance. Defining this function 55 55 also establishes the virtual type and virtual table pair to the resolver 56 and promises that \codeCFA{exceptT}is a virtual type and a child of the56 and promises that @exceptT@ is a virtual type and a child of the 57 57 base exception type. 58 58 59 One odd thing about \codeCFA{get_exception_vtable}is that it should always59 One odd thing about @get_exception_vtable@ is that it should always 60 60 be a constant function, returning the same value regardless of its argument. 61 61 A pointer or reference to the virtual table instance could be used instead, … … 66 66 67 67 Also note the use of the word ``promise" in the trait description. \CFA 68 cannot currently check to see if either \codeCFA{exceptT}or69 \codeCFA{virtualT}match the layout requirements. Currently this is70 considered part of \codeCFA{get_exception_vtable}'s correct implementation.68 cannot currently check to see if either @exceptT@ or 69 @virtualT@ match the layout requirements. Currently this is 70 considered part of @get_exception_vtable@'s correct implementation. 71 71 72 72 \begin{lstlisting} … … 92 92 93 93 Finally there are three additional macros that can be used to refer to the 94 these traits. They are \codeCFA{IS_EXCEPTION},95 \codeCFA{IS_TERMINATION_EXCEPTION} and \codeCFA{IS_RESUMPTION_EXCEPTION}.94 these traits. They are @IS_EXCEPTION@, 95 @IS_TERMINATION_EXCEPTION@ and @IS_RESUMPTION_EXCEPTION@. 96 96 Each takes the virtual type's name and, for polymorphic types only, the 97 97 parenthesized list of polymorphic arguments. These do the name mangling to … … 113 113 The expression must evaluate to a reference to a termination exception. A 114 114 termination exception is any exception with a 115 \codeCFA{void defaultTerminationHandler(T &);}(the default handler) defined115 @void defaultTerminationHandler(T &);@ (the default handler) defined 116 116 on it. The handler is taken from the call sight with \CFA's trait system and 117 117 passed into the exception system along with the exception itself. … … 169 169 170 170 You can also re-throw the most recent termination exception with 171 \codeCFA{throw;}. % This is terrible and you should never do it.171 @throw;@. % This is terrible and you should never do it. 172 172 This can be done in a handler or any function that could be called from a 173 173 handler. … … 193 193 The result of EXPRESSION must be a resumption exception type. A resumption 194 194 exception type is any type that satisfies the assertion 195 \codeCFA{void defaultResumptionHandler(T &);}(the default handler). When the195 @void defaultResumptionHandler(T &);@ (the default handler). When the 196 196 statement is executed the expression is evaluated and the result is thrown. 197 197 … … 260 260 \paragraph{Re-Throwing} 261 261 262 You may also re-throw resumptions with a \codeCFA{throwResume;}statement.263 This can only be done from inside of a \codeCFA{catchResume}block.262 You may also re-throw resumptions with a @throwResume;@ statement. 263 This can only be done from inside of a @catchResume@ block. 264 264 265 265 Outside of any side effects of any code already run in the handler this will … … 269 269 \section{Finally Clauses} 270 270 271 A \codeCFA{finally}clause may be placed at the end of a try statement after271 A @finally@ clause may be placed at the end of a try statement after 272 272 all the handler clauses. In the simply case, with no handlers, it looks like 273 273 this: … … 294 294 295 295 Because of this local control flow out of the finally block is forbidden. 296 The compiler rejects uses of \codeCFA{break}, \codeCFA{continue},297 \codeCFA{fallthru} and \codeCFA{return}that would cause control to leave296 The compiler rejects uses of @break@, @continue@, 297 @fallthru@ and @return@ that would cause control to leave 298 298 the finally block. Other ways to leave the finally block - such as a long 299 299 jump or termination - are much harder to check, at best requiring additional … … 307 307 308 308 There is no special statement for starting a cancellation, instead you call 309 the standard library function \codeCFA{cancel\_stack}which takes an exception.309 the standard library function @cancel\_stack@ which takes an exception. 310 310 Unlike in a throw this exception is not used in control flow but is just there 311 311 to pass information about why the cancellation happened. … … 323 323 324 324 \item Thread Stack: 325 Thread stacks are those created \codeCFA{thread}or otherwise satisfy the326 \codeCFA{is\_thread}trait.325 Thread stacks are those created @thread@ or otherwise satisfy the 326 @is\_thread@ trait. 327 327 328 328 Threads only have two structural points of communication that must happen, … … 333 333 and wait for another thread to join with it. The other thread, when it joins, 334 334 checks for a cancellation. If so it will throw the resumption exception 335 \codeCFA{ThreadCancelled}.336 337 There is a difference here in how explicate joins (with the \codeCFA{join}335 @ThreadCancelled@. 336 337 There is a difference here in how explicate joins (with the @join@ 338 338 function) and implicate joins (from a destructor call). Explicate joins will 339 take the default handler ( \codeCFA{defaultResumptionHandler}) from the context339 take the default handler (@defaultResumptionHandler@) from the context 340 340 and use like a regular through does if the exception is not caught. The 341 341 implicate join does a program abort instead. … … 349 349 350 350 \item Coroutine Stack: 351 Coroutine stacks are those created with \codeCFA{coroutine}or otherwise352 satisfy the \codeCFA{is\_coroutine}trait.351 Coroutine stacks are those created with @coroutine@ or otherwise 352 satisfy the @is\_coroutine@ trait. 353 353 354 354 A coroutine knows of two other coroutines, its starter and its last resumer. … … 356 356 357 357 After the stack is unwound control goes to the last resumer. 358 Resume will resume throw a \codeCFA{CoroutineCancelled}exception, which is358 Resume will resume throw a @CoroutineCancelled@ exception, which is 359 359 polymorphic over the coroutine type and has a pointer to the coroutine being 360 360 canceled and the canceling exception. The resume function also has an 361 assertion that the \codeCFA{defaultResumptionHandler}for the exception. So it361 assertion that the @defaultResumptionHandler@ for the exception. So it 362 362 will use the default handler like a regular throw. 363 363 -
doc/theses/andrew_beach_MMath/future.tex
r5869cea r7b91c0e 20 20 21 21 \section{Additional Throws} 22 Several other kinds of throws, beyond the termination throw ( \codeCFA{throw}),23 the resumption throw ( \codeCFA{throwResume}) and the re-throws, were considered.22 Several other kinds of throws, beyond the termination throw (@throw@), 23 the resumption throw (@throwResume@) and the re-throws, were considered. 24 24 None were as useful as the core throws but they would likely be worth 25 25 revising. … … 114 114 is no reason not to allow it. It is however a small improvement; giving a bit 115 115 of flexibility to the user in what style they want to use. 116 \item Enabling local control flow (by \codeCFA{break}, \codeCFA{return}and116 \item Enabling local control flow (by @break@, @return@ and 117 117 similar statements) out of a termination handler. The current set-up makes 118 118 this very difficult but the catch function that runs the handler after it has -
doc/theses/andrew_beach_MMath/implement.tex
r5869cea r7b91c0e 9 9 10 10 All of this is accessed through a field inserted at the beginning of every 11 virtual type. Currently it is called \codeC{virtual_table}but it is not11 virtual type. Currently it is called @virtual_table@ but it is not 12 12 ment to be accessed by the user. This field is a pointer to the type's 13 13 virtual table instance. It is assigned once during the object's construction … … 40 40 using that to calculate the mangled name of the parent's virtual table type. 41 41 There are two special fields that are included like normal fields but have 42 special initialization rules: the \codeC{size}field is the type's size and is43 initialized with a sizeof expression, the \codeC{align}field is the type's42 special initialization rules: the @size@ field is the type's size and is 43 initialized with a sizeof expression, the @align@ field is the type's 44 44 alignment and uses an alignof expression. The remaining fields are resolved 45 45 to a name matching the field's name and type using the normal visibility … … 56 56 The declarations include the virtual type definition and forward declarations 57 57 of the virtual table instance, constructor, message function and 58 \codeCFA{get_exception_vtable}. The definition includes the storage and58 @get_exception_vtable@. The definition includes the storage and 59 59 initialization of the virtual table instance and the bodies of the three 60 60 functions. … … 65 65 from the per-instance information. The virtual table type and most of the 66 66 functions are polymorphic so they are all part of the core. The virtual table 67 instance and the \codeCFA{get_exception_vtable}function.68 69 Coroutines and threads need instances of \codeCFA{CoroutineCancelled}and70 \codeCFA{ThreadCancelled}respectively to use all of their functionality.71 When a new data type is declared with \codeCFA{coroutine} or \codeCFA{thread}67 instance and the @get_exception_vtable@ function. 68 69 Coroutines and threads need instances of @CoroutineCancelled@ and 70 @ThreadCancelled@ respectively to use all of their functionality. 71 When a new data type is declared with @coroutine@ or @thread@ 72 72 the forward declaration for the instance is created as well. The definition 73 73 of the virtual table is created at the definition of the main function. … … 79 79 function. 80 80 81 The function is \codeC{__cfa__virtual_cast}and it is implemented in the81 The function is @__cfa__virtual_cast@ and it is implemented in the 82 82 standard library. It takes a pointer to the target type's virtual table and 83 83 the object pointer being cast. The function is very simple, getting the … … 87 87 88 88 For the generated code a forward decaration of the virtual works as follows. 89 There is a forward declaration of \codeC{__cfa__virtual_cast}in every cfa89 There is a forward declaration of @__cfa__virtual_cast@ in every cfa 90 90 file so it can just be used. The object argument is the expression being cast 91 91 so that is just placed in the argument list. … … 110 110 often across functions. 111 111 112 At a very basic level this can be done with \codeC{setjmp} \& \codeC{longjmp}112 At a very basic level this can be done with @setjmp@ \& @longjmp@ 113 113 which simply move the top of the stack, discarding everything on the stack 114 114 above a certain point. However this ignores all the clean-up code that should … … 118 118 both of these problems. 119 119 120 Libunwind, provided in \texttt{unwind.h}on most platorms, is a C library120 Libunwind, provided in @unwind.h@ on most platorms, is a C library 121 121 that provides \CPP style stack unwinding. Its operation is divided into two 122 122 phases. The search phase -- phase 1 -- is used to scan the stack and decide … … 142 142 143 143 GCC will generate an LSDA and attach its personality function with the 144 \texttt{-fexceptions}flag. However this only handles the cleanup attribute.144 @-fexceptions@ flag. However this only handles the cleanup attribute. 145 145 This attribute is used on a variable and specifies a function that should be 146 146 run when the variable goes out of scope. The function is passed a pointer to … … 165 165 messages for special cases (some of which should never be used by the 166 166 personality function) and error codes but unless otherwise noted the 167 personality function should always return \codeC{_URC_CONTINUE_UNWIND}.168 169 The \codeC{version}argument is the verson of the implementation that is167 personality function should always return @_URC_CONTINUE_UNWIND@. 168 169 The @version@ argument is the verson of the implementation that is 170 170 calling the personality function. At this point it appears to always be 1 and 171 171 it will likely stay that way until a new version of the API is updated. 172 172 173 The \codeC{action}argument is set of flags that tell the personality173 The @action@ argument is set of flags that tell the personality 174 174 function when it is being called and what it must do on this invocation. 175 175 The flags are as follows: 176 176 \begin{itemize} 177 \item \codeC{_UA_SEARCH_PHASE}: This flag is set whenever the personality177 \item@_UA_SEARCH_PHASE@: This flag is set whenever the personality 178 178 function is called during the search phase. The personality function should 179 179 decide if unwinding will stop in this function or not. If it does then the 180 personality function should return \codeC{_URC_HANDLER_FOUND}.181 \item \codeC{_UA_CLEANUP_PHASE}: This flag is set whenever the personality180 personality function should return @_URC_HANDLER_FOUND@. 181 \item@_UA_CLEANUP_PHASE@: This flag is set whenever the personality 182 182 function is called during the cleanup phase. If no other flags are set this 183 183 means the entire frame will be unwound and all cleanup code should be run. 184 \item \codeC{_UA_HANDLER_FRAME}: This flag is set during the cleanup phase184 \item@_UA_HANDLER_FRAME@: This flag is set during the cleanup phase 185 185 on the function frame that found the handler. The personality function must 186 186 prepare to return to normal code execution and return 187 \codeC{_URC_INSTALL_CONTEXT}.188 \item \codeC{_UA_FORCE_UNWIND}: This flag is set if the personality function187 @_URC_INSTALL_CONTEXT@. 188 \item@_UA_FORCE_UNWIND@: This flag is set if the personality function 189 189 is called through a forced unwind call. Forced unwind only performs the 190 190 cleanup phase and uses a different means to decide when to stop. See its … … 192 192 \end{itemize} 193 193 194 The \codeC{exception_class} argument is a copy of the \codeC{exception}'s195 \codeC{exception_class}field.196 197 The \codeC{exception}argument is a pointer to the user provided storage194 The @exception_class@ argument is a copy of the @exception@'s 195 @exception_class@ field. 196 197 The @exception@ argument is a pointer to the user provided storage 198 198 object. It has two public fields, the exception class which is actually just 199 199 a number that identifies the exception handling mechanism that created it and … … 201 201 exception needs to 202 202 203 The \codeC{context}argument is a pointer to an opaque type. This is passed203 The @context@ argument is a pointer to an opaque type. This is passed 204 204 to the many helper functions that can be called inside the personality 205 205 function. … … 218 218 functions traversing the stack new-to-old until a function finds a handler or 219 219 the end of the stack is reached. In the latter case raise exception will 220 return with \codeC{_URC_END_OF_STACK}.220 return with @_URC_END_OF_STACK@. 221 221 222 222 Once a handler has been found raise exception continues onto the the cleanup … … 227 227 228 228 If an error is encountered raise exception will return either 229 \codeC{_URC_FATAL_PHASE1_ERROR} or \codeC{_URC_FATAL_PHASE2_ERROR}depending229 @_URC_FATAL_PHASE1_ERROR@ or @_URC_FATAL_PHASE2_ERROR@ depending 230 230 on when the error occured. 231 231 … … 259 259 been unwound. 260 260 261 Each time it is called the stop function should return \codeC{_URC_NO_REASON}261 Each time it is called the stop function should return @_URC_NO_REASON@ 262 262 or transfer control directly to other code outside of libunwind. The 263 263 framework does not provide any assistance here. 264 264 265 265 Its arguments are the same as the paired personality function. 266 The actions \codeC{_UA_CLEANUP_PHASE} and \codeC{_UA_FORCE_UNWIND}are always266 The actions @_UA_CLEANUP_PHASE@ and @_UA_FORCE_UNWIND@ are always 267 267 set when it is called. By the official standard that is all but both GCC and 268 268 Clang add an extra action on the last call at the end of the stack: 269 \codeC{_UA_END_OF_STACK}.269 @_UA_END_OF_STACK@. 270 270 271 271 \section{Exception Context} … … 280 280 Each stack has its own exception context. In a purely sequental program, using 281 281 only core Cforall, there is only one stack and the context is global. However 282 if the library \texttt{libcfathread}is linked then there can be multiple282 if the library @libcfathread@ is linked then there can be multiple 283 283 stacks so they will each need their own. 284 284 285 285 To handle this code always gets the exception context from the function 286 \codeC{this_exception_context}. The main exception handling code is in287 \texttt{libcfa}and that library also defines the function as a weak symbol288 so it acts as a default. Meanwhile in \texttt{libcfathread}the function is286 @this_exception_context@. The main exception handling code is in 287 @libcfa@ and that library also defines the function as a weak symbol 288 so it acts as a default. Meanwhile in @libcfathread@ the function is 289 289 defined as a strong symbol that replaces it when the libraries are linked 290 290 together. 291 291 292 The version of the function defined in \texttt{libcfa}is very simple. It292 The version of the function defined in @libcfa@ is very simple. It 293 293 returns a pointer to a global static variable. With only one stack this 294 294 global instance is associated with the only stack. 295 295 296 The version of the function defined in \texttt{libcfathread}has to handle296 The version of the function defined in @libcfathread@ has to handle 297 297 more as there are multiple stacks. The exception context is included as 298 298 part of the per-stack data stored as part of coroutines. In the cold data 299 299 section, stored at the base of each stack, is the exception context for that 300 stack. The \codeC{this_exception_context}uses the concurrency library to get300 stack. The @this_exception_context@ uses the concurrency library to get 301 301 the current coroutine and through it the cold data section and the exception 302 302 context. … … 323 323 to store the exception. Macros with pointer arthritic and type cast are 324 324 used to move between the components or go from the embedded 325 \codeC{_Unwind_Exception}to the entire node.325 @_Unwind_Exception@ to the entire node. 326 326 327 327 All of these nodes are strung together in a linked list. One linked list per … … 347 347 C which is what the \CFA compiler outputs so a work-around is used. 348 348 349 This work around is a function called \codeC{__cfaehm_try_terminate}in the349 This work around is a function called @__cfaehm_try_terminate@ in the 350 350 standard library. The contents of a try block and the termination handlers 351 351 are converted into functions. These are then passed to the try terminate … … 385 385 386 386 These nested functions and all other functions besides 387 \codeC{__cfaehm_try_terminate}in \CFA use the GCC personality function and388 the \texttt{-fexceptions}flag to generate the LSDA. This allows destructors387 @__cfaehm_try_terminate@ in \CFA use the GCC personality function and 388 the @-fexceptions@ flag to generate the LSDA. This allows destructors 389 389 to be implemented with the cleanup attribute. 390 390 … … 401 401 402 402 The handler function does both the matching and catching. It tries each 403 the condition of \codeCFA{catchResume}in order, top-to-bottom and until it403 the condition of @catchResume@ in order, top-to-bottom and until it 404 404 finds a handler that matches. If no handler matches then the function returns 405 405 false. Otherwise the matching handler is run, if it completes successfully 406 the function returns true. Rethrows, through the \codeCFA{throwResume;}406 the function returns true. Rethrows, through the @throwResume;@ 407 407 statement, cause the function to return true. 408 409 % Recursive Resumption Stuff: 410 Blocking out part of the stack is accomplished by updating the front of the 411 list as the search continues. Before the handler at a node is called the head 412 of the list is updated to the next node of the current node. After the search 413 is complete, successful or not, the head of the list is reset. 414 415 This means the current handler and every handler that has already been 416 checked are not on the list while a handler is run. If a resumption is thrown 417 during the handling of another resumption the active handlers and all the 418 other handler checked up to this point will not be checked again. 419 420 This structure also supports new handler added while the resumption is being 421 handled. These are added to the front of the list, pointing back along the 422 stack -- the first one will point over all the checked handlers -- and the 423 ordering is maintained. 408 424 409 425 \subsection{Libunwind Compatibility} … … 438 454 439 455 Cancellation also uses libunwind to do its stack traversal and unwinding, 440 however it uses a different primary function \codeC{_Unwind_ForcedUnwind}.456 however it uses a different primary function @_Unwind_ForcedUnwind@. 441 457 Details of its interface can be found in the unwind section. 442 458 -
doc/theses/andrew_beach_MMath/unwinding.tex
r5869cea r7b91c0e 10 10 Even this is fairly simple if nothing needs to happen when the stack unwinds. 11 11 Traditional C can unwind the stack by saving and restoring state (with 12 \codeC{setjmp} \& \codeC{longjmp}). However many languages define actions that12 @setjmp@ \& @longjmp@). However many languages define actions that 13 13 have to be taken when something is removed from the stack, such as running 14 a variable's destructor or a \codeCFA{try} statement's \codeCFA{finally}14 a variable's destructor or a @try@ statement's @finally@ 15 15 clause. Handling this requires walking the stack going through each stack 16 16 frame. … … 29 29 30 30 \CFA uses two primary functions in libunwind to create most of its 31 exceptional control-flow: \codeC{_Unwind_RaiseException}and32 \codeC{_Unwind_ForcedUnwind}.31 exceptional control-flow: @_Unwind_RaiseException@ and 32 @_Unwind_ForcedUnwind@. 33 33 Their operation is divided into two phases: search and clean-up. The search 34 34 phase -- phase 1 -- is used to scan the stack but not unwinding it. The … … 44 44 A personality function performs three tasks, although not all have to be 45 45 present. The tasks performed are decided by the actions provided. 46 \codeC{_Unwind_Action}is a bitmask of possible actions and an argument of46 @_Unwind_Action@ is a bitmask of possible actions and an argument of 47 47 this type is passed into the personality function. 48 48 \begin{itemize} 49 \item \codeC{_UA_SEARCH_PHASE}is passed in search phase and tells the49 \item@_UA_SEARCH_PHASE@ is passed in search phase and tells the 50 50 personality function to check for handlers. If there is a handler in this 51 51 stack frame, as defined by the language, the personality function should 52 return \codeC{_URC_HANDLER_FOUND}. Otherwise it should return53 \codeC{_URC_CONTINUE_UNWIND}.54 \item \codeC{_UA_CLEANUP_PHASE}is passed in during the clean-up phase and52 return @_URC_HANDLER_FOUND@. Otherwise it should return 53 @_URC_CONTINUE_UNWIND@. 54 \item@_UA_CLEANUP_PHASE@ is passed in during the clean-up phase and 55 55 means part or all of the stack frame is removed. The personality function 56 56 should do whatever clean-up the language defines 57 57 (such as running destructors/finalizers) and then generally returns 58 \codeC{_URC_CONTINUE_UNWIND}.59 \item \codeC{_UA_HANDLER_FRAME}means the personality function must install58 @_URC_CONTINUE_UNWIND@. 59 \item@_UA_HANDLER_FRAME@ means the personality function must install 60 60 a handler. It is also passed in during the clean-up phase and is in addition 61 61 to the clean-up action. libunwind provides several helpers for the personality 62 62 function here. Once it is done, the personality function must return 63 \codeC{_URC_INSTALL_CONTEXT}.63 @_URC_INSTALL_CONTEXT@. 64 64 \end{itemize} 65 65 The personality function is given a number of other arguments. Some are for 66 compatability and there is the \codeC{struct _Unwind_Context}pointer which66 compatability and there is the @struct _Unwind_Context@ pointer which 67 67 passed to many helpers to get information about the current stack frame. 68 68 … … 72 72 raise-exception but with some extras. 73 73 The first it passes in an extra action to the personality function on each 74 stack frame, \codeC{_UA_FORCE_UNWIND}, which means a handler cannot be74 stack frame, @_UA_FORCE_UNWIND@, which means a handler cannot be 75 75 installed. 76 76 … … 83 83 stack frames have been removed. By the standard API this is marked by setting 84 84 the stack pointer inside the context passed to the stop function. However both 85 GCC and Clang add an extra action for this case \codeC{_UA_END_OF_STACK}.85 GCC and Clang add an extra action for this case @_UA_END_OF_STACK@. 86 86 87 87 Each time function the stop function is called it can do one or two things. 88 When it is not the end of the stack it can return \codeC{_URC_NO_REASON}to88 When it is not the end of the stack it can return @_URC_NO_REASON@ to 89 89 continue unwinding. 90 90 % Is there a reason that NO_REASON is used instead of CONTINUE_UNWIND? … … 113 113 114 114 The stop function is very simple. It checks the end of stack flag to see if 115 it is finished unwinding. If so, it calls \codeC{exit}to end the process,115 it is finished unwinding. If so, it calls @exit@ to end the process, 116 116 otherwise it returns with no-reason to continue unwinding. 117 117 % Yeah, this is going to have to change. … … 128 128 location of the instruction pointer and stack layout, which varies with 129 129 compiler and optimization levels. So for frames where there are only 130 destructors, GCC's attribute cleanup with the \texttt{-fexception}flag is130 destructors, GCC's attribute cleanup with the @-fexception@ flag is 131 131 sufficient to handle unwinding. 132 132 133 133 The only functions that require more than that are those that contain 134 \codeCFA{try} statements. A \codeCFA{try} statement has a \codeCFA{try} 135 clause, some number of \codeCFA{catch} clauses and \codeCFA{catchResume}136 clauses and may have a \codeCFA{finally} clause. Of these only \codeCFA{try}137 statements with \codeCFA{catch}clauses need to be transformed and only they138 and the \codeCFA{try}clause are involved.134 @try@ statements. A @try@ statement has a @try@ 135 clause, some number of @catch@ clauses and @catchResume@ 136 clauses and may have a @finally@ clause. Of these only @try@ 137 statements with @catch@ clauses need to be transformed and only they 138 and the @try@ clause are involved. 139 139 140 The \codeCFA{try}statement is converted into a series of closures which can140 The @try@ statement is converted into a series of closures which can 141 141 access other parts of the function according to scoping rules but can be 142 passed around. The \codeCFA{try}clause is converted into the try functions,143 almost entirely unchanged. The \codeCFA{catch}clauses are converted into two142 passed around. The @try@ clause is converted into the try functions, 143 almost entirely unchanged. The @catch@ clauses are converted into two 144 144 functions; the match function and the catch function. 145 145 … … 153 153 runs the handler's body. 154 154 155 These three functions are passed to \codeC{try_terminate}. This is an155 These three functions are passed to @try_terminate@. This is an 156 156 % Maybe I shouldn't quote that, it isn't its actual name. 157 157 internal hand-written function that has its own personality function and … … 167 167 handler was found in this frame. If it was then the personality function 168 168 installs the handler, which is setting the instruction pointer in 169 \codeC{try_terminate}to an otherwise unused section that calls the catch169 @try_terminate@ to an otherwise unused section that calls the catch 170 170 function, passing it the current exception and handler index. 171 \codeC{try_terminate}returns as soon as the catch function returns.171 @try_terminate@ returns as soon as the catch function returns. 172 172 173 173 At this point control has returned to normal control flow. -
doc/theses/fangren_yu_COOP_F20/Report.tex
r5869cea r7b91c0e 17 17 \usepackage[usenames]{color} 18 18 \input{common} % common CFA document macros 19 \usepackage[dvips,plainpages=false,pdfpagelabels,pdfpagemode=UseNone,colorlinks=true, pagebackref=true,linkcolor=blue,citecolor=blue,urlcolor=blue,pagebackref=true,breaklinks=true]{hyperref}19 \usepackage[dvips,plainpages=false,pdfpagelabels,pdfpagemode=UseNone,colorlinks=true,linkcolor=blue,citecolor=blue,urlcolor=blue,pagebackref=true,breaklinks=true]{hyperref} 20 20 \usepackage{breakurl} 21 21 \urlstyle{sf} … … 76 76 \renewcommand{\subsectionmark}[1]{\markboth{\thesubsection\quad #1}{\thesubsection\quad #1}} 77 77 \pagenumbering{roman} 78 \linenumbers % comment out to turn off line numbering78 %\linenumbers % comment out to turn off line numbering 79 79 80 80 \maketitle 81 81 \pdfbookmark[1]{Contents}{section} 82 \tableofcontents 83 84 \clearpage 82 85 83 \thispagestyle{plain} 86 84 \pagenumbering{arabic} 87 85 88 86 \begin{abstract} 89 90 \CFA is an evolutionary extension to the C programming language, featuring a parametric type system, and is currently under active development. The reference compiler for \CFA language, @cfa-cc@, has some of its major components dated back to early 2000s, and is based on inefficient data structures and algorithms. Some improvements targeting the expression resolution algorithm, suggested by a recent prototype experiment on a simplified model, are implemented in @cfa-cc@ to support the full \CFA language. These optimizations speed up the compiler significantly by a factor of 20 across the existing \CFA codebase, bringing the compilation time of a mid-sized \CFA source file down to 10-second level. A few cases derived from realistic code examples that causes trouble to the compiler are analyzed in detail, with proposed solutions. This step of \CFA project development is critical to its eventual goal to be used alongside C for large software systems. 91 87 \CFA is an evolutionary, non-object-oriented extension of the C programming language, featuring a parametric type-system, and is currently under active development. The reference compiler for the \CFA language, @cfa-cc@, has some of its major components dated back to the early 2000s, which are based on inefficient data structures and algorithms. This report introduces improvements targeting the expression resolution algorithm, suggested by a recent prototype experiment on a simplified model, which are implemented in @cfa-cc@ to support the full \CFA language. These optimizations speed up the compiler by a factor of 20 across the existing \CFA codebase, bringing the compilation time of a mid-sized \CFA source file down to the 10-second level. A few problem cases derived from realistic code examples are analyzed in detail, with proposed solutions. This work is a critical step in the \CFA project development to achieve its eventual goal of being used alongside C for large software systems. 92 88 \end{abstract} 93 89 90 \clearpage 91 \section*{Acknowledgements} 92 \begin{sloppypar} 93 I would like to thank everyone in the \CFA team for their contribution towards this project. Programming language design and development is a tough subject and requires a lot of teamwork. Without the collaborative efforts from the team, this project could not have been a success. Specifically, I would like to thank Andrew Beach for introducing me to the \CFA codebase, Thierry Delisle for maintaining the test and build automation framework, Michael Brooks for providing example programs of various experimental language and type system features, and most importantly, Professor Martin Karsten for recommending me to the \CFA team, and my supervisor, Professor Peter Buhr for encouraging me to explore deeply into intricate compiler algorithms. Finally, I gratefully acknowledge the help from Aaron Moss, former graduate from the team and the author of the precedent thesis work, to participate in the \CFA team's virtual conferences and email correspondence, and provide many critical arguments and suggestions. 2020 had been an unusually challenging year for everyone and we managed to keep a steady pace. 94 \end{sloppypar} 95 96 \clearpage 97 \tableofcontents 98 99 \clearpage 94 100 \section{Introduction} 95 101 96 \CFA language, developed by the Programming Language Group at University of Waterloo, has a long history, with the first proof-of-concept compiler built in 2003 by Richard Bilson~\cite{Bilson03}. Many new features are added to the language over time, but the core of \CFA, parametric functions introduced by the @forall@ clause (hence the name of the language), with the type system supporting parametric overloading, remains mostly unchanged.97 98 The current \CFA reference compiler @cfa-cc@ still includes many parts taken directly from the original Bilson's implementation, and serves as a starting point for the enhancement work to the type system. Unfortunately, it does not provide the efficiency required for the language to be used practically: a \CFA source file of approximately 1000 lines of code can take a few minutes to compile. The cause of the problem is that the old compiler used inefficient data structures and algorithms for expression resolution, which involved a lot of copying and redundant work.99 100 This paper presents a series of optimizations to the performance-critical parts of the resolver, with a major rework of the data structure used by the compiler, using a functional programming approach to reduce memory complexity. Subsequent improvements are mostly suggested by running the compiler builds with a performance profiler against the \CFA standard library sourcecode and a test suite to find the most underperforming components in the compiler algorithm.101 102 The \CFA team endorses a pragmatic philosophy in work that mostly focuses on practical implications of language design and implementation, rather than the theoretical limits. In particular, the compiler is designed to work on production \CFA code efficiently and keep type safety, while sometimes making compromises to expressiveness in extreme corner cases. However, when these corner cases do appear in actual usage, they need to be thoroughly investigated. Analysis presented in this paper, therefore, are conducted on a case-by-case basis. Some of them eventually point to certain weaknesses in the language design and solutions areproposed based on experimental results.103 104 \section{ Completed work}102 \CFA language, developed by the Programming Language Group at the University of Waterloo, has a long history, with the initial language design in 1992 by Glen Ditchfield~\cite{Ditchfield92} and the first proof-of-concept compiler built in 2003 by Richard Bilson~\cite{Bilson03}. Many new features have been added to the language over time, but the core of \CFA's type-system --- parametric functions introduced by the @forall@ clause (hence the name of the language) providing parametric overloading --- remains mostly unchanged. 103 104 The current \CFA reference compiler, @cfa-cc@, is designed using the visitor pattern~\cite{vistorpattern} over an abstract syntax tree (AST), where multiple passes over the AST modify it for subsequent passes. @cfa-cc@ still includes many parts taken directly from the original Bilson implementation, which served as the starting point for this enhancement work to the type system. Unfortunately, the prior implementation did not provide the efficiency required for the language to be practical: a \CFA source file of approximately 1000 lines of code can take a multiple minutes to compile. The cause of the problem is that the old compiler used inefficient data structures and algorithms for expression resolution, which involved significant copying and redundant work. 105 106 This report presents a series of optimizations to the performance-critical parts of the resolver, with a major rework of the compiler data-structures using a functional-programming approach to reduce memory complexity. The improvements were suggested by running the compiler builds with a performance profiler against the \CFA standard-library source-code and a test suite to find the most underperforming components in the compiler algorithm. 107 108 The \CFA team endorses a pragmatic philosophy that focuses on practical implications of language design and implementation rather than theoretical limits. In particular, the compiler is designed to be expressive with respect to code reuse while maintaining type safety, but compromise theoretical soundness in extreme corner cases. However, when these corner cases do appear in actual usage, they need to be thoroughly investigated. A case-by-case analysis is presented for several of these corner cases, some of which point to certain weaknesses in the language design with solutions proposed based on experimental results. 109 110 \section{AST restructuring} 105 111 106 112 \subsection{Memory model with sharing} 107 113 108 A major rework of the abstract syntax tree (AST) data structure in the compiler is completed as the first step of the project. The majority of work were documented in the reference manual of the compiler~\cite{cfa-cc}. To summarize: 109 \begin{itemize} 110 \item 111 AST nodes (and therefore subtrees) can be shared without copying when reused. 112 \item 113 Modifications apply the functional programming principle, making copies for local changes without affecting the original data shared by other owners. In-place mutations are permitted as a special case when sharing does not happen. The logic is implemented by reference counting. 114 \item 115 Memory allocation and freeing are performed automatically using smart pointers. 116 \end{itemize} 117 The resolver algorithm designed for overload resolution naturally introduces a significant amount of reused intermediate representations, especially in the following two places: 118 \begin{itemize} 119 \item 120 Function overload candidates are computed by combining the argument candidates bottom-up, with many of them being a common term. For example, if $n$ overloads of a function @f@ all take an integer for the first parameter but different types for the second (@f( int, int )@, @f( int, double )@, etc.) the first term is reused $n$ times for each of the generated candidate expressions. This effect is particularly bad for deep expression trees. 121 \item 122 In the unification algorithm and candidate elimination step, actual types are obtained by substituting the type parameters by their bindings. Let $n$ be the complexity (\ie number of nodes in representation) of the original type, $m$ be the complexity of bound type for parameters, and $k$ be the number of occurrences of type parameters in the original type. If everything needs to be deep-copied, the substitution step takes $O(n+mk)$ time and memory, while using shared nodes it is reduced to $O(n)$ time and $O(k)$ memory. 123 \end{itemize} 124 One of the worst examples for the old compiler is a long chain of I/O operations 125 \begin{cfa} 126 sout | 1 | 2 | 3 | 4 | ... 127 \end{cfa} 128 The pipe operator is overloaded by \CFA I/O library for every primitive type in C language, as well as I/O manipulators defined by the library. In total there are around 50 overloads for the output stream operation. On resolving the $n$-th pipe operator in the sequence, the first term, which is the result of sub-expression containing $n-1$ pipe operators, is reused to resolve every overload. Therefore at least $O(n^2)$ copies of expression nodes are made during resolution, not even counting type unification cost; combined with two large factors from number of overloads of pipe operators, and that the ``output stream type'' in \CFA is a trait with 27 assertions (which adds to complexity of the pipe operator's type) this makes compiling a long output sequence extremely slow. In new AST representation only $O(n)$ copies are required and type of pipe operator is not copied at all. 129 130 Reduction in space complexity is especially important, as preliminary profiling result on the old compiler build shows that over half of time spent in expression resolution are on memory allocations. 131 114 A major rework of the AST data-structure in the compiler was completed as the first step of the project. The majority of this work is documented in my prior report documenting the compiler reference-manual~\cite{cfa-cc}. To summarize: 115 \begin{itemize} 116 \item 117 AST nodes (and therefore subtrees) can be shared without copying. 118 \item 119 Modifications are performed using functional-programming principles, making copies for local changes without affecting the original data shared by other owners. In-place mutations are permitted as a special case when there is no sharing. The logic is implemented by reference counting. 120 \item 121 Memory allocation and freeing are performed automatically using smart pointers~\cite{smartpointers}. 122 \end{itemize} 123 124 The resolver algorithm, designed for overload resolution, uses a significant amount of reused, and hence copying, for the intermediate representations, especially in the following two places: 125 \begin{itemize} 126 \item 127 Function overload candidates are computed by combining the argument candidates bottom-up, with many being a common term. For example, if $n$ overloads of a function @f@ all take an integer for the first parameter but different types for the second, \eg @f( int, int )@, @f( int, double )@, etc., the first term is copied $n$ times for each of the generated candidate expressions. This copying is particularly bad for deep expression trees. 128 \item 129 In the unification algorithm and candidate elimination step, actual types are obtained by substituting the type parameters by their bindings. Let $n$ be the complexity (\ie number of nodes in representation) of the original type, $m$ be the complexity of the bound type for parameters, and $k$ be the number of occurrences of type parameters in the original type. If every substitution needs to be deep-copied, these copy step takes $O(n+mk)$ time and memory, while using shared nodes it is reduced to $O(n)$ time and $O(k)$ memory. 130 \end{itemize} 131 One of the worst examples for the old compiler is a long chain of I/O operations: 132 \begin{cfa} 133 sout | 1 | 2 | 3 | 4 | ...; // print integer constants 134 \end{cfa} 135 The pipe operator is overloaded by the \CFA I/O library for every primitive type in the C language, as well as I/O manipulators defined by the library. In total, there are around 50 overloads for the output stream operation. On resolving the $n$-th pipe operator in the sequence, the first term, which is the result of sub-expression containing $n-1$ pipe operators, is reused to resolve every overload. Therefore at least $O(n^2)$ copies of expression nodes are made during resolution, not even counting type unification cost; combined with the two large factors from number of overloads of pipe operators, and that the ``output stream type'' in \CFA is a trait with 27 assertions (which adds to complexity of the pipe operator's type) this makes compiling a long output sequence extremely slow. In the new AST representation, only $O(n)$ copies are required and the type of the pipe operator is not copied at all. 136 Reduction in space complexity is especially important, as preliminary profiling results on the old compiler build showed over half of the time spent in expression resolution is on memory allocations. 137 138 Since the compiler codebase is large and the new memory model mostly benefits expression resolution, some of the old data structures are still kept, and a conversion pass happens before and after the general resolve phase. Rewriting every compiler module will take longer, and whether the new model is correct was unknown when this project started, therefore only the resolver is currently implemented with the new data structure. 139 132 140 133 141 \subsection{Merged resolver calls} 134 142 135 The pre-resolve phase of compilation, ina dequately called ``validate'' in the compiler source code, does more than just simple syntax validation, as it also normalizes input program. Some of them, however, requires type information on expressions and therefore needsto call the resolver before the general resolve phase. There are three notable places where the resolver is invoked:136 \begin{itemize} 137 \item 138 Attempt to generate default constructor, copy constructor and destructor for user-defined @struct@ types 139 \item 140 Resolve @with@ statements (the same as in P ython, which introduces fields of a structure directly in scope)143 The pre-resolve phase of compilation, inappropriately called ``validate'' in the compiler source code, has a number of passes that do more than simple syntax and semantic validation; some passes also normalizes the input program. A few of these passes require type information for expressions, and therefore, need to call the resolver before the general resolve phase. There are three notable places where the resolver is invoked: 144 \begin{itemize} 145 \item 146 Generate default constructor, copy constructor and destructor for user-defined @struct@ types. 147 \item 148 Resolve @with@ statements (the same as in Pascal~\cite{pascal}), which introduces fields of a structure directly into a scope. 141 149 \item 142 150 Resolve @typeof@ expressions (cf. @decltype@ in \CC); note that this step may depend on symbols introduced by @with@ statements. 143 151 \end{itemize} 144 Since the compiler codebase is large and the new memory model mostly only benefits expression resolution, the old data structure is still kept, and a conversion pass happens before and after resolve phase. Rewriting every compiler module will take a long time, and whether the new model is correct is still unknown when started, therefore only the resolver is implemented with the new data structure. 145 146 Since the constructor calls were one of the most expensive to resolve (reason will be shown in the next section), pre-resolve phase were taking more time after resolver moves to the more efficient new implementation. To better facilitate the new resolver, every step that requires type information are reintegrated as part of resolver. 147 148 A by-product of this work is that the reversed dependence of @with@ statement and @typeof@ can now be handled. Previously, the compiler is unable to handle cases such as 152 153 Since the constructor calls are one of the most expensive to resolve (reason given in~\VRef{s:SpecialFunctionLookup}), this pre-resolve phase was taking a large amount of time even after the resolver was changed to the more efficient new implementation. The problem is that multiple resolutions repeat a significant amount of work. Therefore, to better facilitate the new resolver, every step that requires type information should be integrated as part of the general resolver phase. 154 155 A by-product of this work is that reversed dependence between @with@ statement and @typeof@ can now be handled. Previously, the compiler was unable to handle cases such as: 149 156 \begin{cfa} 150 157 struct S { int x; }; 151 158 S foo(); 152 159 typeof( foo() ) s; // type is S 153 with (s) { 160 with (s) { 154 161 x; // refers to s.x 155 162 } 156 163 \end{cfa} 157 since t ype of @s@ is still unresolved when handling @with@ expressions. Instead, the new (and correct) approach is to evaluate @typeof@ expressions when the declaration is first seen, and it suffices because of the declaration-before-use rule.164 since the type of @s@ is unresolved when handling @with@ expressions because the @with@ pass follows the @typeof@ pass (interchanging passes only interchanges the problem). Instead, the new (and correct) approach is to evaluate @typeof@ expressions when the declaration is first seen during resolution, and it suffices because of the declaration-before-use rule. 158 165 159 166 160 167 \subsection{Special function lookup} 161 162 Reducing the number of functions looked up for overload resolution is an effective way to gain performance when there are many overloads but most of them are trivially wrong. In practice, most functions have few (if any) overloads but there are notable exceptions. Most importantly, constructor @?{}@, destructor @^?{}@, and assignment @?=?@ are generated for every user-defined type, and in a large source file there can be hundreds of them. Furthermore, many calls to them are generated for initializing variables and passing arguments. This fact makes them the most overloaded and most called functions. 163 164 In an object-oriented programming language, object has methods declared with their types, so a call such as @obj.f()@ only needs to perform lookup in the method table corresponding to type of @obj@. \CFA on the other hand, does not have methods, and all types are open (\ie new operations can be defined on them), so a similar approach will not work in general. However, the ``big 3'' operators have a unique property enforced by the language rules, such that the first parameter must have a reference type. Since \CFA does not have class inheritance, reference type must always match exactly. Therefore, argument-dependent lookup can be implemented for these operators, by using a dedicated symbol table. 165 166 The lookup key used for the special functions is the mangled type name of the first parameter, which acts as the @this@ parameter in an object-oriented language. To handle generic types, the type parameters are stripped off, and only the base type is matched. Note that a constructor (destructor, assignment operator) taking arbitrary @this@ argument, for example @forall( dtype T ) void ?{}( T & );@ is not allowed, and it guarantees that if the @this@ type is known, all possible overloads can be found by searching with the given type. In case that the @this@ argument itself is overloaded, it is resolved first and all possible result types are used for lookup. 167 168 Note that for the generated expressions, the particular variable for @this@ argument is fully known, without overloads, so the majority of constructor call resolutions only need to check for one given object type. Explicit constructor calls and assignment statements sometimes may require lookup for multiple types. In the extremely rare case that type of @this@ argument is yet unbound, everything will have to be checked, just like without the argument-dependent lookup algorithm; fortunately, this case almost never happens in practice. An example is found in the library function @new@: 168 \label{s:SpecialFunctionLookup} 169 170 Reducing the number of function looked ups for overload resolution is an effective way to gain performance when there are many overloads but most of them are trivially wrong. In practice, most functions have few (if any) overloads but there are notable exceptions. Most importantly, constructor @?{}@, destructor @^?{}@, and assignment @?=?@ are generated for every user-defined type (@struct@ and @union@ in C), and in a large source file there can be hundreds of them. Furthermore, many calls are generated for initializing variables, passing arguments and copying values. This fact makes them the most overloaded and most called functions. 171 172 In an object-oriented programming language, the object-method types are scoped within a class, so a call such as @obj.f()@ only needs to perform lookup in the method table corresponding to the type of @obj@. \CFA on the other hand, does not have methods, and all types are open, \ie new operations can be defined on them without inheritance; at best a \CFA type can be constrained by a translation unit. However, the ``big 3'' operators have a unique property enforced by the language rules: the first parameter must be a reference to its associated type, which acts as the @this@ parameter in an object-oriented language. Since \CFA does not have class inheritance, the reference type must always match exactly. Therefore, argument-dependent lookup can be implemented for these operators by using a dedicated, fast symbol-table. 173 174 The lookup key for the special functions is the mangled type name of the first parameter. To handle generic types, the type parameters are stripped off, and only the base type is matched. Note a constructor (destructor, assignment operator) may not take an arbitrary @this@ argument, \eg @forall( dtype T ) void ?{}( T & )@, thus guaranteeing that if the @this@ type is known, all possible overloads can be found by searching with this given type. In the case where the @this@ argument itself is overloaded, it is resolved first and all possible result types are used for lookup. 175 176 Note that for a generated expression, the particular variable for the @this@ argument is fully known, without overloads, so the majority of constructor-call resolutions only need to check for one given object type. Explicit constructor calls and assignment statements sometimes require lookup for multiple types. In the extremely rare case that the @this@-argument type is unbound, all necessary types are guaranteed to be checked, as for the previous lookup without the argument-dependent lookup; fortunately, this complex case almost never happens in practice. An example is found in the library function @new@: 169 177 \begin{cfa} 170 178 forall( dtype T | sized( T ), ttype TT | { void ?{}( T &, TT ); } ) 171 179 T * new( TT p ) { return &(*malloc()){ p }; } 172 180 \end{cfa} 173 as @malloc@ may return a pointer to any type, depending on context. 174 175 Interestingly, this particular line of code actually caused another complicated issue, where the unusually massive work of checking every constructor in presence makes the case even worse. Section~\ref{s:TtypeResolutionInfiniteRecursion} presents a detailed analysis for theproblem.176 177 The ``callable'' operator @?()@ (cf. @operator()@ in \CC) c ould also be included in the special operator list, as it is usually only on user-defined types, and the restriction that first argument must be a reference seems reasonable in this case.181 as @malloc@ may return a pointer to any type, depending on context. 182 183 Interestingly, this particular declaration actually causes another complicated issue, making the complex checking of every constructor even worse. \VRef[Section]{s:TtypeResolutionInfiniteRecursion} presents a detailed analysis of this problem. 184 185 The ``callable'' operator @?()@ (cf. @operator()@ in \CC) can also be included in this special operator list, as it is usually only on user-defined types, and the restriction that the first argument must be a reference seems reasonable in this case. 178 186 179 187 180 188 \subsection{Improvement of function type representation} 181 189 182 Since substituting type parameters with their bound types is one fundamental operation in many parts of resolver algorithm (particularly unification and environment binding), making as few copies of type nodes as possible helps reducing memory complexity. Even with the new memory management model, allocation is still a significant factor of resolver performance. Conceptually, operations on type nodes of AST should be performed in functional programming style, treating the data structure as immutable and only copy when necessary. The in-place mutation is a mere optimization that does not change logic of operations. 183 The model was broken on function types by an inappropriate design. Function types require some special treatment due to the existence of assertions. In particular, it must be able to distinguish two different kinds of type parameter usage: 190 Since substituting type parameters with their bound types is one fundamental operation in many parts of resolver algorithm (particularly unification and environment binding), making as few copies of type nodes as possible helps reducing memory complexity. Even with the new memory management model, allocation is still a significant factor of resolver performance. Conceptually, operations on type nodes of the AST should be performed in functional-programming style, treating the data structure as immutable and only copying when necessary. The in-place mutation is a mere optimization that does not change the logic for operations. 191 192 However, the model was broken for function types by an inappropriate design. Function types require special treatment due to the existence of assertions that constrain the types it supports. Specifically, it must be possible to distinguish two different kinds of type parameter usage: 184 193 \begin{cfa} 185 194 forall( dtype T ) void foo( T * t ) { 186 forall( dtype U ) void bar( T * t, U* u ) { ... }187 } 188 \end{cfa} 189 Here, only @U@ is a free parameter in declaration of @bar@, as it appears in the function's own forall clause; while @T@ is not free.190 191 Moreover, the resolution algorithm also has to distinguish type bindings of multiple calls to the same function, for example with195 forall( dtype U ) void bar( @T@ * t, @U@ * u ) { ... } 196 } 197 \end{cfa} 198 Here, only @U@ is a free parameter in the nested declaration of function @bar@, as @T@ must be bound at the call site when resolving @bar@. 199 200 Moreover, the resolution algorithm also has to distinguish type bindings of multiple calls to the same function, \eg: 192 201 \begin{cfa} 193 202 forall( dtype T ) int foo( T x ); 194 foo( foo( 1.0 ) );195 \end{cfa} 196 The inner call has binding (T: double) while the outer call has binding (T: int). Therefore a unique representation of free parameters in each expression is required. This was previously done by creating a copy of the parameter declarations inside function type, and fixing references afterwards. However, fixing references is an inherently deep operation that does not work well with functional programming model, as it must be evaluated eagerlyon the entire syntax tree representing the function type.197 198 The revised approach generates a unique ID value for each function call expression instance and represents an occurrence of free parameter type with a pair of generated ID and the original parameter declaration, so that references do not need to be fixed, and a shallow copy of function type is possible.199 200 Note that after the change, all declaration nodes in syntax tree representation maps one-to-one with the actual declarations in the program, and therefore are guaranteed to be unique. Such property can potentially enable more optimizations, and some related ideas are presented after Section~\ref{s:SharedSub-ExpressionCaseUniqueExpressions}.203 int i = foo( foo( 1.0 ) ); 204 \end{cfa} 205 The inner call has binding (T: double) while the outer call has binding (T: int). Therefore a unique representation for the free parameters is required in each expression. This type binding was previously done by creating a copy of the parameter declarations inside the function type and fixing references afterwards. However, fixing references is an inherently deep operation that does not work well with the functional-programming style, as it forces eager evaluation on the entire syntax tree representing the function type. 206 207 The revised approach generates a unique ID value for each function call expression instance and represents an occurrence of a free-parameter type with a pair of generated ID and original parameter declaration, so references are unique and a shallow copy of the function type is possible. 208 209 Note that after the change, all declaration nodes in the syntax-tree representation now map one-to-one with the actual declarations in the program, and therefore are guaranteed to be unique. This property can potentially enable more optimizations, and some related ideas are presented at the end of \VRef{s:SharedSub-ExpressionCaseUniqueExpressions}. 201 210 202 211 203 212 \subsection{Improvement of pruning steps} 204 213 205 A minor improvement for candidate elimination is to skip the step on the function overloads themselves and only perform on results of function application. As function calls are usually by name, the name resolution rule dictates that every function candidate necessarily has a different type; indirect function calls are rare, and when they do appear, they usually will not have many possible interpretations, and those rarely matches exactly in argument type. Since function types have a much more complex representation than data types (with multiple parameters and assertions), checking equality on them also takes longer.206 207 A brief test of this approach shows that the number of function overloads considered in expression resolution increases by a negligible amount of less than 1 percent, while type comparisons in candidate elimination are cut by more than half. Improvement is consistent over all \CFA source files in the test suite.214 A minor improvement for candidate elimination is to skip the step on the function overloads and only check the results of function application. As function calls are usually by name (versus pointers to functions), the name resolution rule dictates that every function candidate necessarily has a different type; indirect function calls are rare, and when they do appear, there are even fewer cases with multiple interpretations, and these rarely match exactly in argument type. Since function types have a much more complex representation (with multiple parameters and assertions) than data types, checking equality on them also takes longer. 215 216 A brief test of this approach shows that the number of function overloads considered in expression resolution increases by an amount of less than 1 percent, while type comparisons in candidate elimination are reduced by more than half. This improvement is consistent over all \CFA source files in the test suite. 208 217 209 218 … … 211 220 \label{s:SharedSub-ExpressionCaseUniqueExpressions} 212 221 213 Unique expression denotes an expression that must be evaluated only once, to prevent unwanted side effects. It is currently only a compiler artifact, generated on tuple member expression of the form222 Unique expression denotes an expression evaluated only once to prevent unwanted side effects. It is currently only a compiler artifact, generated for tuple-member expression of the form: 214 223 \begin{cfa} 215 224 struct S { int a; int b; }; … … 217 226 s.[a, b]; // tuple member expression, type is [int, int] 218 227 \end{cfa} 219 If the aggregate expression contains function calls, it cannot be evaluated multiple times:228 If the aggregate expression is function call, it cannot be evaluated multiple times: 220 229 \begin{cfa} 221 230 S makeS(); 222 makeS().[a, b]; // this should only make one S231 makeS().[a, b]; // this should only generate a unique S 223 232 \end{cfa} 224 233 Before code generation, the above expression is internally represented as … … 237 246 \end{cfa} 238 247 at code generation, where @_unique_var@ and @_unique_var_evaluated@ are generated variables whose scope covers all appearances of the same expression. 239 240 Note that although the unique expression is only used for tuple expansion now, it is a generally useful construction, and can be seen in other languages, such as Scala's @lazy val@~\cite{Scala}; therefore it could be worthwhile to introduce the unique expression to a broader context in \CFA and even make it directly available to programmers. 241 242 In the compiler's visitor pattern, however, this creates a problem where multiple paths to a logically unique expression exist, so it may be modified more than once and become ill-formed; some specific intervention is required to ensure that unique expressions are only visited once. Furthermore, a unique expression appearing in more than one places will be copied on mutation so its representation is no longer unique. Some hacks are required to keep it in sync, and the methods are different when mutating the unique expression instance itself or its underlying expression. 243 244 Example when mutating the underlying expression (visit-once guard) 248 The conditional check ensures a single call to @makeS()@ even though there are logically multiple calls because of the tuple field expansion. 249 250 Note that although the unique expression is only used for tuple expansion now, it is a generally useful construction, and is seen in other programming languages, such as Scala's @lazy val@~\cite{Scala}; therefore it may be worthwhile to introduce the unique expression to a broader context in \CFA and even make it directly available to programmers. 251 252 In the compiler's visitor pattern, however, this creates a problem where multiple paths to a logically unique expression exist, so it may be modified more than once and become ill-formed; some specific intervention is required to ensure unique expressions are only visited once. Furthermore, a unique expression appearing in more than one places is copied on mutation so its representation is no longer unique. 253 254 Currently, special cases are required to keep everything synchronized, and the methods are different when mutating the unique expression instance itself or its underlying expression: 255 \begin{itemize} 256 \item 257 When mutating the underlying expression (visit-once guard) 245 258 \begin{cfa} 246 259 void InsertImplicitCalls::previsit( const ast::UniqueExpr * unqExpr ) { 247 if ( visitedIds.count( unqExpr->id ) ) visit_children = false;260 @if ( visitedIds.count( unqExpr->id ) ) visit_children = false;@ 248 261 else visitedIds.insert( unqExpr->id ); 249 262 } 250 263 \end{cfa} 251 Example when mutating the unique instance itself, which actually creates copies 264 \item 265 When mutating the unique instance itself, which actually creates copies 252 266 \begin{cfa} 253 267 auto mutExpr = mutate( unqExpr ); // internally calls copy when shared 254 if ( ! unqMap.count( unqExpr->id ) ) { 268 @if ( ! unqMap.count( unqExpr->id ) ) {@ 255 269 ... 256 270 } else { … … 259 273 } 260 274 \end{cfa} 261 Such workaround seems difficult to be fit into a common visitor template. This suggests the memory model may need different kinds of nodes to accurately represent the syntax tree. 262 263 Together with the fact that declaration nodes are always unique, it is possible that AST nodes can be classified by three different types: 264 \begin{itemize} 265 \item 266 \textbf{Strictly unique} with only one owner (declarations); 267 \item 268 \textbf{Logically unique} with (possibly) many owners but should not be copied (unique expression example presented here); 269 \item 270 \textbf{Shared} by functional programming model, which assume immutable data structure and are copied on mutation. 275 \end{itemize} 276 Such workarounds are difficult to fit into the common visitor pattern, which suggests the memory model may need different kinds of nodes to accurately represent this feature in the AST. 277 278 Given that declaration nodes are unique, it is possible for AST nodes to be divided into three different types: 279 \begin{itemize} 280 \item 281 \textbf{Singleton} with only one owner (declarations); 282 \item 283 \textbf{No-copy} with multiple owners but cannot be copied (unique expression example presented here); 284 \item 285 \textbf{Copy} by functional-programming style, which assumes immutable data structures that are copied on mutation. 271 286 \end{itemize} 272 287 The boilerplate code can potentially handle these three cases differently. … … 275 290 \section{Analysis of resolver algorithm complexity} 276 291 277 The focus of this chapter is to identify and analyze some realistic cases that cause resolver algorithm to have an exponential run time. As previous work has shown [3], the overload resolution problem in \CFA has worst-case exponential complexity; however, only few specific patterns can trigger the exponential complexity in practice. Implementing heuristic-based optimization for those selected cases is helpful to alleviate the problem.292 The focus of this section is to identify and analyze some realistic cases that cause the resolver algorithm to have an exponential runtime. As previous work has shown~\cite[\S~4.2.1]{Moss19}, the overload resolution problem in \CFA has worst-case exponential complexity; however, only few specific patterns can trigger the exponential complexity in practice. Implementing heuristic-based optimization for those selected cases is helpful to alleviate the problem. 278 293 279 294 … … 281 296 \label{s:UnboundReturnType} 282 297 283 The interaction of return type overloading and polymorphic functions creates this problem of function calls with unbound returntype, and is further complicated by the presence of assertions.298 The interaction of return-type overloading and polymorphic functions creates function calls with unbounded return-type, and is further complicated by the presence of assertions. 284 299 The prime example of a function with unbound return type is the type-safe version of C @malloc@: 285 300 \begin{cfa} 286 // size deduced from type, so no need to provide the size argument 287 forall( dtype T | sized( T ) ) T * malloc( void ); 288 \end{cfa} 289 Unbound return type can be problematic in resolver algorithm complexity because a single match of function call with unbound return type may create multiple candidates. In the worst case, consider a function declared to return any @otype@: 301 forall( dtype T | sized( T ) ) 302 T * malloc( void ) { return (T *)malloc( sizeof(T) ); } // call C malloc 303 int * i = malloc(); // type deduced from left-hand size $\Rightarrow$ no size argument or return cast 304 \end{cfa} 305 An unbound return-type is problematic in resolver complexity because a single match of a function call with an unbound return type may create multiple candidates. In the worst case, consider a function declared that returns any @otype@ (defined \VPageref{otype}): 290 306 \begin{cfa} 291 307 forall( otype T ) T anyObj( void ); 292 308 \end{cfa} 293 As the resolver attempts to satisfy the otype constraint on @T@, a single call to @anyObj()@ without the result type known creates at least as many candidates as the number of complete types currently in scope; with generic types it becomes even worse, for example, assuming a declaration of generic pairis available at that point:309 As the resolver attempts to satisfy the otype constraint on @T@, a call to @anyObj()@ in an expression, without the result type known, creates at least as many candidates as the number of complete types currently in scope; with generic types it becomes even worse, \eg assuming a declaration of a generic @pair@ is available at that point: 294 310 \begin{cfa} 295 311 forall( otype T, otype U ) struct pair { T first; U second; }; 296 312 \end{cfa} 297 Then an @anyObj()@ call can result in arbitrarily complex types, such as @pair( pair( int, int ), pair( int,int ) )@, and the depth can grow indefinitely until the specified parameter depth limit, thus creating exponentially many candidates. However, the expected types allowed by parent expressions are practically very few, so most of those interpretations are invalid; if the result type is never bound up to top level, by the semantic rules it is ambiguous if there are more than one valid bindings, and resolution can fail fast. It is therefore reasonable to delay resolving assertions on an unbound parameter in return type; however, with the current cost model, such behavior may further cause irregularities in candidate selection, such that the presence of assertions can change the preferred candidate, even when order of expression costs are supposed to stay the same. Detailed analysis of this issue will be presented later, in the correctness part.313 Then an @anyObj()@ call can result in arbitrarily complex types, such as @pair( pair( int, int ), pair( int, int ) )@, and the depth can grow indefinitely until a specified parameter-depth limit, thus creating exponentially many candidates. However, the expected types allowed by parent expressions are practically very few, so most of those interpretations are invalid; if the result type is never bound up to the top level, by the semantic rules it is ambiguous if there is more than one valid binding and resolution fails quickly. It is therefore reasonable to delay resolving assertions on an unbound parameter in a return type; however, with the current cost model, such behavior may further cause irregularities in candidate selection, such that the presence of assertions can change the preferred candidate, even when order of expression costs are supposed to stay the same. A detailed analysis of this issue is presented in \VRef{s:AnalysisTypeSystemCorrectness}. 298 314 299 315 … … 301 317 \label{s:TtypeResolutionInfiniteRecursion} 302 318 303 @ttype@ (``tuple type'') is a relatively new addition to the language that attempts to provide type-safe variadic argument semantics. Unlike regular @dtype@ parameters, @ttype@ is only valid in function parameter list, and may only appear once as the type of last parameter. At the call site, a @ttype@ parameter is bound to the tuple type of all remaining functioncall arguments.319 @ttype@ (``tuple type'') is a relatively new addition to the language that attempts to provide type-safe variadic argument semantics. Unlike regular @dtype@ parameters, @ttype@ is only valid in a function parameter-list, and may only appear once as the last parameter type. At the call site, a @ttype@ parameter is bound to the tuple type of all remaining function-call arguments. 304 320 305 321 There are two kinds of idiomatic @ttype@ usage: one is to provide flexible argument forwarding, similar to the variadic template in \CC (\lstinline[language=C++]|template<typename... args>|), as shown below in the implementation of @unique_ptr@ … … 309 325 T * data; 310 326 }; 311 forall( dtype T | sized( T ), ttype Args| { void ?{}( T &, Args ); })312 void ?{}( unique_ptr( T ) & this, Args args) {313 this.data = new( args );314 } 315 \end{cfa} 316 the other is to implement structural recursion in the first-rest manner:317 \begin{cfa} 318 forall( otype T, ttype Params| { void process( T ); void func( Params ); })327 forall( dtype T | sized( T ), @ttype Args@ | { void ?{}( T &, Args ); }) 328 void ?{}( unique_ptr( T ) & this, Args @args@ ) { 329 this.data = new( @args@ ); // forward constructor arguments to dynamic allocator 330 } 331 \end{cfa} 332 The other usage is to implement structural recursion in the first-rest pattern: 333 \begin{cfa} 334 forall( otype T, @ttype Params@ | { void process( T ); void func( Params ); }) 319 335 void func( T arg1, Params p ) { 320 336 process( arg1 ); 321 func( p ); 322 } 323 \end{cfa} 324 For the second use case, it is important that the number of parameters in the recursive call go down, since the call site must deduce all assertion candidates, and that is only possible if by just looking at argument types (and not their values), the recursion is known to be completed in a finite number of steps. 325 326 In recent experiments, however, some flaw in the type binding rules can lead to the first kind of @ttype@ use case produce an invalid candidate that the resolver enters an infinite loop. 327 328 This bug was discovered in an attempt to raise assertion recursive depth limit and one of the library program takes exponentially longer time to compile. The cause of the problem is identified to be the following set of functions. 329 File @memory.cfa@ contains 330 \begin{cfa} 331 #include "memory.hfa" 332 #include "stdlib.hfa" 333 \end{cfa} 334 where file @memory.hfa@ contains the @unique_ptr@ declaration above, and two other similar functions with @ttype@ parameter: 335 \begin{cfa} 336 forall( dtype T | sized( T ), ttype Args | { void ?{}( T &, Args ); }) { 337 func( @p@ ); // recursive call until base case of one argument 338 } 339 \end{cfa} 340 For the second use case, it is imperative the number of parameters in the recursive call goes down, since the call site must deduce all assertion candidates, and that is only possible if by observation of the argument types (and not their values), the recursion is known to be completed in a finite number of steps. 341 342 In recent experiments, however, a flaw in the type-binding rules can lead to the first kind of @ttype@ use case producing an invalid candidate and the resolver enters an infinite loop. 343 This bug was discovered in an attempt to raise the assertion recursive-depth limit and one of the library programs took exponentially longer to compile. The cause of the problem is the following set of functions: 344 \begin{cfa} 345 // unique_ptr declaration from above 346 347 forall( dtype T | sized( T ), ttype Args | { void ?{}( T &, Args ); } ) { // distribute forall clause 337 348 void ?{}( counter_data( T ) & this, Args args ); 338 349 void ?{}( counter_ptr( T ) & this, Args args ); 339 350 void ?{}( unique_ptr( T ) & this, Args args ); 340 351 } 341 \end{cfa} 342 File @stdlib.hfa@ contains 343 \begin{cfa} 352 344 353 forall( dtype T | sized( T ), ttype TT | { void ?{}( T &, TT ); } ) 345 T * new( TT p ) { return &(*malloc()){ p }; } 346 \end{cfa} 347 348 In the expression @(*malloc()){p}@, the type of object being constructed is yet unknown, since the return type information is not immediately provided. That caused every constructor to be searched, and while normally a bound @ttype@ cannot be unified with any free parameter, it is possible with another free @ttype@. Therefore in addition to the correct option provided by assertion, 3 wrong options are examined, each of which again requires the same assertion, for an unknown base type T and @ttype@ arguments, and that becomes an infinite loop, until the specified recursion limit and resolution is forced to fail. Moreover, during the recursion steps, number of candidates grows exponentially, since there are always 3 options at each step. 349 350 Unfortunately, @ttype@ to @ttype@ binding is necessary, to allow calling the function provided by assertion indirectly. 351 \begin{cfa} 352 forall( dtype T | sized( T ), ttype Args | { void ?{}( T &, Args ); }) 353 void ?{}( unique_ptr( T ) & this, Args args ) { this.data = (T * )new( args ); } 354 \end{cfa} 355 Here the constructor assertion is used for the @new( args )@ call. 354 T * new( TT p ) { return @&(*malloc()){ p };@ } 355 \end{cfa} 356 In the expression @(*malloc()){p}@, the type of the object being constructed is unknown, since the return-type information is not immediately available. That causes every constructor to be searched, and while normally a bound @ttype@ cannot be unified with any free parameter, it is possible with another free @ttype@. Therefore, in addition to the correct option provided by the assertion, 3 wrong options are examined, each of which again requires the same assertion, for an unknown base-type @T@ and @ttype@ argument, which becomes an infinite loop until the specified recursion limit and resolution is fails. Moreover, during the recursion steps, the number of candidates grows exponentially, since there are always 3 options at each step. 357 358 Unfortunately, @ttype@ to @ttype@ binding is necessary, to allow indirectly calling a function provided in an assertion. 359 \begin{cfa} 360 forall( dtype T | sized( T ), ttype Args | { @void ?{}( T &, Args );@ }) 361 void ?{}( unique_ptr( T ) & this, Args args ) { this.data = (T *)@new( args )@; } // constructor call 362 \end{cfa} 363 Here the constructor assertion is used by the @new( args )@ call to indirectly call the constructor on the allocated storage. 356 364 Therefore, it is hard, perhaps impossible, to solve this problem by tweaking the type binding rules. An assertion caching algorithm can help improve this case by detecting cycles in recursion. 357 365 358 Meanwhile, without the caching algorithm implemented, some changes in the \CFA source code are enough to eliminate this problem, at least in the current codebase. Note that the issue only happens with an overloaded variadic function, which rarely appears in practice, since the idiomatic use cases are for argument forwarding and self-recursion. The only overloaded @ttype@ function so far discovered in all of \CFA standard library code is the constructor, and by utilizing the argument-dependent lookup process described in Section~\ref{s:UnboundReturnType}, adding a cast before constructor call gets rid of the issue.359 \begin{cfa} 360 T * new( TT p ) { return &(* (T * )malloc()){ p }; }366 Meanwhile, without a caching algorithm implemented, some changes in the \CFA source code are enough to eliminate this problem, at least in the current codebase. Note that the issue only happens with an overloaded variadic function, which rarely appears in practice, since the idiomatic use cases are for argument forwarding and self-recursion. The only overloaded @ttype@ function so far discovered in all of \CFA standard library is the constructor, and by utilizing the argument-dependent lookup process described in \VRef{s:UnboundReturnType}, adding a cast before the constructor call removes the issue. 367 \begin{cfa} 368 T * new( TT p ) { return &(*@(T * )@malloc()){ p }; } 361 369 \end{cfa} 362 370 … … 364 372 \subsection{Reused assertions in nested generic type} 365 373 366 The following test of deeply nested dynamic generic type reveals that locally caching reused assertions is necessary, rather than just a resolver optimization, because recomputing assertions can result in bloated generated code size:374 The following test of deeply nested, dynamic generic type reveals that locally caching reused assertions is necessary, rather than just a resolver optimization, because recomputing assertions can result in bloated generated code size: 367 375 \begin{cfa} 368 376 struct nil {}; … … 372 380 int main() { 373 381 #if N==0 374 nil x;382 nil @x@; 375 383 #elif N==1 376 cons( size_t, nil ) x;384 cons( size_t, nil ) @x@; 377 385 #elif N==2 378 cons( size_t, cons( size_t, nil ) ) x;386 cons( size_t, cons( size_t, nil ) ) @x@; 379 387 #elif N==3 380 cons( size_t, cons( size_t, cons( size_t, nil ) ) ) x;388 cons( size_t, cons( size_t, cons( size_t, nil ) ) ) @x@; 381 389 // similarly for N=4,5,6 382 390 #endif 383 391 } 384 392 \end{cfa} 385 At the declaration of @x@, it is implicitly initialized by generated constructor call, w hose signature is given by393 At the declaration of @x@, it is implicitly initialized by generated constructor call, with signature: 386 394 \begin{cfa} 387 395 forall( otype L, otype R ) void ?{}( cons( L, R ) & ); 388 396 \end{cfa} 389 Note that the @otype@ constraint contains 4 assertions: 397 where the @otype@ constraint contains the 4 assertions:\label{otype} 390 398 \begin{cfa} 391 399 void ?{}( L & ); // default constructor … … 394 402 L & ?=?( L &, L & ); // assignment 395 403 \end{cfa} 396 Now since the right hand side of outermost cons is again a cons, recursive assertions are required. When the compiler cannot cache and reuse already resolved assertions, it becomes a problem, as each of those 4 pending assertions again asks for 4 more assertions one level below. Without any caching, number of resolved assertions grows exponentially, while that is obviously unnecessary since there are only $n+1$ different types involved. Even worse, this causes exponentially many wrapper functions generated later at the codegen step, and results in huge compiled binary. 397 398 \ begin{table}[h]404 405 \begin{table}[htb] 406 \centering 399 407 \caption{Compilation results of nested cons test} 408 \label{t:NestedConsTest} 400 409 \begin{tabular}{|r|r|r|} 401 410 \hline … … 413 422 \end{table} 414 423 415 As the local functions are implemented by emitting executable code on the stack~\cite{gcc-nested-func}, it eventually means that compiled code also has exponential run time. This problem has evident practical implications, as nested collection types are frequently used in real production code. 416 424 Now since the right hand side of outermost cons is again a cons, recursive assertions are required. \VRef[Table]{t:NestedConsTest} shows when the compiler does not cache and reuse already resolved assertions, it becomes a problem, as each of these 4 pending assertions again asks for 4 more assertions one level below. Without caching, the number of resolved assertions grows exponentially, which is unnecessary since there are only $n+1$ different types involved. Even worse, this problem causes exponentially many wrapper functions to be generated at the backend, resulting in a huge binary. As the local functions are implemented by emitting executable code on the stack~\cite{gcc-nested-func}, it means that compiled code also has exponential run time. This problem has practical implications, as nested collection types are frequently used in real production code. 417 425 418 426 \section{Analysis of type system correctness} 427 \label{s:AnalysisTypeSystemCorrectness} 419 428 420 429 In Moss' thesis~\cite[\S~4.1.2,~p.~45]{Moss19}, the author presents the following example: … … 433 442 From the set of candidates whose parameter and argument types have been unified and whose assertions have been satisfied, those whose sub-expression interpretations have the smallest total cost of conversion are selected ... The total cost of conversion for each of these candidates is then calculated based on the implicit conversions and polymorphism involved in adapting the types of the sub-expression interpretations to the formal parameter types. 434 443 \end{quote} 435 With this model, the algorithm picks @g1@ in resolving the @f( g( 42 ) )@ call, which seems to be undesirable. 436 437 There are further evidence that shows the Bilson model is fundamentally incorrect, following the discussion of unbound return type in Section~\ref{s:UnboundReturnType}. By the conversion cost specification, a binding from a polymorphic type parameter to a concrete type incurs a polymorphic cost of 1. It remains unspecified \emph{when} the type parameters should become bound. When the parameterized types appear in the function parameters, they can be deduced from the argument type, and there is no ambiguity. In the unbound return case, however, the binding may happen at any stage in expression resolution, therefore it is impossible to define a unique local conversion cost. Note that type binding happens exactly once per parameter in resolving the entire expression, so the global binding cost is unambiguously 1. 438 439 As per the current compiler implementation, it does have a notable inconsistency in handling such case. For any unbound parameter that does \emph{not} come with an associated assertion, it remains unbound to the parent expression; for those that does however, they are immediately bound in the assertion resolution step, and concrete result types are used in the parent expressions. 440 444 With this model, the algorithm picks @g1@ in resolving the @f( g( 42 ) )@ call, which is undesirable. 445 446 There is further evidence that shows the Bilson model is fundamentally incorrect, following the discussion of unbound return type in \VRef{s:UnboundReturnType}. By the conversion-cost specification, a binding from a polymorphic type-parameter to a concrete type incurs a polymorphic cost of 1. It remains unspecified \emph{when} the type parameters should become bound. When the parameterized types appear in function parameters, they can be deduced from the argument type, and there is no ambiguity. In the unbound return case, however, the binding may happen at any stage in expression resolution, therefore it is impossible to define a unique local conversion cost. Note that type binding happens exactly once per parameter in resolving the entire expression, so the global binding cost is unambiguously 1. 447 448 In the current compiler implementation, there is a notable inconsistency in handling this case. For any unbound parameter that does \emph{not} come with an associated assertion, it remains unbound to the parent expression; for those that do, however, they are immediately bound in the assertion resolution step, and concrete result types are used in the parent expressions. 441 449 Consider the following example: 442 450 \begin{cfa} … … 444 452 void h( int * ); 445 453 \end{cfa} 446 The expression @h( f() )@ eventually has a total cost of 1 from binding (T: int), but in the eager resolution model, the cost of 1 may occur either atcall to @f@ or at call to @h@, and with the assertion resolution triggering a binding, the local cost of @f()@ is (0 poly, 0 spec) with no assertions, but (1 poly, -1 spec) with an assertion:447 \begin{cfa} 448 forall( dtype T | { void g( T * ); }) T * f( void );454 The expression @h( f() )@ eventually has a total cost of 1 from binding (T: int), but in the eager-resolution model, the cost of 1 may occur either at the call to @f@ or at call to @h@, and with the assertion resolution triggering a binding, the local cost of @f()@ is (0 poly, 0 spec) with no assertions, but (1 poly, -1 spec) with an assertion: 455 \begin{cfa} 456 forall( dtype T | @{ void g( T * ); }@ ) T * f( void ); 449 457 void g( int * ); 450 458 void h( int * ); 451 459 \end{cfa} 452 and that contradicts the principle that adding assertions should make expression cost lower. Furthermore, the time at which type binding and assertion resolution happens is an implementation detail of the compiler, but not a part of language definition. That means two compliant \CFA compilers, one performing immediate assertion resolution at each step, and one delaying assertion resolution on unbound types, can produce different expression costs and therefore different candidate selection, making the language rule itself partially undefined and thereforeunsound. By the above reasoning, the updated cost model using global sum of costs should be accepted as the standard. It also allows the compiler to freely choose when to resolve assertions, as the sum of total costs is independent of that choice; more optimizations regarding assertion resolution can also be implemented.460 and that contradicts the principle that adding assertions should make expression cost lower. Furthermore, the time at which type binding and assertion resolution happens is an implementation detail of the compiler, not part of the language definition. That means two compliant \CFA compilers, one performing immediate assertion resolution at each step, and one delaying assertion resolution on unbound types, can produce different expression costs and therefore different candidate selection, making the language rule itself partially undefined, and therefore, unsound. By the above reasoning, the updated cost model using global sum of costs should be accepted as the standard. It also allows the compiler to freely choose when to resolve assertions, as the sum of total costs is independent of that choice; more optimizations regarding assertion resolution can also be implemented. 453 461 454 462 455 463 \section{Timing results} 456 464 457 For the timing results presented here, the \CFA compiler is built with gcc 9.3.0, and tested on a server machine running Ubuntu 20.04, 64GB RAM and 32-core 2.2 GHz CPU, results reported by the time command, and using only 8 cores in parallel such that the time is close to the case with 100\% CPU utilization on a single thread. 458 459 On the most recent build, the \CFA standard library (~1.3 MB of source code) compiles in 4 minutes 47 seconds total processor time (single thread equivalent), with the slowest file taking 13 seconds. The test suite (178 test cases, ~2.2MB of source code) completes within 25 minutes total processor time,\footnote{Including a few runtime tests; total time spent in compilation is approximately 21 minutes.} with the slowest file taking 23 seconds. In contrast, the library build on old compiler takes 85 minutes total, 5 minutes for the slowest file. Full test suite takes too long with old compiler build and is therefore not run, but the slowest test cases take approximately 5 minutes. Overall, the most recent build compared to old build in April 2020, before the project started, is consistently faster by a factor of 20. 460 461 Additionally, 6 selected \CFA source files with distinct features from library and test suite are used to test compiler performance after each of the optimizations are implemented. Test files are from the most recent build and run through C preprocessor to eliminate the factor of header file changes. The selected tests are: 462 \begin{itemize} 463 \item 464 @lib/fstream@ (112 KB)\footnote{File sizes are after preprocessing, with no line information (\lstinline|gcc -E -P|).}: implementation of I/O library 465 For the timing results presented here, the \CFA compiler is built with gcc 9.3.0, and tested on a server machine running Ubuntu 20.04, 64GB RAM and 32-core 2.2 GHz CPU. 466 Timing is reported by the @time@ command and an experiment is run using 8 cores, where each core is at 100\% CPU utilization. 467 468 On the most recent build, the \CFA standard library ($\approx$1.3 MB of source code) compiles in 4 minutes 47 seconds total processor time (single thread equivalent), with the slowest file taking 13 seconds. The test suite (178 test cases, $\approx$2.2MB of source code) completes within 25 minutes total processor time, 469 % PAB: I do not understand this footnote. 470 %\footnote{Including a few runtime tests; total time spent in compilation is approximately 21 minutes.} 471 with the slowest file taking 23 seconds. In contrast, the library build with the old compiler takes 85 minutes total, 5 minutes for the slowest file. The full test-suite takes too long with old compiler build and is therefore not run, but the slowest test cases take approximately 5 minutes. Overall, the most recent build compared to an old build is consistently faster by a factor of 20. 472 473 Additionally, 6 selected \CFA source files with distinct features from the library and test suite are used to illustrate the compiler performance change after each of the implemented optimizations. Test files are from the most recent build and run through the C preprocessor to expand header file, perform macro expansions, but no line number information (@gcc -E -P@). 474 \VRef[Table]{t:SelectedFileByCompilerBuild} shows the selected tests: 475 \begin{itemize} 476 \item 477 @lib/fstream@ (112 KB) 465 478 \item 466 479 @lib/mutex@ (166 KB): implementation of concurrency primitive … … 470 483 @lib/stdlib@ (64 KB): type-safe wrapper to @void *@-based C standard library functions 471 484 \item 472 @test/ ISO2@ (55 KB): application of I/O library485 @test/io2@ (55 KB): application of I/O library 473 486 \item 474 487 @test/thread@ (188 KB): application of threading library 475 488 \end{itemize} 476 477 The \CFA compiler builds are picked from git commit history that passed the test suite, and implement the optimizations incrementally: 478 \begin{itemize} 479 \item 480 \#0 is the first working build of new AST data structure 489 versus \CFA compiler builds picked from the git commit history that implement the optimizations incrementally: 490 \begin{itemize} 491 \item 492 old resolver 493 \item 494 \#0 is the first working build of the new AST data structure 481 495 \item 482 496 \#1 implements special symbol table and argument-dependent lookup 483 497 \item 484 \#2 implements late assertion satisfaction 485 \item 486 \#3 implements revised function type representation 487 \item 488 \#4 skips pruning on expressions with function type (most recent build) 489 \end{itemize} 490 The old resolver with no memory sharing and none of the optimizations above is also tested. 491 \begin{table} 498 \#2 implements late assertion-satisfaction 499 \item 500 \#3 implements revised function-type representation 501 \item 502 \#4 skips pruning on expressions for function types (most recent build) 503 \end{itemize} 504 Reading left to right for a test shows the benefit of each optimization on the cost of compilation. 505 506 \begin{table}[htb] 507 \centering 492 508 \caption{Compile time of selected files by compiler build, in seconds} 509 \label{t:SelectedFileByCompilerBuild} 493 510 \begin{tabular}{|l|r|r|r|r|r|r|} 494 511 \hline … … 513 530 \end{table} 514 531 515 516 532 \section{Conclusion} 517 533 518 Over the course of 8 months of active research and development in \CFA type system and compiler algorithm, performance of the reference \CFA compiler, cfa-cc, has been greatly improved, allowing mid-sized \CFA programs to be compiled and built reasonably fast. As there are also ongoing efforts in the team on building a standard library, evaluating the runtime performance, and attempting to incorporate \CFA with existing software written in C, this project is especially meaningful forpractical purposes.519 520 A nalysis conducted in the project were based significantly on heuristics and practical evidence, as the theoretical bounds and average cases for the expression resolution problem differ. This approach was difficult at start to follow, with an unacceptably slow compiler, since running the program through debugger and validation tools (\eg @gdb@, @valgrind@) adds another order of magnitude to run time, which was already in minutes. However, near the end of the project, many significant improvements have already been made and new optimizations can be tested immediately. The positive feedback in development cycle benefits the \CFA team as a whole, more than just for the compiler optimizations.521 522 Some potential issues of the language that may happen frequently in practice have been identified. Due to the time constraint and complex nature of these problems, a handful of them remain unsolved, but some constructive proposals are made. Notably, introducing a local assertion cache in the resolver is a common solution for a few remaining problems, so that should be the focus of work soon.523 524 The \CFA team are planning on a public alpha release of the language as the compiler performance becomes promising, and other parts of the system, such as a standard library, are also being enhanced. Ideally, the remaining problems should be resolved before release, and the solutions will also be integral to drafting a formal specification.534 Over the course of 8 months of active research and development of the \CFA type system and compiler algorithms, performance of the reference \CFA compiler, cfa-cc, has been greatly improved. Now, mid-sized \CFA programs are compiled reasonably fast. Currently, there are ongoing efforts by the \CFA team to augment the standard library and evaluate its runtime performance, and incorporate \CFA with existing software written in C; therefore this project is especially meaningful for these practical purposes. 535 536 Accomplishing this work was difficult. Analysis conducted in the project is based significantly on heuristics and practical evidence, as the theoretical bounds and average cases for the expression resolution problem differ. As well, the slowness of the initial compiler made attempts to understand why and where problems exist extremely difficult because both debugging and validation tools (\eg @gdb@, @valgrind@, @pref@) further slowed down compilation time. However, by the end of the project, I had found and fixed several significant problems and new optimizations are easier to introduce and test. The reduction in the development cycle benefits the \CFA team as a whole. 537 538 Some potential issues of the language, which happen frequently in practice, have been identified. Due to the time constraint and complex nature of these problems, a handful of them remain unsolved, but some constructive proposals are made. Notably, introducing a local assertion cache in the resolver is a reasonable solution for a few remaining problems, so that should be the focus of future work. 539 540 The \CFA team are planning on a public alpha release of the language as the compiler performance, given my recent improvements, is now useable. Other parts of the system, such as the standard library, have made significant gains due to the speed up in the development cycle. Ideally, the remaining problems should be resolved before release, and the solutions will also be integral to drafting a formal specification. 525 541 526 542 \addcontentsline{toc}{section}{\refname} -
doc/theses/fangren_yu_COOP_S20/Report.tex
r5869cea r7b91c0e 17 17 \usepackage[usenames]{color} 18 18 \input{common} % common CFA document macros 19 \usepackage[dvips,plainpages=false,pdfpagelabels,pdfpagemode=UseNone,colorlinks=true, pagebackref=true,linkcolor=blue,citecolor=blue,urlcolor=blue,pagebackref=true,breaklinks=true]{hyperref}19 \usepackage[dvips,plainpages=false,pdfpagelabels,pdfpagemode=UseNone,colorlinks=true,linkcolor=blue,citecolor=blue,urlcolor=blue,pagebackref=true,breaklinks=true]{hyperref} 20 20 \usepackage{breakurl} 21 21 \urlstyle{sf} -
driver/cfa.cc
r5869cea r7b91c0e 10 10 // Created On : Tue Aug 20 13:44:49 2002 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Tue Nov 17 14:27:28 202013 // Update Count : 44 012 // Last Modified On : Sat Jan 16 07:30:19 2021 13 // Update Count : 442 14 14 // 15 15 … … 499 499 args[nargs++] = "-no-integrated-cpp"; 500 500 args[nargs++] = "-Wno-deprecated"; 501 args[nargs++] = "-Wno-strict-aliasing"; // casting from one type to another 501 502 #ifdef HAVE_CAST_FUNCTION_TYPE 502 503 args[nargs++] = "-Wno-cast-function-type"; -
libcfa/prelude/builtins.c
r5869cea r7b91c0e 18 18 // type that wraps a pointer and a destructor-like function - used in generating implicit destructor calls for struct members in user-defined functions 19 19 // Note: needs to occur early, because it is used to generate destructor calls during code generation 20 forall( dtype T)20 forall(T &) 21 21 struct __Destructor { 22 22 T * object; … … 25 25 26 26 // defined destructor in the case that non-generated code wants to use __Destructor 27 forall( dtype T)27 forall(T &) 28 28 static inline void ^?{}(__Destructor(T) & x) { 29 29 if (x.object && x.dtor) { … … 34 34 // easy interface into __Destructor's destructor for easy codegen purposes 35 35 extern "C" { 36 forall( dtype T)36 forall(T &) 37 37 static inline void __destroy_Destructor(__Destructor(T) * dtor) { 38 38 ^(*dtor){}; … … 51 51 void abort( const char fmt[], ... ) __attribute__ (( format(printf, 1, 2), __nothrow__, __leaf__, __noreturn__ )); 52 52 53 forall( dtype T)53 forall(T &) 54 54 static inline T & identity(T & i) { 55 55 return i; … … 64 64 static inline void ^?{}($generator &) {} 65 65 66 trait is_generator( dtype T) {66 trait is_generator(T &) { 67 67 void main(T & this); 68 68 $generator * get_generator(T & this); 69 69 }; 70 70 71 forall( dtype T| is_generator(T))71 forall(T & | is_generator(T)) 72 72 static inline T & resume(T & gen) { 73 73 main(gen); … … 78 78 79 79 static inline { 80 forall( dtype DT| { DT & ?+=?( DT &, one_t ); } )80 forall( DT & | { DT & ?+=?( DT &, one_t ); } ) 81 81 DT & ++?( DT & x ) { return x += 1; } 82 82 83 forall( dtype DT| sized(DT) | { void ?{}( DT &, DT ); void ^?{}( DT & ); DT & ?+=?( DT &, one_t ); } )83 forall( DT & | sized(DT) | { void ?{}( DT &, DT ); void ^?{}( DT & ); DT & ?+=?( DT &, one_t ); } ) 84 84 DT & ?++( DT & x ) { DT tmp = x; x += 1; return tmp; } 85 85 86 forall( dtype DT| { DT & ?-=?( DT &, one_t ); } )86 forall( DT & | { DT & ?-=?( DT &, one_t ); } ) 87 87 DT & --?( DT & x ) { return x -= 1; } 88 88 89 forall( dtype DT| sized(DT) | { void ?{}( DT &, DT ); void ^?{}( DT & ); DT & ?-=?( DT &, one_t ); } )89 forall( DT & | sized(DT) | { void ?{}( DT &, DT ); void ^?{}( DT & ); DT & ?-=?( DT &, one_t ); } ) 90 90 DT & ?--( DT & x ) { DT tmp = x; x -= 1; return tmp; } 91 91 92 forall( dtype DT| { int ?!=?( const DT &, zero_t ); } )92 forall( DT & | { int ?!=?( const DT &, zero_t ); } ) 93 93 int !?( const DT & x ) { return !( x != 0 ); } 94 94 } // distribution 95 95 96 96 // universal typed pointer constant 97 static inline forall( dtype DT) DT * intptr( uintptr_t addr ) { return (DT *)addr; }97 static inline forall( DT & ) DT * intptr( uintptr_t addr ) { return (DT *)addr; } 98 98 static inline forall( ftype FT ) FT * intptr( uintptr_t addr ) { return (FT *)addr; } 99 99 … … 156 156 #define __CFA_EXP_OVERFLOW__() 157 157 158 static inline forall( otypeOT | { void ?{}( OT & this, one_t ); OT ?*?( OT, OT ); } ) {158 static inline forall( OT | { void ?{}( OT & this, one_t ); OT ?*?( OT, OT ); } ) { 159 159 OT ?\?( OT ep, unsigned int y ) { __CFA_EXP__(); } 160 160 OT ?\?( OT ep, unsigned long int y ) { __CFA_EXP__(); } -
libcfa/prelude/prelude-gen.cc
r5869cea r7b91c0e 159 159 int main() { 160 160 cout << "# 2 \"prelude.cfa\" // needed for error messages from this file" << endl; 161 cout << "trait sized( dtype T) {};" << endl;161 cout << "trait sized(T &) {};" << endl; 162 162 163 163 cout << "//////////////////////////" << endl; … … 264 264 for (auto cvq : qualifiersPair) { 265 265 for (auto is_vol : { " ", "volatile" }) { 266 cout << "forall( dtype DT) void ?{}(" << cvq.first << type << " * " << is_vol << " &, " << cvq.second << "DT *);" << endl;266 cout << "forall(DT &) void ?{}(" << cvq.first << type << " * " << is_vol << " &, " << cvq.second << "DT *);" << endl; 267 267 } 268 268 } … … 279 279 for (auto cvq : qualifiersSingle) { 280 280 for (auto is_vol : { " ", "volatile" }) { 281 cout << "forall( dtype DT) void ?{}(" << cvq << " DT" << " * " << is_vol << " &);" << endl;281 cout << "forall(DT &) void ?{}(" << cvq << " DT" << " * " << is_vol << " &);" << endl; 282 282 } 283 283 for (auto is_vol : { " ", "volatile" }) { 284 cout << "forall( dtype DT) void ^?{}(" << cvq << " DT" << " * " << is_vol << " &);" << endl;284 cout << "forall(DT &) void ^?{}(" << cvq << " DT" << " * " << is_vol << " &);" << endl; 285 285 } 286 286 } … … 290 290 for (auto is_vol : { " ", "volatile" }) { 291 291 for (auto cvq : qualifiersSingle) { 292 cout << "forall( dtype DT) void ?{}( " << cvq << type << " * " << is_vol << " &, zero_t);" << endl;292 cout << "forall(DT &) void ?{}( " << cvq << type << " * " << is_vol << " &, zero_t);" << endl; 293 293 } 294 294 } … … 317 317 for (auto op : pointerOperators) { 318 318 auto forall = [&op]() { 319 cout << "forall( dtype DT" << op.sized << ") ";319 cout << "forall(DT &" << op.sized << ") "; 320 320 }; 321 321 for (auto type : { "DT"/*, "void"*/ } ) { … … 408 408 for (auto is_vol : { " ", "volatile" }) { 409 409 for (auto cvq : qualifiersPair) { 410 cout << "forall( dtype DT) " << cvq.first << "void * ?=?( " << cvq.first << "void * " << is_vol << " &, " << cvq.second << "DT *);" << endl;410 cout << "forall(DT &) " << cvq.first << "void * ?=?( " << cvq.first << "void * " << is_vol << " &, " << cvq.second << "DT *);" << endl; 411 411 } 412 412 for (auto cvq : qualifiersSingle) { 413 cout << "forall( dtype DT) " << cvq << " DT * ?=?( " << cvq << " DT * " << is_vol << " &, zero_t);" << endl;413 cout << "forall(DT &) " << cvq << " DT * ?=?( " << cvq << " DT * " << is_vol << " &, zero_t);" << endl; 414 414 } 415 415 } -
libcfa/prelude/prelude.old.cf
r5869cea r7b91c0e 23 23 // ------------------------------------------------------------ 24 24 25 trait sized( dtype T) {};25 trait sized(T &) {}; 26 26 27 27 // ------------------------------------------------------------ … … 68 68 long double _Complex ?--( long double _Complex & ), ?--( volatile long double _Complex & ); 69 69 70 forall( dtype T| sized(T) ) T * ?++( T *& );71 forall( dtype T| sized(T) ) const T * ?++( const T *& );72 forall( dtype T| sized(T) ) volatile T * ?++( volatile T *& );73 forall( dtype T| sized(T) ) const volatile T * ?++( const volatile T *& );74 forall( dtype T| sized(T) ) T * ?--( T *& );75 forall( dtype T| sized(T) ) const T * ?--( const T *& );76 forall( dtype T| sized(T) ) volatile T * ?--( volatile T *& );77 forall( dtype T| sized(T) ) const volatile T * ?--( const volatile T *& );78 79 forall( dtype T| sized(T) ) T & ?[?]( T *, ptrdiff_t );80 forall( dtype T| sized(T) ) const T & ?[?]( const T *, ptrdiff_t );81 forall( dtype T| sized(T) ) volatile T & ?[?]( volatile T *, ptrdiff_t );82 forall( dtype T| sized(T) ) const volatile T & ?[?]( const volatile T *, ptrdiff_t );83 forall( dtype T| sized(T) ) T & ?[?]( ptrdiff_t, T * );84 forall( dtype T| sized(T) ) const T & ?[?]( ptrdiff_t, const T * );85 forall( dtype T| sized(T) ) volatile T & ?[?]( ptrdiff_t, volatile T * );86 forall( dtype T| sized(T) ) const volatile T & ?[?]( ptrdiff_t, const volatile T * );70 forall( T & | sized(T) ) T * ?++( T *& ); 71 forall( T & | sized(T) ) const T * ?++( const T *& ); 72 forall( T & | sized(T) ) volatile T * ?++( volatile T *& ); 73 forall( T & | sized(T) ) const volatile T * ?++( const volatile T *& ); 74 forall( T & | sized(T) ) T * ?--( T *& ); 75 forall( T & | sized(T) ) const T * ?--( const T *& ); 76 forall( T & | sized(T) ) volatile T * ?--( volatile T *& ); 77 forall( T & | sized(T) ) const volatile T * ?--( const volatile T *& ); 78 79 forall( T & | sized(T) ) T & ?[?]( T *, ptrdiff_t ); 80 forall( T & | sized(T) ) const T & ?[?]( const T *, ptrdiff_t ); 81 forall( T & | sized(T) ) volatile T & ?[?]( volatile T *, ptrdiff_t ); 82 forall( T & | sized(T) ) const volatile T & ?[?]( const volatile T *, ptrdiff_t ); 83 forall( T & | sized(T) ) T & ?[?]( ptrdiff_t, T * ); 84 forall( T & | sized(T) ) const T & ?[?]( ptrdiff_t, const T * ); 85 forall( T & | sized(T) ) volatile T & ?[?]( ptrdiff_t, volatile T * ); 86 forall( T & | sized(T) ) const volatile T & ?[?]( ptrdiff_t, const volatile T * ); 87 87 88 88 // ------------------------------------------------------------ … … 107 107 long double _Complex ++?( long double _Complex & ), --?( long double _Complex & ); 108 108 109 forall( dtype T| sized(T) ) T * ++?( T *& );110 forall( dtype T| sized(T) ) const T * ++?( const T *& );111 forall( dtype T| sized(T) ) volatile T * ++?( volatile T *& );112 forall( dtype T| sized(T) ) const volatile T * ++?( const volatile T *& );113 forall( dtype T| sized(T) ) T * --?( T *& );114 forall( dtype T| sized(T) ) const T * --?( const T *& );115 forall( dtype T| sized(T) ) volatile T * --?( volatile T *& );116 forall( dtype T| sized(T) ) const volatile T * --?( const volatile T *& );117 118 forall( dtype T| sized(T) ) T & *?( T * );119 forall( dtype T| sized(T) ) const T & *?( const T * );120 forall( dtype T| sized(T) ) volatile T & *?( volatile T * );121 forall( dtype T| sized(T) ) const volatile T & *?( const volatile T * );109 forall( T & | sized(T) ) T * ++?( T *& ); 110 forall( T & | sized(T) ) const T * ++?( const T *& ); 111 forall( T & | sized(T) ) volatile T * ++?( volatile T *& ); 112 forall( T & | sized(T) ) const volatile T * ++?( const volatile T *& ); 113 forall( T & | sized(T) ) T * --?( T *& ); 114 forall( T & | sized(T) ) const T * --?( const T *& ); 115 forall( T & | sized(T) ) volatile T * --?( volatile T *& ); 116 forall( T & | sized(T) ) const volatile T * --?( const volatile T *& ); 117 118 forall( T & | sized(T) ) T & *?( T * ); 119 forall( T & | sized(T) ) const T & *?( const T * ); 120 forall( T & | sized(T) ) volatile T & *?( volatile T * ); 121 forall( T & | sized(T) ) const volatile T & *?( const volatile T * ); 122 122 forall( ftype FT ) FT & *?( FT * ); 123 123 … … 142 142 !?( float _Complex ), !?( double _Complex ), !?( long double _Complex ); 143 143 144 forall( dtype DT) int !?( DT * );145 forall( dtype DT) int !?( const DT * );146 forall( dtype DT) int !?( volatile DT * );147 forall( dtype DT) int !?( const volatile DT * );144 forall( DT & ) int !?( DT * ); 145 forall( DT & ) int !?( const DT * ); 146 forall( DT & ) int !?( volatile DT * ); 147 forall( DT & ) int !?( const volatile DT * ); 148 148 forall( ftype FT ) int !?( FT * ); 149 149 … … 191 191 long double _Complex ?+?( long double _Complex, long double _Complex ), ?-?( long double _Complex, long double _Complex ); 192 192 193 forall( dtype T| sized(T) ) T * ?+?( T *, ptrdiff_t );194 forall( dtype T| sized(T) ) T * ?+?( ptrdiff_t, T * );195 forall( dtype T| sized(T) ) const T * ?+?( const T *, ptrdiff_t );196 forall( dtype T| sized(T) ) const T * ?+?( ptrdiff_t, const T * );197 forall( dtype T| sized(T) ) volatile T * ?+?( volatile T *, ptrdiff_t );198 forall( dtype T| sized(T) ) volatile T * ?+?( ptrdiff_t, volatile T * );199 forall( dtype T| sized(T) ) const volatile T * ?+?( const volatile T *, ptrdiff_t );200 forall( dtype T| sized(T) ) const volatile T * ?+?( ptrdiff_t, const volatile T * );201 forall( dtype T| sized(T) ) T * ?-?( T *, ptrdiff_t );202 forall( dtype T| sized(T) ) const T * ?-?( const T *, ptrdiff_t );203 forall( dtype T| sized(T) ) volatile T * ?-?( volatile T *, ptrdiff_t );204 forall( dtype T| sized(T) ) const volatile T * ?-?( const volatile T *, ptrdiff_t );205 forall( dtype T| sized(T) ) ptrdiff_t ?-?( const volatile T *, const volatile T * );193 forall( T & | sized(T) ) T * ?+?( T *, ptrdiff_t ); 194 forall( T & | sized(T) ) T * ?+?( ptrdiff_t, T * ); 195 forall( T & | sized(T) ) const T * ?+?( const T *, ptrdiff_t ); 196 forall( T & | sized(T) ) const T * ?+?( ptrdiff_t, const T * ); 197 forall( T & | sized(T) ) volatile T * ?+?( volatile T *, ptrdiff_t ); 198 forall( T & | sized(T) ) volatile T * ?+?( ptrdiff_t, volatile T * ); 199 forall( T & | sized(T) ) const volatile T * ?+?( const volatile T *, ptrdiff_t ); 200 forall( T & | sized(T) ) const volatile T * ?+?( ptrdiff_t, const volatile T * ); 201 forall( T & | sized(T) ) T * ?-?( T *, ptrdiff_t ); 202 forall( T & | sized(T) ) const T * ?-?( const T *, ptrdiff_t ); 203 forall( T & | sized(T) ) volatile T * ?-?( volatile T *, ptrdiff_t ); 204 forall( T & | sized(T) ) const volatile T * ?-?( const volatile T *, ptrdiff_t ); 205 forall( T & | sized(T) ) ptrdiff_t ?-?( const volatile T *, const volatile T * ); 206 206 207 207 // ------------------------------------------------------------ … … 255 255 ?>?( long double, long double ), ?>=?( long double, long double ); 256 256 257 forall( dtype DT) signed int ?<?( DT *, DT * );258 forall( dtype DT) signed int ?<?( const DT *, const DT * );259 forall( dtype DT) signed int ?<?( volatile DT *, volatile DT * );260 forall( dtype DT) signed int ?<?( const volatile DT *, const volatile DT * );261 262 forall( dtype DT) signed int ?>?( DT *, DT * );263 forall( dtype DT) signed int ?>?( const DT *, const DT * );264 forall( dtype DT) signed int ?>?( volatile DT *, volatile DT * );265 forall( dtype DT) signed int ?>?( const volatile DT *, const volatile DT * );266 267 forall( dtype DT) signed int ?<=?( DT *, DT * );268 forall( dtype DT) signed int ?<=?( const DT *, const DT * );269 forall( dtype DT) signed int ?<=?( volatile DT *, volatile DT * );270 forall( dtype DT) signed int ?<=?( const volatile DT *, const volatile DT * );271 272 forall( dtype DT) signed int ?>=?( DT *, DT * );273 forall( dtype DT) signed int ?>=?( const DT *, const DT * );274 forall( dtype DT) signed int ?>=?( volatile DT *, volatile DT * );275 forall( dtype DT) signed int ?>=?( const volatile DT *, const volatile DT * );257 forall( DT & ) signed int ?<?( DT *, DT * ); 258 forall( DT & ) signed int ?<?( const DT *, const DT * ); 259 forall( DT & ) signed int ?<?( volatile DT *, volatile DT * ); 260 forall( DT & ) signed int ?<?( const volatile DT *, const volatile DT * ); 261 262 forall( DT & ) signed int ?>?( DT *, DT * ); 263 forall( DT & ) signed int ?>?( const DT *, const DT * ); 264 forall( DT & ) signed int ?>?( volatile DT *, volatile DT * ); 265 forall( DT & ) signed int ?>?( const volatile DT *, const volatile DT * ); 266 267 forall( DT & ) signed int ?<=?( DT *, DT * ); 268 forall( DT & ) signed int ?<=?( const DT *, const DT * ); 269 forall( DT & ) signed int ?<=?( volatile DT *, volatile DT * ); 270 forall( DT & ) signed int ?<=?( const volatile DT *, const volatile DT * ); 271 272 forall( DT & ) signed int ?>=?( DT *, DT * ); 273 forall( DT & ) signed int ?>=?( const DT *, const DT * ); 274 forall( DT & ) signed int ?>=?( volatile DT *, volatile DT * ); 275 forall( DT & ) signed int ?>=?( const volatile DT *, const volatile DT * ); 276 276 277 277 // ------------------------------------------------------------ … … 302 302 signed int ?==?( one_t, one_t ), ?!=?( one_t, one_t ); 303 303 304 forall( dtype DT) signed int ?==?( DT *, DT * );305 forall( dtype DT) signed int ?==?( const DT *, const DT * );306 forall( dtype DT) signed int ?==?( volatile DT *, volatile DT * );307 forall( dtype DT) signed int ?==?( const volatile DT *, const volatile DT * );304 forall( DT & ) signed int ?==?( DT *, DT * ); 305 forall( DT & ) signed int ?==?( const DT *, const DT * ); 306 forall( DT & ) signed int ?==?( volatile DT *, volatile DT * ); 307 forall( DT & ) signed int ?==?( const volatile DT *, const volatile DT * ); 308 308 forall( ftype FT ) signed int ?==?( FT *, FT * ); 309 forall( dtype DT) signed int ?!=?( DT *, DT * );310 forall( dtype DT) signed int ?!=?( const DT *, const DT * );311 forall( dtype DT) signed int ?!=?( volatile DT *, volatile DT * );312 forall( dtype DT) signed int ?!=?( const volatile DT *, const volatile DT * );309 forall( DT & ) signed int ?!=?( DT *, DT * ); 310 forall( DT & ) signed int ?!=?( const DT *, const DT * ); 311 forall( DT & ) signed int ?!=?( volatile DT *, volatile DT * ); 312 forall( DT & ) signed int ?!=?( const volatile DT *, const volatile DT * ); 313 313 forall( ftype FT ) signed int ?!=?( FT *, FT * ); 314 314 … … 376 376 377 377 forall( ftype FT ) FT * ?=?( FT *&, FT * ); 378 forall( fty peFT ) FT * ?=?( FT * volatile &, FT * );379 380 forall( dtype DT) DT * ?=?( DT * &, DT * );381 forall( dtype DT) DT * ?=?( DT * volatile &, DT * );382 forall( dtype DT) const DT * ?=?( const DT * &, DT * );383 forall( dtype DT) const DT * ?=?( const DT * volatile &, DT * );384 forall( dtype DT) const DT * ?=?( const DT * &, const DT * );385 forall( dtype DT) const DT * ?=?( const DT * volatile &, const DT * );386 forall( dtype DT) volatile DT * ?=?( volatile DT * &, DT * );387 forall( dtype DT) volatile DT * ?=?( volatile DT * volatile &, DT * );388 forall( dtype DT) volatile DT * ?=?( volatile DT * &, volatile DT * );389 forall( dtype DT) volatile DT * ?=?( volatile DT * volatile &, volatile DT * );390 391 forall( dtype DT) const volatile DT * ?=?( const volatile DT * &, DT * );392 forall( dtype DT) const volatile DT * ?=?( const volatile DT * volatile &, DT * );393 forall( dtype DT) const volatile DT * ?=?( const volatile DT * &, const DT * );394 forall( dtype DT) const volatile DT * ?=?( const volatile DT * volatile &, const DT * );395 forall( dtype DT) const volatile DT * ?=?( const volatile DT * &, volatile DT * );396 forall( dtype DT) const volatile DT * ?=?( const volatile DT * volatile &, volatile DT * );397 forall( dtype DT) const volatile DT * ?=?( const volatile DT * &, const volatile DT * );398 forall( dtype DT) const volatile DT * ?=?( const volatile DT * volatile &, const volatile DT * );399 400 forall( dtype DT) void * ?=?( void * &, DT * );401 forall( dtype DT) void * ?=?( void * volatile &, DT * );402 forall( dtype DT) const void * ?=?( const void * &, DT * );403 forall( dtype DT) const void * ?=?( const void * volatile &, DT * );404 forall( dtype DT) const void * ?=?( const void * &, const DT * );405 forall( dtype DT) const void * ?=?( const void * volatile &, const DT * );406 forall( dtype DT) volatile void * ?=?( volatile void * &, DT * );407 forall( dtype DT) volatile void * ?=?( volatile void * volatile &, DT * );408 forall( dtype DT) volatile void * ?=?( volatile void * &, volatile DT * );409 forall( dtype DT) volatile void * ?=?( volatile void * volatile &, volatile DT * );410 forall( dtype DT) const volatile void * ?=?( const volatile void * &, DT * );411 forall( dtype DT) const volatile void * ?=?( const volatile void * volatile &, DT * );412 forall( dtype DT) const volatile void * ?=?( const volatile void * &, const DT * );413 forall( dtype DT) const volatile void * ?=?( const volatile void * volatile &, const DT * );414 forall( dtype DT) const volatile void * ?=?( const volatile void * &, volatile DT * );415 forall( dtype DT) const volatile void * ?=?( const volatile void * volatile &, volatile DT * );416 forall( dtype DT) const volatile void * ?=?( const volatile void * &, const volatile DT * );417 forall( dtype DT) const volatile void * ?=?( const volatile void * volatile &, const volatile DT * );378 forall( ftyep FT ) FT * ?=?( FT * volatile &, FT * ); 379 380 forall( DT & ) DT * ?=?( DT * &, DT * ); 381 forall( DT & ) DT * ?=?( DT * volatile &, DT * ); 382 forall( DT & ) const DT * ?=?( const DT * &, DT * ); 383 forall( DT & ) const DT * ?=?( const DT * volatile &, DT * ); 384 forall( DT & ) const DT * ?=?( const DT * &, const DT * ); 385 forall( DT & ) const DT * ?=?( const DT * volatile &, const DT * ); 386 forall( DT & ) volatile DT * ?=?( volatile DT * &, DT * ); 387 forall( DT & ) volatile DT * ?=?( volatile DT * volatile &, DT * ); 388 forall( DT & ) volatile DT * ?=?( volatile DT * &, volatile DT * ); 389 forall( DT & ) volatile DT * ?=?( volatile DT * volatile &, volatile DT * ); 390 391 forall( DT & ) const volatile DT * ?=?( const volatile DT * &, DT * ); 392 forall( DT & ) const volatile DT * ?=?( const volatile DT * volatile &, DT * ); 393 forall( DT & ) const volatile DT * ?=?( const volatile DT * &, const DT * ); 394 forall( DT & ) const volatile DT * ?=?( const volatile DT * volatile &, const DT * ); 395 forall( DT & ) const volatile DT * ?=?( const volatile DT * &, volatile DT * ); 396 forall( DT & ) const volatile DT * ?=?( const volatile DT * volatile &, volatile DT * ); 397 forall( DT & ) const volatile DT * ?=?( const volatile DT * &, const volatile DT * ); 398 forall( DT & ) const volatile DT * ?=?( const volatile DT * volatile &, const volatile DT * ); 399 400 forall( DT & ) void * ?=?( void * &, DT * ); 401 forall( DT & ) void * ?=?( void * volatile &, DT * ); 402 forall( DT & ) const void * ?=?( const void * &, DT * ); 403 forall( DT & ) const void * ?=?( const void * volatile &, DT * ); 404 forall( DT & ) const void * ?=?( const void * &, const DT * ); 405 forall( DT & ) const void * ?=?( const void * volatile &, const DT * ); 406 forall( DT & ) volatile void * ?=?( volatile void * &, DT * ); 407 forall( DT & ) volatile void * ?=?( volatile void * volatile &, DT * ); 408 forall( DT & ) volatile void * ?=?( volatile void * &, volatile DT * ); 409 forall( DT & ) volatile void * ?=?( volatile void * volatile &, volatile DT * ); 410 forall( DT & ) const volatile void * ?=?( const volatile void * &, DT * ); 411 forall( DT & ) const volatile void * ?=?( const volatile void * volatile &, DT * ); 412 forall( DT & ) const volatile void * ?=?( const volatile void * &, const DT * ); 413 forall( DT & ) const volatile void * ?=?( const volatile void * volatile &, const DT * ); 414 forall( DT & ) const volatile void * ?=?( const volatile void * &, volatile DT * ); 415 forall( DT & ) const volatile void * ?=?( const volatile void * volatile &, volatile DT * ); 416 forall( DT & ) const volatile void * ?=?( const volatile void * &, const volatile DT * ); 417 forall( DT & ) const volatile void * ?=?( const volatile void * volatile &, const volatile DT * ); 418 418 419 419 //forall( dtype DT ) DT * ?=?( DT * &, zero_t ); 420 420 //forall( dtype DT ) DT * ?=?( DT * volatile &, zero_t ); 421 forall( dtype DT) const DT * ?=?( const DT * &, zero_t );422 forall( dtype DT) const DT * ?=?( const DT * volatile &, zero_t );421 forall( DT & ) const DT * ?=?( const DT * &, zero_t ); 422 forall( DT & ) const DT * ?=?( const DT * volatile &, zero_t ); 423 423 //forall( dtype DT ) volatile DT * ?=?( volatile DT * &, zero_t ); 424 424 //forall( dtype DT ) volatile DT * ?=?( volatile DT * volatile &, zero_t ); 425 forall( dtype DT) const volatile DT * ?=?( const volatile DT * &, zero_t );426 forall( dtype DT) const volatile DT * ?=?( const volatile DT * volatile &, zero_t );425 forall( DT & ) const volatile DT * ?=?( const volatile DT * &, zero_t ); 426 forall( DT & ) const volatile DT * ?=?( const volatile DT * volatile &, zero_t ); 427 427 428 428 forall( ftype FT ) FT * ?=?( FT * &, zero_t ); 429 429 forall( ftype FT ) FT * ?=?( FT * volatile &, zero_t ); 430 430 431 forall( dtype T| sized(T) ) T * ?+=?( T * &, ptrdiff_t );432 forall( dtype T| sized(T) ) T * ?+=?( T * volatile &, ptrdiff_t );433 forall( dtype T| sized(T) ) const T * ?+=?( const T * &, ptrdiff_t );434 forall( dtype T| sized(T) ) const T * ?+=?( const T * volatile &, ptrdiff_t );435 forall( dtype T| sized(T) ) volatile T * ?+=?( volatile T * &, ptrdiff_t );436 forall( dtype T| sized(T) ) volatile T * ?+=?( volatile T * volatile &, ptrdiff_t );437 forall( dtype T| sized(T) ) const volatile T * ?+=?( const volatile T * &, ptrdiff_t );438 forall( dtype T| sized(T) ) const volatile T * ?+=?( const volatile T * volatile &, ptrdiff_t );439 forall( dtype T| sized(T) ) T * ?-=?( T * &, ptrdiff_t );440 forall( dtype T| sized(T) ) T * ?-=?( T * volatile &, ptrdiff_t );441 forall( dtype T| sized(T) ) const T * ?-=?( const T * &, ptrdiff_t );442 forall( dtype T| sized(T) ) const T * ?-=?( const T * volatile &, ptrdiff_t );443 forall( dtype T| sized(T) ) volatile T * ?-=?( volatile T * &, ptrdiff_t );444 forall( dtype T| sized(T) ) volatile T * ?-=?( volatile T * volatile &, ptrdiff_t );445 forall( dtype T| sized(T) ) const volatile T * ?-=?( const volatile T * &, ptrdiff_t );446 forall( dtype T| sized(T) ) const volatile T * ?-=?( const volatile T * volatile &, ptrdiff_t );431 forall( T & | sized(T) ) T * ?+=?( T * &, ptrdiff_t ); 432 forall( T & | sized(T) ) T * ?+=?( T * volatile &, ptrdiff_t ); 433 forall( T & | sized(T) ) const T * ?+=?( const T * &, ptrdiff_t ); 434 forall( T & | sized(T) ) const T * ?+=?( const T * volatile &, ptrdiff_t ); 435 forall( T & | sized(T) ) volatile T * ?+=?( volatile T * &, ptrdiff_t ); 436 forall( T & | sized(T) ) volatile T * ?+=?( volatile T * volatile &, ptrdiff_t ); 437 forall( T & | sized(T) ) const volatile T * ?+=?( const volatile T * &, ptrdiff_t ); 438 forall( T & | sized(T) ) const volatile T * ?+=?( const volatile T * volatile &, ptrdiff_t ); 439 forall( T & | sized(T) ) T * ?-=?( T * &, ptrdiff_t ); 440 forall( T & | sized(T) ) T * ?-=?( T * volatile &, ptrdiff_t ); 441 forall( T & | sized(T) ) const T * ?-=?( const T * &, ptrdiff_t ); 442 forall( T & | sized(T) ) const T * ?-=?( const T * volatile &, ptrdiff_t ); 443 forall( T & | sized(T) ) volatile T * ?-=?( volatile T * &, ptrdiff_t ); 444 forall( T & | sized(T) ) volatile T * ?-=?( volatile T * volatile &, ptrdiff_t ); 445 forall( T & | sized(T) ) const volatile T * ?-=?( const volatile T * &, ptrdiff_t ); 446 forall( T & | sized(T) ) const volatile T * ?-=?( const volatile T * volatile &, ptrdiff_t ); 447 447 448 448 _Bool ?=?( _Bool &, _Bool ), ?=?( volatile _Bool &, _Bool ); … … 723 723 forall( ftype FT ) void ?{}( FT * volatile &, FT * ); 724 724 725 forall( dtype DT) void ?{}( DT * &, DT * );726 forall( dtype DT) void ?{}( const DT * &, DT * );727 forall( dtype DT) void ?{}( const DT * &, const DT * );728 forall( dtype DT) void ?{}( volatile DT * &, DT * );729 forall( dtype DT) void ?{}( volatile DT * &, volatile DT * );730 forall( dtype DT) void ?{}( const volatile DT * &, DT * );731 forall( dtype DT) void ?{}( const volatile DT * &, const DT * );732 forall( dtype DT) void ?{}( const volatile DT * &, volatile DT * );733 forall( dtype DT) void ?{}( const volatile DT * &, const volatile DT * );734 735 forall( dtype DT) void ?{}( void * &, DT * );736 forall( dtype DT) void ?{}( const void * &, DT * );737 forall( dtype DT) void ?{}( const void * &, const DT * );738 forall( dtype DT) void ?{}( volatile void * &, DT * );739 forall( dtype DT) void ?{}( volatile void * &, volatile DT * );740 forall( dtype DT) void ?{}( const volatile void * &, DT * );741 forall( dtype DT) void ?{}( const volatile void * &, const DT * );742 forall( dtype DT) void ?{}( const volatile void * &, volatile DT * );743 forall( dtype DT) void ?{}( const volatile void * &, const volatile DT * );725 forall( DT & ) void ?{}( DT * &, DT * ); 726 forall( DT & ) void ?{}( const DT * &, DT * ); 727 forall( DT & ) void ?{}( const DT * &, const DT * ); 728 forall( DT & ) void ?{}( volatile DT * &, DT * ); 729 forall( DT & ) void ?{}( volatile DT * &, volatile DT * ); 730 forall( DT & ) void ?{}( const volatile DT * &, DT * ); 731 forall( DT & ) void ?{}( const volatile DT * &, const DT * ); 732 forall( DT & ) void ?{}( const volatile DT * &, volatile DT * ); 733 forall( DT & ) void ?{}( const volatile DT * &, const volatile DT * ); 734 735 forall( DT & ) void ?{}( void * &, DT * ); 736 forall( DT & ) void ?{}( const void * &, DT * ); 737 forall( DT & ) void ?{}( const void * &, const DT * ); 738 forall( DT & ) void ?{}( volatile void * &, DT * ); 739 forall( DT & ) void ?{}( volatile void * &, volatile DT * ); 740 forall( DT & ) void ?{}( const volatile void * &, DT * ); 741 forall( DT & ) void ?{}( const volatile void * &, const DT * ); 742 forall( DT & ) void ?{}( const volatile void * &, volatile DT * ); 743 forall( DT & ) void ?{}( const volatile void * &, const volatile DT * ); 744 744 745 745 //forall( dtype DT ) void ?{}( DT * &, zero_t ); 746 746 //forall( dtype DT ) void ?{}( DT * volatile &, zero_t ); 747 forall( dtype DT) void ?{}( const DT * &, zero_t );747 forall( DT & ) void ?{}( const DT * &, zero_t ); 748 748 //forall( dtype DT ) void ?{}( volatile DT * &, zero_t ); 749 749 //forall( dtype DT ) void ?{}( volatile DT * volatile &, zero_t ); 750 forall( dtype DT) void ?{}( const volatile DT * &, zero_t );750 forall( DT & ) void ?{}( const volatile DT * &, zero_t ); 751 751 752 752 forall( ftype FT ) void ?{}( FT * &, zero_t ); … … 755 755 forall( ftype FT ) void ?{}( FT * & ); 756 756 757 forall( dtype DT) void ?{}( DT * &);758 forall( dtype DT) void ?{}( const DT * &);759 forall( dtype DT) void ?{}( volatile DT * &);760 forall( dtype DT) void ?{}( const volatile DT * &);757 forall( DT & ) void ?{}( DT * &); 758 forall( DT & ) void ?{}( const DT * &); 759 forall( DT & ) void ?{}( volatile DT * &); 760 forall( DT & ) void ?{}( const volatile DT * &); 761 761 762 762 void ?{}( void * &); … … 768 768 forall( ftype FT ) void ^?{}( FT * & ); 769 769 770 forall( dtype DT) void ^?{}( DT * &);771 forall( dtype DT) void ^?{}( const DT * &);772 forall( dtype DT) void ^?{}( volatile DT * &);773 forall( dtype DT) void ^?{}( const volatile DT * &);770 forall( DT & ) void ^?{}( DT * &); 771 forall( DT & ) void ^?{}( const DT * &); 772 forall( DT & ) void ^?{}( volatile DT * &); 773 forall( DT & ) void ^?{}( const volatile DT * &); 774 774 775 775 void ^?{}( void * &); -
libcfa/prelude/sync-builtins.cf
r5869cea r7b91c0e 206 206 _Bool __sync_bool_compare_and_swap(volatile unsigned __int128 *, unsigned __int128, unsigned __int128,...); 207 207 #endif 208 forall( dtype T) _Bool __sync_bool_compare_and_swap(T * volatile *, T *, T*, ...);208 forall(T &) _Bool __sync_bool_compare_and_swap(T * volatile *, T *, T*, ...); 209 209 210 210 char __sync_val_compare_and_swap(volatile char *, char, char,...); … … 223 223 unsigned __int128 __sync_val_compare_and_swap(volatile unsigned __int128 *, unsigned __int128, unsigned __int128,...); 224 224 #endif 225 forall( dtype T) T * __sync_val_compare_and_swap(T * volatile *, T *, T*,...);225 forall(T &) T * __sync_val_compare_and_swap(T * volatile *, T *, T*,...); 226 226 227 227 char __sync_lock_test_and_set(volatile char *, char,...); … … 326 326 void __atomic_exchange(volatile unsigned __int128 *, volatile unsigned __int128 *, volatile unsigned __int128 *, int); 327 327 #endif 328 forall( dtype T) T * __atomic_exchange_n(T * volatile *, T *, int);329 forall( dtype T) void __atomic_exchange(T * volatile *, T * volatile *, T * volatile *, int);328 forall(T &) T * __atomic_exchange_n(T * volatile *, T *, int); 329 forall(T &) void __atomic_exchange(T * volatile *, T * volatile *, T * volatile *, int); 330 330 331 331 _Bool __atomic_load_n(const volatile _Bool *, int); … … 359 359 void __atomic_load(const volatile unsigned __int128 *, volatile unsigned __int128 *, int); 360 360 #endif 361 forall( dtype T) T * __atomic_load_n(T * const volatile *, int);362 forall( dtype T) void __atomic_load(T * const volatile *, T **, int);361 forall(T &) T * __atomic_load_n(T * const volatile *, int); 362 forall(T &) void __atomic_load(T * const volatile *, T **, int); 363 363 364 364 _Bool __atomic_compare_exchange_n(volatile char *, char *, char, _Bool, int, int); … … 390 390 _Bool __atomic_compare_exchange (volatile unsigned __int128 *, unsigned __int128 *, unsigned __int128 *, _Bool, int, int); 391 391 #endif 392 forall( dtype T) _Bool __atomic_compare_exchange_n (T * volatile *, T **, T*, _Bool, int, int);393 forall( dtype T) _Bool __atomic_compare_exchange (T * volatile *, T **, T**, _Bool, int, int);392 forall(T &) _Bool __atomic_compare_exchange_n (T * volatile *, T **, T*, _Bool, int, int); 393 forall(T &) _Bool __atomic_compare_exchange (T * volatile *, T **, T**, _Bool, int, int); 394 394 395 395 void __atomic_store_n(volatile _Bool *, _Bool, int); … … 423 423 void __atomic_store(volatile unsigned __int128 *, unsigned __int128 *, int); 424 424 #endif 425 forall( dtype T) void __atomic_store_n(T * volatile *, T *, int);426 forall( dtype T) void __atomic_store(T * volatile *, T **, int);425 forall(T &) void __atomic_store_n(T * volatile *, T *, int); 426 forall(T &) void __atomic_store(T * volatile *, T **, int); 427 427 428 428 char __atomic_add_fetch (volatile char *, char, int); -
libcfa/src/bitmanip.hfa
r5869cea r7b91c0e 100 100 unsigned long long int floor2( unsigned long long int n, unsigned long long int align ) { verify( is_pow2( align ) ); return n & -align; } 101 101 102 // forall( otypeT | { T ?&?( T, T ); T -?( T ); } )102 // forall( T | { T ?&?( T, T ); T -?( T ); } ) 103 103 // T floor2( T n, T align ) { verify( is_pow2( align ) ); return n & -align; } 104 104 … … 115 115 unsigned long long int ceiling2( unsigned long long int n, unsigned long long int align ) { verify( is_pow2( align ) ); return -floor2( -n, align ); } 116 116 117 // forall( otypeT | { T floor2( T, T ); T -?( T ); } )117 // forall( T | { T floor2( T, T ); T -?( T ); } ) 118 118 // T ceiling2( T n, T align ) { verify( is_pow2( align ) ); return -floor2( -n, align ); } 119 119 } // distribution -
libcfa/src/bits/algorithm.hfa
r5869cea r7b91c0e 17 17 18 18 #ifdef SAFE_SORT 19 forall( otypeT | { int ?<?( T, T ); int ?>?( T, T ); } ) static inline void __libcfa_small_sort2( T * arr );20 forall( otypeT | { int ?<?( T, T ); int ?>?( T, T ); } ) static inline void __libcfa_small_sort3( T * arr );21 forall( otypeT | { int ?<?( T, T ); int ?>?( T, T ); } ) static inline void __libcfa_small_sort4( T * arr );22 forall( otypeT | { int ?<?( T, T ); int ?>?( T, T ); } ) static inline void __libcfa_small_sort5( T * arr );23 forall( otypeT | { int ?<?( T, T ); int ?>?( T, T ); } ) static inline void __libcfa_small_sort6( T * arr );24 forall( otypeT | { int ?<?( T, T ); int ?>?( T, T ); } ) static inline void __libcfa_small_sortN( T * arr, size_t dim );19 forall( T | { int ?<?( T, T ); int ?>?( T, T ); } ) static inline void __libcfa_small_sort2( T * arr ); 20 forall( T | { int ?<?( T, T ); int ?>?( T, T ); } ) static inline void __libcfa_small_sort3( T * arr ); 21 forall( T | { int ?<?( T, T ); int ?>?( T, T ); } ) static inline void __libcfa_small_sort4( T * arr ); 22 forall( T | { int ?<?( T, T ); int ?>?( T, T ); } ) static inline void __libcfa_small_sort5( T * arr ); 23 forall( T | { int ?<?( T, T ); int ?>?( T, T ); } ) static inline void __libcfa_small_sort6( T * arr ); 24 forall( T | { int ?<?( T, T ); int ?>?( T, T ); } ) static inline void __libcfa_small_sortN( T * arr, size_t dim ); 25 25 26 forall( otypeT | { int ?<?( T, T ); int ?>?( T, T ); } )26 forall( T | { int ?<?( T, T ); int ?>?( T, T ); } ) 27 27 static inline void __libcfa_small_sort( T * arr, size_t dim ) { 28 28 switch( dim ) { … … 41 41 #define SWAP(x,y) { T a = min(arr[x], arr[y]); T b = max(arr[x], arr[y]); arr[x] = a; arr[y] = b;} 42 42 43 forall( otypeT | { int ?<?( T, T ); int ?>?( T, T ); } )43 forall( T | { int ?<?( T, T ); int ?>?( T, T ); } ) 44 44 static inline void __libcfa_small_sort2( T * arr ) { 45 45 SWAP(0, 1); 46 46 } 47 47 48 forall( otypeT | { int ?<?( T, T ); int ?>?( T, T ); } )48 forall( T | { int ?<?( T, T ); int ?>?( T, T ); } ) 49 49 static inline void __libcfa_small_sort3( T * arr ) { 50 50 SWAP(1, 2); … … 53 53 } 54 54 55 forall( otypeT | { int ?<?( T, T ); int ?>?( T, T ); } )55 forall( T | { int ?<?( T, T ); int ?>?( T, T ); } ) 56 56 static inline void __libcfa_small_sort4( T * arr ) { 57 57 SWAP(0, 1); … … 62 62 } 63 63 64 forall( otypeT | { int ?<?( T, T ); int ?>?( T, T ); } )64 forall( T | { int ?<?( T, T ); int ?>?( T, T ); } ) 65 65 static inline void __libcfa_small_sort5( T * arr ) { 66 66 SWAP(0, 1); … … 75 75 } 76 76 77 forall( otypeT | { int ?<?( T, T ); int ?>?( T, T ); } )77 forall( T | { int ?<?( T, T ); int ?>?( T, T ); } ) 78 78 static inline void __libcfa_small_sort6( T * arr ) { 79 79 SWAP(1, 2); … … 91 91 } 92 92 93 forall( otypeT | { int ?<?( T, T ); int ?>?( T, T ); } )93 forall( T | { int ?<?( T, T ); int ?>?( T, T ); } ) 94 94 static inline void __libcfa_small_sortN( T * arr, size_t dim ) { 95 95 int i, j; … … 112 112 static inline void __libcfa_small_sortN( void* * arr, size_t dim ); 113 113 114 forall( dtype T)114 forall( T & ) 115 115 static inline void __libcfa_small_sort( T* * arr, size_t dim ) { 116 116 switch( dim ) { -
libcfa/src/bits/collection.hfa
r5869cea r7b91c0e 31 31 32 32 // // wrappers to make Collection have T 33 // forall( dtype T) {33 // forall( T & ) { 34 34 // T *& Next( T * n ) { 35 35 // return (T *)Next( (Colable *)n ); … … 38 38 } // distribution 39 39 40 forall( dtype T| { T *& Next ( T * ); } ) {40 static inline forall( T & | { T *& Next ( T * ); } ) { 41 41 bool listed( T * n ) { 42 42 return Next( n ) != 0p; … … 76 76 } // post: elts = null 77 77 78 forall( dtype T) {78 forall( T & ) { 79 79 T * Curr( ColIter & ci ) with( ci ) { 80 80 return (T *)curr; -
libcfa/src/bits/containers.hfa
r5869cea r7b91c0e 23 23 24 24 #ifdef __cforall 25 forall( dtype T)25 forall(T &) 26 26 #else 27 27 #define T void … … 40 40 41 41 #ifdef __cforall 42 // forall( otypeT | sized(T))42 // forall(T | sized(T)) 43 43 // static inline void ?{}(__small_array(T) & this) {} 44 44 45 forall( dtype T| sized(T))45 forall(T & | sized(T)) 46 46 static inline T & ?[?]( __small_array(T) & this, __lock_size_t idx ) { 47 47 return ((typeof(this.data))this.data)[idx]; 48 48 } 49 49 50 forall( dtype T| sized(T))50 forall(T & | sized(T)) 51 51 static inline T & ?[?]( const __small_array(T) & this, __lock_size_t idx ) { 52 52 return ((typeof(this.data))this.data)[idx]; 53 53 } 54 54 55 forall( dtype T)55 forall(T &) 56 56 static inline T * begin( const __small_array(T) & this ) { 57 57 return ((typeof(this.data))this.data); 58 58 } 59 59 60 forall( dtype T| sized(T))60 forall(T & | sized(T)) 61 61 static inline T * end( const __small_array(T) & this ) { 62 62 return ((typeof(this.data))this.data) + this.size; … … 69 69 70 70 #ifdef __cforall 71 trait is_node( dtype T) {71 trait is_node(T &) { 72 72 T *& get_next( T & ); 73 73 }; … … 78 78 //----------------------------------------------------------------------------- 79 79 #ifdef __cforall 80 forall( dtype TYPE)80 forall(TYPE &) 81 81 #define T TYPE 82 82 #else … … 95 95 96 96 #ifdef __cforall 97 forall( dtype T)97 forall(T &) 98 98 static inline void ?{}( __stack(T) & this ) { 99 99 (this.top){ 0p }; 100 100 } 101 101 102 static inline forall( dtype T| is_node(T) ) {102 static inline forall( T & | is_node(T) ) { 103 103 void push( __stack(T) & this, T * val ) { 104 104 verify( !get_next( *val ) ); … … 126 126 //----------------------------------------------------------------------------- 127 127 #ifdef __cforall 128 forall( dtype TYPE)128 forall(TYPE &) 129 129 #define T TYPE 130 130 #else … … 144 144 145 145 #ifdef __cforall 146 static inline forall( dtype T| is_node(T) ) {146 static inline forall( T & | is_node(T) ) { 147 147 void ?{}( __queue(T) & this ) with( this ) { 148 148 (this.head){ 1p }; … … 215 215 //----------------------------------------------------------------------------- 216 216 #ifdef __cforall 217 forall( dtype TYPE)217 forall(TYPE &) 218 218 #define T TYPE 219 219 #define __getter_t * [T * & next, T * & prev] ( T & ) … … 237 237 238 238 #ifdef __cforall 239 forall( dtype T)239 forall(T & ) 240 240 static inline [void] ?{}( __dllist(T) & this, * [T * & next, T * & prev] ( T & ) __get ) { 241 241 (this.head){ 0p }; … … 245 245 #define next 0 246 246 #define prev 1 247 static inline forall( dtype T) {247 static inline forall(T &) { 248 248 void push_front( __dllist(T) & this, T & node ) with( this ) { 249 249 verify(__get); -
libcfa/src/bits/defs.hfa
r5869cea r7b91c0e 5 5 // file "LICENCE" distributed with Cforall. 6 6 // 7 // defs.hfa -- 7 // defs.hfa -- Commen macros, functions and typedefs 8 // Most files depend on them and they are always useful to have. 9 // 10 // *** Must not contain code specific to libcfathread *** 8 11 // 9 12 // Author : Thierry Delisle … … 62 65 #endif 63 66 } 67 68 // pause to prevent excess processor bus usage 69 #if defined( __i386 ) || defined( __x86_64 ) 70 #define Pause() __asm__ __volatile__ ( "pause" : : : ) 71 #elif defined( __ARM_ARCH ) 72 #define Pause() __asm__ __volatile__ ( "YIELD" : : : ) 73 #else 74 #error unsupported architecture 75 #endif -
libcfa/src/bits/locks.hfa
r5869cea r7b91c0e 5 5 // file "LICENCE" distributed with Cforall. 6 6 // 7 // bits/locks.hfa -- Fast internal locks. 7 // bits/locks.hfa -- Basic spinlocks that are reused in the system. 8 // Used for locks that aren't specific to cforall threads and can be used anywhere 9 // 10 // *** Must not contain code specific to libcfathread *** 8 11 // 9 12 // Author : Thierry Delisle … … 19 22 #include "bits/defs.hfa" 20 23 #include <assert.h> 21 22 #ifdef __cforall23 extern "C" {24 #include <pthread.h>25 }26 #endif27 28 // pause to prevent excess processor bus usage29 #if defined( __i386 ) || defined( __x86_64 )30 #define Pause() __asm__ __volatile__ ( "pause" : : : )31 #elif defined( __ARM_ARCH )32 #define Pause() __asm__ __volatile__ ( "YIELD" : : : )33 #else34 #error unsupported architecture35 #endif36 24 37 25 struct __spinlock_t { … … 104 92 enable_interrupts_noPoll(); 105 93 } 106 107 108 #ifdef __CFA_WITH_VERIFY__109 extern bool __cfaabi_dbg_in_kernel();110 #endif111 112 extern "C" {113 char * strerror(int);114 }115 #define CHECKED(x) { int err = x; if( err != 0 ) abort("KERNEL ERROR: Operation \"" #x "\" return error %d - %s\n", err, strerror(err)); }116 117 struct __bin_sem_t {118 pthread_mutex_t lock;119 pthread_cond_t cond;120 int val;121 };122 123 static inline void ?{}(__bin_sem_t & this) with( this ) {124 // Create the mutex with error checking125 pthread_mutexattr_t mattr;126 pthread_mutexattr_init( &mattr );127 pthread_mutexattr_settype( &mattr, PTHREAD_MUTEX_ERRORCHECK_NP);128 pthread_mutex_init(&lock, &mattr);129 130 pthread_cond_init (&cond, (const pthread_condattr_t *)0p); // workaround trac#208: cast should not be required131 val = 0;132 }133 134 static inline void ^?{}(__bin_sem_t & this) with( this ) {135 CHECKED( pthread_mutex_destroy(&lock) );136 CHECKED( pthread_cond_destroy (&cond) );137 }138 139 static inline void wait(__bin_sem_t & this) with( this ) {140 verify(__cfaabi_dbg_in_kernel());141 CHECKED( pthread_mutex_lock(&lock) );142 while(val < 1) {143 pthread_cond_wait(&cond, &lock);144 }145 val -= 1;146 CHECKED( pthread_mutex_unlock(&lock) );147 }148 149 static inline bool post(__bin_sem_t & this) with( this ) {150 bool needs_signal = false;151 152 CHECKED( pthread_mutex_lock(&lock) );153 if(val < 1) {154 val += 1;155 pthread_cond_signal(&cond);156 needs_signal = true;157 }158 CHECKED( pthread_mutex_unlock(&lock) );159 160 return needs_signal;161 }162 163 #undef CHECKED164 165 struct $thread;166 extern void park( void );167 extern void unpark( struct $thread * this );168 static inline struct $thread * active_thread ();169 170 // Semaphore which only supports a single thread171 struct single_sem {172 struct $thread * volatile ptr;173 };174 175 static inline {176 void ?{}(single_sem & this) {177 this.ptr = 0p;178 }179 180 void ^?{}(single_sem &) {}181 182 bool wait(single_sem & this) {183 for() {184 struct $thread * expected = this.ptr;185 if(expected == 1p) {186 if(__atomic_compare_exchange_n(&this.ptr, &expected, 0p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {187 return false;188 }189 }190 else {191 /* paranoid */ verify( expected == 0p );192 if(__atomic_compare_exchange_n(&this.ptr, &expected, active_thread(), false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {193 park();194 return true;195 }196 }197 198 }199 }200 201 bool post(single_sem & this) {202 for() {203 struct $thread * expected = this.ptr;204 if(expected == 1p) return false;205 if(expected == 0p) {206 if(__atomic_compare_exchange_n(&this.ptr, &expected, 1p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {207 return false;208 }209 }210 else {211 if(__atomic_compare_exchange_n(&this.ptr, &expected, 0p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {212 unpark( expected );213 return true;214 }215 }216 }217 }218 }219 220 // Synchronozation primitive which only supports a single thread and one post221 // Similar to a binary semaphore with a 'one shot' semantic222 // is expected to be discarded after each party call their side223 struct oneshot {224 // Internal state :225 // 0p : is initial state (wait will block)226 // 1p : fulfilled (wait won't block)227 // any thread : a thread is currently waiting228 struct $thread * volatile ptr;229 };230 231 static inline {232 void ?{}(oneshot & this) {233 this.ptr = 0p;234 }235 236 void ^?{}(oneshot &) {}237 238 // Wait for the post, return immidiately if it already happened.239 // return true if the thread was parked240 bool wait(oneshot & this) {241 for() {242 struct $thread * expected = this.ptr;243 if(expected == 1p) return false;244 /* paranoid */ verify( expected == 0p );245 if(__atomic_compare_exchange_n(&this.ptr, &expected, active_thread(), false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {246 park();247 /* paranoid */ verify( this.ptr == 1p );248 return true;249 }250 }251 }252 253 // Mark as fulfilled, wake thread if needed254 // return true if a thread was unparked255 bool post(oneshot & this) {256 struct $thread * got = __atomic_exchange_n( &this.ptr, 1p, __ATOMIC_SEQ_CST);257 if( got == 0p ) return false;258 unpark( got );259 return true;260 }261 }262 263 // base types for future to build upon264 // It is based on the 'oneshot' type to allow multiple futures265 // to block on the same instance, permitting users to block a single266 // thread on "any of" [a given set of] futures.267 // does not support multiple threads waiting on the same future268 struct future_t {269 // Internal state :270 // 0p : is initial state (wait will block)271 // 1p : fulfilled (wait won't block)272 // 2p : in progress ()273 // 3p : abandoned, server should delete274 // any oneshot : a context has been setup to wait, a thread could wait on it275 struct oneshot * volatile ptr;276 };277 278 static inline {279 void ?{}(future_t & this) {280 this.ptr = 0p;281 }282 283 void ^?{}(future_t &) {}284 285 void reset(future_t & this) {286 // needs to be in 0p or 1p287 __atomic_exchange_n( &this.ptr, 0p, __ATOMIC_SEQ_CST);288 }289 290 // check if the future is available291 bool available( future_t & this ) {292 return this.ptr == 1p;293 }294 295 // Prepare the future to be waited on296 // intented to be use by wait, wait_any, waitfor, etc. rather than used directly297 bool setup( future_t & this, oneshot & wait_ctx ) {298 /* paranoid */ verify( wait_ctx.ptr == 0p );299 // The future needs to set the wait context300 for() {301 struct oneshot * expected = this.ptr;302 // Is the future already fulfilled?303 if(expected == 1p) return false; // Yes, just return false (didn't block)304 305 // The future is not fulfilled, try to setup the wait context306 /* paranoid */ verify( expected == 0p );307 if(__atomic_compare_exchange_n(&this.ptr, &expected, &wait_ctx, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {308 return true;309 }310 }311 }312 313 // Stop waiting on a future314 // When multiple futures are waited for together in "any of" pattern315 // futures that weren't fulfilled before the thread woke up316 // should retract the wait ctx317 // intented to be use by wait, wait_any, waitfor, etc. rather than used directly318 void retract( future_t & this, oneshot & wait_ctx ) {319 // Remove the wait context320 struct oneshot * got = __atomic_exchange_n( &this.ptr, 0p, __ATOMIC_SEQ_CST);321 322 // got == 0p: future was never actually setup, just return323 if( got == 0p ) return;324 325 // got == wait_ctx: since fulfil does an atomic_swap,326 // if we got back the original then no one else saw context327 // It is safe to delete (which could happen after the return)328 if( got == &wait_ctx ) return;329 330 // got == 1p: the future is ready and the context was fully consumed331 // the server won't use the pointer again332 // It is safe to delete (which could happen after the return)333 if( got == 1p ) return;334 335 // got == 2p: the future is ready but the context hasn't fully been consumed336 // spin until it is safe to move on337 if( got == 2p ) {338 while( this.ptr != 1p ) Pause();339 return;340 }341 342 // got == any thing else, something wen't wrong here, abort343 abort("Future in unexpected state");344 }345 346 // Mark the future as abandoned, meaning it will be deleted by the server347 bool abandon( future_t & this ) {348 /* paranoid */ verify( this.ptr != 3p );349 350 // Mark the future as abandonned351 struct oneshot * got = __atomic_exchange_n( &this.ptr, 3p, __ATOMIC_SEQ_CST);352 353 // If the future isn't already fulfilled, let the server delete it354 if( got == 0p ) return false;355 356 // got == 2p: the future is ready but the context hasn't fully been consumed357 // spin until it is safe to move on358 if( got == 2p ) {359 while( this.ptr != 1p ) Pause();360 got = 1p;361 }362 363 // The future is completed delete it now364 /* paranoid */ verify( this.ptr != 1p );365 free( &this );366 return true;367 }368 369 // from the server side, mark the future as fulfilled370 // delete it if needed371 bool fulfil( future_t & this ) {372 for() {373 struct oneshot * expected = this.ptr;374 // was this abandoned?375 #if defined(__GNUC__) && __GNUC__ >= 7376 #pragma GCC diagnostic push377 #pragma GCC diagnostic ignored "-Wfree-nonheap-object"378 #endif379 if( expected == 3p ) { free( &this ); return false; }380 #if defined(__GNUC__) && __GNUC__ >= 7381 #pragma GCC diagnostic pop382 #endif383 384 /* paranoid */ verify( expected != 1p ); // Future is already fulfilled, should not happen385 /* paranoid */ verify( expected != 2p ); // Future is bein fulfilled by someone else, this is even less supported then the previous case.386 387 // If there is a wait context, we need to consume it and mark it as consumed after388 // If there is no context then we can skip the in progress phase389 struct oneshot * want = expected == 0p ? 1p : 2p;390 if(__atomic_compare_exchange_n(&this.ptr, &expected, want, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {391 if( expected == 0p ) { /* paranoid */ verify( this.ptr == 1p); return false; }392 bool ret = post( *expected );393 __atomic_store_n( &this.ptr, 1p, __ATOMIC_SEQ_CST);394 return ret;395 }396 }397 398 }399 400 // Wait for the future to be fulfilled401 bool wait( future_t & this ) {402 oneshot temp;403 if( !setup(this, temp) ) return false;404 405 // Wait context is setup, just wait on it406 bool ret = wait( temp );407 408 // Wait for the future to tru409 while( this.ptr == 2p ) Pause();410 // Make sure the state makes sense411 // Should be fulfilled, could be in progress but it's out of date if so412 // since if that is the case, the oneshot was fulfilled (unparking this thread)413 // and the oneshot should not be needed any more414 __attribute__((unused)) struct oneshot * was = this.ptr;415 /* paranoid */ verifyf( was == 1p, "Expected this.ptr to be 1p, was %p\n", was );416 417 // Mark the future as fulfilled, to be consistent418 // with potential calls to avail419 // this.ptr = 1p;420 return ret;421 }422 }423 94 #endif -
libcfa/src/bits/queue.hfa
r5869cea r7b91c0e 9 9 // instead of being null. 10 10 11 forall( dtype T| { T *& Next ( T * ); } ) {11 forall( T & | { T *& Next ( T * ); } ) { 12 12 struct Queue { 13 13 inline Collection; // Plan 9 inheritance … … 151 151 } // distribution 152 152 153 forall( dtype T| { T *& Next ( T * ); } ) {153 forall( T & | { T *& Next ( T * ); } ) { 154 154 struct QueueIter { 155 155 inline ColIter; // Plan 9 inheritance -
libcfa/src/bits/sequence.hfa
r5869cea r7b91c0e 29 29 30 30 // // wrappers to make Collection have T 31 // forall( dtype T) {31 // forall( T & ) { 32 32 // T *& Back( T * n ) { 33 33 // return (T *)Back( (Seqable *)n ); … … 43 43 // and the back field of the last node points at the first node (circular). 44 44 45 forall( dtype T| { T *& Back ( T * ); T *& Next ( T * ); } ) {45 forall( T & | { T *& Back ( T * ); T *& Next ( T * ); } ) { 46 46 struct Sequence { 47 47 inline Collection; // Plan 9 inheritance … … 231 231 } // distribution 232 232 233 forall( dtype T| { T *& Back ( T * ); T *& Next ( T * ); } ) {233 forall( T & | { T *& Back ( T * ); T *& Next ( T * ); } ) { 234 234 // SeqIter(T) is used to iterate over a Sequence(T) in head-to-tail order. 235 235 struct SeqIter { -
libcfa/src/bits/stack.hfa
r5869cea r7b91c0e 9 9 // instead of being null. 10 10 11 forall( dtype T| { T *& Next ( T * ); } ) {11 forall( T & | { T *& Next ( T * ); } ) { 12 12 struct Stack { 13 13 inline Collection; // Plan 9 inheritance … … 67 67 // order returned by drop(). 68 68 69 forall( dtype T| { T *& Next ( T * ); } ) {69 forall( T & | { T *& Next ( T * ); } ) { 70 70 struct StackIter { 71 71 inline ColIter; // Plan 9 inheritance -
libcfa/src/common.cfa
r5869cea r7b91c0e 23 23 [ long int, long int ] div( long int num, long int denom ) { ldiv_t qr = ldiv( num, denom ); return [ qr.quot, qr.rem ]; } 24 24 [ long long int, long long int ] div( long long int num, long long int denom ) { lldiv_t qr = lldiv( num, denom ); return [ qr.quot, qr.rem ]; } 25 forall( otypeT | { T ?/?( T, T ); T ?%?( T, T ); } )25 forall( T | { T ?/?( T, T ); T ?%?( T, T ); } ) 26 26 [ T, T ] div( T num, T denom ) { return [ num / denom, num % denom ]; } 27 27 -
libcfa/src/common.hfa
r5869cea r7b91c0e 21 21 [ long int, long int ] div( long int num, long int denom ); 22 22 [ long long int, long long int ] div( long long int num, long long int denom ); 23 forall( otypeT | { T ?/?( T, T ); T ?%?( T, T ); } )23 forall( T | { T ?/?( T, T ); T ?%?( T, T ); } ) 24 24 [ T, T ] div( T num, T demon ); 25 25 … … 61 61 } // distribution 62 62 63 forall( otypeT | { void ?{}( T &, zero_t ); int ?<?( T, T ); T -?( T ); } )63 forall( T | { void ?{}( T &, zero_t ); int ?<?( T, T ); T -?( T ); } ) 64 64 T abs( T ); 65 65 … … 70 70 intptr_t min( intptr_t t1, intptr_t t2 ) { return t1 < t2 ? t1 : t2; } // optimization 71 71 uintptr_t min( uintptr_t t1, uintptr_t t2 ) { return t1 < t2 ? t1 : t2; } // optimization 72 forall( otypeT | { int ?<?( T, T ); } )72 forall( T | { int ?<?( T, T ); } ) 73 73 T min( T t1, T t2 ) { return t1 < t2 ? t1 : t2; } 74 74 … … 76 76 intptr_t max( intptr_t t1, intptr_t t2 ) { return t1 > t2 ? t1 : t2; } // optimization 77 77 uintptr_t max( uintptr_t t1, uintptr_t t2 ) { return t1 > t2 ? t1 : t2; } // optimization 78 forall( otypeT | { int ?>?( T, T ); } )78 forall( T | { int ?>?( T, T ); } ) 79 79 T max( T t1, T t2 ) { return t1 > t2 ? t1 : t2; } 80 80 81 forall( otypeT | { T min( T, T ); T max( T, T ); } )81 forall( T | { T min( T, T ); T max( T, T ); } ) 82 82 T clamp( T value, T min_val, T max_val ) { return max( min_val, min( value, max_val ) ); } 83 83 84 forall( otypeT )84 forall( T ) 85 85 void swap( T & v1, T & v2 ) { T temp = v1; v1 = v2; v2 = temp; } 86 86 } // distribution -
libcfa/src/concurrency/coroutine.cfa
r5869cea r7b91c0e 46 46 47 47 //----------------------------------------------------------------------------- 48 FORALL_DATA_INSTANCE(CoroutineCancelled, ( dtype coroutine_t), (coroutine_t))49 50 forall( dtype T)48 FORALL_DATA_INSTANCE(CoroutineCancelled, (coroutine_t &), (coroutine_t)) 49 50 forall(T &) 51 51 void mark_exception(CoroutineCancelled(T) *) {} 52 52 53 forall( dtype T)53 forall(T &) 54 54 void copy(CoroutineCancelled(T) * dst, CoroutineCancelled(T) * src) { 55 55 dst->virtual_table = src->virtual_table; … … 58 58 } 59 59 60 forall( dtype T)60 forall(T &) 61 61 const char * msg(CoroutineCancelled(T) *) { 62 62 return "CoroutineCancelled(...)"; … … 64 64 65 65 // This code should not be inlined. It is the error path on resume. 66 forall( dtype T| is_coroutine(T))66 forall(T & | is_coroutine(T)) 67 67 void __cfaehm_cancelled_coroutine( T & cor, $coroutine * desc ) { 68 68 verify( desc->cancellation ); … … 148 148 // Part of the Public API 149 149 // Not inline since only ever called once per coroutine 150 forall( dtype T| is_coroutine(T))150 forall(T & | is_coroutine(T)) 151 151 void prime(T& cor) { 152 152 $coroutine* this = get_coroutine(cor); -
libcfa/src/concurrency/coroutine.hfa
r5869cea r7b91c0e 22 22 //----------------------------------------------------------------------------- 23 23 // Exception thrown from resume when a coroutine stack is cancelled. 24 FORALL_DATA_EXCEPTION(CoroutineCancelled, ( dtype coroutine_t), (coroutine_t)) (24 FORALL_DATA_EXCEPTION(CoroutineCancelled, (coroutine_t &), (coroutine_t)) ( 25 25 coroutine_t * the_coroutine; 26 26 exception_t * the_exception; 27 27 ); 28 28 29 forall( dtype T)29 forall(T &) 30 30 void copy(CoroutineCancelled(T) * dst, CoroutineCancelled(T) * src); 31 31 32 forall( dtype T)32 forall(T &) 33 33 const char * msg(CoroutineCancelled(T) *); 34 34 … … 37 37 // Anything that implements this trait can be resumed. 38 38 // Anything that is resumed is a coroutine. 39 trait is_coroutine( dtype T| IS_RESUMPTION_EXCEPTION(CoroutineCancelled, (T))) {39 trait is_coroutine(T & | IS_RESUMPTION_EXCEPTION(CoroutineCancelled, (T))) { 40 40 void main(T & this); 41 41 $coroutine * get_coroutine(T & this); … … 60 60 //----------------------------------------------------------------------------- 61 61 // Public coroutine API 62 forall( dtype T| is_coroutine(T))62 forall(T & | is_coroutine(T)) 63 63 void prime(T & cor); 64 64 … … 72 72 void __cfactx_invoke_coroutine(void (*main)(void *), void * this); 73 73 74 forall( dtype T)74 forall(T &) 75 75 void __cfactx_start(void (*main)(T &), struct $coroutine * cor, T & this, void (*invoke)(void (*main)(void *), void *)); 76 76 … … 129 129 } 130 130 131 forall( dtype T| is_coroutine(T))131 forall(T & | is_coroutine(T)) 132 132 void __cfaehm_cancelled_coroutine( T & cor, $coroutine * desc ); 133 133 134 134 // Resume implementation inlined for performance 135 forall( dtype T| is_coroutine(T))135 forall(T & | is_coroutine(T)) 136 136 static inline T & resume(T & cor) { 137 137 // optimization : read TLS once and reuse it -
libcfa/src/concurrency/future.hfa
r5869cea r7b91c0e 19 19 #include "monitor.hfa" 20 20 21 forall( otypeT ) {21 forall( T ) { 22 22 struct future { 23 23 inline future_t; … … 58 58 } 59 59 60 forall( otypeT ) {60 forall( T ) { 61 61 monitor multi_future { 62 62 inline future_t; -
libcfa/src/concurrency/io/types.hfa
r5869cea r7b91c0e 5 5 // file "LICENCE" distributed with Cforall. 6 6 // 7 // io/types.hfa -- 7 // io/types.hfa -- PRIVATE 8 // Types used by the I/O subsystem 8 9 // 9 10 // Author : Thierry Delisle … … 21 22 22 23 #include "bits/locks.hfa" 24 #include "kernel/fwd.hfa" 23 25 24 26 #if defined(CFA_HAVE_LINUX_IO_URING_H) -
libcfa/src/concurrency/kernel.cfa
r5869cea r7b91c0e 224 224 } 225 225 226 V( this->terminated );226 post( this->terminated ); 227 227 228 228 if(this == mainProcessor) { … … 624 624 // Unexpected Terminating logic 625 625 //============================================================================================= 626 627 extern "C" { 628 extern void __cfaabi_real_abort(void); 629 } 630 static volatile bool kernel_abort_called = false; 631 632 void * kernel_abort(void) __attribute__ ((__nothrow__)) { 633 // abort cannot be recursively entered by the same or different processors because all signal handlers return when 634 // the globalAbort flag is true. 635 bool first = !__atomic_test_and_set( &kernel_abort_called, __ATOMIC_SEQ_CST); 636 637 // first task to abort ? 638 if ( !first ) { 639 // We aren't the first to abort. 640 // I give up, just let C handle it 641 __cfaabi_real_abort(); 642 } 643 644 // disable interrupts, it no longer makes sense to try to interrupt this processor 645 disable_interrupts(); 646 647 return __cfaabi_tls.this_thread; 648 } 649 650 void kernel_abort_msg( void * kernel_data, char * abort_text, int abort_text_size ) { 651 $thread * thrd = ( $thread * ) kernel_data; 626 void __kernel_abort_msg( char * abort_text, int abort_text_size ) { 627 $thread * thrd = __cfaabi_tls.this_thread; 652 628 653 629 if(thrd) { … … 669 645 } 670 646 671 int kernel_abort_lastframe( void ) __attribute__ ((__nothrow__)) {672 return get_coroutine( kernelTLS().this_thread) == get_coroutine(mainThread) ? 4 : 2;647 int __kernel_abort_lastframe( void ) __attribute__ ((__nothrow__)) { 648 return get_coroutine(__cfaabi_tls.this_thread) == get_coroutine(mainThread) ? 4 : 2; 673 649 } 674 650 … … 688 664 // Kernel Utilities 689 665 //============================================================================================= 690 //-----------------------------------------------------------------------------691 // Locks692 void ?{}( semaphore & this, int count = 1 ) {693 (this.lock){};694 this.count = count;695 (this.waiting){};696 }697 void ^?{}(semaphore & this) {}698 699 bool P(semaphore & this) with( this ){700 lock( lock __cfaabi_dbg_ctx2 );701 count -= 1;702 if ( count < 0 ) {703 // queue current task704 append( waiting, active_thread() );705 706 // atomically release spin lock and block707 unlock( lock );708 park();709 return true;710 }711 else {712 unlock( lock );713 return false;714 }715 }716 717 bool V(semaphore & this) with( this ) {718 $thread * thrd = 0p;719 lock( lock __cfaabi_dbg_ctx2 );720 count += 1;721 if ( count <= 0 ) {722 // remove task at head of waiting list723 thrd = pop_head( waiting );724 }725 726 unlock( lock );727 728 // make new owner729 unpark( thrd );730 731 return thrd != 0p;732 }733 734 bool V(semaphore & this, unsigned diff) with( this ) {735 $thread * thrd = 0p;736 lock( lock __cfaabi_dbg_ctx2 );737 int release = max(-count, (int)diff);738 count += diff;739 for(release) {740 unpark( pop_head( waiting ) );741 }742 743 unlock( lock );744 745 return thrd != 0p;746 }747 748 666 //----------------------------------------------------------------------------- 749 667 // Debug -
libcfa/src/concurrency/kernel.hfa
r5869cea r7b91c0e 5 5 // file "LICENCE" distributed with Cforall. 6 6 // 7 // kernel -- 7 // kernel -- Header containing the core of the kernel API 8 8 // 9 9 // Author : Thierry Delisle … … 24 24 extern "C" { 25 25 #include <bits/pthreadtypes.h> 26 #include <pthread.h> 26 27 #include <linux/types.h> 27 28 } 28 29 29 30 //----------------------------------------------------------------------------- 30 // Locks 31 struct semaphore { 32 __spinlock_t lock; 33 int count; 34 __queue_t($thread) waiting; 35 }; 36 37 void ?{}(semaphore & this, int count = 1); 38 void ^?{}(semaphore & this); 39 bool P (semaphore & this); 40 bool V (semaphore & this); 41 bool V (semaphore & this, unsigned count); 31 // Underlying Locks 32 #ifdef __CFA_WITH_VERIFY__ 33 extern bool __cfaabi_dbg_in_kernel(); 34 #endif 35 36 extern "C" { 37 char * strerror(int); 38 } 39 #define CHECKED(x) { int err = x; if( err != 0 ) abort("KERNEL ERROR: Operation \"" #x "\" return error %d - %s\n", err, strerror(err)); } 40 41 struct __bin_sem_t { 42 pthread_mutex_t lock; 43 pthread_cond_t cond; 44 int val; 45 }; 46 47 static inline void ?{}(__bin_sem_t & this) with( this ) { 48 // Create the mutex with error checking 49 pthread_mutexattr_t mattr; 50 pthread_mutexattr_init( &mattr ); 51 pthread_mutexattr_settype( &mattr, PTHREAD_MUTEX_ERRORCHECK_NP); 52 pthread_mutex_init(&lock, &mattr); 53 54 pthread_cond_init (&cond, (const pthread_condattr_t *)0p); // workaround trac#208: cast should not be required 55 val = 0; 56 } 57 58 static inline void ^?{}(__bin_sem_t & this) with( this ) { 59 CHECKED( pthread_mutex_destroy(&lock) ); 60 CHECKED( pthread_cond_destroy (&cond) ); 61 } 62 63 static inline void wait(__bin_sem_t & this) with( this ) { 64 verify(__cfaabi_dbg_in_kernel()); 65 CHECKED( pthread_mutex_lock(&lock) ); 66 while(val < 1) { 67 pthread_cond_wait(&cond, &lock); 68 } 69 val -= 1; 70 CHECKED( pthread_mutex_unlock(&lock) ); 71 } 72 73 static inline bool post(__bin_sem_t & this) with( this ) { 74 bool needs_signal = false; 75 76 CHECKED( pthread_mutex_lock(&lock) ); 77 if(val < 1) { 78 val += 1; 79 pthread_cond_signal(&cond); 80 needs_signal = true; 81 } 82 CHECKED( pthread_mutex_unlock(&lock) ); 83 84 return needs_signal; 85 } 86 87 #undef CHECKED 42 88 43 89 … … 91 137 92 138 // Termination synchronisation (user semaphore) 93 semaphoreterminated;139 oneshot terminated; 94 140 95 141 // pthread Stack -
libcfa/src/concurrency/kernel/fwd.hfa
r5869cea r7b91c0e 5 5 // file "LICENCE" distributed with Cforall. 6 6 // 7 // kernel/fwd.hfa -- 7 // kernel/fwd.hfa -- PUBLIC 8 // Fundamental code needed to implement threading M.E.S. algorithms. 8 9 // 9 10 // Author : Thierry Delisle … … 134 135 extern uint64_t thread_rand(); 135 136 137 // Semaphore which only supports a single thread 138 struct single_sem { 139 struct $thread * volatile ptr; 140 }; 141 142 static inline { 143 void ?{}(single_sem & this) { 144 this.ptr = 0p; 145 } 146 147 void ^?{}(single_sem &) {} 148 149 bool wait(single_sem & this) { 150 for() { 151 struct $thread * expected = this.ptr; 152 if(expected == 1p) { 153 if(__atomic_compare_exchange_n(&this.ptr, &expected, 0p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) { 154 return false; 155 } 156 } 157 else { 158 /* paranoid */ verify( expected == 0p ); 159 if(__atomic_compare_exchange_n(&this.ptr, &expected, active_thread(), false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) { 160 park(); 161 return true; 162 } 163 } 164 165 } 166 } 167 168 bool post(single_sem & this) { 169 for() { 170 struct $thread * expected = this.ptr; 171 if(expected == 1p) return false; 172 if(expected == 0p) { 173 if(__atomic_compare_exchange_n(&this.ptr, &expected, 1p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) { 174 return false; 175 } 176 } 177 else { 178 if(__atomic_compare_exchange_n(&this.ptr, &expected, 0p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) { 179 unpark( expected ); 180 return true; 181 } 182 } 183 } 184 } 185 } 186 187 // Synchronozation primitive which only supports a single thread and one post 188 // Similar to a binary semaphore with a 'one shot' semantic 189 // is expected to be discarded after each party call their side 190 struct oneshot { 191 // Internal state : 192 // 0p : is initial state (wait will block) 193 // 1p : fulfilled (wait won't block) 194 // any thread : a thread is currently waiting 195 struct $thread * volatile ptr; 196 }; 197 198 static inline { 199 void ?{}(oneshot & this) { 200 this.ptr = 0p; 201 } 202 203 void ^?{}(oneshot &) {} 204 205 // Wait for the post, return immidiately if it already happened. 206 // return true if the thread was parked 207 bool wait(oneshot & this) { 208 for() { 209 struct $thread * expected = this.ptr; 210 if(expected == 1p) return false; 211 /* paranoid */ verify( expected == 0p ); 212 if(__atomic_compare_exchange_n(&this.ptr, &expected, active_thread(), false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) { 213 park(); 214 /* paranoid */ verify( this.ptr == 1p ); 215 return true; 216 } 217 } 218 } 219 220 // Mark as fulfilled, wake thread if needed 221 // return true if a thread was unparked 222 bool post(oneshot & this) { 223 struct $thread * got = __atomic_exchange_n( &this.ptr, 1p, __ATOMIC_SEQ_CST); 224 if( got == 0p ) return false; 225 unpark( got ); 226 return true; 227 } 228 } 229 230 // base types for future to build upon 231 // It is based on the 'oneshot' type to allow multiple futures 232 // to block on the same instance, permitting users to block a single 233 // thread on "any of" [a given set of] futures. 234 // does not support multiple threads waiting on the same future 235 struct future_t { 236 // Internal state : 237 // 0p : is initial state (wait will block) 238 // 1p : fulfilled (wait won't block) 239 // 2p : in progress () 240 // 3p : abandoned, server should delete 241 // any oneshot : a context has been setup to wait, a thread could wait on it 242 struct oneshot * volatile ptr; 243 }; 244 245 static inline { 246 void ?{}(future_t & this) { 247 this.ptr = 0p; 248 } 249 250 void ^?{}(future_t &) {} 251 252 void reset(future_t & this) { 253 // needs to be in 0p or 1p 254 __atomic_exchange_n( &this.ptr, 0p, __ATOMIC_SEQ_CST); 255 } 256 257 // check if the future is available 258 bool available( future_t & this ) { 259 return this.ptr == 1p; 260 } 261 262 // Prepare the future to be waited on 263 // intented to be use by wait, wait_any, waitfor, etc. rather than used directly 264 bool setup( future_t & this, oneshot & wait_ctx ) { 265 /* paranoid */ verify( wait_ctx.ptr == 0p ); 266 // The future needs to set the wait context 267 for() { 268 struct oneshot * expected = this.ptr; 269 // Is the future already fulfilled? 270 if(expected == 1p) return false; // Yes, just return false (didn't block) 271 272 // The future is not fulfilled, try to setup the wait context 273 /* paranoid */ verify( expected == 0p ); 274 if(__atomic_compare_exchange_n(&this.ptr, &expected, &wait_ctx, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) { 275 return true; 276 } 277 } 278 } 279 280 // Stop waiting on a future 281 // When multiple futures are waited for together in "any of" pattern 282 // futures that weren't fulfilled before the thread woke up 283 // should retract the wait ctx 284 // intented to be use by wait, wait_any, waitfor, etc. rather than used directly 285 void retract( future_t & this, oneshot & wait_ctx ) { 286 // Remove the wait context 287 struct oneshot * got = __atomic_exchange_n( &this.ptr, 0p, __ATOMIC_SEQ_CST); 288 289 // got == 0p: future was never actually setup, just return 290 if( got == 0p ) return; 291 292 // got == wait_ctx: since fulfil does an atomic_swap, 293 // if we got back the original then no one else saw context 294 // It is safe to delete (which could happen after the return) 295 if( got == &wait_ctx ) return; 296 297 // got == 1p: the future is ready and the context was fully consumed 298 // the server won't use the pointer again 299 // It is safe to delete (which could happen after the return) 300 if( got == 1p ) return; 301 302 // got == 2p: the future is ready but the context hasn't fully been consumed 303 // spin until it is safe to move on 304 if( got == 2p ) { 305 while( this.ptr != 1p ) Pause(); 306 return; 307 } 308 309 // got == any thing else, something wen't wrong here, abort 310 abort("Future in unexpected state"); 311 } 312 313 // Mark the future as abandoned, meaning it will be deleted by the server 314 bool abandon( future_t & this ) { 315 /* paranoid */ verify( this.ptr != 3p ); 316 317 // Mark the future as abandonned 318 struct oneshot * got = __atomic_exchange_n( &this.ptr, 3p, __ATOMIC_SEQ_CST); 319 320 // If the future isn't already fulfilled, let the server delete it 321 if( got == 0p ) return false; 322 323 // got == 2p: the future is ready but the context hasn't fully been consumed 324 // spin until it is safe to move on 325 if( got == 2p ) { 326 while( this.ptr != 1p ) Pause(); 327 got = 1p; 328 } 329 330 // The future is completed delete it now 331 /* paranoid */ verify( this.ptr != 1p ); 332 free( &this ); 333 return true; 334 } 335 336 // from the server side, mark the future as fulfilled 337 // delete it if needed 338 bool fulfil( future_t & this ) { 339 for() { 340 struct oneshot * expected = this.ptr; 341 // was this abandoned? 342 #if defined(__GNUC__) && __GNUC__ >= 7 343 #pragma GCC diagnostic push 344 #pragma GCC diagnostic ignored "-Wfree-nonheap-object" 345 #endif 346 if( expected == 3p ) { free( &this ); return false; } 347 #if defined(__GNUC__) && __GNUC__ >= 7 348 #pragma GCC diagnostic pop 349 #endif 350 351 /* paranoid */ verify( expected != 1p ); // Future is already fulfilled, should not happen 352 /* paranoid */ verify( expected != 2p ); // Future is bein fulfilled by someone else, this is even less supported then the previous case. 353 354 // If there is a wait context, we need to consume it and mark it as consumed after 355 // If there is no context then we can skip the in progress phase 356 struct oneshot * want = expected == 0p ? 1p : 2p; 357 if(__atomic_compare_exchange_n(&this.ptr, &expected, want, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) { 358 if( expected == 0p ) { /* paranoid */ verify( this.ptr == 1p); return false; } 359 bool ret = post( *expected ); 360 __atomic_store_n( &this.ptr, 1p, __ATOMIC_SEQ_CST); 361 return ret; 362 } 363 } 364 365 } 366 367 // Wait for the future to be fulfilled 368 bool wait( future_t & this ) { 369 oneshot temp; 370 if( !setup(this, temp) ) return false; 371 372 // Wait context is setup, just wait on it 373 bool ret = wait( temp ); 374 375 // Wait for the future to tru 376 while( this.ptr == 2p ) Pause(); 377 // Make sure the state makes sense 378 // Should be fulfilled, could be in progress but it's out of date if so 379 // since if that is the case, the oneshot was fulfilled (unparking this thread) 380 // and the oneshot should not be needed any more 381 __attribute__((unused)) struct oneshot * was = this.ptr; 382 /* paranoid */ verifyf( was == 1p, "Expected this.ptr to be 1p, was %p\n", was ); 383 384 // Mark the future as fulfilled, to be consistent 385 // with potential calls to avail 386 // this.ptr = 1p; 387 return ret; 388 } 389 } 390 136 391 //----------------------------------------------------------------------- 137 392 // Statics call at the end of each thread to register statistics -
libcfa/src/concurrency/kernel/startup.cfa
r5869cea r7b91c0e 199 199 void ?{}(processor & this) with( this ) { 200 200 ( this.idle ){}; 201 ( this.terminated ){ 0};201 ( this.terminated ){}; 202 202 ( this.runner ){}; 203 203 init( this, "Main Processor", *mainCluster ); … … 528 528 void ?{}(processor & this, const char name[], cluster & _cltr) { 529 529 ( this.idle ){}; 530 ( this.terminated ){ 0};530 ( this.terminated ){}; 531 531 ( this.runner ){}; 532 532 … … 549 549 __wake_proc( &this ); 550 550 551 P( terminated );551 wait( terminated ); 552 552 /* paranoid */ verify( active_processor() != &this); 553 553 } -
libcfa/src/concurrency/locks.cfa
r5869cea r7b91c0e 7 7 //----------------------------------------------------------------------------- 8 8 // info_thread 9 forall( dtype L| is_blocking_lock(L)) {9 forall(L & | is_blocking_lock(L)) { 10 10 struct info_thread { 11 11 // used to put info_thread on a dl queue (aka sequence) … … 195 195 //----------------------------------------------------------------------------- 196 196 // alarm node wrapper 197 forall( dtype L| is_blocking_lock(L)) {197 forall(L & | is_blocking_lock(L)) { 198 198 struct alarm_node_wrap { 199 199 alarm_node_t alarm_node; … … 239 239 //----------------------------------------------------------------------------- 240 240 // condition variable 241 forall( dtype L| is_blocking_lock(L)) {241 forall(L & | is_blocking_lock(L)) { 242 242 243 243 void ?{}( condition_variable(L) & this ){ … … 356 356 bool wait( condition_variable(L) & this, L & l, uintptr_t info, Time time ) with(this) { WAIT_TIME( info, &l , time ) } 357 357 } 358 359 //----------------------------------------------------------------------------- 360 // Semaphore 361 void ?{}( semaphore & this, int count = 1 ) { 362 (this.lock){}; 363 this.count = count; 364 (this.waiting){}; 365 } 366 void ^?{}(semaphore & this) {} 367 368 bool P(semaphore & this) with( this ){ 369 lock( lock __cfaabi_dbg_ctx2 ); 370 count -= 1; 371 if ( count < 0 ) { 372 // queue current task 373 append( waiting, active_thread() ); 374 375 // atomically release spin lock and block 376 unlock( lock ); 377 park(); 378 return true; 379 } 380 else { 381 unlock( lock ); 382 return false; 383 } 384 } 385 386 bool V(semaphore & this) with( this ) { 387 $thread * thrd = 0p; 388 lock( lock __cfaabi_dbg_ctx2 ); 389 count += 1; 390 if ( count <= 0 ) { 391 // remove task at head of waiting list 392 thrd = pop_head( waiting ); 393 } 394 395 unlock( lock ); 396 397 // make new owner 398 unpark( thrd ); 399 400 return thrd != 0p; 401 } 402 403 bool V(semaphore & this, unsigned diff) with( this ) { 404 $thread * thrd = 0p; 405 lock( lock __cfaabi_dbg_ctx2 ); 406 int release = max(-count, (int)diff); 407 count += diff; 408 for(release) { 409 unpark( pop_head( waiting ) ); 410 } 411 412 unlock( lock ); 413 414 return thrd != 0p; 415 } -
libcfa/src/concurrency/locks.hfa
r5869cea r7b91c0e 13 13 //----------------------------------------------------------------------------- 14 14 // is_blocking_lock 15 trait is_blocking_lock( dtype L| sized(L)) {15 trait is_blocking_lock(L & | sized(L)) { 16 16 // For synchronization locks to use when acquiring 17 17 void on_notify( L &, struct $thread * ); … … 31 31 // the info thread is a wrapper around a thread used 32 32 // to store extra data for use in the condition variable 33 forall( dtype L| is_blocking_lock(L)) {33 forall(L & | is_blocking_lock(L)) { 34 34 struct info_thread; 35 35 … … 120 120 //----------------------------------------------------------------------------- 121 121 // Synchronization Locks 122 forall( dtype L| is_blocking_lock(L)) {122 forall(L & | is_blocking_lock(L)) { 123 123 struct condition_variable { 124 124 // Spin lock used for mutual exclusion … … 157 157 bool wait( condition_variable(L) & this, L & l, uintptr_t info, Time time ); 158 158 } 159 160 //----------------------------------------------------------------------------- 161 // Semaphore 162 struct semaphore { 163 __spinlock_t lock; 164 int count; 165 __queue_t($thread) waiting; 166 }; 167 168 void ?{}(semaphore & this, int count = 1); 169 void ^?{}(semaphore & this); 170 bool P (semaphore & this); 171 bool V (semaphore & this); 172 bool V (semaphore & this, unsigned count); -
libcfa/src/concurrency/monitor.cfa
r5869cea r7b91c0e 50 50 static inline [$thread *, int] search_entry_queue( const __waitfor_mask_t &, $monitor * monitors [], __lock_size_t count ); 51 51 52 forall( dtype T| sized( T ))52 forall(T & | sized( T )) 53 53 static inline __lock_size_t insert_unique( T * array [], __lock_size_t & size, T * val ); 54 54 static inline __lock_size_t count_max ( const __waitfor_mask_t & mask ); … … 949 949 } 950 950 951 forall( dtype T| sized( T ))951 forall(T & | sized( T )) 952 952 static inline __lock_size_t insert_unique( T * array [], __lock_size_t & size, T * val ) { 953 953 if( !val ) return size; -
libcfa/src/concurrency/monitor.hfa
r5869cea r7b91c0e 22 22 #include "stdlib.hfa" 23 23 24 trait is_monitor( dtype T) {24 trait is_monitor(T &) { 25 25 $monitor * get_monitor( T & ); 26 26 void ^?{}( T & mutex ); … … 59 59 void ^?{}( monitor_dtor_guard_t & this ); 60 60 61 static inline forall( dtype T| sized(T) | { void ^?{}( T & mutex ); } )61 static inline forall( T & | sized(T) | { void ^?{}( T & mutex ); } ) 62 62 void delete( T * th ) { 63 63 ^(*th){}; -
libcfa/src/concurrency/mutex.cfa
r5869cea r7b91c0e 164 164 } 165 165 166 forall( dtype L| is_lock(L))166 forall(L & | is_lock(L)) 167 167 void wait(condition_variable & this, L & l) { 168 168 lock( this.lock __cfaabi_dbg_ctx2 ); … … 176 176 //----------------------------------------------------------------------------- 177 177 // Scopes 178 forall( dtype L| is_lock(L))178 forall(L & | is_lock(L)) 179 179 void lock_all ( L * locks[], size_t count) { 180 180 // Sort locks based on addresses … … 188 188 } 189 189 190 forall( dtype L| is_lock(L))190 forall(L & | is_lock(L)) 191 191 void unlock_all( L * locks[], size_t count) { 192 192 // Lock all -
libcfa/src/concurrency/mutex.hfa
r5869cea r7b91c0e 70 70 void unlock(recursive_mutex_lock & this) __attribute__((deprecated("use concurrency/locks.hfa instead"))); 71 71 72 trait is_lock( dtype L| sized(L)) {72 trait is_lock(L & | sized(L)) { 73 73 void lock (L &); 74 74 void unlock(L &); … … 94 94 void wait(condition_variable & this) __attribute__((deprecated("use concurrency/locks.hfa instead"))); 95 95 96 forall( dtype L| is_lock(L))96 forall(L & | is_lock(L)) 97 97 void wait(condition_variable & this, L & l) __attribute__((deprecated("use concurrency/locks.hfa instead"))); 98 98 99 99 //----------------------------------------------------------------------------- 100 100 // Scopes 101 forall( dtype L| is_lock(L)) {101 forall(L & | is_lock(L)) { 102 102 #if !defined( __TUPLE_ARRAYS_EXIST__ ) 103 103 void lock ( L * locks [], size_t count); -
libcfa/src/concurrency/preemption.cfa
r5869cea r7b91c0e 616 616 } 617 617 618 // Prevent preemption since we are about to start terminating things 619 void __kernel_abort_lock(void) { 620 signal_block( SIGUSR1 ); 621 } 622 618 623 // Raii ctor/dtor for the preemption_scope 619 624 // Used by thread to control when they want to receive preemption signals -
libcfa/src/concurrency/thread.cfa
r5869cea r7b91c0e 62 62 } 63 63 64 FORALL_DATA_INSTANCE(ThreadCancelled, ( dtype thread_t), (thread_t))64 FORALL_DATA_INSTANCE(ThreadCancelled, (thread_t &), (thread_t)) 65 65 66 forall( dtype T)66 forall(T &) 67 67 void copy(ThreadCancelled(T) * dst, ThreadCancelled(T) * src) { 68 68 dst->virtual_table = src->virtual_table; … … 71 71 } 72 72 73 forall( dtype T)73 forall(T &) 74 74 const char * msg(ThreadCancelled(T) *) { 75 75 return "ThreadCancelled"; 76 76 } 77 77 78 forall( dtype T)78 forall(T &) 79 79 static void default_thread_cancel_handler(ThreadCancelled(T) & ) { 80 80 abort( "Unhandled thread cancellation.\n" ); 81 81 } 82 82 83 forall( dtype T| is_thread(T) | IS_EXCEPTION(ThreadCancelled, (T)))83 forall(T & | is_thread(T) | IS_EXCEPTION(ThreadCancelled, (T))) 84 84 void ?{}( thread_dtor_guard_t & this, 85 T & thrd, void(* defaultResumptionHandler)(ThreadCancelled(T) &)) {86 $monitor * m = get_monitor(thrd);85 T & thrd, void(*cancelHandler)(ThreadCancelled(T) &)) { 86 $monitor * m = get_monitor(thrd); 87 87 $thread * desc = get_thread(thrd); 88 88 89 89 // Setup the monitor guard 90 90 void (*dtor)(T& mutex this) = ^?{}; 91 bool join = defaultResumptionHandler != (void(*)(ThreadCancelled(T)&))0;91 bool join = cancelHandler != (void(*)(ThreadCancelled(T)&))0; 92 92 (this.mg){&m, (void(*)())dtor, join}; 93 93 … … 103 103 } 104 104 desc->state = Cancelled; 105 if (!join) { 106 defaultResumptionHandler = default_thread_cancel_handler; 107 } 105 void(*defaultResumptionHandler)(ThreadCancelled(T) &) = 106 join ? cancelHandler : default_thread_cancel_handler; 108 107 109 108 ThreadCancelled(T) except; … … 125 124 //----------------------------------------------------------------------------- 126 125 // Starting and stopping threads 127 forall( dtype T| is_thread(T) )126 forall( T & | is_thread(T) ) 128 127 void __thrd_start( T & this, void (*main_p)(T &) ) { 129 128 $thread * this_thrd = get_thread(this); … … 141 140 //----------------------------------------------------------------------------- 142 141 // Support for threads that don't ues the thread keyword 143 forall( dtype T| sized(T) | is_thread(T) | { void ?{}(T&); } )142 forall( T & | sized(T) | is_thread(T) | { void ?{}(T&); } ) 144 143 void ?{}( scoped(T)& this ) with( this ) { 145 144 handle{}; … … 147 146 } 148 147 149 forall( dtype T, ttype P| sized(T) | is_thread(T) | { void ?{}(T&, P); } )148 forall( T &, P... | sized(T) | is_thread(T) | { void ?{}(T&, P); } ) 150 149 void ?{}( scoped(T)& this, P params ) with( this ) { 151 150 handle{ params }; … … 153 152 } 154 153 155 forall( dtype T| sized(T) | is_thread(T) )154 forall( T & | sized(T) | is_thread(T) ) 156 155 void ^?{}( scoped(T)& this ) with( this ) { 157 156 ^handle{}; … … 159 158 160 159 //----------------------------------------------------------------------------- 161 forall( dtype T| is_thread(T) | IS_RESUMPTION_EXCEPTION(ThreadCancelled, (T)))160 forall(T & | is_thread(T) | IS_RESUMPTION_EXCEPTION(ThreadCancelled, (T))) 162 161 T & join( T & this ) { 163 162 thread_dtor_guard_t guard = { this, defaultResumptionHandler }; -
libcfa/src/concurrency/thread.hfa
r5869cea r7b91c0e 26 26 //----------------------------------------------------------------------------- 27 27 // thread trait 28 trait is_thread( dtype T) {28 trait is_thread(T &) { 29 29 void ^?{}(T& mutex this); 30 30 void main(T& this); … … 32 32 }; 33 33 34 FORALL_DATA_EXCEPTION(ThreadCancelled, ( dtype thread_t), (thread_t)) (34 FORALL_DATA_EXCEPTION(ThreadCancelled, (thread_t &), (thread_t)) ( 35 35 thread_t * the_thread; 36 36 exception_t * the_exception; 37 37 ); 38 38 39 forall( dtype T)39 forall(T &) 40 40 void copy(ThreadCancelled(T) * dst, ThreadCancelled(T) * src); 41 41 42 forall( dtype T)42 forall(T &) 43 43 const char * msg(ThreadCancelled(T) *); 44 44 … … 47 47 48 48 // Inline getters for threads/coroutines/monitors 49 forall( dtype T| is_thread(T) )49 forall( T & | is_thread(T) ) 50 50 static inline $coroutine* get_coroutine(T & this) __attribute__((const)) { return &get_thread(this)->self_cor; } 51 51 52 forall( dtype T| is_thread(T) )52 forall( T & | is_thread(T) ) 53 53 static inline $monitor * get_monitor (T & this) __attribute__((const)) { return &get_thread(this)->self_mon; } 54 54 … … 60 60 extern struct cluster * mainCluster; 61 61 62 forall( dtype T| is_thread(T) )62 forall( T & | is_thread(T) ) 63 63 void __thrd_start( T & this, void (*)(T &) ); 64 64 … … 82 82 }; 83 83 84 forall( dtype T| is_thread(T) | IS_EXCEPTION(ThreadCancelled, (T)) )84 forall( T & | is_thread(T) | IS_EXCEPTION(ThreadCancelled, (T)) ) 85 85 void ?{}( thread_dtor_guard_t & this, T & thrd, void(*)(ThreadCancelled(T) &) ); 86 86 void ^?{}( thread_dtor_guard_t & this ); … … 89 89 // thread runner 90 90 // Structure that actually start and stop threads 91 forall( dtype T| sized(T) | is_thread(T) )91 forall( T & | sized(T) | is_thread(T) ) 92 92 struct scoped { 93 93 T handle; 94 94 }; 95 95 96 forall( dtype T| sized(T) | is_thread(T) | { void ?{}(T&); } )96 forall( T & | sized(T) | is_thread(T) | { void ?{}(T&); } ) 97 97 void ?{}( scoped(T)& this ); 98 98 99 forall( dtype T, ttype P| sized(T) | is_thread(T) | { void ?{}(T&, P); } )99 forall( T &, P... | sized(T) | is_thread(T) | { void ?{}(T&, P); } ) 100 100 void ?{}( scoped(T)& this, P params ); 101 101 102 forall( dtype T| sized(T) | is_thread(T) )102 forall( T & | sized(T) | is_thread(T) ) 103 103 void ^?{}( scoped(T)& this ); 104 104 … … 115 115 void unpark( $thread * this ); 116 116 117 forall( dtype T| is_thread(T) )117 forall( T & | is_thread(T) ) 118 118 static inline void unpark( T & this ) { if(!&this) return; unpark( get_thread( this ) );} 119 119 … … 128 128 //---------- 129 129 // join 130 forall( dtype T| is_thread(T) | IS_RESUMPTION_EXCEPTION(ThreadCancelled, (T)) )130 forall( T & | is_thread(T) | IS_RESUMPTION_EXCEPTION(ThreadCancelled, (T)) ) 131 131 T & join( T & this ); 132 132 -
libcfa/src/containers/list.hfa
r5869cea r7b91c0e 66 66 #define __DLISTED_MGD_JUSTIMPL(STRUCT) 67 67 68 forall( dtype tE) {68 forall( tE & ) { 69 69 struct $mgd_link { 70 70 tE *elem; … … 83 83 (this.is_terminator){ 1 }; 84 84 } 85 forall ( otypetInit | { void ?{}( $mgd_link(tE) &, tInit); } )85 forall ( tInit | { void ?{}( $mgd_link(tE) &, tInit); } ) 86 86 static inline void ?=?( $mgd_link(tE) &this, tInit i ) { 87 87 ^?{}( this ); … … 115 115 __DLISTED_MGD_COMMON(STRUCT, STRUCT, $links) 116 116 117 trait $dlistable( dtype Tnode, dtype Telem) {117 trait $dlistable(Tnode &, Telem &) { 118 118 $mgd_link(Telem) & $prev_link(Tnode &); 119 119 $mgd_link(Telem) & $next_link(Tnode &); … … 125 125 }; 126 126 127 forall ( dtype Tnode, dtype Telem| $dlistable(Tnode, Telem)) {127 forall (Tnode &, Telem & | $dlistable(Tnode, Telem)) { 128 128 129 129 // implemented as a sentinel item in an underlying cicrular list -
libcfa/src/containers/maybe.cfa
r5869cea r7b91c0e 18 18 19 19 20 forall( otypeT)20 forall(T) 21 21 void ?{}(maybe(T) & this) { 22 22 this.has_value = false; 23 23 } 24 24 25 forall( otypeT)25 forall(T) 26 26 void ?{}(maybe(T) & this, T value) { 27 27 this.has_value = true; … … 29 29 } 30 30 31 forall( otypeT)31 forall(T) 32 32 void ?{}(maybe(T) & this, maybe(T) other) { 33 33 this.has_value = other.has_value; … … 37 37 } 38 38 39 forall( otypeT)39 forall(T) 40 40 maybe(T) ?=?(maybe(T) & this, maybe(T) that) { 41 41 if (this.has_value && that.has_value) { … … 51 51 } 52 52 53 forall( otypeT)53 forall(T) 54 54 void ^?{}(maybe(T) & this) { 55 55 if (this.has_value) { … … 58 58 } 59 59 60 forall( otypeT)60 forall(T) 61 61 bool ?!=?(maybe(T) this, zero_t) { 62 62 return this.has_value; 63 63 } 64 64 65 forall( otypeT)65 forall(T) 66 66 maybe(T) maybe_value(T value) { 67 67 return (maybe(T)){value}; 68 68 } 69 69 70 forall( otypeT)70 forall(T) 71 71 maybe(T) maybe_none() { 72 72 return (maybe(T)){}; 73 73 } 74 74 75 forall( otypeT)75 forall(T) 76 76 bool has_value(maybe(T) * this) { 77 77 return this->has_value; 78 78 } 79 79 80 forall( otypeT)80 forall(T) 81 81 T get(maybe(T) * this) { 82 82 assertf(this->has_value, "attempt to get from maybe without value"); … … 84 84 } 85 85 86 forall( otypeT)86 forall(T) 87 87 void set(maybe(T) * this, T value) { 88 88 if (this->has_value) { … … 94 94 } 95 95 96 forall( otypeT)96 forall(T) 97 97 void set_none(maybe(T) * this) { 98 98 if (this->has_value) { -
libcfa/src/containers/maybe.hfa
r5869cea r7b91c0e 19 19 20 20 // DO NOT USE DIRECTLY! 21 forall( otypeT)21 forall(T) 22 22 struct maybe { 23 23 bool has_value; … … 26 26 27 27 28 forall( otypeT)28 forall(T) 29 29 void ?{}(maybe(T) & this); 30 30 31 forall( otypeT)31 forall(T) 32 32 void ?{}(maybe(T) & this, T value); 33 33 34 forall( otypeT)34 forall(T) 35 35 void ?{}(maybe(T) & this, maybe(T) other); 36 36 37 forall( otypeT)37 forall(T) 38 38 void ^?{}(maybe(T) & this); 39 39 40 forall( otypeT)40 forall(T) 41 41 maybe(T) ?=?(maybe(T) & this, maybe(T) other); 42 42 43 forall( otypeT)43 forall(T) 44 44 bool ?!=?(maybe(T) this, zero_t); 45 45 46 46 /* Waiting for bug#11 to be fixed. 47 forall( otypeT)47 forall(T) 48 48 maybe(T) maybe_value(T value); 49 49 50 forall( otypeT)50 forall(T) 51 51 maybe(T) maybe_none(); 52 52 */ 53 53 54 forall( otypeT)54 forall(T) 55 55 bool has_value(maybe(T) * this); 56 56 57 forall( otypeT)57 forall(T) 58 58 T get(maybe(T) * this); 59 59 60 forall( otypeT)60 forall(T) 61 61 void set(maybe(T) * this, T value); 62 62 63 forall( otypeT)63 forall(T) 64 64 void set_none(maybe(T) * this); 65 65 -
libcfa/src/containers/pair.cfa
r5869cea r7b91c0e 13 13 #include <containers/pair.hfa> 14 14 15 forall( otype R, otypeS15 forall(R, S 16 16 | { int ?==?(R, R); int ?<?(R, R); int ?<?(S, S); }) 17 17 int ?<?(pair(R, S) p, pair(R, S) q) { … … 19 19 } 20 20 21 forall( otype R, otypeS21 forall(R, S 22 22 | { int ?==?(R, R); int ?<?(R, R); int ?<=?(S, S); }) 23 23 int ?<=?(pair(R, S) p, pair(R, S) q) { … … 25 25 } 26 26 27 forall( otype R, otypeS | { int ?==?(R, R); int ?==?(S, S); })27 forall(R, S | { int ?==?(R, R); int ?==?(S, S); }) 28 28 int ?==?(pair(R, S) p, pair(R, S) q) { 29 29 return p.first == q.first && p.second == q.second; 30 30 } 31 31 32 forall( otype R, otypeS | { int ?!=?(R, R); int ?!=?(S, S); })32 forall(R, S | { int ?!=?(R, R); int ?!=?(S, S); }) 33 33 int ?!=?(pair(R, S) p, pair(R, S) q) { 34 34 return p.first != q.first || p.second != q.second; 35 35 } 36 36 37 forall( otype R, otypeS37 forall(R, S 38 38 | { int ?==?(R, R); int ?>?(R, R); int ?>?(S, S); }) 39 39 int ?>?(pair(R, S) p, pair(R, S) q) { … … 41 41 } 42 42 43 forall( otype R, otypeS43 forall(R, S 44 44 | { int ?==?(R, R); int ?>?(R, R); int ?>=?(S, S); }) 45 45 int ?>=?(pair(R, S) p, pair(R, S) q) { -
libcfa/src/containers/pair.hfa
r5869cea r7b91c0e 16 16 #pragma once 17 17 18 forall( otype R, otypeS) struct pair {18 forall(R, S) struct pair { 19 19 R first; 20 20 S second; 21 21 }; 22 22 23 forall( otype R, otypeS23 forall(R, S 24 24 | { int ?==?(R, R); int ?<?(R, R); int ?<?(S, S); }) 25 25 int ?<?(pair(R, S) p, pair(R, S) q); 26 26 27 forall( otype R, otypeS27 forall(R, S 28 28 | { int ?==?(R, R); int ?<?(R, R); int ?<=?(S, S); }) 29 29 int ?<=?(pair(R, S) p, pair(R, S) q); 30 30 31 forall( otype R, otypeS | { int ?==?(R, R); int ?==?(S, S); })31 forall(R, S | { int ?==?(R, R); int ?==?(S, S); }) 32 32 int ?==?(pair(R, S) p, pair(R, S) q); 33 33 34 forall( otype R, otypeS | { int ?!=?(R, R); int ?!=?(S, S); })34 forall(R, S | { int ?!=?(R, R); int ?!=?(S, S); }) 35 35 int ?!=?(pair(R, S) p, pair(R, S) q); 36 36 37 forall( otype R, otypeS37 forall(R, S 38 38 | { int ?==?(R, R); int ?>?(R, R); int ?>?(S, S); }) 39 39 int ?>?(pair(R, S) p, pair(R, S) q); 40 40 41 forall( otype R, otypeS41 forall(R, S 42 42 | { int ?==?(R, R); int ?>?(R, R); int ?>=?(S, S); }) 43 43 int ?>=?(pair(R, S) p, pair(R, S) q); -
libcfa/src/containers/result.cfa
r5869cea r7b91c0e 18 18 19 19 20 forall( otype T, otypeE)20 forall(T, E) 21 21 void ?{}(result(T, E) & this) { 22 22 this.has_value = false; … … 24 24 } 25 25 26 forall( otype T, otypeE)26 forall(T, E) 27 27 void ?{}(result(T, E) & this, one_t, T value) { 28 28 this.has_value = true; … … 30 30 } 31 31 32 forall( otype T, otypeE)32 forall(T, E) 33 33 void ?{}(result(T, E) & this, zero_t, E error) { 34 34 this.has_value = false; … … 36 36 } 37 37 38 forall( otype T, otypeE)38 forall(T, E) 39 39 void ?{}(result(T, E) & this, result(T, E) other) { 40 40 this.has_value = other.has_value; … … 46 46 } 47 47 48 forall( otype T, otypeE)48 forall(T, E) 49 49 result(T, E) ?=?(result(T, E) & this, result(T, E) that) { 50 50 if (this.has_value && that.has_value) { … … 63 63 } 64 64 65 forall( otype T, otypeE)65 forall(T, E) 66 66 void ^?{}(result(T, E) & this) { 67 67 if (this.has_value) { … … 72 72 } 73 73 74 forall( otype T, otypeE)74 forall(T, E) 75 75 bool ?!=?(result(T, E) this, zero_t) { 76 76 return this.has_value; 77 77 } 78 78 79 forall( otype T, otypeE)79 forall(T, E) 80 80 result(T, E) result_value(T value) { 81 81 return (result(T, E)){1, value}; 82 82 } 83 83 84 forall( otype T, otypeE)84 forall(T, E) 85 85 result(T, E) result_error(E error) { 86 86 return (result(T, E)){0, error}; 87 87 } 88 88 89 forall( otype T, otypeE)89 forall(T, E) 90 90 bool has_value(result(T, E) * this) { 91 91 return this->has_value; 92 92 } 93 93 94 forall( otype T, otypeE)94 forall(T, E) 95 95 T get(result(T, E) * this) { 96 96 assertf(this->has_value, "attempt to get from result without value"); … … 98 98 } 99 99 100 forall( otype T, otypeE)100 forall(T, E) 101 101 E get_error(result(T, E) * this) { 102 102 assertf(!this->has_value, "attempt to get from result without error"); … … 104 104 } 105 105 106 forall( otype T, otypeE)106 forall(T, E) 107 107 void set(result(T, E) * this, T value) { 108 108 if (this->has_value) { … … 115 115 } 116 116 117 forall( otype T, otypeE)117 forall(T, E) 118 118 void set_error(result(T, E) * this, E error) { 119 119 if (this->has_value) { -
libcfa/src/containers/result.hfa
r5869cea r7b91c0e 19 19 20 20 // DO NOT USE DIRECTLY! 21 forall( otype T, otypeE)21 forall(T, E) 22 22 union inner_result{ 23 23 T value; … … 25 25 }; 26 26 27 forall( otype T, otypeE)27 forall(T, E) 28 28 struct result { 29 29 bool has_value; … … 32 32 33 33 34 forall( otype T, otypeE)34 forall(T, E) 35 35 void ?{}(result(T, E) & this); 36 36 37 forall( otype T, otypeE)37 forall(T, E) 38 38 void ?{}(result(T, E) & this, one_t, T value); 39 39 40 forall( otype T, otypeE)40 forall(T, E) 41 41 void ?{}(result(T, E) & this, zero_t, E error); 42 42 43 forall( otype T, otypeE)43 forall(T, E) 44 44 void ?{}(result(T, E) & this, result(T, E) other); 45 45 46 forall( otype T, otypeE)46 forall(T, E) 47 47 void ^?{}(result(T, E) & this); 48 48 49 forall( otype T, otypeE)49 forall(T, E) 50 50 result(T, E) ?=?(result(T, E) & this, result(T, E) other); 51 51 52 forall( otype T, otypeE)52 forall(T, E) 53 53 bool ?!=?(result(T, E) this, zero_t); 54 54 55 55 /* Wating for bug#11 to be fixed. 56 forall( otype T, otypeE)56 forall(T, E) 57 57 result(T, E) result_value(T value); 58 58 59 forall( otype T, otypeE)59 forall(T, E) 60 60 result(T, E) result_error(E error); 61 61 */ 62 62 63 forall( otype T, otypeE)63 forall(T, E) 64 64 bool has_value(result(T, E) * this); 65 65 66 forall( otype T, otypeE)66 forall(T, E) 67 67 T get(result(T, E) * this); 68 68 69 forall( otype T, otypeE)69 forall(T, E) 70 70 E get_error(result(T, E) * this); 71 71 72 forall( otype T, otypeE)72 forall(T, E) 73 73 void set(result(T, E) * this, T value); 74 74 75 forall( otype T, otypeE)75 forall(T, E) 76 76 void set_error(result(T, E) * this, E error); 77 77 -
libcfa/src/containers/stackLockFree.hfa
r5869cea r7b91c0e 9 9 // Created On : Wed May 13 20:58:58 2020 10 10 // Last Modified By : Peter A. Buhr 11 // Last Modified On : Sun Jun 14 13:25:09 202012 // Update Count : 6 411 // Last Modified On : Wed Jan 20 20:40:03 2021 12 // Update Count : 67 13 13 // 14 14 … … 17 17 #include <stdint.h> 18 18 19 forall( dtype T)19 forall( T & ) 20 20 union Link { 21 21 struct { // 32/64-bit x 2 … … 31 31 }; // Link 32 32 33 forall( otypeT | sized(T) | { Link(T) * ?`next( T * ); } ) {33 forall( T | sized(T) | { Link(T) * ?`next( T * ); } ) { 34 34 struct StackLF { 35 35 Link(T) stack; … … 42 42 43 43 void push( StackLF(T) & this, T & n ) with(this) { 44 *( &n )`next = stack; // atomic assignment unnecessary, or use CAA44 *( &n )`next = stack; // atomic assignment unnecessary, or use CAA 45 45 for () { // busy wait 46 46 if ( __atomic_compare_exchange_n( &stack.atom, &( &n )`next->atom, (Link(T))@{ {&n, ( &n )`next->count + 1} }.atom, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST ) ) break; // attempt to update top node … … 65 65 } 66 66 if( next == 0p ) return false; 67 link = ( next)`next;67 link = ( next )`next; 68 68 } 69 69 } -
libcfa/src/containers/vector.cfa