Changeset 8d66610
- Timestamp:
- May 21, 2021, 4:48:10 PM (3 years ago)
- Branches:
- ADT, arm-eh, ast-experimental, enum, forall-pointer-decay, jacob/cs343-translation, master, new-ast-unique-expr, pthread-emulation, qualifiedEnum
- Children:
- f1bce515
- Parents:
- 5407cdc (diff), 7404cdc (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the(diff)
links above to see all the changes relative to each parent. - Files:
-
- 21 added
- 69 edited
- 3 moved
Legend:
- Unmodified
- Added
- Removed
-
doc/LaTeXmacros/common.tex
r5407cdc r8d66610 11 11 %% Created On : Sat Apr 9 10:06:17 2016 12 12 %% Last Modified By : Peter A. Buhr 13 %% Last Modified On : S un Feb 14 15:52:46202114 %% Update Count : 5 2413 %% Last Modified On : Sat May 8 08:48:37 2021 14 %% Update Count : 540 15 15 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 16 16 … … 38 38 \usepackage{xspace} 39 39 \newcommand{\CFAIcon}{\textsf{C}\raisebox{\depth}{\rotatebox{180}{\textsf{A}}}} % Cforall icon 40 \newcommand{\CFA}{\protect\CFAIcon\xspace} % CFA symbolic name41 \newcommand{\CFL}{\textrm{Cforall}\xspace} % Cforall non-icon name42 \newcommand{\Celeven}{\textrm{C11}\xspace} % C11 symbolic name40 \newcommand{\CFA}{\protect\CFAIcon\xspace} % CFA symbolic name 41 \newcommand{\CFL}{\textrm{Cforall}\xspace} % Cforall non-icon name 42 \newcommand{\Celeven}{\textrm{C11}\xspace} % C11 symbolic name 43 43 \newcommand{\CCIcon}{\textrm{C}\kern-.1em\hbox{+\kern-.25em+}} % C++ icon 44 \newcommand{\CC} {\protect\CCIcon\xspace}% C++ symbolic name44 \newcommand{\CC}[1][]{\protect\CCIcon{#1}\xspace} % C++ symbolic name 45 45 % numbers disallowed in latex variables names => use number names 46 \newcommand{\CCeleven}{\protect\CCIcon{11}\xspace} % C++11 symbolic name47 \newcommand{\CCfourteen}{\protect\CCIcon{14}\xspace} 48 \newcommand{\CCseventeen}{\protect\CCIcon{17}\xspace} 49 \newcommand{\CCtwenty}{\protect\CCIcon{20}\xspace} % C++20 symbolic name46 \newcommand{\CCeleven}{\protect\CCIcon{11}\xspace} % C++11 symbolic name 47 \newcommand{\CCfourteen}{\protect\CCIcon{14}\xspace} % C++14 symbolic name 48 \newcommand{\CCseventeen}{\protect\CCIcon{17}\xspace} % C++17 symbolic name 49 \newcommand{\CCtwenty}{\protect\CCIcon{20}\xspace} % C++20 symbolic name 50 50 \newcommand{\Csharp}{C\raisebox{-0.7ex}{\Large$^\sharp$}\xspace} % C# symbolic name 51 51 … … 102 102 \renewcommand\subsubsection{\@startsection{subsubsection}{3}{\z@}{-2.5ex \@plus -1ex \@minus -.2ex}{1.0ex \@plus .2ex}{\normalfont\normalsize\bfseries}} 103 103 \renewcommand\paragraph{\@startsection{paragraph}{4}{\z@}{-2.0ex \@plus -1ex \@minus -.2ex}{-1em}{\normalfont\normalsize\bfseries}} 104 \renewcommand\subparagraph{\@startsection{subparagraph}{4}{\z@}{-1.5ex \@plus -1ex \@minus -.2ex}{-1em}{\normalfont\normalsize\bfseries\itshape}} 104 105 105 106 % index macros … … 152 153 % Latin abbreviation 153 154 \newcommand{\abbrevFont}{\textit} % set empty for no italics 155 % If not followed by a comma or colon, add a comma. 156 \newcommand{\CheckCommaColon}{\@ifnextchar{,}{}{\@ifnextchar{:}{}{,\xspace}}} 157 % If not followed by a period, add a period. 158 \newcommand{\CheckPeriod}{\@ifnextchar{.}{}{.\xspace}} 159 154 160 \@ifundefined{eg}{ 155 161 \newcommand{\EG}{\abbrevFont{e}.\abbrevFont{g}.} 156 \newcommand*{\eg}{% 157 \@ifnextchar{,}{\EG}% 158 {\@ifnextchar{:}{\EG}% 159 {\EG,\xspace}}% 160 }}{}% 162 \newcommand{\eg}{\EG\CheckCommaColon} 163 }{}% 161 164 \@ifundefined{ie}{ 162 165 \newcommand{\IE}{\abbrevFont{i}.\abbrevFont{e}.} 163 \newcommand*{\ie}{% 164 \@ifnextchar{,}{\IE}% 165 {\@ifnextchar{:}{\IE}% 166 {\IE,\xspace}}% 167 }}{}% 166 \newcommand{\ie}{\IE\CheckCommaColon} 167 }{}% 168 168 \@ifundefined{etc}{ 169 169 \newcommand{\ETC}{\abbrevFont{etc}} 170 \newcommand*{\etc}{% 171 \@ifnextchar{.}{\ETC}% 172 {\ETC.\xspace}% 173 }}{}% 170 \newcommand{\etc}{\ETC\CheckPeriod} 171 }{}% 174 172 \@ifundefined{etal}{ 175 173 \newcommand{\ETAL}{\abbrevFont{et}~\abbrevFont{al}} 176 \newcommand*{\etal}{% 177 \@ifnextchar{.}{\protect\ETAL}% 178 {\protect\ETAL.\xspace}% 179 }}{}% 174 \newcommand{\etal}{\ETAL\CheckPeriod} 175 }{}% 180 176 \@ifundefined{viz}{ 181 177 \newcommand{\VIZ}{\abbrevFont{viz}} 182 \newcommand*{\viz}{% 183 \@ifnextchar{.}{\VIZ}% 184 {\VIZ.\xspace}% 185 }}{}% 178 \newcommand{\viz}{\VIZ\CheckPeriod} 179 }{}% 186 180 \makeatother 187 181 … … 284 278 showlines=true, % show blank lines at end of code 285 279 aboveskip=4pt, % spacing above/below code block 286 belowskip= 0pt,280 belowskip=2pt, 287 281 numberstyle=\footnotesize\sf, % numbering style 288 282 % replace/adjust listing characters that look bad in sanserif … … 297 291 \lstset{ 298 292 language=CFA, 299 moredelim=**[is][\color{red}]{@}{@}, % red highlighting @...@300 %moredelim=**[is][\color{red}]{®}{®}, % red highlighting ®...® (registered trademark symbol) emacs: C-q M-.293 %moredelim=**[is][\color{red}]{@}{@}, % red highlighting @...@ 294 moredelim=**[is][\color{red}]{®}{®}, % red highlighting ®...® (registered trademark symbol) emacs: C-q M-. 301 295 %moredelim=**[is][\color{blue}]{ß}{ß}, % blue highlighting ß...ß (sharp s symbol) emacs: C-q M-_ 302 296 %moredelim=**[is][\color{OliveGreen}]{¢}{¢}, % green highlighting ¢...¢ (cent symbol) emacs: C-q M-" -
doc/theses/andrew_beach_MMath/Makefile
r5407cdc r8d66610 34 34 ${LATEX} ${BASE} 35 35 ${BIBTEX} ${BUILD}/${BASE} 36 ${LATEX} ${BASE}37 36 ${GLOSSARY} ${BUILD}/${BASE} 38 37 ${LATEX} ${BASE} -
doc/theses/andrew_beach_MMath/cfalab.sty
r5407cdc r8d66610 1 1 % Package for CFA Research Lab. 2 % (Now more a personal collection and testing grounds for common.sty.) 2 3 % 3 % Made by combining and updating various macro files people had made. 4 % This is a collection of commands everyone working on CFA related documents 5 % should find useful. So mostly programming language related tools. 4 6 % 5 7 % Internal commands are prefixed with "\cfalab@". … … 10 12 11 13 % Other packages required. 14 % 15 % Access to new basic LaTeX tools and other low level commands. 12 16 \RequirePackage{etoolbox} 17 % Code formatting tools and environments. 13 18 \RequirePackage{listings} 19 % Automatically adds spaces. 14 20 \RequirePackage{xspace} 15 21 16 % Symbols: All symbols are zero argument robust commands with special rules 17 % about the space following the c.s. token. Normally the space might be 18 % re-added according to the rules of the xspace package. They may be followed 19 % by a star (which the command will consume) to disable this behaviour. 22 % Tip for commands that end with \xspace: if the default is not correct then 23 % follow the command with {} to disable \xspace, use '{} ' to force add a 24 % space and '{}<whatever-follows>' to force remove one. 25 % 26 % \CFA 27 % Cforall with the forall symbol. 28 \newrobustcmd\CFA{\textsf{C\raisebox{\depth}{\rotatebox{180}{A}}}\xspace} 29 % \Cpp[<std>] 30 % C++ symbol name. You may optionally provide <std> to specify a standard. 31 \newrobustcmd\Cpp[1][\xspace]{C++#1} 20 32 21 % \newsymbolcmd{<command>}{<replacement text>} 22 % Defines <command> to be a symbol that has the given <replacement text>. 23 \newrobustcmd*\newsymbolcmd[2]{\newrobustcmd{#1}{\cfalab@symbol{#2}}} 24 \def\cfalab@symbol#1{\@ifnextchar*{#1\cfalab@eatstar}{#1\xspace}} 25 \def\cfalab@eatstar*{} 26 27 % Cforall with the forall symbol. 28 \newsymbolcmd\CFA{\textsf{C}\raisebox{\depth}{\rotatebox{180}{\textsf{A}}}} 29 % C++ with kerning. (No standard number support.) 30 \newsymbolcmd\CPP{\textrm{C}\kern-.1em\hbox{+\kern-.25em+}} 31 32 % This is executed very early in the \begin{document} code. 33 % This is executed very early in the \begin{document} code, before the 34 % document's contents but after packages are loaded. 33 35 \AtEndPreamble{ 34 36 \@ifpackageloaded{hyperref}{ … … 36 38 \pdfstringdefDisableCommands{ 37 39 \def\CFA{CFA} 38 \def\CPP{C++} 40 \def\Cpp{C++} 41 \def\lstinline{} 42 \def\code#1#2{#2} 39 43 } 40 44 }{} 41 45 } 46 47 % \colour{<colour>}{<text>} 48 % Just \color but using the LaTeX style instead of TeX style command. 49 \newcommand*\colour[2]{{\color{#1}#2}} 50 51 % \code{<language>}{<code>} 52 % Use the listings package to format the snipit of <code> in <language>. 53 \newrobustcmd*\code[2]{\lstinline[language=#1]{#2}} 54 55 % \begin{cfa}[<options>] 56 % \end{cfa} 57 % Use the listings package to format a block of CFA code. 58 % Extra listings options can be passed in as an optional argument. 59 \lstnewenvironment{cfa}[1][]{\lstset{language=CFA}\lstset{#1}}{} 60 61 % \settextunderscore{(new|old)} 62 % Redefines the underscore either as a new repersentation or the old one. 63 % Not that some other packages (ex. hyperref) can override this. Set it up 64 % after loading them. 65 \let\cfalab@textunderscore@old=\textunderscore 66 \newcommand\cfalab@textunderscore@new{% 67 \leavevmode\makebox[1.2ex][c]{\rule{1ex}{0.075ex}}} 68 \newcommand\settextunderscore[1]{% 69 \renewcommand\textunderscore{\csuse{cfalab@textunderscore@#1}}} 42 70 43 71 % The CFA listings language. Based off of ANCI C and including GCC extensions. … … 61 89 \lstset{defaultdialect={[UW]CFA}} 62 90 63 % The cfalab style defines some common settings useful in different languages. 64 \lstdefinestyle{cfalab}{% 65 columns=fullflexible, 66 basicstyle=\linespread{0.9}\tt, 67 stringstyle=\tt, 91 % Create an internal paragraph indent amount. This is used internally to 92 % mimic the standard indent even when it has been overriden in the document. 93 \newlength\cfalab@parindent 94 \deflength\cfalab@parindent{\parindent} 95 96 % The cfacommon style has many useful defaults for CFA and other types of 97 % code. Use the listings option "style=cfacommon" to load them. 98 \lstdefinestyle{cfacommon}{ 99 columns=fullflexible, 100 basicstyle=\linespread{0.9}\sf, 101 stringstyle=\tt, 102 tabsize=5, 103 % Indent code to paragraph indentation. 104 xleftmargin=\cfalab@parindent, 105 % Allow ASCII characters in the range 128-255. 106 extendedchars=true, 107 % This allows you to use "math mode" to insert LaTeX into the code. 108 % Use \( and \) if you need to insert math mode inside that code. 109 escapechar=\$, 110 % Disable LaTeX math escape in CFA code $...$ 111 mathescape=false, 112 keepspaces=true, 113 % Do not show spaces with cup. 114 showstringspaces=false, 115 % Show blank lines at end of code. 116 showlines=true, 117 % Spacing above/below code block. 118 aboveskip=4pt,belowskip=0pt, 119 numberstyle=\footnotesize\sf, 120 % Replace/adjust listing characters that look bad in sanserif. 121 literate={-}{\makebox[1ex][c]{\raisebox{0.7ex}{\rule{0.75ex}{0.1ex}}}}1 122 {^}{\raisebox{0.6ex}{$\scriptscriptstyle\land\,$}}1 123 {~}{\raisebox{0.3ex}{$\scriptstyle\sim\,$}}1 {`}{\ttfamily\upshape\hspace*{-0.1ex}`}1 124 {<-}{$\leftarrow$}2 {=>}{$\Rightarrow$}2 125 {->}{\makebox[1ex][c]{\raisebox{0.4ex}{\rule{0.8ex}{0.075ex}}}\kern-0.2ex\textgreater}2, 68 126 } 69 127 70 % \code*[<escape character>]{<code>} 71 % Use the listings package to format a snipit of <code>. 72 % The <escape character> must be a character that does not appear in 73 % <code> and defaults to a backtick. 74 \newcommand*\codeC[2][\`]{\lstinline[language=C]#1#2#1} 75 \newcommand*\codeCFA[2][\`]{\lstinline[language=CFA]#1#2#1} 128 % common.tex Compatablity =================================================== 129 % Below this line is for compatability with the old common.tex file. 76 130 77 % \settextunderscore{(new|old)} 78 % Redefines the underscore either as a new repersentation or the old one. 79 % Not that some other packages (ex. hyperref) can override this. Set it 80 % up after loading them. 81 \let\cfalab@textunderscore@old=\textunderscore 82 \newcommand\cfalab@textunderscore@new{% 83 \leavevmode\makebox[1.2ex][c]{\rule{1ex}{0.075ex}}} 84 \newcommand\settextunderscore[1]{% 85 \renewcommand\textunderscore{\csuse{cfalab@textunderscore@#1}}} 131 % Backwards compatable way to activate the cfacommon style. 132 \newcommand{\CFAStyle}{\lstset{style=cfacommon}} 133 134 % A couple of abbreviations are provided. Just ones someone liked. 135 % 136 % Abbreviation formatting commands (renew to customize): 137 \newcommand{\abbrevFont}{\textit} 138 % 139 % Abbreviations that, if not followed by a comma or colon, add a comma. 140 \newrobustcmd*\cfalab@abbrev@comma{% 141 \@ifnextchar{,}{}{\@ifnextchar{:}{}{,\xspace}}} 142 \providerobustcmd*\eg{\abbrevFont{e}.\abbrevFont{g}.\cfalab@abbrev@comma} 143 \providerobustcmd*\ie{\abbrevFont{i}.\abbrevFont{e}.\cfalab@abbrev@comma} 144 % 145 % Abbreviations that, if not followed by a period, add a period. 146 \newrobustcmd*\cfalab@abbrev@period{\@ifnextchar{.}{}{.\xspace}} 147 \providerobustcmd*\etc{\abbrevFont{etc}\cfalab@abbrev@period} 148 \providerobustcmd*\etal{\abbrevFont{et}~\abbrevFont{al}\cfalab@abbrev@period} 149 \providerobustcmd*\viz{\abbrevFont{viz}\cfalab@abbrev@period} 86 150 87 151 \endinput -
doc/theses/andrew_beach_MMath/existing.tex
r5407cdc r8d66610 16 16 to be defined~\cite{Moss18}. 17 17 \begin{cfa} 18 char i; int i; double i; $\C[3.75in]{// variable overload}$19 int f(); double f(); $\C{// return overload}$20 void g( int ); void g( double ); $\C{// parameter overload}\CRT$18 char i; int i; double i; 19 int f(); double f(); 20 void g( int ); void g( double ); 21 21 \end{cfa} 22 22 This feature requires name mangling so the assembly symbols are unique for … … 26 26 mangling is: 27 27 \begin{cfa} 28 // name mangling 28 // name mangling on by default 29 29 int i; // _X1ii_1 30 @extern "C"@ { // noname mangling30 extern "C" { // disables name mangling 31 31 int j; // j 32 @extern "Cforall"@ { //name mangling32 extern "Cforall" { // enables name mangling 33 33 int k; // _X1ki_1 34 34 } 35 // no name mangling36 } 37 // name mangling35 // revert to no name mangling 36 } 37 // revert to name mangling 38 38 \end{cfa} 39 39 Both forms of @extern@ affect all the declarations within their nested lexical … … 50 50 \begin{cfa} 51 51 int i, j; 52 int @&@ ri = i, @&&@rri = ri;52 int & ri = i, && rri = ri; 53 53 rri = 3; // auto-dereference assign to i 54 @&@ri = @&@j; // rebindable54 &ri = &j; // rebindable 55 55 ri = 5; // assign to j 56 56 \end{cfa} … … 64 64 65 65 In general, operator names in \CFA are constructed by bracketing an operator 66 token with @?@, which indicates the position of the arguments. For example, infixed67 multiplication is @?*?@ while prefix dereference is @*?@. This syntax make it 68 easy to tell the difference between prefix operations (such as @++?@) and 69 post-fix operations (@?++@).66 token with @?@, which indicates the position of the arguments. For example, 67 infixed multiplication is @?*?@ while prefix dereference is @*?@. 68 This syntax make it easy to tell the difference between prefix operations 69 (such as @++?@) and post-fix operations (@?++@). 70 70 71 71 The special name for a constructor is @?{}@, which comes from the 72 initialization syntax in C. The special name for a destructor is @^{}@, where 73 the @^@ has no special meaning. 72 initialization syntax in C. That initialation syntax is also the operator 73 form. \CFA will generate a constructor call each time a variable is declared, 74 passing the initialization arguments to the constructort. 75 \begin{cfa} 76 struct Example { ... }; 77 void ?{}(Example & this) { ... } 78 { 79 Example a; 80 Example b = {}; 81 } 82 void ?{}(Example & this, char first, int num) { ... } 83 { 84 Example c = {'a', 2}; 85 } 86 \end{cfa} 87 Both @a@ and @b@ will be initalized with the first constructor (there is no 88 general way to skip initialation) while @c@ will be initalized with the 89 second. 90 74 91 % I don't like the \^{} symbol but $^\wedge$ isn't better. 75 \begin{cfa} 76 struct T { ... }; 77 void ?@{}@(@T &@ this, ...) { ... } // constructor 78 void ?@^{}@(@T &@ this, ...) { ... } // destructor 92 Similarly destructors use the special name @^?{}@ (the @^@ has no special 93 meaning). They can be called explicatly as well but normally they are 94 implicitly called on a variable when it goes out of scope. 95 \begin{cfa} 96 void ^?{}(Example & this) { ... } 79 97 { 80 T s = @{@ ... @}@; // same constructor/initialization braces 81 } // destructor call automatically generated82 \end{cfa} 83 The first parameter is a reference parameter to the type for the 84 constructor/destructor. Destructors may have multiple parameters. The compiler 85 implicitly matches an overloaded constructor @void ^?{}(T &, ...);@ to an 86 object declaration with associated initialization, and generates a construction 87 call after the object is allocated. When an object goes out of scope, the 88 matching overloaded destructor @void ^?{}(T &);@ is called. Without explicit 89 definition, \CFA creates a default and copy constructor, destructor and 90 assignment (like \Cpp). It is possible to define constructors/destructors for 91 basic and existing types (unlike \Cpp).98 Example d; 99 } // <- implicit destructor call 100 \end{cfa} 101 No operator name is restricted in what function signatures they may be bound 102 to although most of the forms cannot be called in operator form. Some 103 ``near-misses" will generate warnings. 104 105 Whenever a type is defined, \CFA will create a default zero-argument 106 constructor, a copy constructor, a series of argument-per-field constructors 107 and a destructor. All user constructors are defined after this. 108 Because operators are never part of the type definition they may be added 109 at any time, including on built-in types. 92 110 93 111 \section{Polymorphism} … … 105 123 works on any type @T@: 106 124 \begin{cfa} 107 @forall( T )@ @T@ identity( @T@ val ) { return val; } 108 int forty_two = identity( 42 ); // T bound to int, forty_two == 42 109 \end{cfa} 125 forall( T ) T identity( T val ) { return val; } 126 int forty_two = identity( 42 ); 127 char capital_a = identity( 'A' ); 128 \end{cfa} 129 Each use of a polymorphic declaration will resolve its polymorphic parameters 130 (in this case, just @T@) to concrete types (@int@ in the first use and @char@ 131 in the second). 110 132 111 133 To allow a polymorphic function to be separately compiled, the type @T@ must be … … 115 137 types used in a function, \eg: 116 138 \begin{cfa} 117 forall( T @| { void do_once(T); }@) // assertion139 forall( T | { void do_once(T); }) 118 140 void do_twice(T value) { 119 141 do_once(value); 120 142 do_once(value); 121 143 } 122 void do_once(@int@ i) { ... } // provide assertion 123 @int@ i; 124 do_twice(i); // implicitly pass assertion do_once to do_twice 125 \end{cfa} 126 Any object with a type fulfilling the assertion may be passed as an argument to 127 a @do_twice@ call. 144 \end{cfa} 128 145 129 146 A polymorphic function can be used in the same way as a normal function. The … … 132 149 all the variables replaced with the concrete types from the arguments) is 133 150 defined at a call site. 151 \begin{cfa} 152 void do_once(int i) { ... } 153 int i; 154 do_twice(i); 155 \end{cfa} 156 Any object with a type fulfilling the assertion may be passed as an argument to 157 a @do_twice@ call. 134 158 135 159 Note, a function named @do_once@ is not required in the scope of @do_twice@ to … … 138 162 call. 139 163 \begin{cfa} 140 void do_once(double y) { ... } // global164 void do_once(double y) { ... } 141 165 int quadruple(int x) { 142 void do_once(int y) { y = y * 2; } // local143 do_twice(x); // using local "do_once"166 void do_once(int y) { y = y * 2; } 167 do_twice(x); 144 168 return x; 145 169 } … … 150 174 function. The matched assertion function is then passed as a function pointer 151 175 to @do_twice@ and called within it. 176 The global definition of @do_once@ is ignored. 152 177 153 178 To avoid typing long lists of assertions, constraints can be collect into … … 161 186 and the @forall@ list in the previous example is replaced with the trait. 162 187 \begin{cfa} 163 forall(dtype T | @done_once(T)@)188 forall(dtype T | done_once(T)) 164 189 \end{cfa} 165 190 In general, a trait can contain an arbitrary number of assertions, both … … 172 197 declarations instead of parameters, returns, and local variable declarations. 173 198 \begin{cfa} 174 forall(dtype @T@)199 forall(dtype T) 175 200 struct node { 176 node( @T@) * next; // generic linked node177 @T@* data;178 } 179 node( @int@) inode;180 \end{cfa} 181 The generic type @node(T)@ is an example of a polymorphic -type usage. Like \Cpp182 template usage, a polymorphic -type usage must specify a type parameter.201 node(T) * next; // generic linked node 202 T * data; 203 } 204 node(int) inode; 205 \end{cfa} 206 The generic type @node(T)@ is an example of a polymorphic type usage. Like \Cpp 207 template usage, a polymorphic type usage must specify a type parameter. 183 208 184 209 There are many other polymorphism features in \CFA but these are the ones used … … 219 244 Each coroutine has a @main@ function, which takes a reference to a coroutine 220 245 object and returns @void@. 221 \begin{cfa}[numbers=left] 222 void main(@CountUp & this@) { // argument matches trait is_coroutine 223 unsigned int up = 0; // retained between calls 224 while (true) { 225 next = up; // make "up" available outside function 226 @suspend;@$\label{suspend}$ 227 up += 1; 246 \begin{cfa} 247 void main(CountUp & this) { 248 for (unsigned int next = 0 ; true ; ++next) { 249 next = up; 250 suspend;$\label{suspend}$ 228 251 } 229 252 } … … 254 277 @mutex@. 255 278 \begin{cfa} 256 void example(MonitorA & @mutex@ argA, MonitorB & @mutex@argB);279 void example(MonitorA & mutex argA, MonitorB & mutex argB); 257 280 \end{cfa} 258 281 When the function is called, it implicitly acquires the monitor lock for all of -
doc/theses/andrew_beach_MMath/features.tex
r5407cdc r8d66610 20 20 \subparagraph{Raise} 21 21 The raise is the starting point for exception handling. It marks the beginning 22 of exception handling by \newterm{raising}an excepion, which passes it to22 of exception handling by raising an excepion, which passes it to 23 23 the EHM. 24 24 25 25 Some well known examples include the @throw@ statements of \Cpp and Java and 26 the \code Py{raise} statement from Python. In real systems a raise may preform27 some other work (such as memory management) but for the purposes of this 28 overview that can be ignored.26 the \code{Python}{raise} statement from Python. In real systems a raise may 27 preform some other work (such as memory management) but for the 28 purposes of this overview that can be ignored. 29 29 30 30 \subparagraph{Handle} … … 93 93 A handler labelled with any given exception can handle exceptions of that 94 94 type or any child type of that exception. The root of the exception hierarchy 95 (here \code C{exception}) acts as a catch-all, leaf types catch single types95 (here \code{C}{exception}) acts as a catch-all, leaf types catch single types 96 96 and the exceptions in the middle can be used to catch different groups of 97 97 related exceptions. … … 101 101 between different sub-hierarchies. 102 102 This design is used in \CFA even though it is not a object-orientated 103 language using different toolsto create the hierarchy.103 language; so different tools are used to create the hierarchy. 104 104 105 105 % Could I cite the rational for the Python IO exception rework? … … 123 123 \section{Virtuals} 124 124 Virtual types and casts are not part of \CFA's EHM nor are they required for 125 any EHM. But \CFA uses a hierarchial system of exceptions and this feature 126 is leveraged to create that. 127 128 % Maybe talk about why the virtual system is so minimal. 129 % Created for but not a part of the exception system. 125 any EHM. 126 However the \CFA uses a hierarchy built with the virtual system as the basis 127 for exceptions and exception matching. 128 129 The virtual system would have ideally been part of \CFA before the work 130 on exception handling began, but unfortunately it was not. 131 Because of this only the features and framework needed for the EHM were 132 designed and implemented. Other features were considered to ensure that 133 the structure could accomidate other desirable features but they were not 134 implemented. 135 The rest of this section will only discuss the finalized portion of the 136 virtual system. 130 137 131 138 The virtual system supports multiple ``trees" of types. Each tree is … … 143 150 It is important to note that these are virtual members, not virtual methods 144 151 of object-orientated programming, and can be of any type. 145 However, since \CFA has function pointers and they are allowed, virtual 146 members can be used to mimic virtual methods. 152 \CFA still supports virtual methods as a special case of virtual members. 153 Function pointers that take a pointer to the virtual type will be modified 154 with each level of inheritance so that refers to the new type. 155 This means an object can always be passed to a function in its virtual table 156 as if it were a method. 147 157 148 158 Each virtual type has a unique id. … … 175 185 While much of the virtual infrastructure is created, it is currently only used 176 186 internally for exception handling. The only user-level feature is the virtual 177 cast, which is the same as the \Cpp \ lstinline[language=C++]|dynamic_cast|.187 cast, which is the same as the \Cpp \code{C++}{dynamic_cast}. 178 188 \label{p:VirtualCast} 179 189 \begin{cfa} … … 197 207 \begin{cfa} 198 208 trait is_exception(exceptT &, virtualT &) { 199 virtualT const & get_exception_vtable(exceptT *);209 // Numerous imaginary assertions. 200 210 }; 201 211 \end{cfa} 202 212 The trait is defined over two types, the exception type and the virtual table 203 type. This should be one-to-one: each exception type has only one virtual 204 table type and vice versa. The only assertion in the trait is 205 @get_exception_vtable@, which takes a pointer of the exception type and 206 returns a reference to the virtual table type instance. 207 208 % TODO: This section, and all references to get_exception_vtable, are 209 % out-of-data. Perhaps wait until the update is finished before rewriting it. 210 The function @get_exception_vtable@ is actually a constant function. 211 Regardless of the value passed in (including the null pointer) it should 212 return a reference to the virtual table instance for that type. 213 The reason it is a function instead of a constant is that it make type 214 annotations easier to write as you can use the exception type instead of the 215 virtual table type; which usually has a mangled name. 216 % Also \CFA's trait system handles functions better than constants and doing 217 % it this way reduce the amount of boiler plate we need. 213 type. Each exception type should have but a single virtual table type. 214 Now there are no actual assertions in this trait because the trait system 215 actually can't express them (adding such assertions would be part of 216 completing the virtual system). The imaginary assertions would probably come 217 from a trait defined by the virtual system, and state that the exception type 218 is a virtual type, is a decendent of @exception_t@ (the base exception type) 219 and note its virtual table type. 218 220 219 221 % I did have a note about how it is the programmer's responsibility to make … … 235 237 Both traits ensure a pair of types are an exception type and its virtual table 236 238 and defines one of the two default handlers. The default handlers are used 237 as fallbacks and are discussed in detail in \ VRef{s:ExceptionHandling}.239 as fallbacks and are discussed in detail in \vref{s:ExceptionHandling}. 238 240 239 241 However, all three of these traits can be tricky to use directly. … … 351 353 for particular exception type. 352 354 The global default termination handler performs a cancellation 353 \see{\VRef{s:Cancellation}}on the current stack with the copied exception.355 (see \vref{s:Cancellation}) on the current stack with the copied exception. 354 356 355 357 \subsection{Resumption} … … 426 428 427 429 \subsubsection{Resumption Marking} 430 \label{s:ResumptionMarking} 428 431 A key difference between resumption and termination is that resumption does 429 432 not unwind the stack. A side effect that is that when a handler is matched … … 472 475 The symmetry between resumption termination is why this pattern was picked. 473 476 Other patterns, such as marking just the handlers that caught, also work but 474 lack the symmetry means there are lessrules to remember.477 lack the symmetry means there are more rules to remember. 475 478 476 479 \section{Conditional Catch} … … 557 560 \end{cfa} 558 561 If there are further handlers after this handler only the first version will 559 check them. If multiple handlers on a single try block could handle the same560 exception the translations get more complex but they are equivilantly562 check them. If multiple handlers on a single try block that could handle the 563 same exception the translations get more complex but they are equivilantly 561 564 powerful. 562 565 … … 633 636 and the current stack is 634 637 unwound. After that it depends one which stack is being cancelled. 635 \begin{description} 636 \ item[Main Stack:]638 639 \paragraph{Main Stack} 637 640 The main stack is the one used by the program main at the start of execution, 638 641 and is the only stack in a sequential program. … … 645 648 to, so it would have be explicitly managed. 646 649 647 \ item[Thread Stack:]650 \paragraph{Thread Stack} 648 651 A thread stack is created for a \CFA @thread@ object or object that satisfies 649 652 the @is_thread@ trait. … … 671 674 Also you can always add an explicit join if that is the desired behaviour. 672 675 673 \ item[Coroutine Stack:]676 \paragraph{Coroutine Stack} 674 677 A coroutine stack is created for a @coroutine@ object or object that 675 678 satisfies the @is_coroutine@ trait. … … 685 688 (in terms of coroutine state) called resume on this coroutine, so the message 686 689 is passed to the latter. 687 \end{description} -
doc/theses/andrew_beach_MMath/future.tex
r5407cdc r8d66610 110 110 \section{Zero-Cost Try} 111 111 \CFA does not have zero-cost try-statements because the compiler generates C 112 code rather than assembler code \see{\VPageref{p:zero-cost}}. When the compiler112 code rather than assembler code (see \vpageref{p:zero-cost}). When the compiler 113 113 does create its own assembly (or LLVM byte-code), then zero-cost try-statements 114 114 are possible. The downside of zero-cost try-statements is the LSDA complexity, -
doc/theses/andrew_beach_MMath/implement.tex
r5407cdc r8d66610 9 9 % Virtual table rules. Virtual tables, the pointer to them and the cast. 10 10 While the \CFA virtual system currently has only one public feature, virtual 11 cast \see{\VPageref{p:VirtualCast}}, substantial structure is required to12 su pport it, and provide features for exception handling and the standard13 library.11 cast (see the virtual cast feature \vpageref{p:VirtualCast}), 12 substantial structure is required to support it, 13 and provide features for exception handling and the standard library. 14 14 15 15 \subsection{Virtual Type} 16 Virtual types only have one change to their structure, the addition of a 17 pointer to the virtual table. This is always the first field so that 18 if it is cast to a supertype the field's location is still known. 19 20 This field is set as part of all new generated constructors. 21 \todo{They only come as part exceptions and don't work.} 22 After the object is created the field is constant. 23 24 However it can be read from, internally it is just a regular field called 25 @virtual_table@. Dereferencing it gives the virtual table and access to the 26 type's virtual members. 16 Virtual types only have one change to their structure: the addition of a 17 pointer to the virtual table, which is called the \emph{virtual-table pointer}. 18 Internally, the field is called @virtual_table@. 19 The field is fixed after construction. It is always the first field in the 20 structure so that its location is always known. 21 \todo{Talk about constructors for virtual types (after they are working).} 22 23 This is what binds an instance of a virtual type to a virtual table. This 24 pointer can be used as an identity check. It can also be used to access the 25 virtual table and the virtual members there. 26 27 \subsection{Type Id} 28 Every virtual type has a unique id. 29 Type ids can be compared for equality (the types reperented are the same) 30 or used to access the type's type information. 31 The type information currently is only the parent's type id or, if the 32 type has no parent, zero. 33 34 The id's are implemented as pointers to the type's type information instance. 35 Derefencing the pointer gets the type information. 36 By going back-and-forth between the type id and 37 the type info one can find every ancestor of a virtual type. 38 It also pushes the issue of creating a unique value (for 39 the type id) to the problem of creating a unique instance (for type 40 information) which the linker can solve. 41 42 Advanced linker support is required because there is no place that appears 43 only once to attach the type information to. There should be one structure 44 definition but it is included in multiple translation units. Each virtual 45 table definition should be unique but there are an arbitrary number of thoses. 46 So the special section prefix \texttt{.gnu.linkonce} is used. 47 With a unique suffix (making the entire section name unique) the linker will 48 remove multiple definition making sure only one version exists after linking. 49 Then it is just a matter of making sure there is a unique name for each type. 50 51 This is done in three phases. 52 The first phase is to generate a new structure definition to store the type 53 information. The layout is the same in each case, just the parent's type id, 54 but the types are changed. 55 The structure's name is change, it is based off the virtual type's name, and 56 the type of the parent's type id. 57 If the virtual type is polymorphic then the type information structure is 58 polymorphic as well, with the same polymorphic arguments. 59 60 The second phase is to generate an instance of the type information with a 61 almost unique name, generated by mangling the virtual type name. 62 63 The third phase is implicit with \CFA's overloading scheme. \CFA mangles 64 names with type information so that all of the symbols exported to the linker 65 are unique even if in \CFA code they are the same. Having two declarations 66 with the same name and same type is forbidden because it is impossible for 67 overload resolution to pick between them. This is why a unique type is 68 generated for each virtual type. 69 Polymorphic information is included in this mangling so polymorphic 70 types will have seperate instances for each set of polymorphic arguments. 71 72 \begin{cfa} 73 struct TYPE_ID_TYPE { 74 PARENT_ID_TYPE const * parent; 75 }; 76 77 __attribute__((cfa_linkonce)) 78 TYPE_ID_TYPE const TYPE_ID_NAME = { 79 &PARENT_ID_NAME, 80 }; 81 \end{cfa} 82 83 \subsubsection{cfa\_linkonce Attribute} 84 Another feature added to \CFA is a new attribute: \texttt{cfa\_linkonce}. 85 This attribute can be put on an object or function definition 86 (any global declaration with a name and a type). 87 This allows you to define that object or function multiple times. 88 All definitions should have the link-once attribute on them and all should 89 be identical. 90 91 The simplist way to use it is to put a definition in a header where the 92 forward declaration would usually go. 93 This is how it is used for type-id instances. There was is no unique location 94 associated with a type except for the type definition which is in a header. 95 This allows the unique type-id object to be generated there. 96 97 Internally @cfa_linkonce@ removes all @section@ attributes 98 from the declaration (as well as itself) and replaces them with 99 @section(".gnu.linkonce.NAME")@ where \texttt{NAME} is replaced by the 100 mangled name of the object. 101 The prefix \texttt{.gnu.linkonce} in section names is recognized by the 102 linker. If two of these sections with the same name, including everything 103 that comes after the special prefix, then only one will be used and the other 104 will be discarded. 27 105 28 106 \subsection{Virtual Table} 29 Every time a virtual type is defined the new virtual table type must also be 30 defined. 31 32 The unique instance is important because the address of the virtual table 33 instance is used as the identifier for the virtual type. So a pointer to the 34 virtual table and the ID for the virtual type are interchangable. 35 \todo{Unique instances might be going so we will have to talk about the new 36 system instead.} 37 38 The first step in putting it all together is to create the virtual table type. 39 The virtual table type is just a structure and can be described in terms of 40 its fields. The first field is always the parent type ID (or a pointer to 41 the parent virtual table) or 0 (the null pointer). 42 Next are other fields on the parent virtual table are repeated. 43 Finally are the fields used to store any new virtual members of the new 44 The virtual type 45 46 The virtual system is accessed through a private constant field inserted at the 47 beginning of every virtual type, called the virtual-table pointer. This field 48 points at a type's virtual table and is assigned during the object's 49 construction. The address of a virtual table acts as the unique identifier for 50 the virtual type, and the first field of a virtual table is a pointer to the 51 parent virtual-table or @0p@. The remaining fields are duplicated from the 52 parent tables in this type's inheritance chain, followed by any fields this type 53 introduces. Parent fields are duplicated so they can be changed (all virtual 54 members are overridable), so that references to the dispatched type 55 are replaced with the current virtual type. 56 % These are always taken by pointer or reference. 57 58 % Simple ascii diragram: 59 \begin{verbatim} 60 parent_pointer \ 61 parent_field0 | 62 ... | Same layout as parent. 63 parent_fieldN / 107 Each virtual type has a virtual table type that stores its type id and 108 virtual members. 109 Each virtual type instance is bound to a table instance that is filled with 110 the values of virtual members. 111 Both the layout of the fields and their value are decided by the rules given 112 below. 113 114 The layout always comes in three parts. 115 The first section is just the type id at the head of the table. It is always 116 there to ensure that 117 The second section are all the virtual members of the parent, in the same 118 order as they appear in the parent's virtual table. Note that the type may 119 change slightly as references to the ``this" will change. This is limited to 120 inside pointers/references and via function pointers so that the size (and 121 hence the offsets) are the same. 122 The third section is similar to the second except that it is the new virtual 123 members introduced at this level in the hierarchy. 124 125 \begin{figure} 126 \begin{cfa} 127 type_id 128 parent_field0 129 ... 130 parent_fieldN 64 131 child_field0 65 132 ... 66 133 child_fieldN 67 \end{verbatim} 68 \todo{Refine the diagram} 69 70 % For each virtual type, a virtual table is constructed. This is both a new type 71 % and an instance of that type. Other instances of the type could be created 72 % but the system doesn't use them. So this section will go over the creation of 73 % the type and the instance. 74 75 A virtual table is created when the virtual type is created. The name of the 76 type is created by mangling the name of the base type. The name of the instance 77 is also generated by name mangling. The fields are initialized automatically. 78 The parent field is initialized by getting the type of the parent field and 79 using that to calculate the mangled name of the parent's virtual table type. 80 There are two special fields that are included like normal fields but have 81 special initialization rules: the @size@ field is the type's size and is 82 initialized with a @sizeof@ expression, the @align@ field is the type's 83 alignment and uses an @alignof@ expression. The remaining fields are resolved 84 to a name matching the field's name and type using the normal visibility and 85 overload resolution rules of the type system. 86 87 These operations are split up into several groups depending on where they take 88 place which varies for monomorphic and polymorphic types. The first devision is 89 between the declarations and the definitions. Declarations, such as a function 90 signature or a aggregate's name, must always be visible but may be repeated in 91 the form of forward declarations in headers. Definitions, such as function 92 bodies and a aggregate's layout, can be separately compiled but must occur 93 exactly once in a source file. 94 95 \begin{sloppypar} 96 The declarations include the virtual type definition and forward declarations 97 of the virtual table instance, constructor, message function and 98 @get_exception_vtable@. The definition includes the storage and initialization 99 of the virtual table instance and the bodies of the three functions. 100 \end{sloppypar} 101 102 Monomorphic instances put all of these two groups in one place each. 103 Polymorphic instances also split out the core declarations and definitions from 104 the per-instance information. The virtual table type and most of the functions 105 are polymorphic so they are all part of the core. The virtual table instance 106 and the @get_exception_vtable@ function. 107 108 \begin{sloppypar} 134 \end{cfa} 135 \caption{Virtual Table Layout} 136 \label{f:VirtualTableLayout} 137 \todo*{Improve the Virtual Table Layout diagram.} 138 \end{figure} 139 140 The first and second sections together mean that every virtual table has a 141 prefix that has the same layout and types as its parent virtual table. 142 This, combined with the fixed offset to the virtual table pointer, means that 143 for any virtual type it doesn't matter if we have it or any of its 144 descendants, it is still always safe to access the virtual table through 145 the virtual table pointer. 146 From there it is safe to check the type id to identify the exact type of the 147 underlying object, access any of the virtual members and pass the object to 148 any of the method-like virtual members. 149 150 When a virtual table is declared the user decides where to declare it and its 151 name. The initialization of the virtual table is entirely automatic based on 152 the context of the declaration. 153 154 The type id is always fixed, each virtual table type will always have one 155 exactly one possible type id. 156 The virtual members are usually filled in by resolution. The best match for 157 a given name and type at the declaration site is filled in. 158 There are two exceptions to that rule: the @size@ field is the type's size 159 and is set to the result of a @sizeof@ expression, the @align@ field is the 160 type's alignment and similarly uses an @alignof@ expression. 161 162 \subsubsection{Concurrency Integration} 109 163 Coroutines and threads need instances of @CoroutineCancelled@ and 110 164 @ThreadCancelled@ respectively to use all of their functionality. When a new … … 112 166 the instance is created as well. The definition of the virtual table is created 113 167 at the definition of the main function. 114 \end{sloppypar} 168 169 \begin{figure} 170 \begin{cfa} 171 coroutine Example { 172 // fields 173 } 174 \end{cfa} 175 176 \begin{cfa} 177 __attribute__((cfa_linkonce)) 178 struct __cfatid_struct_CoroutineCancelled(Example) 179 __cfatid_CoroutineCancelled = { 180 &EXCEPTION_TYPE_ID, 181 }; 182 extern CoroutineCancelled_vtable _default_vtable_object_declaration; 183 extern CoroutineCancelled_vtable & _default_vtable; 184 \end{cfa} 185 186 \begin{cfa} 187 void main(Example & this) { 188 // body 189 } 190 \end{cfa} 191 192 \begin{cfa} 193 CoroutineCancelled_vtable _default_vtable_object_declaration = { 194 __cfatid_CoroutineCancelled, 195 // Virtual member initialization. 196 }; 197 198 CoroutineCancelled_vtable & _default_vtable = 199 &_default_vtable_object_declaration; 200 \end{cfa} 201 \caption{Concurrency Transformations} 202 \label{f:ConcurrencyTransformations} 203 \end{figure} 204 \todo{Improve Concurrency Transformations figure.} 115 205 116 206 \subsection{Virtual Cast} … … 119 209 % The C-cast is just to make sure the generated code is correct so the rest of 120 210 % the section is about that function. 121 The function is 211 The function is implemented in the standard library and has the following 212 signature: 122 213 \begin{cfa} 123 214 void * __cfa__virtual_cast( 124 struct __cfa__parent_vtable const * parent, 125 struct __cfa__parent_vtable const * const * child ); 126 \end{cfa} 127 and it is implemented in the standard library. The structure reperents the 128 head of a vtable which is the pointer to the parent virtual table. The 129 @parent@ points directly at the parent type virtual table while the @child@ 130 points at the object of the (possibe) child type. 131 132 In terms of the virtual cast expression, @parent@ comes from looking up the 133 type being cast to and @child@ is the result of the expression being cast. 134 Because the complier outputs C code, some type C type casts are also used. 135 The last bit of glue is an map that saves every virtual type the compiler 136 sees. This is used to check the type used in a virtual cast is a virtual 137 type and to get its virtual table. 138 (It also checks for conflicting definitions.) 139 140 Inside the function it is a simple conditional. If the type repersented by 141 @parent@ is or is an ancestor of the type repersented by @*child@ (it 142 requires one more level of derefence to pass through the object) then @child@ 143 is returned, otherwise the null pointer is returned. 144 145 The check itself is preformed is a simple linear search. If the child 146 virtual table or any of its ancestors (which are retreved through the first 147 field of every virtual table) are the same as the parent virtual table then 148 the cast succeeds. 215 struct __cfavir_type_td parent, 216 struct __cfavir_type_id const * child ); 217 \end{cfa} 218 The type id of target type of the virtual cast is passed in as @parent@ and 219 the cast target is passed in as @child@. 220 221 For C generation both arguments and the result are wrapped with type casts. 222 There is also an internal store inside the compiler to make sure that the 223 target type is a virtual type. 224 % It also checks for conflicting definitions. 225 226 The virtual cast either returns the original pointer as a new type or null. 227 So the function just does the parent check and returns the approprate value. 228 The parent check is a simple linear search of child's ancestors using the 229 type information. 149 230 150 231 \section{Exceptions} … … 161 242 162 243 Stack unwinding is the process of removing stack frames (activations) from the 163 stack. On function entry and return, unwinding is handled directly by the code 164 embedded in the function. Usually, the stack-frame size is known statically 165 based on parameter and local variable declarations. For dynamically-sized 166 local variables, a runtime computation is necessary to know the frame 167 size. Finally, a function's frame-size may change during execution as local 168 variables (static or dynamic sized) go in and out of scope. 244 stack. On function entry and return, unwinding is handled directly by the 245 call/return code embedded in the function. 246 In many cases the position of the instruction pointer (relative to parameter 247 and local declarations) is enough to know the current size of the stack 248 frame. 249 250 Usually, the stack-frame size is known statically based on parameter and 251 local variable declarations. Even with dynamic stack-size the information 252 to determain how much of the stack has to be removed is still contained 253 within the function. 169 254 Allocating/deallocating stack space is usually an $O(1)$ operation achieved by 170 255 bumping the hardware stack-pointer up or down as needed. 171 172 Unwinding across multiple stack frames is more complex because individual stack 173 management code associated with each frame is bypassed. That is, the location 174 of a function's frame-management code is largely unknown and dispersed 175 throughout the function, hence the current frame size managed by that code is 176 also unknown. Hence, code unwinding across frames does not have direct 177 knowledge about what is on the stack, and hence, how much of the stack needs to 178 be removed. 179 180 % At a very basic level this can be done with @setjmp@ \& @longjmp@ which simply 181 % move the top of the stack, discarding everything on the stack above a certain 182 % point. However this ignores all the cleanup code that should be run when 183 % certain sections of the stack are removed (for \CFA these are from destructors 184 % and finally clauses) and also requires that the point to which the stack is 185 % being unwound is known ahead of time. libunwind is used to address both of 186 % these problems. 256 Constructing/destructing values on the stack takes longer put in terms of 257 figuring out what needs to be done is of similar complexity. 258 259 Unwinding across multiple stack frames is more complex because that 260 information is no longer contained within the current function. 261 With seperate compilation a function has no way of knowing what its callers 262 are so it can't know how large those frames are. 263 Without altering the main code path it is also hard to pass that work off 264 to the caller. 187 265 188 266 The traditional unwinding mechanism for C is implemented by saving a snap-shot … … 191 269 reseting to a snap-shot of an arbitrary but existing function frame on the 192 270 stack. It is up to the programmer to ensure the snap-shot is valid when it is 193 reset, making this unwinding approach fragile with potential errors that are 194 difficult to debug because the stack becomes corrupted. 195 196 However, many languages define cleanup actions that must be taken when objects 197 are deallocated from the stack or blocks end, such as running a variable's 198 destructor or a @try@ statement's @finally@ clause. Handling these mechanisms 199 requires walking the stack and checking each stack frame for these potential 200 actions. 201 202 For exceptions, it must be possible to walk the stack frames in search of @try@ 203 statements to match and execute a handler. For termination exceptions, it must 204 also be possible to unwind all stack frames from the throw to the matching 205 catch, and each of these frames must be checked for cleanup actions. Stack 206 walking is where most of the complexity and expense of exception handling 207 appears. 271 reset and that all required clean-up from the unwound stacks is preformed. 272 This approach is fragile and forces a work onto the surounding code. 273 274 With respect to that work forced onto the surounding code, 275 many languages define clean-up actions that must be taken when certain 276 sections of the stack are removed. Such as when the storage for a variable 277 is removed from the stack or when a try statement with a finally clause is 278 (conceptually) popped from the stack. 279 None of these should be handled by the user, that would contradict the 280 intention of these features, so they need to be handled automatically. 281 282 To safely remove sections of the stack the language must be able to find and 283 run these clean-up actions even when removing multiple functions unknown at 284 the beginning of the unwinding. 208 285 209 286 One of the most popular tools for stack management is libunwind, a low-level … … 215 292 \subsection{libunwind Usage} 216 293 Libunwind, accessed through @unwind.h@ on most platforms, is a C library that 217 provides \C C-style stack-unwinding. Its operation is divided into two phases:294 provides \Cpp-style stack-unwinding. Its operation is divided into two phases: 218 295 search and cleanup. The dynamic target search -- phase 1 -- is used to scan the 219 296 stack and decide where unwinding should stop (but no unwinding occurs). The … … 226 303 LSDA can contain any information but conventionally it is a table with entries 227 304 representing regions of the function and what has to be done there during 228 unwinding. These regions are bracketed by the instruction pointer. If the305 unwinding. These regions are bracketed by instruction addresses. If the 229 306 instruction pointer is within a region's start/end, then execution is currently 230 307 executing in that region. Regions are used to mark out the scopes of objects … … 238 315 239 316 The GCC compilation flag @-fexceptions@ causes the generation of an LSDA and 240 attaches its personality function. However, this 317 attaches a personality function to each function. 318 In plain C (which \CFA currently compiles down to) this 241 319 flag only handles the cleanup attribute: 242 \todo{Peter: What is attached? Andrew: It uses the .cfi\_personality directive243 and that's all I know.}244 320 \begin{cfa} 245 321 void clean_up( int * var ) { ... } 246 322 int avar __attribute__(( cleanup(clean_up) )); 247 323 \end{cfa} 248 which is used on a variable and specifies a function, in this case @clean_up@, 249 run when the variable goes out of scope. 250 The function is passed a pointer to the object being removed from the stack 251 so it can be used to mimic destructors. 252 However, this feature cannot be used to mimic @try@ statements as it cannot 253 control the unwinding. 324 The attribue is used on a variable and specifies a function, 325 in this case @clean_up@, run when the variable goes out of scope. 326 This is enough to mimic destructors, but not try statements which can effect 327 the unwinding. 328 329 To get full unwinding support all of this has to be done directly with 330 assembly and assembler directives. Partiularly the cfi directives 331 \texttt{.cfi\_lsda} and \texttt{.cfi\_personality}. 254 332 255 333 \subsection{Personality Functions} … … 268 346 \end{lstlisting} 269 347 The @action@ argument is a bitmask of possible actions: 270 \begin{enumerate} 348 \begin{enumerate}[topsep=5pt] 271 349 \item 272 350 @_UA_SEARCH_PHASE@ specifies a search phase and tells the personality function … … 291 369 @_UA_FORCE_UNWIND@ specifies a forced unwind call. Forced unwind only performs 292 370 the cleanup phase and uses a different means to decide when to stop 293 \see{\VRef{s:ForcedUnwind}}.371 (see \vref{s:ForcedUnwind}). 294 372 \end{enumerate} 295 373 296 374 The @exception_class@ argument is a copy of the 297 \lstinline[language=C]|exception|'s @exception_class@ field. 298 299 The \lstinline[language=C]|exception| argument is a pointer to the user 300 provided storage object. It has two public fields, the exception class, which 301 is actually just a number, identifying the exception handling mechanism that 302 created it, and the cleanup function. The cleanup function is called if 303 required by the exception. 375 \code{C}{exception}'s @exception_class@ field. 376 This a number that identifies the exception handling mechanism that created 377 the 378 379 The \code{C}{exception} argument is a pointer to the user 380 provided storage object. It has two public fields: the @exception_class@, 381 which is described above, and the @exception_cleanup@ function. 382 The clean-up function is used by the EHM to clean-up the exception if it 383 should need to be freed at an unusual time, it takes an argument that says 384 why it had to be cleaned up. 304 385 305 386 The @context@ argument is a pointer to an opaque type passed to helper … … 309 390 that can be passed several places in libunwind. It includes a number of 310 391 messages for special cases (some of which should never be used by the 311 personality function) and error codes but unless otherwise notedthe392 personality function) and error codes. However, unless otherwise noted, the 312 393 personality function should always return @_URC_CONTINUE_UNWIND@. 313 394 … … 324 405 @_URC_END_OF_STACK@. 325 406 326 Second, when a handler is matched, raise exception continues onto the cleanup327 phase .407 Second, when a handler is matched, raise exception moves to the clean-up 408 phase and walks the stack a second time. 328 409 Once again, it calls the personality functions of each stack frame from newest 329 410 to oldest. This pass stops at the stack frame containing the matching handler. … … 338 419 Forced Unwind is the other central function in libunwind. 339 420 \begin{cfa} 340 _Unwind_Reason_Code _Unwind_ForcedUnwind( 421 _Unwind_Reason_Code _Unwind_ForcedUnwind(_Unwind_Exception *, 341 422 _Unwind_Stop_Fn, void *); 342 423 \end{cfa} … … 380 461 Each stack must have its own exception context. In a sequential \CFA program, 381 462 there is only one stack with a single global exception-context. However, when 382 the library @libcfathread@ is linked, there are multiple stacks whereeach463 the library @libcfathread@ is linked, there are multiple stacks and each 383 464 needs its own exception context. 384 465 385 General access to the exception context is provided byfunction466 The exception context should be retrieved by calling the function 386 467 @this_exception_context@. For sequential execution, this function is defined as 387 468 a weak symbol in the \CFA system-library, @libcfa@. When a \CFA program is … … 390 471 391 472 The sequential @this_exception_context@ returns a hard-coded pointer to the 392 global ex ecption context.473 global exception context. 393 474 The concurrent version adds the exception context to the data stored at the 394 base of each stack. When @this_exception_context@ is called it retrieves the475 base of each stack. When @this_exception_context@ is called, it retrieves the 395 476 active stack and returns the address of the context saved there. 396 477 … … 399 480 % catches. Talk about GCC nested functions. 400 481 401 Termination exceptions use libunwind heavily because it matches the intended 402 use from \CCexceptions closely. The main complication for \CFA is that the482 \CFA termination exceptions use libunwind heavily because they match \Cpp 483 \Cpp exceptions closely. The main complication for \CFA is that the 403 484 compiler generates C code, making it very difficult to generate the assembly to 404 485 form the LSDA for try blocks or destructors. … … 411 492 per-exception storage. 412 493 413 [Quick ASCII diagram to get started.] 494 \begin{figure} 414 495 \begin{verbatim} 415 496 Fixed Header | _Unwind_Exception <- pointer target … … 420 501 V ... 421 502 \end{verbatim} 422 423 Exceptions are stored in variable-sized blocks. 424 The first component is a fixed sized data structure that contains the 503 \caption{Exception Layout} 504 \label{f:ExceptionLayout} 505 \end{figure} 506 \todo*{Convert the exception layout to an actual diagram.} 507 508 Exceptions are stored in variable-sized blocks (see \vref{f:ExceptionLayout}). 509 The first component is a fixed-sized data structure that contains the 425 510 information for libunwind and the exception system. The second component is an 426 511 area of memory big enough to store the exception. Macros with pointer arthritic … … 428 513 @_Unwind_Exception@ to the entire node. 429 514 430 All of these nodes are linked together in a list, one list per stack, with the 515 Multipe exceptions can exist at the same time because exceptions can be 516 raised inside handlers, destructors and finally blocks. 517 Figure~\vref{f:MultipleExceptions} shows a program that has multiple 518 exceptions active at one time. 519 Each time an exception is thrown and caught the stack unwinds and the finally 520 clause runs. This will throw another exception (until @num_exceptions@ gets 521 high enough) which must be allocated. The previous exceptions may not be 522 freed because the handler/catch clause has not been run. 523 So the EHM must keep them alive while it allocates exceptions for new throws. 524 525 \begin{figure} 526 \centering 527 % Andrew: Figure out what these do and give them better names. 528 \newsavebox{\myboxA} 529 \newsavebox{\myboxB} 530 \begin{lrbox}{\myboxA} 531 \begin{lstlisting}[language=CFA,{moredelim=**[is][\color{red}]{@}{@}}] 532 unsigned num_exceptions = 0; 533 void throws() { 534 try { 535 try { 536 ++num_exceptions; 537 throw (Example){table}; 538 } finally { 539 if (num_exceptions < 3) { 540 throws(); 541 } 542 } 543 } catch (exception_t *) { 544 --num_exceptions; 545 } 546 } 547 int main() { 548 throws(); 549 } 550 \end{lstlisting} 551 \end{lrbox} 552 553 \begin{lrbox}{\myboxB} 554 \begin{lstlisting} 555 \end{lstlisting} 556 \end{lrbox} 557 558 {\usebox\myboxA} 559 \hspace{25pt} 560 {\usebox\myboxB} 561 562 \caption{Multiple Exceptions} 563 \label{f:MultipleExceptions} 564 \end{figure} 565 \todo*{Work on multiple exceptions code sample.} 566 567 All exceptions are stored in nodes which are then linked together in lists, 568 one list per stack, with the 431 569 list head stored in the exception context. Within each linked list, the most 432 570 recently thrown exception is at the head followed by older thrown … … 439 577 exception, the copy function, and the free function, so they are specific to an 440 578 exception type. The size and copy function are used immediately to copy an 441 exception into managed memory. After the exception is handled the free function442 is used to clean up the exception and then the entire node is passed to free 443 so the memory can be given back to the heap.579 exception into managed memory. After the exception is handled, the free 580 function is used to clean up the exception and then the entire node is 581 passed to free so the memory can be given back to the heap. 444 582 445 583 \subsection{Try Statements and Catch Clauses} … … 454 592 calls them. 455 593 Because this function is known and fixed (and not an arbitrary function that 456 happens to contain a try statement) this meansthe LSDA can be generated ahead594 happens to contain a try statement), the LSDA can be generated ahead 457 595 of time. 458 596 459 597 Both the LSDA and the personality function are set ahead of time using 460 embedded assembly. This is handcrafted using C @asm@ statements and contains 598 embedded assembly. This assembly code is handcrafted using C @asm@ statements 599 and contains 461 600 enough information for the single try statement the function repersents. 462 601 … … 487 626 nested functions and all other functions besides @__cfaehm_try_terminate@ in 488 627 \CFA use the GCC personality function and the @-fexceptions@ flag to generate 489 the LSDA. This allows destructors to be implemented with the cleanup attribute. 628 the LSDA. 629 Using this pattern, \CFA implements destructors with the cleanup attribute. 630 631 \begin{figure} 632 \begin{cfa} 633 try { 634 // TRY BLOCK 635 } catch (Exception1 * name1 ; check(name1)) { 636 // CATCH BLOCK 1 637 } catch (Exception2 * name2) { 638 // CATCH BLOCK 2 639 } 640 \end{cfa} 641 642 \begin{cfa} 643 void try(void) { 644 // TRY BLOCK 645 } 646 int match(exception_t * __exception_inst) { 647 { 648 Exception1 * name1; 649 if (name1 = (virtual Exception1 *)__exception_inst && check(name1)) { 650 return 1; 651 } 652 } 653 { 654 Exception2 * name2; 655 if (name2 = (virtual Exception2 *)__exception_inst) { 656 return 2; 657 } 658 } 659 return 0; 660 } 661 void catch(exception_t * __exception_inst, int __handler_index) { 662 switch (__handler_index) { 663 case 1: 664 { 665 Exception1 * name1 = (virtual Exception1 *)__exception_inst; 666 // CATCH BLOCK 1 667 } 668 return; 669 case 2: 670 { 671 Exception2 * name2 = (virtual Exception2 *)__exception_inst; 672 // CATCH BLOCK 2 673 } 674 return; 675 } 676 } 677 { 678 __cfaehm_try_terminate(try, catch, match); 679 } 680 \end{cfa} 681 682 \caption{Termination Transformation} 683 \label{f:TerminationTransformation} 684 \todo*{Improve (compress?) Termination Transformations.} 685 \end{figure} 490 686 491 687 \section{Resumption} 492 688 % The stack-local data, the linked list of nodes. 493 689 494 Resumption simple to implement because there is no stack unwinding. The 495 resumption raise uses a list of nodes for its stack traversal. The head of the 496 list is stored in the exception context. The nodes in the list have a pointer 497 to the next node and a pointer to the handler function. 498 499 A resumption raise traverses this list. At each node the handler function is 500 called, passing the exception by pointer. It returns true if the exception is 501 handled and false otherwise. 502 503 The handler function does both the matching and handling. It computes the 504 condition of each @catchResume@ in top-to-bottom order, until it finds a 505 handler that matches. If no handler matches then the function returns 506 false. Otherwise the matching handler is run; if it completes successfully, the 507 function returns true. Rethrowing, through the @throwResume;@ statement, 508 causes the function to return true. 690 Resumption simpler to implement than termination 691 because there is no stack unwinding. 692 Instead of storing the data in a special area using assembly, 693 there is just a linked list of possible handlers for each stack, 694 with each node on the list reperenting a try statement on the stack. 695 696 The head of the list is stored in the exception context. 697 The nodes are stored in order, with the more recent try statements closer 698 to the head of the list. 699 Instead of traversing the stack resumption handling traverses the list. 700 At each node the EHM checks to see if the try statement the node repersents 701 can handle the exception. If it can, then the exception is handled and 702 the operation finishes, otherwise the search continues to the next node. 703 If the search reaches the end of the list without finding a try statement 704 that can handle the exception the default handler is executed and the 705 operation finishes. 706 707 In each node is a handler function which does most of the work there. 708 The handler function is passed the raised the exception and returns true 709 if the exception is handled and false if it cannot be handled here. 710 711 For each @catchResume@ clause the handler function will: 712 check to see if the raised exception is a descendant type of the declared 713 exception type, if it is and there is a conditional expression then it will 714 run the test, if both checks pass the handling code for the clause is run 715 and the function returns true, otherwise it moves onto the next clause. 716 If this is the last @catchResume@ clause then instead of moving onto 717 the next clause the function returns false as no handler could be found. 718 719 \begin{figure} 720 \begin{cfa} 721 try { 722 // TRY BLOCK 723 } catchResume (Exception1 * name1 ; check(name1)) { 724 // CATCH BLOCK 1 725 } catchResume (Exception2 * name2) { 726 // CATCH BLOCK 2 727 } 728 \end{cfa} 729 730 \begin{cfa} 731 bool handle(exception_t * __exception_inst) { 732 { 733 Exception1 * name1; 734 if (name1 = (virtual Exception1 *)__exception_inst && check(name1)) { 735 // CATCH BLOCK 1 736 return 1; 737 } 738 } 739 { 740 Exception2 * name2; 741 if (name2 = (virtual Exception2 *)__exception_inst) { 742 // CATCH BLOCK 2 743 return 2; 744 } 745 } 746 return false; 747 } 748 struct __try_resume_node __resume_node 749 __attribute__((cleanup( __cfaehm_try_resume_cleanup ))); 750 __cfaehm_try_resume_setup( &__resume_node, handler ); 751 \end{cfa} 752 753 \caption{Resumption Transformation} 754 \label{f:ResumptionTransformation} 755 \todo*{Improve (compress?) Resumption Transformations.} 756 \end{figure} 509 757 510 758 % Recursive Resumption Stuff: 511 Search skipping \see{\VPageref{p:searchskip}}, which ignores parts of the stack 759 Search skipping (see \vpageref{s:ResumptionMarking}), which ignores parts of 760 the stack 512 761 already examined, is accomplished by updating the front of the list as the 513 search continues. Before the handler at a node is called the head of the list762 search continues. Before the handler at a node is called, the head of the list 514 763 is updated to the next node of the current node. After the search is complete, 515 764 successful or not, the head of the list is reset. … … 524 773 stack -- the first one points over all the checked handlers -- and the ordering 525 774 is maintained. 775 776 \begin{figure} 777 \begin{minipage}[l][][b]{0,2\textwidth} 778 \begin{verbatim} 779 780 781 X <- 782 | 783 V 784 X 785 | 786 V 787 X 788 \end{verbatim} 789 Initial State 790 \end{minipage} 791 \begin{minipage}[l][][b]{0,2\textwidth} 792 \begin{verbatim} 793 794 795 X 796 | 797 V 798 X <- 799 | 800 V 801 X 802 \end{verbatim} 803 Handler Found 804 \end{minipage} 805 \begin{minipage}[l][][b]{0,2\textwidth} 806 \begin{verbatim} 807 X <- 808 / 809 / X 810 | | 811 \ V 812 X 813 | 814 V 815 X 816 \end{verbatim} 817 Try Block Added 818 \end{minipage} 819 \begin{minipage}[l][][b]{0,2\textwidth} 820 \begin{verbatim} 821 822 823 X <- 824 | 825 V 826 X 827 | 828 V 829 X 830 \end{verbatim} 831 Handler Done 832 \end{minipage} 833 \caption{Resumption Marking} 834 \label{f:ResumptionMarking} 835 \todo*{Convert Resumption Marking into a line figure.} 836 \end{figure} 526 837 527 838 \label{p:zero-cost} … … 540 851 \section{Finally} 541 852 % Uses destructors and GCC nested functions. 542 Finally clauses is placed into a GCC nested-function with a unique name, and no 543 arguments or return values. This nested function is then set as the cleanup 853 A finally clause is placed into a GCC nested-function with a unique name, 854 and no arguments or return values. 855 This nested function is then set as the cleanup 544 856 function of an empty object that is declared at the beginning of a block placed 545 857 around the context of the associated @try@ statement. 546 858 547 The rest is handled by GCC. The try block and all handlers are inside th e859 The rest is handled by GCC. The try block and all handlers are inside this 548 860 block. At completion, control exits the block and the empty object is cleaned 549 861 up, which runs the function that contains the finally code. … … 553 865 554 866 Cancellation also uses libunwind to do its stack traversal and unwinding, 555 however it uses a different primary function @_Unwind_ForcedUnwind@. Details556 of its interface can be found in the \VRef{s:ForcedUnwind}.867 however it uses a different primary function: @_Unwind_ForcedUnwind@. Details 868 of its interface can be found in the Section~\vref{s:ForcedUnwind}. 557 869 558 870 The first step of cancellation is to find the cancelled stack and its type: … … 560 872 pointer and the current thread pointer, and every thread stores a pointer to 561 873 its main coroutine and the coroutine it is currently executing. 562 563 So if the active thread's main and current coroutine are the same. If they 564 are then the current stack is a thread stack, otherwise it is a coroutine565 stack. If it is a thread stack then an equality check with the stored main 566 thread pointer and current thread pointer is enough to tell if the current 567 thread is the main thread or not.874 \todo*{Consider adding a description of how threads are coroutines.} 875 876 If a the current thread's main and current coroutines are the same then the 877 current stack is a thread stack. Furthermore it is easy to compare the 878 current thread to the main thread to see if they are the same. And if this 879 is not a thread stack then it must be a coroutine stack. 568 880 569 881 However, if the threading library is not linked, the sequential execution is on … … 574 886 Regardless of how the stack is chosen, the stop function and parameter are 575 887 passed to the forced-unwind function. The general pattern of all three stop 576 functions is the same: they continue unwinding until the end of stack when they577 do there primary work.888 functions is the same: they continue unwinding until the end of stack and 889 then preform their transfer. 578 890 579 891 For main stack cancellation, the transfer is just a program abort. -
doc/theses/andrew_beach_MMath/uw-ethesis.tex
r5407cdc r8d66610 66 66 % Tip: Photographs should be cropped and compressed so as not to be too large. 67 67 68 % To create a PDF output that is optimized for double-sided printing:69 % 1) comment-out the \documentclass statement in the preamble below, and70 % un-comment the second \documentclass line.71 % 2) change the value assigned below to the boolean variable "PrintVersion"72 % from "false" to "true".73 74 68 % ====================================================================== 75 69 % D O C U M E N T P R E A M B L E … … 87 81 } 88 82 89 % Some LaTeX commands I define for my own nomenclature. 90 % If you have to, it's easier to make changes to nomenclature once here than 91 % in a million places throughout your thesis! 92 \newcommand{\package}[1]{\textbf{#1}} % package names in bold text 93 \newcommand{\cmmd}[1]{\textbackslash\texttt{#1}} % command name in tt font 94 \newcommand{\href}[1]{#1} % does nothing, but defines the command so the 95 % print-optimized version will ignore \href tags (redefined by hyperref pkg). 96 % Anything defined here may be redefined by packages added below... 83 % Does nothing, ignores \href tags (redefined by hyperref package). 84 \newcommand{\href}[1]{#1} 97 85 98 86 % For a nomenclature (optional; available from ctan.org) … … 105 93 % Removes large sections of the document. 106 94 \usepackage{comment} 107 % Adds todos (Must be included after comment.) 108 \usepackage{todonotes} 95 % Adds todo commands. 96 \usepackage{todo} 97 % cfa macros used in the document 98 \usepackage{cfalab} 99 % allow global and individual modification of spacing 100 \usepackage{enumitem} 101 % Improved reference tools. 102 \usepackage[nospace]{varioref} 109 103 110 104 % Hyperlinks make it very easy to navigate an electronic document. … … 151 145 152 146 % Exception to the rule of hyperref being the last add-on package 153 \usepackage[ automake,toc,abbreviations]{glossaries-extra}147 \usepackage[toc,abbreviations]{glossaries-extra} 154 148 % If glossaries-extra is not in your LaTeX distribution, get it from CTAN 155 149 % (http://ctan.org/pkg/glossaries-extra), although it's supposed to be in … … 208 202 \makeglossaries 209 203 210 % cfa macros used in the document 211 %\usepackage{cfalab} 212 % I'm going to bring back eventually. 213 \makeatletter 214 % Combines all \CC* commands: 215 \newrobustcmd*\Cpp[1][\xspace]{\cfalab@Cpp#1} 216 \newcommand\cfalab@Cpp{C\kern-.1em\hbox{+\kern-.25em+}} 217 % Optional arguments do not work with pdf string. (Some fix-up required.) 218 \pdfstringdefDisableCommands{\def\Cpp{C++}} 219 220 % Wrappers for inline code snippits. 221 \newrobustcmd*\codeCFA[1]{\lstinline[language=CFA]{#1}} 222 \newrobustcmd*\codeC[1]{\lstinline[language=C]{#1}} 223 \newrobustcmd*\codeCpp[1]{\lstinline[language=C++]{#1}} 224 \newrobustcmd*\codePy[1]{\lstinline[language=Python]{#1}} 225 226 % Colour text, formatted in LaTeX style instead of TeX style. 227 \newcommand*\colour[2]{{\color{#1}#2}} 228 \makeatother 229 230 \input{common} 231 % CFA code-style for all languages 232 \CFAStyle 233 % CFA default lnaguage 234 \lstset{language=CFA,basicstyle=\linespread{0.9}\tt} 204 % listings package configuation: 205 \lstMakeShortInline@ 206 \lstset{language=CFA,style=cfacommon,basicstyle=\linespread{0.9}\tt} 207 \lstset{moredelim=**[is][\protect\color{red}]{@}{@}} 235 208 % Annotations from Peter: 236 209 \newcommand{\PAB}[1]{{\color{blue}PAB: #1}} … … 262 235 % Tip: Putting each sentence on a new line is a way to simplify later editing. 263 236 %---------------------------------------------------------------------- 237 \input{intro} 264 238 \input{existing} 265 239 \input{features} 266 240 \input{implement} 267 %\input{unwinding}268 241 \input{future} 269 242 … … 325 298 \phantomsection % allows hyperref to link to the correct page 326 299 300 \todos 301 327 302 %---------------------------------------------------------------------- 328 303 \end{document} % end of logical document -
doc/theses/mubeen_zulfiqar_MMath/AllocDS1.fig
r5407cdc r8d66610 8 8 -2 9 9 1200 2 10 6 4 950 1275 5250 142511 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 5025 1350 20 20 5025 1350 5045 135012 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 5100 1350 20 20 5100 1350 5120 135013 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 5175 1350 20 20 5175 1350 5195 135010 6 4200 1575 4500 1725 11 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 4275 1650 20 20 4275 1650 4295 1650 12 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 4350 1650 20 20 4350 1650 4370 1650 13 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 4425 1650 20 20 4425 1650 4445 1650 14 14 -6 15 6 5700 1950 6000 2100 16 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 5775 2025 20 20 5775 2025 5795 2025 17 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 5850 2025 20 20 5850 2025 5870 2025 18 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 5925 2025 20 20 5925 2025 5945 2025 19 -6 20 6 3600 2100 3900 2475 15 6 2850 2475 3150 2850 21 16 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 22 17 1 1 1.00 45.00 90.00 23 3675 2100 3675 232518 2925 2475 2925 2700 24 19 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 25 3600 2325 3900 2325 3900 2475 3600 2475 3600 232520 2850 2700 3150 2700 3150 2850 2850 2850 2850 2700 26 21 -6 27 6 5100 2100 5400 247522 6 4350 2475 4650 2850 28 23 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 29 24 1 1 1.00 45.00 90.00 30 5175 2100 5175 232525 4425 2475 4425 2700 31 26 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 32 5100 2325 5400 2325 5400 2475 5100 2475 5100 232527 4350 2700 4650 2700 4650 2850 4350 2850 4350 2700 33 28 -6 34 6 4350 2100 4575 277529 6 3600 2475 3825 3150 35 30 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 36 31 1 1 1.00 45.00 90.00 37 4425 2100 4425 232532 3675 2475 3675 2700 38 33 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 39 4350 2325 4575 2325 4575 2475 4350 2475 4350 232534 3600 2700 3825 2700 3825 2850 3600 2850 3600 2700 40 35 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 41 4350 2625 4575 2625 4575 2775 4350 2775 4350 262536 3600 3000 3825 3000 3825 3150 3600 3150 3600 3000 42 37 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 43 38 1 1 1.00 45.00 90.00 44 4425 2400 4425 262539 3675 2775 3675 3000 45 40 -6 46 6 5700 3225 6000 3375 47 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 5775 3300 20 20 5775 3300 5795 3300 48 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 5850 3300 20 20 5850 3300 5870 3300 49 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 5925 3300 20 20 5925 3300 5945 3300 41 6 4875 3600 5175 3750 42 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 4950 3675 20 20 4950 3675 4970 3675 43 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 5025 3675 20 20 5025 3675 5045 3675 44 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 5100 3675 20 20 5100 3675 5120 3675 45 -6 46 6 4875 2325 5175 2475 47 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 4950 2400 20 20 4950 2400 4970 2400 48 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 5025 2400 20 20 5025 2400 5045 2400 49 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 5100 2400 20 20 5100 2400 5120 2400 50 -6 51 6 5625 2325 5925 2475 52 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 5700 2400 20 20 5700 2400 5720 2400 53 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 5775 2400 20 20 5775 2400 5795 2400 54 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 5850 2400 20 20 5850 2400 5870 2400 55 -6 56 6 5625 3600 5925 3750 57 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 5700 3675 20 20 5700 3675 5720 3675 58 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 5775 3675 20 20 5775 3675 5795 3675 59 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 5850 3675 20 20 5850 3675 5870 3675 50 60 -6 51 61 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 52 2 700 1950 3900 195062 2400 2100 2400 2550 53 63 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 54 3000 1800 3000 217564 2550 2100 2550 2550 55 65 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 56 3150 1800 3150 217566 2700 2100 2700 2550 57 67 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 58 3300 1800 3300 217568 2850 2100 2850 2550 59 69 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 60 3 450 1800 3450 217570 3000 2100 3000 2550 61 71 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 62 3600 1800 3600 217572 3600 2100 3600 2550 63 73 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 64 3 750 1800 3750 217574 3900 2100 3900 2550 65 75 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 66 4 200 1950 5400 195076 4050 2100 4050 2550 67 77 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 68 4 350 1800 4350 217578 4200 2100 4200 2550 69 79 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 70 4 500 1800 4500 217580 4350 2100 4350 2550 71 81 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 72 4 650 1800 4650 217582 4500 2100 4500 2550 73 83 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 74 4800 1800 4800 217584 3300 1500 3300 1800 75 85 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 76 4950 1800 4950 217586 3600 1500 3600 1800 77 87 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 78 5100 1800 5100 2175 79 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 80 5250 1800 5250 2175 81 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 82 4050 1200 4050 1500 83 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 84 4350 1200 4350 1500 85 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 86 4650 1200 4650 1500 88 3900 1500 3900 1800 87 89 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 88 3 750 1200 5550 1200 5550 1500 3750 1500 3750 120090 3000 1500 4800 1500 4800 1800 3000 1800 3000 1500 89 91 2 1 1 1 0 7 50 -1 -1 4.000 0 0 -1 1 0 2 90 92 1 1 1.00 45.00 90.00 91 3 975 1350 3375 180093 3225 1650 2625 2100 92 94 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 93 95 1 1 1.00 45.00 90.00 94 3 900 1350 3300 180096 3150 1650 2550 2100 95 97 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 96 98 1 1 1.00 45.00 90.00 97 4200 1350 4800 180099 3450 1650 4050 2100 98 100 2 1 1 1 0 7 50 -1 -1 4.000 0 0 -1 1 0 2 99 101 1 1 1.00 45.00 90.00 100 4125 1350 4725 1800102 3375 1650 3975 2100 101 103 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 102 2850 1800 2850 2175 104 2100 2100 2100 2550 105 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 106 1950 2250 3150 2250 107 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 108 3450 2250 4650 2250 103 109 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 104 2700 1800 3900 1800 3900 2175 2700 2175 2700 1800110 1950 2100 3150 2100 3150 2550 1950 2550 1950 2100 105 111 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 106 4200 1800 5400 1800 5400 2175 4200 2175 4200 1800 112 3450 2100 4650 2100 4650 2550 3450 2550 3450 2100 113 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 114 2250 2100 2250 2550 115 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 116 3750 2100 3750 2550 107 117 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 108 118 1 1 1.00 45.00 90.00 109 2 775 2100 2775 2325119 2025 2475 2025 2700 110 120 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 111 121 1 1 1.00 45.00 90.00 112 2 775 2400 2775 2625122 2025 2775 2025 3000 113 123 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 114 2700 2625 2850 2625 2850 2775 2700 2775 2700 2625124 1950 3000 2100 3000 2100 3150 1950 3150 1950 3000 115 125 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 116 2700 2325 2850 2325 2850 2475 2700 2475 2700 2325126 1950 2700 2100 2700 2100 2850 1950 2850 1950 2700 117 127 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 3 118 128 1 1 1.00 45.00 90.00 119 2700 3375 3450 3375 3450 3150129 1950 3750 2700 3750 2700 3525 120 130 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 121 2700 3150 3900 3150 3900 3525 2700 3525 2700 3150131 1950 3525 3150 3525 3150 3900 1950 3900 1950 3525 122 132 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 3 123 133 1 1 1.00 45.00 90.00 124 4200 3375 4950 3375 4950 3150134 3450 3750 4200 3750 4200 3525 125 135 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 126 4200 3150 5400 3150 5400 3525 4200 3525 4200 3150136 3450 3525 4650 3525 4650 3900 3450 3900 3450 3525 127 137 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 3 128 138 1 1 1.00 45.00 90.00 129 3 900 4350 4950 4350 4950 3900139 3150 4650 4200 4650 4200 4275 130 140 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 131 3900 3900 5400 3900 5400 4575 3900 4575 3900 3900 132 4 2 0 50 -1 0 12 0.0000 2 135 975 2625 2175 free buckets\001 133 4 2 0 50 -1 0 12 0.0000 2 135 435 2625 1950 locks\001 134 4 1 0 50 -1 0 12 0.0000 2 135 1365 4650 1125 N thread buckets\001 135 4 1 0 50 -1 0 12 0.0000 2 180 390 5175 1725 heap\001 136 4 1 0 50 -1 0 12 0.0000 2 180 390 2925 1725 heap\001 137 4 1 0 50 -1 0 12 0.0000 2 180 915 3300 3075 bump alloc\001 138 4 0 0 50 -1 0 12 0.0000 2 135 360 4275 3325 lock\001 139 4 1 0 50 -1 0 12 0.0000 2 180 915 4800 3075 bump alloc\001 140 4 0 0 50 -1 0 12 0.0000 2 135 360 3975 4075 lock\001 141 4 1 0 50 -1 0 12 0.0000 2 135 345 4725 3825 sbrk\001 142 4 0 0 50 -1 0 12 0.0000 2 135 360 2775 3325 lock\001 143 4 2 0 50 -1 0 12 0.0000 2 135 675 2625 2625 free lists\001 141 3150 4275 4650 4275 4650 4875 3150 4875 3150 4275 142 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 143 1950 2400 3150 2400 144 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 145 3450 2400 4650 2400 146 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 147 5400 2100 5400 3900 148 4 2 0 50 -1 0 11 0.0000 2 120 300 1875 2250 lock\001 149 4 1 0 50 -1 0 12 0.0000 2 135 1935 3900 1425 N kernel-thread buckets\001 150 4 1 0 50 -1 0 12 0.0000 2 195 810 4425 2025 heap$_2$\001 151 4 1 0 50 -1 0 12 0.0000 2 195 810 2175 2025 heap$_1$\001 152 4 2 0 50 -1 0 11 0.0000 2 120 270 1875 2400 size\001 153 4 2 0 50 -1 0 11 0.0000 2 120 270 1875 2550 free\001 154 4 1 0 50 -1 0 12 0.0000 2 180 825 2550 3450 local pool\001 155 4 0 0 50 -1 0 12 0.0000 2 135 360 3525 3700 lock\001 156 4 0 0 50 -1 0 12 0.0000 2 135 360 3225 4450 lock\001 157 4 2 0 50 -1 0 12 0.0000 2 135 600 1875 3000 free list\001 158 4 1 0 50 -1 0 12 0.0000 2 180 825 4050 3450 local pool\001 159 4 1 0 50 -1 0 12 0.0000 2 180 1455 3900 4200 global pool (sbrk)\001 160 4 0 0 50 -1 0 12 0.0000 2 135 360 2025 3700 lock\001 161 4 1 0 50 -1 0 12 0.0000 2 180 720 6450 3150 free pool\001 162 4 1 0 50 -1 0 12 0.0000 2 180 390 6450 2925 heap\001 -
doc/theses/mubeen_zulfiqar_MMath/AllocDS2.fig
r5407cdc r8d66610 8 8 -2 9 9 1200 2 10 6 2850 2 025 3150 217511 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 2925 21 00 20 20 2925 2100 2945 210012 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 3000 21 00 20 20 3000 2100 3020 210013 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 3075 21 00 20 20 3075 2100 3095 210010 6 2850 2100 3150 2250 11 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 2925 2175 20 20 2925 2175 2945 2175 12 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 3000 2175 20 20 3000 2175 3020 2175 13 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 3075 2175 20 20 3075 2175 3095 2175 14 14 -6 15 6 4050 2 025 4350 217516 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 4125 21 00 20 20 4125 2100 4145 210017 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 4200 21 00 20 20 4200 2100 4220 210018 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 4275 21 00 20 20 4275 2100 4295 210015 6 4050 2100 4350 2250 16 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 4125 2175 20 20 4125 2175 4145 2175 17 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 4200 2175 20 20 4200 2175 4220 2175 18 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 4275 2175 20 20 4275 2175 4295 2175 19 19 -6 20 6 4650 2 025 4950 217521 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 4725 21 00 20 20 4725 2100 4745 210022 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 4800 21 00 20 20 4800 2100 4820 210023 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 4875 21 00 20 20 4875 2100 4895 210020 6 4650 2100 4950 2250 21 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 4725 2175 20 20 4725 2175 4745 2175 22 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 4800 2175 20 20 4800 2175 4820 2175 23 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 4875 2175 20 20 4875 2175 4895 2175 24 24 -6 25 6 3450 2 025 3750 217526 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 3525 21 00 20 20 3525 2100 3545 210027 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 3600 21 00 20 20 3600 2100 3620 210028 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 3675 21 00 20 20 3675 2100 3695 210025 6 3450 2100 3750 2250 26 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 3525 2175 20 20 3525 2175 3545 2175 27 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 3600 2175 20 20 3600 2175 3620 2175 28 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 3675 2175 20 20 3675 2175 3695 2175 29 29 -6 30 6 3300 21 00 3600 247530 6 3300 2175 3600 2550 31 31 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 32 32 1 1 1.00 45.00 90.00 33 3375 21 00 3375 232533 3375 2175 3375 2400 34 34 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 35 3300 2 325 3600 2325 3600 2475 3300 2475 3300 232535 3300 2400 3600 2400 3600 2550 3300 2550 3300 2400 36 36 -6 37 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 38 3150 1800 3150 2250 39 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 40 2850 1800 2850 2250 41 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 42 4650 1800 4650 2250 43 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 44 4950 1800 4950 2250 45 2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 46 4500 1725 4500 2250 47 2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 48 5100 1725 5100 2250 49 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 50 3450 1800 3450 2250 51 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 52 3750 1800 3750 2250 53 2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 54 3300 1725 3300 2250 55 2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 56 3900 1725 3900 2250 57 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 58 5250 1800 5250 2250 59 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 60 5400 1800 5400 2250 61 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 62 5550 1800 5550 2250 63 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 64 5700 1800 5700 2250 65 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 66 5850 1800 5850 2250 67 2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 68 2700 1725 2700 2250 69 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 70 1 1 1.00 45.00 90.00 71 3375 1275 3375 1575 72 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 73 1 1 1.00 45.00 90.00 74 2700 1275 2700 1575 75 2 1 1 1 0 7 50 -1 -1 4.000 0 0 -1 1 0 2 76 1 1 1.00 45.00 90.00 77 2775 1275 2775 1575 78 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 79 1 1 1.00 45.00 90.00 80 5175 1275 5175 1575 81 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 82 1 1 1.00 45.00 90.00 83 5625 1275 5625 1575 84 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 85 1 1 1.00 45.00 90.00 86 3750 1275 3750 1575 87 2 1 1 1 0 7 50 -1 -1 4.000 0 0 -1 1 0 2 88 1 1 1.00 45.00 90.00 89 3825 1275 3825 1575 37 90 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 38 91 2700 1950 6000 1950 39 92 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 40 3150 1800 3150 2175 41 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 42 2850 1800 2850 2175 43 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 44 4650 1800 4650 2175 45 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 46 4950 1800 4950 2175 47 2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 48 4500 1725 4500 2175 49 2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 50 5100 1725 5100 2175 51 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 52 3450 1800 3450 2175 53 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 54 3750 1800 3750 2175 55 2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 56 3300 1725 3300 2175 57 2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 58 3900 1725 3900 2175 59 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 60 5250 1800 5250 2175 61 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 62 5400 1800 5400 2175 63 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 64 5550 1800 5550 2175 65 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 66 5700 1800 5700 2175 67 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 68 5850 1800 5850 2175 69 2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 70 2700 1725 2700 2175 93 2700 2100 6000 2100 71 94 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 72 2700 1800 6000 1800 6000 2 175 2700 21752700 180095 2700 1800 6000 1800 6000 2250 2700 2250 2700 1800 73 96 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 74 97 1 1 1.00 45.00 90.00 75 2775 21 00 2775 232598 2775 2175 2775 2400 76 99 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 77 100 1 1 1.00 45.00 90.00 78 2775 24 00 2775 2625101 2775 2475 2775 2700 79 102 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 80 2700 2 625 2850 2625 2850 2775 2700 2775 2700 2625103 2700 2700 2850 2700 2850 2850 2700 2850 2700 2700 81 104 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 82 2700 2 325 2850 2325 2850 2475 2700 2475 2700 2325105 2700 2400 2850 2400 2850 2550 2700 2550 2700 2400 83 106 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 84 107 1 1 1.00 45.00 90.00 85 4575 21 00 4575 2325108 4575 2175 4575 2400 86 109 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 87 4500 2 325 5025 2325 5025 2475 4500 2475 4500 2325110 4500 2400 5025 2400 5025 2550 4500 2550 4500 2400 88 111 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 3 89 112 1 1 1.00 45.00 90.00 90 3600 3525 4650 3525 4650 3 075113 3600 3525 4650 3525 4650 3150 91 114 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 92 3600 3075 5100 3075 5100 3750 3600 3750 3600 3075 93 4 2 0 50 -1 0 12 0.0000 2 135 975 2625 2175 free buckets\001 94 4 2 0 50 -1 0 12 0.0000 2 135 435 2625 1950 locks\001 115 3600 3150 5100 3150 5100 3750 3600 3750 3600 3150 116 4 2 0 50 -1 0 11 0.0000 2 120 300 2625 1950 lock\001 95 117 4 1 0 50 -1 0 10 0.0000 2 150 1155 3000 1725 N$\\times$S$_1$\001 96 118 4 1 0 50 -1 0 10 0.0000 2 150 1155 3600 1725 N$\\times$S$_2$\001 119 4 1 0 50 -1 0 12 0.0000 2 180 390 4425 1500 heap\001 120 4 2 0 50 -1 0 12 0.0000 2 135 1140 2550 1425 kernel threads\001 121 4 2 0 50 -1 0 11 0.0000 2 120 270 2625 2100 size\001 122 4 2 0 50 -1 0 11 0.0000 2 120 270 2625 2250 free\001 123 4 2 0 50 -1 0 12 0.0000 2 135 600 2625 2700 free list\001 124 4 0 0 50 -1 0 12 0.0000 2 135 360 3675 3325 lock\001 125 4 1 0 50 -1 0 12 0.0000 2 180 1455 4350 3075 global pool (sbrk)\001 97 126 4 1 0 50 -1 0 10 0.0000 2 150 1110 4800 1725 N$\\times$S$_t$\001 98 4 2 0 50 -1 0 12 0.0000 2 135 675 2625 2625 free lists\00199 4 0 0 50 -1 0 12 0.0000 2 135 360 3675 3250 lock\001100 4 1 0 50 -1 0 12 0.0000 2 135 345 4425 3000 sbrk\001101 4 1 0 50 -1 0 12 0.0000 2 180 390 4425 1500 heap\001 -
doc/theses/mubeen_zulfiqar_MMath/Makefile
r5407cdc r8d66610 15 15 16 16 .PHONY: all clean 17 .PRECIOUS: %.dvi %.ps # do not delete intermediate files 17 18 18 19 ### Commands: 19 20 LATEX = TEXINPUTS=${TEXLIB} && export TEXINPUTS && latex -halt-on-error -output-directory=${BUILD} 20 21 BIBTEX = BIBINPUTS=${BIBLIB} bibtex 21 #GLOSSARY =INDEXSTYLE=${BUILD} makeglossaries-lite22 #GLOSSARY = INDEXSTYLE=${BUILD} makeglossaries-lite 22 23 23 24 ### Rules and Recipes: … … 25 26 all: ${DOC} 26 27 27 ${BUILD}/%.dvi: ${TEXSRC} ${FIGSRC: .fig=.tex} ${BIBSRC} Makefile | ${BUILD}28 ${BUILD}/%.dvi: ${TEXSRC} ${FIGSRC:%.fig=%.tex} ${BIBSRC} Makefile | ${BUILD} 28 29 ${LATEX} ${BASE} 29 30 ${BIBTEX} ${BUILD}/${BASE} -
doc/theses/mubeen_zulfiqar_MMath/allocator.tex
r5407cdc r8d66610 1 \chapter{Allocator} 1 2 2 \chapter{Allocator} 3 \noindent 4 ==================== 5 6 Writing Points: 7 \begin{itemize} 8 \item 9 Objective of @uHeapLmmm@. 10 \item 11 Design philosophy. 12 \item 13 Background and previous design of @uHeapLmmm@. 14 \item 15 Distributed design of @uHeapLmmm@. 16 17 ----- SHOULD WE GIVE IMPLEMENTATION DETAILS HERE? ----- 18 19 \PAB{Maybe. There might be an Implementation chapter.} 20 \item 21 figure. 22 \item 23 Advantages of distributed design. 24 \end{itemize} 25 26 The new features added to @uHeapLmmm@ (incl. @malloc_size@ routine) 27 \CFA alloc interface with examples. 28 \begin{itemize} 29 \item 30 Why did we need it? 31 \item 32 The added benefits. 33 \end{itemize} 34 35 ----- SHOULD WE GIVE PERFORMANCE AND USABILITY COMPARISON OF DIFFERENT INTERFACES THAT WE TRIED? ----- 36 37 \PAB{Often Performance is its own chapter. I added one for now.} 38 39 Performance evaluation using u-benchmark suite. 40 41 \noindent 42 ==================== 3 43 4 44 \newpage 5 45 \paragraph{Design 1: Decentralized} 6 Fixed number of heaps: shard the heap into N heaps each with a bump-area allocated from the sbrkarea.46 Fixed number of heaps: shard the heap into N heaps each with a bump-area allocated from the @sbrk@ area. 7 47 Kernel threads (KT) are assigned to the N heaps. 8 48 When KTs $\le$ N, the heaps are uncontented. -
doc/theses/mubeen_zulfiqar_MMath/background.tex
r5407cdc r8d66610 1 1 \chapter{Background} 2 2 3 \noindent 4 ==================== 5 6 Writing Points: 7 \begin{itemize} 8 \item 9 Classification of benchmarks. 10 \item 11 Literature review of current benchmarks. 12 \item 13 Features and limitations. 14 \item 15 Literature review of current memory allocators. 16 \item 17 Breakdown of memory allocation techniques. 18 \item 19 Features and limitations. 20 \end{itemize} 21 22 \noindent 23 ==================== 24 3 25 \cite{Wasik08} -
doc/theses/mubeen_zulfiqar_MMath/benchmarks.tex
r5407cdc r8d66610 1 1 \chapter{Benchmarks} 2 3 \noindent 4 ==================== 5 6 Writing Points: 7 \begin{itemize} 8 \item 9 Performance matrices of memory allocation. 10 \item 11 Aim of micro benchmark suite. 12 13 ----- SHOULD WE GIVE IMPLEMENTATION DETAILS HERE? ----- 14 15 \PAB{For the benchmarks, yes.} 16 \item 17 A complete list of benchmarks in micro benchmark suite. 18 \item 19 One detailed section for each benchmark in micro benchmark suite including: 20 21 \begin{itemize} 22 \item 23 The introduction of the benchmark. 24 \item 25 Figure. 26 \item 27 Results with popular memory allocators. 28 \end{itemize} 29 30 \item 31 Summarize performance of current memory allocators. 32 \end{itemize} 33 34 \noindent 35 ==================== -
doc/theses/mubeen_zulfiqar_MMath/conclusion.tex
r5407cdc r8d66610 1 1 \chapter{Conclusion} 2 3 \noindent 4 ==================== 5 6 Writing Points: 7 \begin{itemize} 8 \item 9 Summarize u-benchmark suite. 10 \item 11 Summarize @uHeapLmmm@. 12 \item 13 Make recommendations on memory allocator design. 14 \end{itemize} 15 16 \noindent 17 ==================== -
doc/theses/mubeen_zulfiqar_MMath/intro.tex
r5407cdc r8d66610 1 1 \chapter{Introduction} 2 3 \noindent 4 ==================== 5 6 Writing Points: 7 \begin{itemize} 8 \item 9 Introduce dynamic memory allocation with brief background. 10 \item 11 Scope of the thesis. 12 \item 13 Importance of memory allocation and micro-benchmark suite. 14 \item 15 Research problem. 16 \item 17 Research objectives. 18 \item 19 The vision behind cfa-malloc. 20 \item 21 An outline of the thesis. 22 \end{itemize} 23 24 \noindent 25 ==================== -
doc/theses/mubeen_zulfiqar_MMath/uw-ethesis.tex
r5407cdc r8d66610 84 84 \usepackage{graphicx} 85 85 \usepackage{comment} % Removes large sections of the document. 86 \usepackage{todonotes} % Adds todos (Must be included after comment.)87 86 88 87 % Hyperlinks make it very easy to navigate an electronic document. … … 107 106 colorlinks=true, % false: boxed links; true: colored links 108 107 linkcolor=blue, % color of internal links 109 citecolor= green, % color of links to bibliography108 citecolor=blue, % color of links to bibliography 110 109 filecolor=magenta, % color of file links 111 urlcolor= cyan% color of external links110 urlcolor=blue % color of external links 112 111 } 113 112 \ifthenelse{\boolean{PrintVersion}}{ % for improved print quality, change some hyperref options … … 168 167 \CFAStyle % CFA code-style for all languages 169 168 \lstset{language=CFA,basicstyle=\linespread{0.9}\tt} % CFA default language 169 \newcommand{\PAB}[1]{{\color{red}PAB: #1}} 170 170 171 171 %====================================================================== … … 194 194 \input{allocator} 195 195 \input{benchmarks} 196 \input{performance} 196 197 \input{conclusion} 197 198 -
doc/user/Makefile
r5407cdc r8d66610 61 61 62 62 ${BASE}.dvi : Makefile ${GRAPHS} ${PROGRAMS} ${PICTURES} ${FIGURES} ${SOURCES} \ 63 ${Macros}/common. tex${Macros}/lstlang.sty ${Macros}/indexstyle ../bibliography/pl.bib build/version | ${Build}63 ${Macros}/common.sty ${Macros}/lstlang.sty ${Macros}/indexstyle ../bibliography/pl.bib build/version | ${Build} 64 64 # Conditionally create an empty *.ind (index) file for inclusion until makeindex is run. 65 65 if [ ! -r ${basename $@}.ind ] ; then touch ${Build}/${basename $@}.ind ; fi … … 68 68 -${BibTeX} ${Build}/${basename $@} 69 69 # Some citations reference others so run again to resolve these citations 70 ${LaTeX} ${basename $@}.tex70 # ${LaTeX} ${basename $@}.tex 71 71 -${BibTeX} ${Build}/${basename $@} 72 72 # Make index from *.aux entries and input index at end of document … … 75 75 ${LaTeX} ${basename $@}.tex 76 76 # Run again to get index title into table of contents 77 ${LaTeX} ${basename $@}.tex77 # ${LaTeX} ${basename $@}.tex 78 78 79 79 ## Define the default recipes. -
doc/user/user.tex
r5407cdc r8d66610 11 11 %% Created On : Wed Apr 6 14:53:29 2016 12 12 %% Last Modified By : Peter A. Buhr 13 %% Last Modified On : S un Apr 25 19:03:03 202114 %% Update Count : 495113 %% Last Modified On : Sat May 8 08:51:33 2021 14 %% Update Count : 5062 15 15 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 16 16 … … 65 65 % keyword escape ¶...¶ (pilcrow symbol) emacs: C-q M-^ 66 66 % math escape $...$ (dollar symbol) 67 \input{common} % common CFA document macros 67 \usepackage{common} % common CFA document macros 68 %\input{common} % common CFA document macros 68 69 \setlength{\gcolumnposn}{3in} 69 70 \CFAStyle % use default CFA format-style … … 585 586 For example, the octal ©0© or hexadecimal ©0x© prefix may end with an underscore ©0_377© or ©0x_ff©; 586 587 the exponent infix ©E© may start or end with an underscore ©1.0_E10©, ©1.0E_10© or ©1.0_E_10©; 587 the type suffixes ©U©, ©L©, etc.may start with an underscore ©1_U©, ©1_ll© or ©1.0E10_f©.588 the type suffixes ©U©, ©L©, \etc may start with an underscore ©1_U©, ©1_ll© or ©1.0E10_f©. 588 589 \end{enumerate} 589 590 It is significantly easier to read and enter long constants when they are broken up into smaller groupings (many cultures use comma and/or period among digits for the same purpose). … … 1570 1571 \end{cquote} 1571 1572 1572 All type qualifiers, \eg ©const©, ©volatile©, etc., are used in the normal way with the new declarations and also appear left to right, \eg:1573 All type qualifiers, \eg ©const©, ©volatile©, \etc, are used in the normal way with the new declarations and also appear left to right, \eg: 1573 1574 \begin{cquote} 1574 1575 \begin{tabular}{@{}l@{\hspace{1em}}l@{\hspace{1em}}l@{}} … … 1590 1591 \end{tabular} 1591 1592 \end{cquote} 1592 All declaration qualifiers, \eg ©extern©, ©static©, etc., are used in the normal way with the new declarations but can only appear at the start of a \CFA routine declaration,\footnote{\label{StorageClassSpecifier}1593 All declaration qualifiers, \eg ©extern©, ©static©, \etc, are used in the normal way with the new declarations but can only appear at the start of a \CFA routine declaration,\footnote{\label{StorageClassSpecifier} 1593 1594 The placement of a storage-class specifier other than at the beginning of the declaration specifiers in a declaration is an obsolescent feature.~\cite[\S~6.11.5(1)]{C11}} \eg: 1594 1595 \begin{cquote} … … 3147 3148 also, it is unnecessary to specify all the fields of a struct in a multiple record-field tuple. 3148 3149 3149 Since tuple-index expressions are a form of member-access expression, it is possible to use tuple-index expressions in conjunction with member-access expressions to restructure a tuple (\eg, rearrange components, drop components, duplicate components, etc.).3150 Since tuple-index expressions are a form of member-access expression, it is possible to use tuple-index expressions in conjunction with member-access expressions to restructure a tuple (\eg, rearrange components, drop components, duplicate components, \etc). 3150 3151 \begin{cfa} 3151 3152 [ int, int, long, double ] x; … … 3312 3313 3313 3314 \section{Tuples} 3315 \label{tuples} 3314 3316 3315 3317 In C and \CFA, lists of elements appear in several contexts, such as the parameter list for a routine call. … … 3420 3422 3421 3423 \subsection{Tuple Coercions} 3424 \label{tuple coercions}\label{coercions!tuple} 3422 3425 3423 3426 There are four coercions that can be performed on tuples and tuple variables: closing, opening, flattening and structuring. … … 3464 3467 3465 3468 \subsection{Mass Assignment} 3469 \label{mass assignment}\label{assignment!mass} 3466 3470 3467 3471 \CFA permits assignment to several variables at once using mass assignment~\cite{CLU}. … … 3504 3508 3505 3509 \subsection{Multiple Assignment} 3510 \label{multiple assignment}\label{assignment!multiple} 3506 3511 3507 3512 \CFA also supports the assignment of several values at once, known as multiple assignment~\cite{CLU,Galletly96}. … … 3545 3550 3546 3551 \subsection{Cascade Assignment} 3552 \index{cascade assignment}\index{assignment!cascade} 3547 3553 3548 3554 As in C, \CFA mass and multiple assignments can be cascaded, producing cascade assignment. … … 3564 3570 \section{Stream I/O Library} 3565 3571 \label{s:StreamIOLibrary} 3566 \index{input /output stream library}3567 \index{stream library} 3572 \index{input}\index{output} 3573 \index{stream library}\index{library!stream} 3568 3574 3569 3575 The goal of \CFA stream input/output (I/O) is to simplify the common cases\index{I/O!common case}, while fully supporting polymorphism and user defined types in a consistent way. 3570 3576 Stream I/O can be implicitly or explicitly formatted. 3571 Implicit formatting means \CFA selects the output or input format for values that match with the type of a variable.3577 Implicit formatting means \CFA selects the output or input format for values that matches the variable's type. 3572 3578 Explicit formatting means additional information is specified to augment how an output or input of value is interpreted. 3573 \CFA formatting i s a cross between C ©printf© and \CC ©cout© manipulators, and Python implicit spacing and newline.3579 \CFA formatting incorporates ideas from C ©printf©, \CC ©stream© manipulators, and Python implicit spacing and newline. 3574 3580 Specifically: 3575 3581 \begin{itemize} … … 3584 3590 Hence, it is common programming practice to toggle manipulators on and then back to the default to prevent downstream side-effects. 3585 3591 Without this programming style, errors occur when moving prints, as manipulator effects incorrectly flow into the new location. 3586 (To guarantee no side-effects, manipulator values must be saved and restored across function calls.) 3587 \item 3588 \CFA has more sophisticated implicit spacing between valuesthan Python, plus implicit newline at the end of a print.3592 Furthermore, to guarantee no side-effects, manipulator values must be saved and restored across function calls. 3593 \item 3594 \CFA has more sophisticated implicit value spacing than Python, plus implicit newline at the end of a print. 3589 3595 \end{itemize} 3596 3597 The standard polymorphic I/Os stream are ©stdin©/©sin© (input), ©stdout©/©sout© and ©stderr©/©serr© (output) (like C++ ©cin©/©cout©/©cerr©). 3598 Polymorphic streams ©exit© and ©abort© provide implicit program termination without and with generating a stack trace and core file. 3599 Stream ©exit© implicitly returns ©EXIT_FAILURE© to the shell. 3600 \begin{cfa} 3601 ®exit® | "x (" | x | ") negative value."; // terminate and return EXIT_FAILURE to shell 3602 ®abort® | "x (" | x | ") negative value."; // terminate and generate stack trace and core file 3603 \end{cfa} 3604 Note, \CFA stream variables ©stdin©, ©stdout©, ©stderr©, ©exit©, and ©abort© overload C variables ©stdin©, ©stdout©, ©stderr©, and functions ©exit© and ©abort©, respectively. 3590 3605 The \CFA header file for the I/O library is \Indexc{fstream.hfa}. 3606 3607 3608 \subsection{Basic I/O} 3591 3609 3592 3610 For implicit formatted output, the common case is printing a series of variables separated by whitespace. … … 3601 3619 \begin{cfa} 3602 3620 3603 cout << x ®<< " "® << y ®<< " "® << z<< endl;3621 cout << x ®<< " "® << y ®<< " "® << z << endl; 3604 3622 \end{cfa} 3605 3623 & … … 3653 3671 \end{tabular} 3654 3672 \end{cquote} 3655 Input and output use a uniform operator, ©|©, rather than separate operators, as in ©>>© and ©<<© for \CC.3673 Input and output use a uniform operator, ©|©, rather than \CC's ©>>© and ©<<© input/output operators. 3656 3674 There is a weak similarity between the \CFA logical-or operator and the \Index{Shell pipe-operator} for moving data, where data flows in the correct direction for input but the opposite direction for output. 3657 3675 … … 3698 3716 \end{cquote} 3699 3717 3718 \VRef[Figure]{f:CFACommand-LineProcessing} shows idiomatic \CFA command-line processing and copying an input file to an output file. 3719 Note, a stream variable may be copied because it is a reference to an underlying stream data-structures. 3720 All I/O errors are handles as exceptions, but end-of-file is not an exception as C programmers are use to explicitly checking for it. 3721 3722 \begin{figure} 3723 \begin{cfa} 3724 #include ®<fstream.hfa>® 3725 3726 int main( int argc, char * argv[] ) { 3727 ®ifstream® in = stdin; $\C{// copy default files}$ 3728 ®ofstream® out = stdout; 3729 3730 try { 3731 choose ( argc ) { 3732 case 2, 3: 3733 ®open®( in, argv[1] ); $\C{// open input file first as output creates file}$ 3734 if ( argc == 3 ) ®open®( out, argv[2] ); $\C{// do not create output unless input opens}$ 3735 case 1: ; $\C{// use default files}$ 3736 default: 3737 ®exit® | "Usage" | argv[0] | "[ input-file (default stdin) " 3738 "[ output-file (default stdout) ] ]"; 3739 } // choose 3740 } catch( ®Open_Failure® * ex; ex->istream == &in ) { 3741 ®exit® | "Unable to open input file" | argv[1]; 3742 } catch( ®Open_Failure® * ex; ex->ostream == &out ) { 3743 ®close®( in ); $\C{// optional}$ 3744 ®exit® | "Unable to open output file" | argv[2]; 3745 } // try 3746 3747 out | nlOff; $\C{// turn off auto newline}$ 3748 in | nlOn; $\C{// turn on reading newline}$ 3749 char ch; 3750 for () { $\C{// read/write characters}$ 3751 in | ch; 3752 if ( eof( in ) ) break; $\C{// eof ?}$ 3753 out | ch; 3754 } // for 3755 } // main 3756 \end{cfa} 3757 \caption{\CFA Command-Line Processing} 3758 \label{f:CFACommand-LineProcessing} 3759 \end{figure} 3760 3761 \VRef[Figure]{f:StreamFunctions} shows the stream operations. 3762 \begin{itemize}[topsep=4pt,itemsep=2pt,parsep=0pt] 3763 \item 3764 \Indexc{fail} tests the stream error-indicator, returning nonzero if it is set. 3765 \item 3766 \Indexc{clear} resets the stream error-indicator. 3767 \item 3768 \Indexc{flush} (©ofstream© only) causes any unwritten data for a stream to be written to the file. 3769 \item 3770 \Indexc{eof} (©ifstream© only) tests the end-of-file indicator for the stream pointed to by stream. 3771 Returns true if the end-of-file indicator is set, otherwise false. 3772 \item 3773 \Indexc{open} binds the file with ©name© to a stream accessed with ©mode© (see ©fopen©). 3774 \item 3775 \Indexc{close} flushes the stream and closes the file. 3776 \item 3777 \Indexc{write} (©ofstream© only) write ©size© bytes to the stream. 3778 The bytes are written lazily to file when internal buffers fill. 3779 Eager buffer writes are done with ©flush© 3780 \item 3781 \Indexc{read} (©ifstream© only) read ©size© bytes to the stream. 3782 \item 3783 \Indexc{ungetc} (©ifstream© only) pushes the character back to the input stream. 3784 Pushed-back characters returned by subsequent reads in the reverse order of pushing. 3785 \end{itemize} 3786 The constructor functions: 3787 \begin{itemize}[topsep=4pt,itemsep=2pt,parsep=0pt] 3788 \item 3789 create an unbound stream, which is subsequently bound to a file with ©open©. 3790 \item 3791 create a bound stream to the associated file with given ©mode©. 3792 \end{itemize} 3793 The destructor closes the stream. 3794 3795 \begin{figure} 3796 \begin{cfa} 3797 // *********************************** ofstream *********************************** 3798 3799 bool fail( ofstream & );$\indexc{fail}\index{ofstream@©ofstream©!©fail©}$ 3800 void clear( ofstream & );$\indexc{clear}\index{ofstream@©ofstream©!©clear©}$ 3801 int flush( ofstream & );$\indexc{flush}\index{ofstream@©ofstream©!©flush©}$ 3802 void open( ofstream &, const char name[], const char mode[] = "w" );$\indexc{open}\index{ofstream@©ofstream©!©open©}$ 3803 void close( ofstream & );$\indexc{close}\index{ofstream@©ofstream©!©close©}$ 3804 ofstream & write( ofstream &, const char data[], size_t size );$\indexc{write}\index{ofstream@©ofstream©!©write©}$ 3805 3806 void ?{}( ofstream & );$\index{ofstream@©ofstream©!©?{}©}$ 3807 void ?{}( ofstream &, const char name[], const char mode[] = "w" ); 3808 void ^?{}( ofstream & );$\index{ofstream@©ofstream©!©^?{}©}$ 3809 3810 // *********************************** ifstream *********************************** 3811 3812 bool fail( ifstream & is );$\indexc{fail}\index{ifstream@©ifstream©!©fail©}$ 3813 void clear( ifstream & );$\indexc{clear}\index{ifstream@©ifstream©!©clear©}$ 3814 bool eof( ifstream & is );$\indexc{eof}\index{ifstream@©ifstream©!©eof©}$ 3815 void open( ifstream & is, const char name[], const char mode[] = "r" );$\indexc{open}\index{ifstream@©ifstream©!©open©}$ 3816 void close( ifstream & is );$\indexc{close}\index{ifstream@©ifstream©!©close©}$ 3817 ifstream & read( ifstream & is, char data[], size_t size );$\indexc{read}\index{ifstream@©ifstream©!©read©}$ 3818 ifstream & ungetc( ifstream & is, char c );$\indexc{unget}\index{ifstream@©ifstream©!©unget©}$ 3819 3820 void ?{}( ifstream & is );$\index{ifstream@©ifstream©!©?{}©}$ 3821 void ?{}( ifstream & is, const char name[], const char mode[] = "r" ); 3822 void ^?{}( ifstream & is );$\index{ifstream@©ifstream©!©^?{}©}$ 3823 \end{cfa} 3824 \caption{Stream Functions} 3825 \label{f:StreamFunctions} 3826 \end{figure} 3700 3827 3701 3828 … … 3846 3973 3847 3974 \item 3848 \Indexc{sepOn}\index{manipulator!sepOn@©sepOn©} and \Indexc{sepOff}\index{manipulator!sepOff@©sepOff©} toggle printing the separator with respect to the next printed item, and then return to the global sep erator setting.3975 \Indexc{sepOn}\index{manipulator!sepOn@©sepOn©} and \Indexc{sepOff}\index{manipulator!sepOff@©sepOff©} toggle printing the separator with respect to the next printed item, and then return to the global separator setting. 3849 3976 \begin{cfa}[belowskip=0pt] 3850 3977 sout | 1 | sepOff | 2 | 3; $\C{// turn off implicit separator for the next item}$ … … 4030 4157 sout | wd( 4, "ab" ) | wd( 3, "ab" ) | wd( 2, "ab" ); 4031 4158 \end{cfa} 4032 \begin{cfa}[showspaces=true,aboveskip=0pt ,belowskip=0pt]4159 \begin{cfa}[showspaces=true,aboveskip=0pt] 4033 4160 ® ®34 ® ®34 34 4034 4161 ® ®4.000000 ® ®4.000000 4.000000 … … 4378 4505 \end{cfa} 4379 4506 4380 \Textbf{WARNING:} ©printf©\index{printf@©printf©}, ©scanf©\index{scanf@©scanf©} and their derivatives are unsafe when used with user-level threading, as in \CFA. 4381 These stream routines use kernel-thread locking (©futex©\index{futex@©futex©}), which block kernel threads, to prevent interleaving of I/O. 4382 However, the following simple example illustrates how a deadlock can occur (other complex scenarios are possible). 4383 Assume a single kernel thread and two user-level threads calling ©printf©. 4384 One user-level thread acquires the I/O lock and is time-sliced while performing ©printf©. 4385 The other user-level thread then starts execution, calls ©printf©, and blocks the only kernel thread because it cannot acquire the I/O lock. 4386 It does not help if the kernel lock is multiple acquisition, \ie, the lock owner can acquire it multiple times, because it then results in two user threads in the ©printf© critical section, corrupting the stream. 4507 4508 \section{String Stream} 4509 4510 All the stream formatting capabilities are available to format text to/from a C string rather than to a stream file. 4511 \VRef[Figure]{f:StringStreamProcessing} shows writing (output) and reading (input) from a C string. 4512 \begin{figure} 4513 \begin{cfa} 4514 #include <fstream.hfa> 4515 #include <strstream.hfa> 4516 4517 int main() { 4518 enum { size = 256 }; 4519 char buf[size]; $\C{// output buffer}$ 4520 ®ostrstream osstr = { buf, size };® $\C{// bind output buffer/size}$ 4521 int i = 3, j = 5, k = 7; 4522 double x = 12345678.9, y = 98765.4321e-11; 4523 4524 osstr | i | hex(j) | wd(10, k) | sci(x) | unit(eng(y)); $\C{// same lines of output}$ 4525 write( osstr ); 4526 printf( "%s", buf ); 4527 sout | i | hex(j) | wd(10, k) | sci(x) | unit(eng(y)); 4528 4529 char buf2[] = "12 14 15 3.5 7e4"; $\C{// input buffer}$ 4530 ®istrstream isstr = { buf2 };® 4531 isstr | i | j | k | x | y; 4532 sout | i | j | k | x | y; 4533 } 4534 \end{cfa} 4535 \caption{String Stream Processing} 4536 \label{f:StringStreamProcessing} 4537 \end{figure} 4538 4539 \VRef[Figure]{f:StringStreamFunctions} shows the string stream operations. 4540 \begin{itemize}[topsep=4pt,itemsep=2pt,parsep=0pt] 4541 \item 4542 \Indexc{write} (©ostrstream© only) writes all the buffered characters to the specified stream (©stdout© default). 4543 \end{itemize} 4544 The constructor functions: 4545 \begin{itemize}[topsep=4pt,itemsep=2pt,parsep=0pt] 4546 \item 4547 create a bound stream to a write buffer (©ostrstream©) of ©size© or a read buffer (©istrstream©) containing a C string terminated with ©'\0'©. 4548 \end{itemize} 4549 4550 \begin{figure} 4551 \begin{cfa} 4552 // *********************************** ostrstream *********************************** 4553 4554 ostrstream & write( ostrstream & os, FILE * stream = stdout ); 4555 4556 void ?{}( ostrstream &, char buf[], size_t size ); 4557 4558 // *********************************** istrstream *********************************** 4559 4560 void ?{}( istrstream & is, char buf[] ); 4561 \end{cfa} 4562 \caption{String Stream Functions} 4563 \label{f:StringStreamFunctions} 4564 \end{figure} 4387 4565 4388 4566 … … 5482 5660 \item 5483 5661 Package: a container to organize modules for distribution; It has attributes like name, author, 5484 version, dependences, etc.5485 \item 5486 Project: a working set for a \CFA project; It has attributes like name, author, version, dependences, etc.5662 version, dependences, \etc. 5663 \item 5664 Project: a working set for a \CFA project; It has attributes like name, author, version, dependences, \etc. 5487 5665 \end{itemize} 5488 5666 … … 5621 5799 5622 5800 A package is defined by putting a project description file, Do.prj, with one or more modules into a directory. 5623 This project description file contains the package's meta data, including package name, author, version, dependences, etc.5801 This project description file contains the package's meta data, including package name, author, version, dependences, \etc. 5624 5802 It should be in the root of the package directory. 5625 5803 … … 5678 5856 Module: a container to organize a set of related types and methods; It has a module name, and several interfaces visible from outside 5679 5857 \item 5680 Package: a container to organize modules for distribution; It has attributes like name, author, version, dependences, etc.5681 \item 5682 Project: a working set for a \CFA project; It has attributes like name, author, version, dependences, etc.5858 Package: a container to organize modules for distribution; It has attributes like name, author, version, dependences, \etc. 5859 \item 5860 Project: a working set for a \CFA project; It has attributes like name, author, version, dependences, \etc. 5683 5861 \end{itemize} 5684 5862 … … 8111 8289 \begin{cquote} 8112 8290 \begin{tabular}{@{}l@{\hspace{\parindentlnth}}|@{\hspace{\parindentlnth}}l@{}} 8113 \multicolumn{1}{@{}c|@{\hspace{\parindentlnth}}}{\textbf{\CFA}} & \multicolumn{1}{@{\hspace{\parindentlnth}}c }{\textbf{C}@{}} \\8291 \multicolumn{1}{@{}c|@{\hspace{\parindentlnth}}}{\textbf{\CFA}} & \multicolumn{1}{@{\hspace{\parindentlnth}}c@{}}{\textbf{C}} \\ 8114 8292 \hline 8115 8293 \begin{cfa} 8116 #include <gmp >$\indexc{gmp}$8294 #include <gmp.hfa>$\indexc{gmp}$ 8117 8295 int main( void ) { 8118 8296 sout | "Factorial Numbers"; -
libcfa/src/common.hfa
r5407cdc r8d66610 10 10 // Created On : Wed Jul 11 17:54:36 2018 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Sat Aug 15 08:51:29 202013 // Update Count : 1 412 // Last Modified On : Wed May 5 14:02:04 2021 13 // Update Count : 18 14 14 // 15 15 … … 67 67 68 68 static inline { 69 char min( char t1, char t2 ) { return t1 < t2 ? t1 : t2; } // optimization 70 intptr_t min( intptr_t t1, intptr_t t2 ) { return t1 < t2 ? t1 : t2; } // optimization 71 uintptr_t min( uintptr_t t1, uintptr_t t2 ) { return t1 < t2 ? t1 : t2; } // optimization 69 char min( char v1, char v2 ) { return v1 < v2 ? v1 : v2; } // optimization 70 int min( int v1, int v2 ) { return v1 < v2 ? v1 : v2; } 71 unsigned int min( unsigned int v1, unsigned int v2 ) { return v1 < v2 ? v1 : v2; } 72 long int min( long int v1, long int v2 ) { return v1 < v2 ? v1 : v2; } 73 unsigned long int min( unsigned long int v1, unsigned int v2 ) { return v1 < v2 ? v1 : v2; } 74 long long int min( long long int v1, long long int v2 ) { return v1 < v2 ? v1 : v2; } 75 unsigned long long int min( unsigned long long int v1, unsigned int v2 ) { return v1 < v2 ? v1 : v2; } 72 76 forall( T | { int ?<?( T, T ); } ) 73 T min( T t1, T t2 ) { return t1 < t2 ? t1 : t2; }77 T min( T v1, T v2 ) { return v1 < v2 ? v1 : v2; } 74 78 75 char max( char t1, char t2 ) { return t1 > t2 ? t1 : t2; } // optimization 76 intptr_t max( intptr_t t1, intptr_t t2 ) { return t1 > t2 ? t1 : t2; } // optimization 77 uintptr_t max( uintptr_t t1, uintptr_t t2 ) { return t1 > t2 ? t1 : t2; } // optimization 79 char max( char v1, char v2 ) { return v1 > v2 ? v1 : v2; } // optimization 80 int max( int v1, int v2 ) { return v1 > v2 ? v1 : v2; } 81 unsigned int max( unsigned int v1, unsigned int v2 ) { return v1 > v2 ? v1 : v2; } 82 long int max( long int v1, long int v2 ) { return v1 > v2 ? v1 : v2; } 83 unsigned long int max( unsigned long int v1, unsigned long int v2 ) { return v1 > v2 ? v1 : v2; } 84 long long int max( long long int v1, long long int v2 ) { return v1 > v2 ? v1 : v2; } 85 unsigned long long int max( unsigned long long int v1, unsigned long long int v2 ) { return v1 > v2 ? v1 : v2; } 78 86 forall( T | { int ?>?( T, T ); } ) 79 T max( T t1, T t2 ) { return t1 > t2 ? t1 : t2; }87 T max( T v1, T v2 ) { return v1 > v2 ? v1 : v2; } 80 88 81 89 forall( T | { T min( T, T ); T max( T, T ); } ) -
libcfa/src/concurrency/alarm.cfa
r5407cdc r8d66610 38 38 39 39 void __kernel_set_timer( Duration alarm ) { 40 verifyf(alarm >= 1`us || alarm == 0, "Setting timer to < 1us (%jins)", alarm`ns); 41 setitimer( ITIMER_REAL, &(itimerval){ alarm }, 0p ); 40 alarm = max(alarm, 1`us); 41 itimerval otv @= { 0 }; 42 getitimer( ITIMER_REAL, &otv ); 43 Duration od = { otv.it_value }; 44 if(od == 0 || od > alarm) { 45 setitimer( ITIMER_REAL, &(itimerval){ alarm }, 0p ); 46 } 42 47 } 43 48 … … 46 51 //============================================================================================= 47 52 48 void ?{}( alarm_node_t & this, $thread * thrd, Time alarm, Duration period) with( this ) { 53 void ?{}( alarm_node_t & this, $thread * thrd, Duration alarm, Duration period) with( this ) { 54 this.initial = alarm; 55 this.period = period; 49 56 this.thrd = thrd; 50 this.alarm = alarm; 51 this.period = period; 57 this.timeval = __kernel_get_time() + alarm; 52 58 set = false; 53 59 type = User; 54 60 } 55 61 56 void ?{}( alarm_node_t & this, processor * proc, Time alarm, Duration period ) with( this ) { 62 void ?{}( alarm_node_t & this, processor * proc, Duration alarm, Duration period ) with( this ) { 63 this.initial = alarm; 64 this.period = period; 57 65 this.proc = proc; 58 this.alarm = alarm; 59 this.period = period; 66 this.timeval = __kernel_get_time() + alarm; 60 67 set = false; 61 68 type = Kernel; 62 69 } 63 void ?{}( alarm_node_t & this, Alarm_Callback callback, Time alarm, Duration period ) with( this ) { 64 this.alarm = alarm; 65 this.period = period; 70 void ?{}( alarm_node_t & this, Alarm_Callback callback, Duration alarm, Duration period ) with( this ) { 66 71 this.callback = callback; 72 this.initial = alarm; 73 this.period = period; 74 this.timeval = __kernel_get_time() + alarm; 67 75 set = false; 68 76 type = Callback; … … 77 85 void insert( alarm_list_t * this, alarm_node_t * n ) { 78 86 alarm_node_t * it = & (*this)`first; 79 while( it && (n-> alarm > it->alarm) ) {87 while( it && (n->timeval > it->timeval) ) { 80 88 it = & (*it)`next; 81 89 } … … 105 113 lock( event_kernel->lock __cfaabi_dbg_ctx2 ); 106 114 { 107 verify( validate( alarms ) ); 108 bool first = ! & alarms`first; 115 /* paranoid */ verify( validate( alarms ) ); 109 116 110 __cfadbg_print_safe( preemption, " KERNEL: alarm inserting %p (%lu).\n", this, this->alarm.tn ); 117 Time curr = __kernel_get_time(); 118 __cfadbg_print_safe( preemption, " KERNEL: alarm inserting %p (%lu -> %lu).\n", this, curr.tn, this->timeval.tn ); 111 119 insert( &alarms, this ); 112 if( first ) { 113 __kernel_set_timer( alarms`first.alarm - __kernel_get_time() ); 114 } 120 __kernel_set_timer( this->timeval - curr); 121 this->set = true; 115 122 } 116 123 unlock( event_kernel->lock ); 117 this->set = true;118 124 enable_interrupts(); 119 125 } … … 124 130 { 125 131 verify( validate( event_kernel->alarms ) ); 126 remove( *this ); 132 if (this->set) remove( *this ); 133 this->set = false; 127 134 } 128 135 unlock( event_kernel->lock ); 129 136 enable_interrupts(); 130 this->set = false;131 137 } 132 138 … … 136 142 137 143 void sleep( Duration duration ) { 138 alarm_node_t node = { active_thread(), __kernel_get_time() +duration, 0`s };144 alarm_node_t node = { active_thread(), duration, 0`s }; 139 145 140 146 register_self( &node ); -
libcfa/src/concurrency/alarm.hfa
r5407cdc r8d66610 46 46 47 47 struct alarm_node_t { 48 Time alarm;// time when alarm goes off49 Duration period; 48 Duration initial; // time when alarm goes off 49 Duration period; // if > 0 => period of alarm 50 50 51 DLISTED_MGD_IMPL_IN(alarm_node_t)51 inline dlink(alarm_node_t); 52 52 53 53 union { 54 $thread * thrd; 55 processor * proc; 56 Alarm_Callback callback; 54 $thread * thrd; // thrd who created event 55 processor * proc; // proc who created event 56 Alarm_Callback callback; // callback to handle event 57 57 }; 58 58 59 bool set :1; // whether or not the alarm has be registered 60 enum alarm_type type; // true if this is not a user defined alarm 59 Time timeval; // actual time at which the alarm goes off 60 enum alarm_type type; // true if this is not a user defined alarm 61 bool set :1; // whether or not the alarm has be registered 61 62 }; 62 DLISTED_MGD_IMPL_OUT(alarm_node_t)63 P9_EMBEDDED( alarm_node_t, dlink(alarm_node_t) ) 63 64 64 void ?{}( alarm_node_t & this, $thread * thrd, Timealarm, Duration period );65 void ?{}( alarm_node_t & this, processor * proc, Timealarm, Duration period );66 void ?{}( alarm_node_t & this, Alarm_Callback callback, Timealarm, Duration period );65 void ?{}( alarm_node_t & this, $thread * thrd, Duration alarm, Duration period ); 66 void ?{}( alarm_node_t & this, processor * proc, Duration alarm, Duration period ); 67 void ?{}( alarm_node_t & this, Alarm_Callback callback, Duration alarm, Duration period ); 67 68 void ^?{}( alarm_node_t & this ); 68 69 69 typedef dlist(alarm_node_t , alarm_node_t) alarm_list_t;70 typedef dlist(alarm_node_t) alarm_list_t; 70 71 71 72 void insert( alarm_list_t * this, alarm_node_t * n ); -
libcfa/src/concurrency/clib/cfathread.cfa
r5407cdc r8d66610 27 27 extern void __cfactx_invoke_thread(void (*main)(void *), void * this); 28 28 } 29 30 extern Time __kernel_get_time(); 29 31 30 32 //================================================================================ … … 265 267 int cfathread_cond_timedwait(cfathread_cond_t *restrict cond, cfathread_mutex_t *restrict mut, const struct timespec *restrict abstime) __attribute__((nonnull (1,2,3))) { 266 268 Time t = { *abstime }; 267 if( wait( (*cond)->impl, (*mut)->impl, t ) ) { 269 timespec curr; 270 clock_gettime( CLOCK_REALTIME, &curr ); 271 Time c = { curr }; 272 if( wait( (*cond)->impl, (*mut)->impl, t - c ) ) { 268 273 return 0; 269 274 } -
libcfa/src/concurrency/clib/cfathread.h
r5407cdc r8d66610 80 80 81 81 typedef struct cfathread_cond_attr { 82 // WARNING: adding support for pthread_condattr_setclock would require keeping track of the clock 83 // and reading it in cond_timedwait 82 84 } cfathread_condattr_t; 83 85 typedef struct cfathread_condition * cfathread_cond_t; -
libcfa/src/concurrency/invoke.h
r5407cdc r8d66610 146 146 struct __thread_desc_link { 147 147 struct $thread * next; 148 struct $thread * prev;149 148 volatile unsigned long long ts; 150 unsigned preferred;151 149 }; 152 150 … … 155 153 // context that is switch during a __cfactx_switch 156 154 struct __stack_context_t context; 155 156 // Link lists fields 157 // instrusive link field for threads 158 struct __thread_desc_link link; 157 159 158 160 // current execution status for coroutine … … 170 172 struct cluster * curr_cluster; 171 173 172 // Link lists fields 173 // instrusive link field for threads 174 struct __thread_desc_link link; 174 // preferred ready-queue 175 unsigned preferred; 175 176 176 177 // coroutine body used to store context -
libcfa/src/concurrency/io.cfa
r5407cdc r8d66610 138 138 /* paranoid */ verify( proc->io.ctx ); 139 139 140 __attribute__((unused)) cluster * cltr = proc->cltr; 140 141 $io_context & ctx = *proc->io.ctx; 142 143 // for(i; 2) { 144 // unsigned idx = proc->rdq.id + i; 145 // cltr->ready_queue.lanes.tscs[idx].tv = -1ull; 146 // } 141 147 142 148 __ioarbiter_flush( ctx ); … … 151 157 // Update statistics 152 158 __STATS__( false, io.calls.errors.busy ++; ) 159 // for(i; 2) { 160 // unsigned idx = proc->rdq.id + i; 161 // cltr->ready_queue.lanes.tscs[idx].tv = rdtscl(); 162 // } 153 163 return; 154 164 default: … … 172 182 173 183 ctx.proc->io.pending = false; 184 185 ready_schedule_lock(); 186 __cfa_io_drain( proc ); 187 ready_schedule_unlock(); 188 // for(i; 2) { 189 // unsigned idx = proc->rdq.id + i; 190 // cltr->ready_queue.lanes.tscs[idx].tv = rdtscl(); 191 // } 174 192 } 175 193 -
libcfa/src/concurrency/kernel.cfa
r5407cdc r8d66610 163 163 #if !defined(__CFA_NO_STATISTICS__) 164 164 if( this->print_halts ) { 165 __cfaabi_bits_print_safe( STDOUT_FILENO, "Processor : %d - %s (%p)\n", this-> id, this->name, (void*)this);165 __cfaabi_bits_print_safe( STDOUT_FILENO, "Processor : %d - %s (%p)\n", this->unique_id, this->name, (void*)this); 166 166 } 167 167 #endif … … 170 170 // Setup preemption data 171 171 preemption_scope scope = { this }; 172 173 __STATS( unsigned long long last_tally = rdtscl(); )174 172 175 173 // if we need to run some special setup, now is the time to do it. … … 184 182 MAIN_LOOP: 185 183 for() { 184 #define OLD_MAIN 1 185 #if OLD_MAIN 186 186 // Check if there is pending io 187 187 __maybe_io_drain( this ); … … 223 223 #if !defined(__CFA_NO_STATISTICS__) 224 224 if(this->print_halts) { 225 __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 0\n", this-> id, rdtscl());225 __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 0\n", this->unique_id, rdtscl()); 226 226 } 227 227 #endif … … 236 236 #if !defined(__CFA_NO_STATISTICS__) 237 237 if(this->print_halts) { 238 __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 1\n", this-> id, rdtscl());238 __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 1\n", this->unique_id, rdtscl()); 239 239 } 240 240 #endif … … 258 258 if( __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST) ) break MAIN_LOOP; 259 259 260 #if !defined(__CFA_NO_STATISTICS__)261 unsigned long long curr = rdtscl();262 if(curr > (last_tally + 500000000)) {263 __tally_stats(this->cltr->stats, __cfaabi_tls.this_stats);264 last_tally = curr;265 }266 #endif267 268 260 if(this->io.pending && !this->io.dirty) { 269 261 __cfa_io_flush( this ); 270 262 } 271 263 272 // SEARCH: { 273 // /* paranoid */ verify( ! __preemption_enabled() ); 274 // /* paranoid */ verify( kernelTLS().this_proc_id ); 275 276 // // First, lock the scheduler since we are searching for a thread 277 278 // // Try to get the next thread 279 // ready_schedule_lock(); 280 // readyThread = pop_fast( this->cltr ); 281 // ready_schedule_unlock(); 282 // if(readyThread) { break SEARCH; } 283 284 // // If we can't find a thread, might as well flush any outstanding I/O 285 // if(this->io.pending) { __cfa_io_flush( this ); } 286 287 // // Spin a little on I/O, just in case 288 // for(25) { 289 // __maybe_io_drain( this ); 290 // ready_schedule_lock(); 291 // readyThread = pop_fast( this->cltr ); 292 // ready_schedule_unlock(); 293 // if(readyThread) { break SEARCH; } 294 // } 295 296 // // no luck, try stealing a few times 297 // for(25) { 298 // if( __maybe_io_drain( this ) ) { 299 // ready_schedule_lock(); 300 // readyThread = pop_fast( this->cltr ); 301 // } else { 302 // ready_schedule_lock(); 303 // readyThread = pop_slow( this->cltr ); 304 // } 305 // ready_schedule_unlock(); 306 // if(readyThread) { break SEARCH; } 307 // } 308 309 // // still no luck, search for a thread 310 // ready_schedule_lock(); 311 // readyThread = pop_search( this->cltr ); 312 // ready_schedule_unlock(); 313 // if(readyThread) { break SEARCH; } 314 315 // // Don't block if we are done 316 // if( __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST) ) break MAIN_LOOP; 317 318 // __STATS( __tls_stats()->ready.sleep.halts++; ) 319 320 // // Push self to idle stack 321 // mark_idle(this->cltr->procs, * this); 322 323 // // Confirm the ready-queue is empty 324 // __maybe_io_drain( this ); 325 // ready_schedule_lock(); 326 // readyThread = pop_search( this->cltr ); 327 // ready_schedule_unlock(); 328 329 // if( readyThread ) { 330 // // A thread was found, cancel the halt 331 // mark_awake(this->cltr->procs, * this); 332 333 // __STATS( __tls_stats()->ready.sleep.cancels++; ) 334 335 // // continue the main loop 336 // break SEARCH; 337 // } 338 339 // __STATS( if(this->print_halts) __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 0\n", this->id, rdtscl()); ) 340 // __cfadbg_print_safe(runtime_core, "Kernel : core %p waiting on eventfd %d\n", this, this->idle); 341 342 // // __disable_interrupts_hard(); 343 // eventfd_t val; 344 // eventfd_read( this->idle, &val ); 345 // // __enable_interrupts_hard(); 346 347 // __STATS( if(this->print_halts) __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 1\n", this->id, rdtscl()); ) 348 349 // // We were woken up, remove self from idle 350 // mark_awake(this->cltr->procs, * this); 351 352 // // DON'T just proceed, start looking again 353 // continue MAIN_LOOP; 354 // } 355 356 // RUN_THREAD: 357 // /* paranoid */ verify( kernelTLS().this_proc_id ); 358 // /* paranoid */ verify( ! __preemption_enabled() ); 359 // /* paranoid */ verify( readyThread ); 360 361 // // Reset io dirty bit 362 // this->io.dirty = false; 363 364 // // We found a thread run it 365 // __run_thread(this, readyThread); 366 367 // // Are we done? 368 // if( __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST) ) break MAIN_LOOP; 369 370 // #if !defined(__CFA_NO_STATISTICS__) 371 // unsigned long long curr = rdtscl(); 372 // if(curr > (last_tally + 500000000)) { 373 // __tally_stats(this->cltr->stats, __cfaabi_tls.this_stats); 374 // last_tally = curr; 375 // } 376 // #endif 377 378 // if(this->io.pending && !this->io.dirty) { 379 // __cfa_io_flush( this ); 380 // } 381 382 // // Check if there is pending io 383 // __maybe_io_drain( this ); 264 #else 265 #warning new kernel loop 266 SEARCH: { 267 /* paranoid */ verify( ! __preemption_enabled() ); 268 269 // First, lock the scheduler since we are searching for a thread 270 ready_schedule_lock(); 271 272 // Try to get the next thread 273 readyThread = pop_fast( this->cltr ); 274 if(readyThread) { ready_schedule_unlock(); break SEARCH; } 275 276 // If we can't find a thread, might as well flush any outstanding I/O 277 if(this->io.pending) { __cfa_io_flush( this ); } 278 279 // Spin a little on I/O, just in case 280 for(5) { 281 __maybe_io_drain( this ); 282 readyThread = pop_fast( this->cltr ); 283 if(readyThread) { ready_schedule_unlock(); break SEARCH; } 284 } 285 286 // no luck, try stealing a few times 287 for(5) { 288 if( __maybe_io_drain( this ) ) { 289 readyThread = pop_fast( this->cltr ); 290 } else { 291 readyThread = pop_slow( this->cltr ); 292 } 293 if(readyThread) { ready_schedule_unlock(); break SEARCH; } 294 } 295 296 // still no luck, search for a thread 297 readyThread = pop_search( this->cltr ); 298 if(readyThread) { ready_schedule_unlock(); break SEARCH; } 299 300 // Don't block if we are done 301 if( __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST) ) break MAIN_LOOP; 302 303 __STATS( __tls_stats()->ready.sleep.halts++; ) 304 305 // Push self to idle stack 306 ready_schedule_unlock(); 307 mark_idle(this->cltr->procs, * this); 308 ready_schedule_lock(); 309 310 // Confirm the ready-queue is empty 311 __maybe_io_drain( this ); 312 readyThread = pop_search( this->cltr ); 313 ready_schedule_unlock(); 314 315 if( readyThread ) { 316 // A thread was found, cancel the halt 317 mark_awake(this->cltr->procs, * this); 318 319 __STATS( __tls_stats()->ready.sleep.cancels++; ) 320 321 // continue the main loop 322 break SEARCH; 323 } 324 325 __STATS( if(this->print_halts) __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 0\n", this->unique_id, rdtscl()); ) 326 __cfadbg_print_safe(runtime_core, "Kernel : core %p waiting on eventfd %d\n", this, this->idle); 327 328 // __disable_interrupts_hard(); 329 eventfd_t val; 330 eventfd_read( this->idle, &val ); 331 // __enable_interrupts_hard(); 332 333 __STATS( if(this->print_halts) __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 1\n", this->unique_id, rdtscl()); ) 334 335 // We were woken up, remove self from idle 336 mark_awake(this->cltr->procs, * this); 337 338 // DON'T just proceed, start looking again 339 continue MAIN_LOOP; 340 } 341 342 RUN_THREAD: 343 /* paranoid */ verify( ! __preemption_enabled() ); 344 /* paranoid */ verify( readyThread ); 345 346 // Reset io dirty bit 347 this->io.dirty = false; 348 349 // We found a thread run it 350 __run_thread(this, readyThread); 351 352 // Are we done? 353 if( __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST) ) break MAIN_LOOP; 354 355 if(this->io.pending && !this->io.dirty) { 356 __cfa_io_flush( this ); 357 } 358 359 ready_schedule_lock(); 360 __maybe_io_drain( this ); 361 ready_schedule_unlock(); 362 #endif 384 363 } 385 364 … … 390 369 391 370 post( this->terminated ); 392 393 371 394 372 if(this == mainProcessor) { … … 553 531 static void __schedule_thread( $thread * thrd ) { 554 532 /* paranoid */ verify( ! __preemption_enabled() ); 555 /* paranoid */ verify( kernelTLS().this_proc_id );556 533 /* paranoid */ verify( ready_schedule_islocked()); 557 534 /* paranoid */ verify( thrd ); … … 567 544 /* paranoid */ verify( 0x0D15EA5E0D15EA5Ep == thrd->canary ); 568 545 569 546 const bool local = thrd->state != Start; 570 547 if (thrd->preempted == __NO_PREEMPTION) thrd->state = Ready; 571 548 … … 575 552 576 553 // push the thread to the cluster ready-queue 577 push( cl, thrd );554 push( cl, thrd, local ); 578 555 579 556 // variable thrd is no longer safe to use … … 611 588 static inline $thread * __next_thread(cluster * this) with( *this ) { 612 589 /* paranoid */ verify( ! __preemption_enabled() ); 613 /* paranoid */ verify( kernelTLS().this_proc_id );614 590 615 591 ready_schedule_lock(); … … 617 593 ready_schedule_unlock(); 618 594 619 /* paranoid */ verify( kernelTLS().this_proc_id );620 595 /* paranoid */ verify( ! __preemption_enabled() ); 621 596 return thrd; … … 625 600 static inline $thread * __next_thread_slow(cluster * this) with( *this ) { 626 601 /* paranoid */ verify( ! __preemption_enabled() ); 627 /* paranoid */ verify( kernelTLS().this_proc_id );628 602 629 603 ready_schedule_lock(); … … 638 612 ready_schedule_unlock(); 639 613 640 /* paranoid */ verify( kernelTLS().this_proc_id );641 614 /* paranoid */ verify( ! __preemption_enabled() ); 642 615 return thrd; … … 895 868 unsigned tail = *ctx->cq.tail; 896 869 if(head == tail) return false; 870 #if OLD_MAIN 897 871 ready_schedule_lock(); 898 872 ret = __cfa_io_drain( proc ); 899 873 ready_schedule_unlock(); 874 #else 875 ret = __cfa_io_drain( proc ); 876 #endif 900 877 #endif 901 878 return ret; … … 926 903 } 927 904 905 static void crawl_list( cluster * cltr, dlist(processor) & list, unsigned count ) { 906 /* paranoid */ verify( cltr->stats ); 907 908 processor * it = &list`first; 909 for(unsigned i = 0; i < count; i++) { 910 /* paranoid */ verifyf( it, "Unexpected null iterator, at index %u of %u\n", i, count); 911 /* paranoid */ verify( it->local_data->this_stats ); 912 __tally_stats( cltr->stats, it->local_data->this_stats ); 913 it = &(*it)`next; 914 } 915 } 916 917 void crawl_cluster_stats( cluster & this ) { 918 // Stop the world, otherwise stats could get really messed-up 919 // this doesn't solve all problems but does solve many 920 // so it's probably good enough 921 uint_fast32_t last_size = ready_mutate_lock(); 922 923 crawl_list(&this, this.procs.actives, this.procs.total - this.procs.idle); 924 crawl_list(&this, this.procs.idles , this.procs.idle ); 925 926 // Unlock the RWlock 927 ready_mutate_unlock( last_size ); 928 } 929 930 928 931 void print_stats_now( cluster & this, int flags ) { 932 crawl_cluster_stats( this ); 929 933 __print_stats( this.stats, this.print_stats, "Cluster", this.name, (void*)&this ); 930 934 } -
libcfa/src/concurrency/kernel.hfa
r5407cdc r8d66610 49 49 50 50 // Processor id, required for scheduling threads 51 struct __processor_id_t { 52 unsigned id:24; 53 54 #if !defined(__CFA_NO_STATISTICS__) 55 struct __stats_t * stats; 56 #endif 57 }; 51 58 52 59 53 coroutine processorCtx_t { … … 63 57 // Wrapper around kernel threads 64 58 struct __attribute__((aligned(128))) processor { 65 // Main state66 inline __processor_id_t;67 68 59 // Cluster from which to get threads 69 60 struct cluster * cltr; … … 90 81 pthread_t kernel_thread; 91 82 83 // Unique id for the processor (not per cluster) 84 unsigned unique_id; 85 92 86 struct { 93 87 $io_context * ctx; … … 113 107 114 108 // Link lists fields 115 DLISTED_MGD_IMPL_IN(processor)109 inline dlink(processor); 116 110 117 111 // special init fields … … 123 117 } init; 124 118 119 struct KernelThreadData * local_data; 120 125 121 #if !defined(__CFA_NO_STATISTICS__) 126 122 int print_stats; … … 133 129 #endif 134 130 }; 131 P9_EMBEDDED( processor, dlink(processor) ) 135 132 136 133 void ?{}(processor & this, const char name[], struct cluster & cltr); … … 140 137 static inline void ?{}(processor & this, struct cluster & cltr) { this{ "Anonymous Processor", cltr}; } 141 138 static inline void ?{}(processor & this, const char name[]) { this{name, *mainCluster}; } 142 143 DLISTED_MGD_IMPL_OUT(processor)144 139 145 140 //----------------------------------------------------------------------------- … … 152 147 153 148 // Aligned timestamps which are used by the relaxed ready queue 154 struct __attribute__((aligned(128))) __timestamp_t; 155 void ?{}(__timestamp_t & this); 156 void ^?{}(__timestamp_t & this); 149 struct __attribute__((aligned(128))) __timestamp_t { 150 volatile unsigned long long tv; 151 }; 152 153 static inline void ?{}(__timestamp_t & this) { this.tv = 0; } 154 static inline void ^?{}(__timestamp_t & this) {} 157 155 158 156 //TODO adjust cache size to ARCHITECTURE … … 177 175 void ?{}(__ready_queue_t & this); 178 176 void ^?{}(__ready_queue_t & this); 177 #if !defined(__CFA_NO_STATISTICS__) 178 unsigned cnt(const __ready_queue_t & this, unsigned idx); 179 #endif 179 180 180 181 // Idle Sleep … … 190 191 191 192 // List of idle processors 192 dlist(processor , processor) idles;193 dlist(processor) idles; 193 194 194 195 // List of active processors 195 dlist(processor , processor) actives;196 dlist(processor) actives; 196 197 }; 197 198 -
libcfa/src/concurrency/kernel/fwd.hfa
r5407cdc r8d66610 38 38 struct $thread * volatile this_thread; 39 39 struct processor * volatile this_processor; 40 struct __processor_id_t * volatile this_proc_id; 41 struct __stats_t * volatile this_stats; 40 volatile bool sched_lock; 42 41 43 42 struct { … … 56 55 uint64_t bck_seed; 57 56 } ready_rng; 57 58 struct __stats_t * volatile this_stats; 59 60 61 #ifdef __CFA_WITH_VERIFY__ 62 // Debug, check if the rwlock is owned for reading 63 bool in_sched_lock; 64 unsigned sched_id; 65 #endif 58 66 } __cfaabi_tls __attribute__ ((tls_model ( "initial-exec" ))); 59 67 -
libcfa/src/concurrency/kernel/startup.cfa
r5407cdc r8d66610 77 77 static void doregister( struct cluster & cltr ); 78 78 static void unregister( struct cluster & cltr ); 79 static void register_tls( processor * this ); 80 static void unregister_tls( processor * this ); 79 81 static void ?{}( $coroutine & this, current_stack_info_t * info); 80 82 static void ?{}( $thread & this, current_stack_info_t * info); … … 123 125 NULL, // cannot use 0p 124 126 NULL, 127 false, 128 { 1, false, false }, 129 0, 130 { 0, 0 }, 125 131 NULL, 126 NULL, 127 { 1, false, false }, 132 #ifdef __CFA_WITH_VERIFY__ 133 false, 134 0, 135 #endif 128 136 }; 129 137 … … 210 218 (*mainProcessor){}; 211 219 220 register_tls( mainProcessor ); 221 212 222 //initialize the global state variables 213 223 __cfaabi_tls.this_processor = mainProcessor; 214 __cfaabi_tls.this_proc_id = (__processor_id_t*)mainProcessor;215 224 __cfaabi_tls.this_thread = mainThread; 216 225 … … 219 228 __init_stats( __cfaabi_tls.this_stats ); 220 229 #endif 230 mainProcessor->local_data = &__cfaabi_tls; 221 231 222 232 // Enable preemption … … 273 283 #endif 274 284 285 mainProcessor->local_data = 0p; 286 287 unregister_tls( mainProcessor ); 288 275 289 // Destroy the main processor and its context in reverse order of construction 276 290 // These were manually constructed so we need manually destroy them … … 316 330 processor * proc = (processor *) arg; 317 331 __cfaabi_tls.this_processor = proc; 318 __cfaabi_tls.this_proc_id = (__processor_id_t*)proc;319 332 __cfaabi_tls.this_thread = 0p; 320 333 __cfaabi_tls.preemption_state.[enabled, disable_count] = [false, 1]; 334 proc->local_data = &__cfaabi_tls; 335 336 register_tls( proc ); 337 321 338 // SKULLDUGGERY: We want to create a context for the processor coroutine 322 339 // which is needed for the 2-step context switch. However, there is no reason … … 355 372 #endif 356 373 #endif 374 375 proc->local_data = 0p; 376 377 unregister_tls( proc ); 357 378 358 379 return 0p; … … 446 467 self_mon_p = &self_mon; 447 468 link.next = 0p; 448 link. prev = 0p;449 link.preferred = -1u;469 link.ts = 0; 470 preferred = -1u; 450 471 last_proc = 0p; 451 472 #if defined( __CFA_WITH_VERIFY__ ) … … 475 496 this.rdq.id = -1u; 476 497 this.rdq.target = -1u; 477 this.rdq.cutoff = -1ull;498 this.rdq.cutoff = 0ull; 478 499 do_terminate = false; 479 500 preemption_alarm = 0p; … … 485 506 486 507 this.init.thrd = initT; 508 509 this.local_data = 0p; 487 510 488 511 this.idle = eventfd(0, 0); … … 496 519 #endif 497 520 498 // Register and Lock the RWlock so no-one pushes/pops while we are changing the queue499 uint_fast32_t last_size = ready_mutate_register((__processor_id_t*)&this);500 this.cltr->procs.total += 1u;501 insert_last(this.cltr->procs.actives, this);502 503 // Adjust the ready queue size504 ready_queue_grow( cltr );505 506 // Unlock the RWlock507 ready_mutate_unlock( last_size );508 509 521 __cfadbg_print_safe(runtime_core, "Kernel : core %p created\n", &this); 510 522 } … … 512 524 // Not a ctor, it just preps the destruction but should not destroy members 513 525 static void deinit(processor & this) { 514 // Lock the RWlock so no-one pushes/pops while we are changing the queue515 uint_fast32_t last_size = ready_mutate_lock();516 this.cltr->procs.total -= 1u;517 remove(this);518 519 // Adjust the ready queue size520 ready_queue_shrink( this.cltr );521 522 // Unlock the RWlock and unregister: we don't need the read_lock any more523 ready_mutate_unregister((__processor_id_t*)&this, last_size );524 525 526 close(this.idle); 526 527 } … … 656 657 cltr->nthreads -= 1; 657 658 unlock(cltr->thread_list_lock); 659 } 660 661 static void register_tls( processor * this ) { 662 // Register and Lock the RWlock so no-one pushes/pops while we are changing the queue 663 uint_fast32_t last_size; 664 [this->unique_id, last_size] = ready_mutate_register(); 665 666 this->cltr->procs.total += 1u; 667 insert_last(this->cltr->procs.actives, *this); 668 669 // Adjust the ready queue size 670 ready_queue_grow( this->cltr ); 671 672 // Unlock the RWlock 673 ready_mutate_unlock( last_size ); 674 } 675 676 677 static void unregister_tls( processor * this ) { 678 // Lock the RWlock so no-one pushes/pops while we are changing the queue 679 uint_fast32_t last_size = ready_mutate_lock(); 680 this->cltr->procs.total -= 1u; 681 remove(*this); 682 683 // clear the cluster so nothing gets pushed to local queues 684 cluster * cltr = this->cltr; 685 this->cltr = 0p; 686 687 // Adjust the ready queue size 688 ready_queue_shrink( cltr ); 689 690 // Unlock the RWlock and unregister: we don't need the read_lock any more 691 ready_mutate_unregister( this->unique_id, last_size ); 658 692 } 659 693 -
libcfa/src/concurrency/kernel_private.hfa
r5407cdc r8d66610 25 25 // Scheduler 26 26 27 struct __attribute__((aligned(128))) __scheduler_lock_id_t;28 27 29 28 extern "C" { … … 80 79 // Lock-Free registering/unregistering of threads 81 80 // Register a processor to a given cluster and get its unique id in return 82 void register_proc_id( struct __processor_id_t *);81 unsigned register_proc_id( void ); 83 82 84 83 // Unregister a processor from a given cluster using its id, getting back the original pointer 85 void unregister_proc_id( struct __processor_id_t * proc);84 void unregister_proc_id( unsigned ); 86 85 87 86 //======================================================================= … … 112 111 } 113 112 114 // Cells use by the reader writer lock 115 // while not generic it only relies on a opaque pointer 116 struct __attribute__((aligned(128))) __scheduler_lock_id_t { 117 // Spin lock used as the underlying lock 118 volatile bool lock; 119 120 // Handle pointing to the proc owning this cell 121 // Used for allocating cells and debugging 122 __processor_id_t * volatile handle; 123 124 #ifdef __CFA_WITH_VERIFY__ 125 // Debug, check if this is owned for reading 126 bool owned; 127 #endif 128 }; 129 130 static_assert( sizeof(struct __scheduler_lock_id_t) <= __alignof(struct __scheduler_lock_id_t)); 113 114 115 131 116 132 117 //----------------------------------------------------------------------- … … 147 132 148 133 // writer lock 149 volatile bool lock;134 volatile bool write_lock; 150 135 151 136 // data pointer 152 __scheduler_lock_id_t* data;137 volatile bool * volatile * data; 153 138 }; 154 139 … … 163 148 static inline void ready_schedule_lock(void) with(*__scheduler_lock) { 164 149 /* paranoid */ verify( ! __preemption_enabled() ); 165 /* paranoid */ verify( kernelTLS().this_proc_id ); 166 167 unsigned iproc = kernelTLS().this_proc_id->id; 168 /*paranoid*/ verify(data[iproc].handle == kernelTLS().this_proc_id); 169 /*paranoid*/ verify(iproc < ready); 150 /* paranoid */ verify( ! kernelTLS().in_sched_lock ); 151 /* paranoid */ verify( data[kernelTLS().sched_id] == &kernelTLS().sched_lock ); 152 /* paranoid */ verify( !kernelTLS().this_processor || kernelTLS().this_processor->unique_id == kernelTLS().sched_id ); 170 153 171 154 // Step 1 : make sure no writer are in the middle of the critical section 172 while(__atomic_load_n(& lock, (int)__ATOMIC_RELAXED))155 while(__atomic_load_n(&write_lock, (int)__ATOMIC_RELAXED)) 173 156 Pause(); 174 157 … … 179 162 180 163 // Step 2 : acquire our local lock 181 __atomic_acquire( & data[iproc].lock );182 /*paranoid*/ verify( data[iproc].lock);164 __atomic_acquire( &kernelTLS().sched_lock ); 165 /*paranoid*/ verify(kernelTLS().sched_lock); 183 166 184 167 #ifdef __CFA_WITH_VERIFY__ 185 168 // Debug, check if this is owned for reading 186 data[iproc].owned= true;169 kernelTLS().in_sched_lock = true; 187 170 #endif 188 171 } … … 190 173 static inline void ready_schedule_unlock(void) with(*__scheduler_lock) { 191 174 /* paranoid */ verify( ! __preemption_enabled() ); 192 /* paranoid */ verify( kernelTLS().this_proc_id ); 193 194 unsigned iproc = kernelTLS().this_proc_id->id; 195 /*paranoid*/ verify(data[iproc].handle == kernelTLS().this_proc_id); 196 /*paranoid*/ verify(iproc < ready); 197 /*paranoid*/ verify(data[iproc].lock); 198 /*paranoid*/ verify(data[iproc].owned); 175 /* paranoid */ verify( data[kernelTLS().sched_id] == &kernelTLS().sched_lock ); 176 /* paranoid */ verify( !kernelTLS().this_processor || kernelTLS().this_processor->unique_id == kernelTLS().sched_id ); 177 /* paranoid */ verify( kernelTLS().sched_lock ); 178 /* paranoid */ verify( kernelTLS().in_sched_lock ); 199 179 #ifdef __CFA_WITH_VERIFY__ 200 180 // Debug, check if this is owned for reading 201 data[iproc].owned= false;181 kernelTLS().in_sched_lock = false; 202 182 #endif 203 __atomic_unlock(& data[iproc].lock);183 __atomic_unlock(&kernelTLS().sched_lock); 204 184 } 205 185 … … 207 187 static inline bool ready_schedule_islocked(void) { 208 188 /* paranoid */ verify( ! __preemption_enabled() ); 209 /*paranoid*/ verify( kernelTLS().this_proc_id ); 210 __processor_id_t * proc = kernelTLS().this_proc_id; 211 return __scheduler_lock->data[proc->id].owned; 189 /* paranoid */ verify( (!kernelTLS().in_sched_lock) || kernelTLS().sched_lock ); 190 return kernelTLS().sched_lock; 212 191 } 213 192 214 193 static inline bool ready_mutate_islocked() { 215 return __scheduler_lock-> lock;194 return __scheduler_lock->write_lock; 216 195 } 217 196 #endif … … 228 207 // Register a processor to a given cluster and get its unique id in return 229 208 // For convenience, also acquires the lock 230 static inline uint_fast32_t ready_mutate_register( struct __processor_id_t * proc ) { 231 register_proc_id( proc ); 232 return ready_mutate_lock(); 209 static inline [unsigned, uint_fast32_t] ready_mutate_register() { 210 unsigned id = register_proc_id(); 211 uint_fast32_t last = ready_mutate_lock(); 212 return [id, last]; 233 213 } 234 214 235 215 // Unregister a processor from a given cluster using its id, getting back the original pointer 236 216 // assumes the lock is acquired 237 static inline void ready_mutate_unregister( struct __processor_id_t * proc, uint_fast32_t last_s ) {217 static inline void ready_mutate_unregister( unsigned id, uint_fast32_t last_s ) { 238 218 ready_mutate_unlock( last_s ); 239 unregister_proc_id( proc);219 unregister_proc_id( id ); 240 220 } 241 221 … … 281 261 // push thread onto a ready queue for a cluster 282 262 // returns true if the list was previously empty, false otherwise 283 __attribute__((hot)) void push(struct cluster * cltr, struct $thread * thrd );263 __attribute__((hot)) void push(struct cluster * cltr, struct $thread * thrd, bool local); 284 264 285 265 //----------------------------------------------------------------------- -
libcfa/src/concurrency/locks.cfa
r5407cdc r8d66610 188 188 alarm_node_t alarm_node; 189 189 condition_variable(L) * cond; 190 info_thread(L) * i ;190 info_thread(L) * info_thd; 191 191 }; 192 192 193 void ?{}( alarm_node_wrap(L) & this, Timealarm, Duration period, Alarm_Callback callback, condition_variable(L) * c, info_thread(L) * i ) {193 void ?{}( alarm_node_wrap(L) & this, Duration alarm, Duration period, Alarm_Callback callback, condition_variable(L) * c, info_thread(L) * i ) { 194 194 this.alarm_node{ callback, alarm, period }; 195 195 this.cond = c; 196 this.i = i;196 this.info_thd = i; 197 197 } 198 198 … … 206 206 // may still be called after a thread has been removed from the queue but 207 207 // before the alarm is unregistered 208 if ( listed(i ) ) { // is thread on queue209 i ->signalled = false;208 if ( listed(info_thd) ) { // is thread on queue 209 info_thd->signalled = false; 210 210 // remove this thread O(1) 211 remove( cond->blocked_threads, *i );211 remove( cond->blocked_threads, *info_thd ); 212 212 cond->count--; 213 if( i ->lock ) {213 if( info_thd->lock ) { 214 214 // call lock's on_notify if a lock was passed 215 on_notify(*i ->lock, i->t);215 on_notify(*info_thd->lock, info_thd->t); 216 216 } else { 217 217 // otherwise wake thread 218 unpark( i ->t );218 unpark( info_thd->t ); 219 219 } 220 220 } … … 313 313 314 314 // helper for wait()'s' with a timeout 315 void queue_info_thread_timeout( condition_variable(L) & this, info_thread(L) & info, Time t) with(this) {315 void queue_info_thread_timeout( condition_variable(L) & this, info_thread(L) & info, Duration t, Alarm_Callback callback ) with(this) { 316 316 lock( lock __cfaabi_dbg_ctx2 ); 317 317 size_t recursion_count = queue_and_get_recursion(this, &info); 318 alarm_node_wrap(L) node_wrap = { t, 0`s, alarm_node_wrap_cast, &this, &info };318 alarm_node_wrap(L) node_wrap = { t, 0`s, callback, &this, &info }; 319 319 register_self( &node_wrap.alarm_node ); 320 320 unlock( lock ); … … 332 332 #define WAIT_TIME( u, l, t ) \ 333 333 info_thread( L ) i = { active_thread(), u, l }; \ 334 queue_info_thread_timeout(this, i, t ); \334 queue_info_thread_timeout(this, i, t, alarm_node_wrap_cast ); \ 335 335 return i.signalled; 336 336 … … 340 340 void wait( condition_variable(L) & this, L & l, uintptr_t info ) with(this) { WAIT( info, &l ) } 341 341 342 bool wait( condition_variable(L) & this, Duration duration ) with(this) { WAIT_TIME( 0 , 0p , __kernel_get_time() + duration ) } 343 bool wait( condition_variable(L) & this, uintptr_t info, Duration duration ) with(this) { WAIT_TIME( info, 0p , __kernel_get_time() + duration ) } 344 bool wait( condition_variable(L) & this, Time time ) with(this) { WAIT_TIME( 0 , 0p , time ) } 345 bool wait( condition_variable(L) & this, uintptr_t info, Time time ) with(this) { WAIT_TIME( info, 0p , time ) } 346 bool wait( condition_variable(L) & this, L & l, Duration duration ) with(this) { WAIT_TIME( 0 , &l , __kernel_get_time() + duration ) } 347 bool wait( condition_variable(L) & this, L & l, uintptr_t info, Duration duration ) with(this) { WAIT_TIME( info, &l , __kernel_get_time() + duration ) } 348 bool wait( condition_variable(L) & this, L & l, Time time ) with(this) { WAIT_TIME( 0 , &l , time ) } 349 bool wait( condition_variable(L) & this, L & l, uintptr_t info, Time time ) with(this) { WAIT_TIME( info, &l , time ) } 342 bool wait( condition_variable(L) & this, Duration duration ) with(this) { WAIT_TIME( 0 , 0p , duration ) } 343 bool wait( condition_variable(L) & this, uintptr_t info, Duration duration ) with(this) { WAIT_TIME( info, 0p , duration ) } 344 bool wait( condition_variable(L) & this, L & l, Duration duration ) with(this) { WAIT_TIME( 0 , &l , duration ) } 345 bool wait( condition_variable(L) & this, L & l, uintptr_t info, Duration duration ) with(this) { WAIT_TIME( info, &l , duration ) } 350 346 } 351 347 -
libcfa/src/concurrency/locks.hfa
r5407cdc r8d66610 290 290 bool wait( condition_variable(L) & this, Duration duration ); 291 291 bool wait( condition_variable(L) & this, uintptr_t info, Duration duration ); 292 bool wait( condition_variable(L) & this, Time time );293 bool wait( condition_variable(L) & this, uintptr_t info, Time time );294 292 295 293 void wait( condition_variable(L) & this, L & l ); … … 297 295 bool wait( condition_variable(L) & this, L & l, Duration duration ); 298 296 bool wait( condition_variable(L) & this, L & l, uintptr_t info, Duration duration ); 299 bool wait( condition_variable(L) & this, L & l, Time time ); 300 bool wait( condition_variable(L) & this, L & l, uintptr_t info, Time time ); 301 } 297 } -
libcfa/src/concurrency/preemption.cfa
r5407cdc r8d66610 18 18 19 19 #include "preemption.hfa" 20 20 21 #include <assert.h> 21 22 … … 26 27 #include <limits.h> // PTHREAD_STACK_MIN 27 28 29 #include "bits/debug.hfa" 28 30 #include "bits/signal.hfa" 29 31 #include "kernel_private.hfa" … … 105 107 static inline alarm_node_t * get_expired( alarm_list_t * alarms, Time currtime ) { 106 108 if( ! & (*alarms)`first ) return 0p; // If no alarms return null 107 if( (*alarms)`first. alarm>= currtime ) return 0p; // If alarms head not expired return null109 if( (*alarms)`first.timeval >= currtime ) return 0p; // If alarms head not expired return null 108 110 return pop(alarms); // Otherwise just pop head 109 111 } … … 141 143 if( period > 0 ) { 142 144 __cfadbg_print_buffer_local( preemption, " KERNEL: alarm period is %lu.\n", period`ns ); 143 node-> alarm = currtime + period;// Alarm is periodic, add currtime to it (used cached current time)145 node->timeval = currtime + period; // Alarm is periodic, add currtime to it (used cached current time) 144 146 insert( alarms, node ); // Reinsert the node for the next time it triggers 145 147 } … … 148 150 // If there are still alarms pending, reset the timer 149 151 if( & (*alarms)`first ) { 150 Duration delta = (*alarms)`first.alarm - currtime; 151 Duration capped = max(delta, 50`us); 152 __kernel_set_timer( capped ); 152 Duration delta = (*alarms)`first.timeval - currtime; 153 __kernel_set_timer( delta ); 153 154 } 154 155 } … … 160 161 // Alarms need to be enabled 161 162 if ( duration > 0 && ! alarm->set ) { 162 alarm-> alarm = __kernel_get_time() +duration;163 alarm->period = duration;163 alarm->initial = duration; 164 alarm->period = duration; 164 165 register_self( alarm ); 165 166 } … … 167 168 else if ( duration == 0 && alarm->set ) { 168 169 unregister_self( alarm ); 169 alarm-> alarm= 0;170 alarm->period = 0;170 alarm->initial = 0; 171 alarm->period = 0; 171 172 } 172 173 // If alarm is different from previous, change it 173 174 else if ( duration > 0 && alarm->period != duration ) { 174 175 unregister_self( alarm ); 175 alarm-> alarm = __kernel_get_time() +duration;176 alarm->period = duration;176 alarm->initial = duration; 177 alarm->period = duration; 177 178 register_self( alarm ); 178 179 } … … 599 600 600 601 // Notify the alarm thread of the shutdown 601 sigval val = { 1 }; 602 sigval val; 603 val.sival_int = 0; 602 604 pthread_sigqueue( alarm_thread, SIGALRM, val ); 603 605 … … 619 621 // Used by thread to control when they want to receive preemption signals 620 622 void ?{}( preemption_scope & this, processor * proc ) { 621 (this.alarm){ proc, (Time){ 0 }, 0`s };623 (this.alarm){ proc, 0`s, 0`s }; 622 624 this.proc = proc; 623 625 this.proc->preemption_alarm = &this.alarm; … … 687 689 // Waits on SIGALRM and send SIGUSR1 to whom ever needs it 688 690 static void * alarm_loop( __attribute__((unused)) void * args ) { 689 __processor_id_t id; 690 register_proc_id(&id); 691 __cfaabi_tls.this_proc_id = &id; 692 691 unsigned id = register_proc_id(); 693 692 694 693 // Block sigalrms to control when they arrive … … 707 706 siginfo_t info; 708 707 int sig = sigwaitinfo( &mask, &info ); 708 709 __cfadbg_print_buffer_decl ( preemption, " KERNEL: sigwaitinfo returned %d, c: %d, v: %d\n", sig, info.si_code, info.si_value.sival_int ); 710 __cfadbg_print_buffer_local( preemption, " KERNEL: SI_QUEUE %d, SI_TIMER %d, SI_KERNEL %d\n", SI_QUEUE, SI_TIMER, SI_KERNEL ); 709 711 710 712 if( sig < 0 ) { … … 714 716 case EAGAIN : 715 717 case EINTR : 716 {__cfa abi_dbg_print_buffer_decl(" KERNEL: Spurious wakeup %d.\n", err );}718 {__cfadbg_print_buffer_local( preemption, " KERNEL: Spurious wakeup %d.\n", err );} 717 719 continue; 718 720 case EINVAL : … … 726 728 assertf(sig == SIGALRM, "Kernel Internal Error, sigwait: Unexpected signal %d (%d : %d)\n", sig, info.si_code, info.si_value.sival_int); 727 729 728 // __cfaabi_dbg_print_safe( "Kernel : Caught alarm from %d with %d\n", info.si_code, info.si_value.sival_int );729 730 // Switch on the code (a.k.a. the sender) to 730 731 switch( info.si_code ) 731 732 { 733 // Signal was not sent by the kernel but by an other thread 734 case SI_QUEUE: 735 // other threads may signal the alarm thread to shut it down 736 // or to manual cause the preemption tick 737 // use info.si_value and handle the case here 738 switch( info.si_value.sival_int ) { 739 case 0: 740 goto EXIT; 741 default: 742 abort( "SI_QUEUE with val %d", info.si_value.sival_int); 743 } 744 // fallthrough 732 745 // Timers can apparently be marked as sent for the kernel 733 746 // In either case, tick preemption … … 739 752 unlock( event_kernel->lock ); 740 753 break; 741 // Signal was not sent by the kernel but by an other thread742 case SI_QUEUE:743 // For now, other thread only signal the alarm thread to shut it down744 // If this needs to change use info.si_value and handle the case here745 goto EXIT;746 754 } 747 755 } … … 749 757 EXIT: 750 758 __cfaabi_dbg_print_safe( "Kernel : Preemption thread stopping\n" ); 751 register_proc_id(&id);759 unregister_proc_id(id); 752 760 753 761 return 0p; -
libcfa/src/concurrency/ready_queue.cfa
r5407cdc r8d66610 17 17 // #define __CFA_DEBUG_PRINT_READY_QUEUE__ 18 18 19 // #define USE_MPSC20 19 21 20 #define USE_RELAXED_FIFO … … 93 92 this.alloc = 0; 94 93 this.ready = 0; 95 this.lock = false;96 94 this.data = alloc(this.max); 97 98 /*paranoid*/ verify( 0 == (((uintptr_t)(this.data )) % 64) ); 99 /*paranoid*/ verify( 0 == (((uintptr_t)(this.data + 1)) % 64) ); 95 this.write_lock = false; 96 100 97 /*paranoid*/ verify(__atomic_is_lock_free(sizeof(this.alloc), &this.alloc)); 101 98 /*paranoid*/ verify(__atomic_is_lock_free(sizeof(this.ready), &this.ready)); … … 106 103 } 107 104 108 void ?{}( __scheduler_lock_id_t & this, __processor_id_t * proc ) {109 this.handle = proc;110 this.lock = false;111 #ifdef __CFA_WITH_VERIFY__112 this.owned = false;113 #endif114 }115 105 116 106 //======================================================================= 117 107 // Lock-Free registering/unregistering of threads 118 void register_proc_id( struct __processor_id_t * proc) with(*__scheduler_lock) {108 unsigned register_proc_id( void ) with(*__scheduler_lock) { 119 109 __cfadbg_print_safe(ready_queue, "Kernel : Registering proc %p for RW-Lock\n", proc); 110 bool * handle = (bool *)&kernelTLS().sched_lock; 120 111 121 112 // Step - 1 : check if there is already space in the data … … 124 115 // Check among all the ready 125 116 for(uint_fast32_t i = 0; i < s; i++) { 126 __processor_id_t * null = 0p; // Re-write every loop since compare thrashes it 127 if( __atomic_load_n(&data[i].handle, (int)__ATOMIC_RELAXED) == null 128 && __atomic_compare_exchange_n( &data[i].handle, &null, proc, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) { 129 /*paranoid*/ verify(i < ready); 130 /*paranoid*/ verify(0 == (__alignof__(data[i]) % cache_line_size)); 131 /*paranoid*/ verify((((uintptr_t)&data[i]) % cache_line_size) == 0); 132 proc->id = i; 117 bool * volatile * cell = (bool * volatile *)&data[i]; // Cforall is bugged and the double volatiles causes problems 118 /* paranoid */ verify( handle != *cell ); 119 120 bool * null = 0p; // Re-write every loop since compare thrashes it 121 if( __atomic_load_n(cell, (int)__ATOMIC_RELAXED) == null 122 && __atomic_compare_exchange_n( cell, &null, handle, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) { 123 /* paranoid */ verify(i < ready); 124 /* paranoid */ verify( (kernelTLS().sched_id = i, true) ); 125 return i; 133 126 } 134 127 } … … 141 134 142 135 // Step - 3 : Mark space as used and then publish it. 143 __scheduler_lock_id_t * storage = (__scheduler_lock_id_t *)&data[n]; 144 (*storage){ proc }; 136 data[n] = handle; 145 137 while() { 146 138 unsigned copy = n; … … 154 146 155 147 // Return new spot. 156 /*paranoid*/ verify(n < ready); 157 /*paranoid*/ verify(__alignof__(data[n]) == (2 * cache_line_size)); 158 /*paranoid*/ verify((((uintptr_t)&data[n]) % cache_line_size) == 0); 159 proc->id = n; 160 } 161 162 void unregister_proc_id( struct __processor_id_t * proc ) with(*__scheduler_lock) { 163 unsigned id = proc->id; 164 /*paranoid*/ verify(id < ready); 165 /*paranoid*/ verify(proc == __atomic_load_n(&data[id].handle, __ATOMIC_RELAXED)); 166 __atomic_store_n(&data[id].handle, 0p, __ATOMIC_RELEASE); 148 /* paranoid */ verify(n < ready); 149 /* paranoid */ verify( (kernelTLS().sched_id = n, true) ); 150 return n; 151 } 152 153 void unregister_proc_id( unsigned id ) with(*__scheduler_lock) { 154 /* paranoid */ verify(id < ready); 155 /* paranoid */ verify(id == kernelTLS().sched_id); 156 /* paranoid */ verify(data[id] == &kernelTLS().sched_lock); 157 158 bool * volatile * cell = (bool * volatile *)&data[id]; // Cforall is bugged and the double volatiles causes problems 159 160 __atomic_store_n(cell, 0p, __ATOMIC_RELEASE); 167 161 168 162 __cfadbg_print_safe(ready_queue, "Kernel : Unregister proc %p\n", proc); … … 174 168 uint_fast32_t ready_mutate_lock( void ) with(*__scheduler_lock) { 175 169 /* paranoid */ verify( ! __preemption_enabled() ); 170 /* paranoid */ verify( ! kernelTLS().sched_lock ); 176 171 177 172 // Step 1 : lock global lock 178 173 // It is needed to avoid processors that register mid Critical-Section 179 174 // to simply lock their own lock and enter. 180 __atomic_acquire( & lock );175 __atomic_acquire( &write_lock ); 181 176 182 177 // Step 2 : lock per-proc lock … … 186 181 uint_fast32_t s = ready; 187 182 for(uint_fast32_t i = 0; i < s; i++) { 188 __atomic_acquire( &data[i].lock ); 183 volatile bool * llock = data[i]; 184 if(llock) __atomic_acquire( llock ); 189 185 } 190 186 … … 203 199 // Alternative solution : return s in write_lock and pass it to write_unlock 204 200 for(uint_fast32_t i = 0; i < last_s; i++) { 205 v erify(data[i].lock);206 __atomic_store_n(&data[i].lock, (bool)false, __ATOMIC_RELEASE);201 volatile bool * llock = data[i]; 202 if(llock) __atomic_store_n(llock, (bool)false, __ATOMIC_RELEASE); 207 203 } 208 204 209 205 // Step 2 : release global lock 210 /*paranoid*/ assert(true == lock);211 __atomic_store_n(& lock, (bool)false, __ATOMIC_RELEASE);206 /*paranoid*/ assert(true == write_lock); 207 __atomic_store_n(&write_lock, (bool)false, __ATOMIC_RELEASE); 212 208 213 209 /* paranoid */ verify( ! __preemption_enabled() ); … … 253 249 } 254 250 255 __attribute__((hot)) void push(struct cluster * cltr, struct $thread * thrd ) with (cltr->ready_queue) {251 __attribute__((hot)) void push(struct cluster * cltr, struct $thread * thrd, bool push_local) with (cltr->ready_queue) { 256 252 __cfadbg_print_safe(ready_queue, "Kernel : Pushing %p on cluster %p\n", thrd, cltr); 257 253 258 const bool external = (!kernelTLS().this_processor) || (cltr != kernelTLS().this_processor->cltr);254 const bool external = !push_local || (!kernelTLS().this_processor) || (cltr != kernelTLS().this_processor->cltr); 259 255 /* paranoid */ verify(external || kernelTLS().this_processor->rdq.id < lanes.count ); 260 261 // write timestamp262 thrd->link.ts = rdtscl();263 256 264 257 bool local; … … 280 273 #endif 281 274 282 #if defined(USE_MPSC)283 // mpsc always succeeds284 } while( false );285 #else286 275 // If we can't lock it retry 287 276 } while( !__atomic_try_acquire( &lanes.data[i].lock ) ); 288 #endif289 277 290 278 // Actually push it 291 279 push(lanes.data[i], thrd); 292 280 293 #if !defined(USE_MPSC) 294 // Unlock and return 295 __atomic_unlock( &lanes.data[i].lock ); 296 #endif 281 // Unlock and return 282 __atomic_unlock( &lanes.data[i].lock ); 297 283 298 284 // Mark the current index in the tls rng instance as having an item … … 350 336 #endif 351 337 #if defined(USE_WORK_STEALING) 352 __attribute__((hot)) void push(struct cluster * cltr, struct $thread * thrd ) with (cltr->ready_queue) {338 __attribute__((hot)) void push(struct cluster * cltr, struct $thread * thrd, bool push_local) with (cltr->ready_queue) { 353 339 __cfadbg_print_safe(ready_queue, "Kernel : Pushing %p on cluster %p\n", thrd, cltr); 354 340 355 const bool external = (!kernelTLS().this_processor) || (cltr != kernelTLS().this_processor->cltr); 341 // #define USE_PREFERRED 342 #if !defined(USE_PREFERRED) 343 const bool external = !push_local || (!kernelTLS().this_processor) || (cltr != kernelTLS().this_processor->cltr); 356 344 /* paranoid */ verify(external || kernelTLS().this_processor->rdq.id < lanes.count ); 357 358 // write timestamp 359 thrd->link.ts = rdtscl(); 345 #else 346 unsigned preferred = thrd->preferred; 347 const bool external = push_local || (!kernelTLS().this_processor) || preferred == -1u || thrd->curr_cluster != cltr; 348 /* paranoid */ verifyf(external || preferred < lanes.count, "Invalid preferred queue %u for %u lanes", preferred, lanes.count ); 349 350 unsigned r = preferred % READYQ_SHARD_FACTOR; 351 const unsigned start = preferred - r; 352 #endif 360 353 361 354 // Try to pick a lane and lock it … … 371 364 } 372 365 else { 373 processor * proc = kernelTLS().this_processor; 374 unsigned r = proc->rdq.its++; 375 i = proc->rdq.id + (r % READYQ_SHARD_FACTOR); 366 #if !defined(USE_PREFERRED) 367 processor * proc = kernelTLS().this_processor; 368 unsigned r = proc->rdq.its++; 369 i = proc->rdq.id + (r % READYQ_SHARD_FACTOR); 370 #else 371 i = start + (r++ % READYQ_SHARD_FACTOR); 372 #endif 376 373 } 377 378 379 #if defined(USE_MPSC)380 // mpsc always succeeds381 } while( false );382 #else383 374 // If we can't lock it retry 384 375 } while( !__atomic_try_acquire( &lanes.data[i].lock ) ); 385 #endif386 376 387 377 // Actually push it 388 378 push(lanes.data[i], thrd); 389 379 390 #if !defined(USE_MPSC) 391 // Unlock and return 392 __atomic_unlock( &lanes.data[i].lock ); 393 #endif 380 // Unlock and return 381 __atomic_unlock( &lanes.data[i].lock ); 394 382 395 383 #if !defined(__CFA_NO_STATISTICS__) … … 410 398 411 399 if(proc->rdq.target == -1u) { 400 unsigned long long min = ts(lanes.data[proc->rdq.id]); 401 for(int i = 0; i < READYQ_SHARD_FACTOR; i++) { 402 unsigned long long tsc = ts(lanes.data[proc->rdq.id + i]); 403 if(tsc < min) min = tsc; 404 } 405 proc->rdq.cutoff = min; 412 406 proc->rdq.target = __tls_rand() % lanes.count; 413 unsigned it1 = proc->rdq.itr;414 unsigned it2 = proc->rdq.itr + 1;415 unsigned idx1 = proc->rdq.id + (it1 % READYQ_SHARD_FACTOR);416 unsigned idx2 = proc->rdq.id + (it2 % READYQ_SHARD_FACTOR);417 unsigned long long tsc1 = ts(lanes.data[idx1]);418 unsigned long long tsc2 = ts(lanes.data[idx2]);419 proc->rdq.cutoff = min(tsc1, tsc2);420 if(proc->rdq.cutoff == 0) proc->rdq.cutoff = -1ull;421 407 } 422 408 else { 423 409 unsigned target = proc->rdq.target; 424 410 proc->rdq.target = -1u; 425 if(lanes.tscs[target].tv < proc->rdq.cutoff) { 411 const unsigned long long bias = 0; //2_500_000_000; 412 const unsigned long long cutoff = proc->rdq.cutoff > bias ? proc->rdq.cutoff - bias : proc->rdq.cutoff; 413 if(lanes.tscs[target].tv < cutoff && ts(lanes.data[target]) < cutoff) { 426 414 $thread * t = try_pop(cltr, target __STATS(, __tls_stats()->ready.pop.help)); 427 415 if(t) return t; … … 430 418 431 419 for(READYQ_SHARD_FACTOR) { 432 unsigned i = proc->rdq.id + ( --proc->rdq.itr% READYQ_SHARD_FACTOR);420 unsigned i = proc->rdq.id + (proc->rdq.itr++ % READYQ_SHARD_FACTOR); 433 421 if($thread * t = try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.local))) return t; 434 422 } … … 462 450 // If list looks empty retry 463 451 if( is_empty(lane) ) { 464 __STATS( stats.espec++; )465 452 return 0p; 466 453 } … … 468 455 // If we can't get the lock retry 469 456 if( !__atomic_try_acquire(&lane.lock) ) { 470 __STATS( stats.elock++; )471 457 return 0p; 472 458 } … … 475 461 if( is_empty(lane) ) { 476 462 __atomic_unlock(&lane.lock); 477 __STATS( stats.eempty++; )478 463 return 0p; 479 464 } … … 481 466 // Actually pop the list 482 467 struct $thread * thrd; 483 thrd = pop(lane); 468 unsigned long long tsv; 469 [thrd, tsv] = pop(lane); 484 470 485 471 /* paranoid */ verify(thrd); 472 /* paranoid */ verify(tsv); 486 473 /* paranoid */ verify(lane.lock); 487 474 … … 493 480 494 481 #if defined(USE_WORK_STEALING) 495 lanes.tscs[w].tv = t hrd->link.ts;482 lanes.tscs[w].tv = tsv; 496 483 #endif 484 485 thrd->preferred = w; 497 486 498 487 // return the popped thread … … 522 511 // Check that all the intrusive queues in the data structure are still consistent 523 512 static void check( __ready_queue_t & q ) with (q) { 524 #if defined(__CFA_WITH_VERIFY__) && !defined(USE_MPSC)513 #if defined(__CFA_WITH_VERIFY__) 525 514 { 526 515 for( idx ; lanes.count ) { … … 528 517 assert(!lanes.data[idx].lock); 529 518 530 assert(head(sl)->link.prev == 0p ); 531 assert(head(sl)->link.next->link.prev == head(sl) ); 532 assert(tail(sl)->link.next == 0p ); 533 assert(tail(sl)->link.prev->link.next == tail(sl) ); 534 535 if(is_empty(sl)) { 536 assert(tail(sl)->link.prev == head(sl)); 537 assert(head(sl)->link.next == tail(sl)); 538 } else { 539 assert(tail(sl)->link.prev != head(sl)); 540 assert(head(sl)->link.next != tail(sl)); 541 } 519 if(is_empty(sl)) { 520 assert( sl.anchor.next == 0p ); 521 assert( sl.anchor.ts == 0 ); 522 assert( mock_head(sl) == sl.prev ); 523 } else { 524 assert( sl.anchor.next != 0p ); 525 assert( sl.anchor.ts != 0 ); 526 assert( mock_head(sl) != sl.prev ); 527 } 542 528 } 543 529 } … … 560 546 // fixes the list so that the pointers back to anchors aren't left dangling 561 547 static inline void fix(__intrusive_lane_t & ll) { 562 #if !defined(USE_MPSC) 563 // if the list is not empty then follow he pointer and fix its reverse 564 if(!is_empty(ll)) { 565 head(ll)->link.next->link.prev = head(ll); 566 tail(ll)->link.prev->link.next = tail(ll); 567 } 568 // Otherwise just reset the list 569 else { 570 verify(tail(ll)->link.next == 0p); 571 tail(ll)->link.prev = head(ll); 572 head(ll)->link.next = tail(ll); 573 verify(head(ll)->link.prev == 0p); 574 } 575 #endif 576 } 577 578 static void assign_list(unsigned & value, dlist(processor, processor) & list, unsigned count) { 548 if(is_empty(ll)) { 549 verify(ll.anchor.next == 0p); 550 ll.prev = mock_head(ll); 551 } 552 } 553 554 static void assign_list(unsigned & value, dlist(processor) & list, unsigned count) { 579 555 processor * it = &list`first; 580 556 for(unsigned i = 0; i < count; i++) { … … 597 573 lanes.tscs = alloc(lanes.count, lanes.tscs`realloc); 598 574 for(i; lanes.count) { 599 lanes.tscs[i].tv = ts(lanes.data[i]); 575 unsigned long long tsc = ts(lanes.data[i]); 576 lanes.tscs[i].tv = tsc != 0 ? tsc : rdtscl(); 600 577 } 601 578 #endif … … 686 663 while(!is_empty(lanes.data[idx])) { 687 664 struct $thread * thrd; 688 thrd = pop(lanes.data[idx]); 689 690 push(cltr, thrd); 665 unsigned long long _; 666 [thrd, _] = pop(lanes.data[idx]); 667 668 push(cltr, thrd, true); 691 669 692 670 // for printing count the number of displaced threads … … 725 703 /* paranoid */ verify( ready_mutate_islocked() ); 726 704 } 705 706 #if !defined(__CFA_NO_STATISTICS__) 707 unsigned cnt(const __ready_queue_t & this, unsigned idx) { 708 /* paranoid */ verify(this.lanes.count > idx); 709 return this.lanes.data[idx].cnt; 710 } 711 #endif -
libcfa/src/concurrency/ready_subqueue.hfa
r5407cdc r8d66610 7 7 // Intrusives lanes which are used by the relaxed ready queue 8 8 struct __attribute__((aligned(128))) __intrusive_lane_t { 9 10 #if defined(USE_MPSC) 11 mpsc_queue($thread) queue; 12 __attribute__((aligned(128))) 13 #else 14 // anchor for the head and the tail of the queue 15 __attribute__((aligned(128))) struct __sentinel_t { 16 // Link lists fields 17 // instrusive link field for threads 18 // must be exactly as in $thread 19 __thread_desc_link link; 20 } before, after; 21 #endif 9 struct $thread * prev; 22 10 23 11 // spin lock protecting the queue 24 12 volatile bool lock; 25 13 26 // Optional statistic counters 27 #if !defined(__CFA_NO_SCHED_STATS__) 28 struct __attribute__((aligned(64))) { 29 // difference between number of push and pops 30 ssize_t diff; 14 __thread_desc_link anchor; 31 15 32 // total number of pushes and pops 33 size_t push; 34 size_t pop ; 35 } stat; 16 #if !defined(__CFA_NO_STATISTICS__) 17 unsigned cnt; 36 18 #endif 37 19 }; 38 20 39 void ?{}(__intrusive_lane_t & this);40 void ^?{}(__intrusive_lane_t & this);41 42 21 // Get the head pointer (one before the first element) from the anchor 43 static inline $thread * head(const __intrusive_lane_t & this) { 44 #if defined(USE_MPSC) 45 return this.queue.head; 46 #else 47 $thread * rhead = ($thread *)( 48 (uintptr_t)( &this.before ) - offsetof( $thread, link ) 49 ); 50 /* paranoid */ verify(rhead); 51 return rhead; 52 #endif 53 } 54 55 // Get the tail pointer (one after the last element) from the anchor 56 static inline $thread * tail(const __intrusive_lane_t & this) { 57 #if defined(USE_MPSC) 58 return this.queue.tail; 59 #else 60 $thread * rtail = ($thread *)( 61 (uintptr_t)( &this.after ) - offsetof( $thread, link ) 62 ); 63 /* paranoid */ verify(rtail); 64 return rtail; 65 #endif 22 static inline $thread * mock_head(const __intrusive_lane_t & this) { 23 $thread * rhead = ($thread *)( 24 (uintptr_t)( &this.anchor ) - __builtin_offsetof( $thread, link ) 25 ); 26 return rhead; 66 27 } 67 28 … … 69 30 void ?{}( __intrusive_lane_t & this ) { 70 31 this.lock = false; 32 this.prev = mock_head(this); 33 this.anchor.next = 0p; 34 this.anchor.ts = 0; 35 #if !defined(__CFA_NO_STATISTICS__) 36 this.cnt = 0; 37 #endif 71 38 72 #if !defined(USE_MPSC) 73 this.before.link.prev = 0p; 74 this.before.link.next = tail(this); 75 this.before.link.ts = 0; 76 77 this.after .link.prev = head(this); 78 this.after .link.next = 0p; 79 this.after .link.ts = 0; 80 81 #if !defined(__CFA_NO_SCHED_STATS__) 82 this.stat.diff = 0; 83 this.stat.push = 0; 84 this.stat.pop = 0; 85 #endif 86 87 // We add a boat-load of assertions here because the anchor code is very fragile 88 /* paranoid */ verify(((uintptr_t)( head(this) ) + offsetof( $thread, link )) == (uintptr_t)(&this.before)); 89 /* paranoid */ verify(((uintptr_t)( tail(this) ) + offsetof( $thread, link )) == (uintptr_t)(&this.after )); 90 /* paranoid */ verify(head(this)->link.prev == 0p ); 91 /* paranoid */ verify(head(this)->link.next == tail(this) ); 92 /* paranoid */ verify(tail(this)->link.next == 0p ); 93 /* paranoid */ verify(tail(this)->link.prev == head(this) ); 94 /* paranoid */ verify(&head(this)->link.prev == &this.before.link.prev ); 95 /* paranoid */ verify(&head(this)->link.next == &this.before.link.next ); 96 /* paranoid */ verify(&tail(this)->link.prev == &this.after .link.prev ); 97 /* paranoid */ verify(&tail(this)->link.next == &this.after .link.next ); 98 /* paranoid */ verify(__alignof__(__intrusive_lane_t) == 128); 99 /* paranoid */ verify(__alignof__(this) == 128); 100 /* paranoid */ verifyf(((intptr_t)(&this) % 128) == 0, "Expected address to be aligned %p %% 128 == %zd", &this, ((intptr_t)(&this) % 128)); 101 #endif 39 // We add a boat-load of assertions here because the anchor code is very fragile 40 /* paranoid */ _Static_assert( offsetof( $thread, link ) == offsetof(__intrusive_lane_t, anchor) ); 41 /* paranoid */ verify( offsetof( $thread, link ) == offsetof(__intrusive_lane_t, anchor) ); 42 /* paranoid */ verify( ((uintptr_t)( mock_head(this) ) + offsetof( $thread, link )) == (uintptr_t)(&this.anchor) ); 43 /* paranoid */ verify( &mock_head(this)->link.next == &this.anchor.next ); 44 /* paranoid */ verify( &mock_head(this)->link.ts == &this.anchor.ts ); 45 /* paranoid */ verify( mock_head(this)->link.next == 0p ); 46 /* paranoid */ verify( mock_head(this)->link.ts == 0 ); 47 /* paranoid */ verify( mock_head(this) == this.prev ); 48 /* paranoid */ verify( __alignof__(__intrusive_lane_t) == 128 ); 49 /* paranoid */ verify( __alignof__(this) == 128 ); 50 /* paranoid */ verifyf( ((intptr_t)(&this) % 128) == 0, "Expected address to be aligned %p %% 128 == %zd", &this, ((intptr_t)(&this) % 128) ); 102 51 } 103 52 104 53 // Dtor is trivial 105 54 void ^?{}( __intrusive_lane_t & this ) { 106 #if !defined(USE_MPSC) 107 // Make sure the list is empty 108 /* paranoid */ verify(head(this)->link.prev == 0p ); 109 /* paranoid */ verify(head(this)->link.next == tail(this) ); 110 /* paranoid */ verify(tail(this)->link.next == 0p ); 111 /* paranoid */ verify(tail(this)->link.prev == head(this) ); 112 #endif 55 // Make sure the list is empty 56 /* paranoid */ verify( this.anchor.next == 0p ); 57 /* paranoid */ verify( this.anchor.ts == 0 ); 58 /* paranoid */ verify( mock_head(this) == this.prev ); 113 59 } 114 60 115 61 // Push a thread onto this lane 116 62 // returns true of lane was empty before push, false otherwise 117 bool push(__intrusive_lane_t & this, $thread * node) { 118 #if defined(USE_MPSC) 119 inline $thread * volatile & ?`next ( $thread * this ) __attribute__((const)) { 120 return this->link.next; 121 } 122 push(this.queue, node); 123 #else 124 #if defined(__CFA_WITH_VERIFY__) 125 /* paranoid */ verify(this.lock); 126 /* paranoid */ verify(node->link.ts != 0); 127 /* paranoid */ verify(node->link.next == 0p); 128 /* paranoid */ verify(node->link.prev == 0p); 129 /* paranoid */ verify(tail(this)->link.next == 0p); 130 /* paranoid */ verify(head(this)->link.prev == 0p); 63 static inline void push( __intrusive_lane_t & this, $thread * node ) { 64 /* paranoid */ verify( this.lock ); 65 /* paranoid */ verify( node->link.next == 0p ); 66 /* paranoid */ verify( node->link.ts == 0 ); 67 /* paranoid */ verify( this.prev->link.next == 0p ); 68 /* paranoid */ verify( this.prev->link.ts == 0 ); 69 if( this.anchor.next == 0p ) { 70 /* paranoid */ verify( this.anchor.next == 0p ); 71 /* paranoid */ verify( this.anchor.ts == 0 ); 72 /* paranoid */ verify( this.prev == mock_head( this ) ); 73 } else { 74 /* paranoid */ verify( this.anchor.next != 0p ); 75 /* paranoid */ verify( this.anchor.ts != 0 ); 76 /* paranoid */ verify( this.prev != mock_head( this ) ); 77 } 131 78 132 if(this.before.link.ts == 0l) { 133 /* paranoid */ verify(tail(this)->link.prev == head(this)); 134 /* paranoid */ verify(head(this)->link.next == tail(this)); 135 } else { 136 /* paranoid */ verify(tail(this)->link.prev != head(this)); 137 /* paranoid */ verify(head(this)->link.next != tail(this)); 138 } 139 #endif 140 141 // Get the relevant nodes locally 142 $thread * tail = tail(this); 143 $thread * prev = tail->link.prev; 144 145 // Do the push 146 node->link.next = tail; 147 node->link.prev = prev; 148 prev->link.next = node; 149 tail->link.prev = node; 150 151 // Update stats 152 #if !defined(__CFA_NO_SCHED_STATS__) 153 this.stat.diff++; 154 this.stat.push++; 155 #endif 156 157 verify(node->link.next == tail(this)); 158 159 // Check if the queue used to be empty 160 if(this.before.link.ts == 0l) { 161 this.before.link.ts = node->link.ts; 162 /* paranoid */ verify(node->link.prev == head(this)); 163 return true; 164 } 165 return false; 79 // Get the relevant nodes locally 80 this.prev->link.next = node; 81 this.prev->link.ts = rdtscl(); 82 this.prev = node; 83 #if !defined(__CFA_NO_STATISTICS__) 84 this.cnt++; 166 85 #endif 167 86 } … … 170 89 // returns popped 171 90 // returns true of lane was empty before push, false otherwise 172 $thread * pop(__intrusive_lane_t & this) { 173 /* paranoid */ verify(this.lock); 174 #if defined(USE_MPSC) 175 inline $thread * volatile & ?`next ( $thread * this ) __attribute__((const)) { 176 return this->link.next; 177 } 178 return pop(this.queue); 179 #else 180 /* paranoid */ verify(this.before.link.ts != 0ul); 91 static inline [* $thread, unsigned long long] pop( __intrusive_lane_t & this ) { 92 /* paranoid */ verify( this.lock ); 93 /* paranoid */ verify( this.anchor.next != 0p ); 94 /* paranoid */ verify( this.anchor.ts != 0 ); 181 95 182 // Get anchors locally 183 $thread * head = head(this); 184 $thread * tail = tail(this); 96 // Get the relevant nodes locally 97 unsigned long long ts = this.anchor.ts; 98 $thread * node = this.anchor.next; 99 this.anchor.next = node->link.next; 100 this.anchor.ts = node->link.ts; 101 bool is_empty = this.anchor.ts == 0; 102 node->link.next = 0p; 103 node->link.ts = 0; 104 #if !defined(__CFA_NO_STATISTICS__) 105 this.cnt--; 106 #endif 185 107 186 // Get the relevant nodes locally 187 $thread * node = head->link.next; 188 $thread * next = node->link.next; 108 // Update head time stamp 109 if(is_empty) this.prev = mock_head( this ); 189 110 190 /* paranoid */ verify(node != tail); 191 /* paranoid */ verify(node); 192 193 // Do the pop 194 head->link.next = next; 195 next->link.prev = head; 196 node->link.next = 0p; 197 node->link.prev = 0p; 198 199 // Update head time stamp 200 this.before.link.ts = next->link.ts; 201 202 // Update stats 203 #ifndef __CFA_NO_SCHED_STATS__ 204 this.stat.diff--; 205 this.stat.pop ++; 206 #endif 207 208 // Check if we emptied list and return accordingly 209 /* paranoid */ verify(tail(this)->link.next == 0p); 210 /* paranoid */ verify(head(this)->link.prev == 0p); 211 if(next == tail) { 212 /* paranoid */ verify(this.before.link.ts == 0); 213 /* paranoid */ verify(tail(this)->link.prev == head(this)); 214 /* paranoid */ verify(head(this)->link.next == tail(this)); 215 return node; 216 } 217 else { 218 /* paranoid */ verify(next->link.ts != 0); 219 /* paranoid */ verify(tail(this)->link.prev != head(this)); 220 /* paranoid */ verify(head(this)->link.next != tail(this)); 221 /* paranoid */ verify(this.before.link.ts != 0); 222 return node; 223 } 224 #endif 111 /* paranoid */ verify( node->link.next == 0p ); 112 /* paranoid */ verify( node->link.ts == 0 ); 113 return [node, ts]; 225 114 } 226 115 227 116 // Check whether or not list is empty 228 117 static inline bool is_empty(__intrusive_lane_t & this) { 229 #if defined(USE_MPSC) 230 return this.queue.head == 0p; 231 #else 232 // Cannot verify here since it may not be locked 233 return this.before.link.ts == 0; 234 #endif 118 return this.anchor.ts == 0; 235 119 } 236 120 237 121 // Return the timestamp 238 122 static inline unsigned long long ts(__intrusive_lane_t & this) { 239 #if defined(USE_MPSC) 240 $thread * tl = this.queue.head; 241 if(!tl) return -1ull; 242 return tl->link.ts; 243 #else 244 // Cannot verify here since it may not be locked 245 return this.before.link.ts; 246 #endif 123 // Cannot verify here since it may not be locked 124 return this.anchor.ts; 247 125 } 248 249 // Aligned timestamps which are used by the relaxed ready queue250 struct __attribute__((aligned(128))) __timestamp_t {251 volatile unsigned long long tv;252 };253 254 void ?{}(__timestamp_t & this) { this.tv = 0; }255 void ^?{}(__timestamp_t & this) {} -
libcfa/src/concurrency/stats.cfa
r5407cdc r8d66610 19 19 stats->ready.pop.local .attempt = 0; 20 20 stats->ready.pop.local .success = 0; 21 stats->ready.pop.local .elock = 0;22 stats->ready.pop.local .eempty = 0;23 stats->ready.pop.local .espec = 0;24 21 stats->ready.pop.help .attempt = 0; 25 22 stats->ready.pop.help .success = 0; 26 stats->ready.pop.help .elock = 0;27 stats->ready.pop.help .eempty = 0;28 stats->ready.pop.help .espec = 0;29 23 stats->ready.pop.steal .attempt = 0; 30 24 stats->ready.pop.steal .success = 0; 31 stats->ready.pop.steal .elock = 0;32 stats->ready.pop.steal .eempty = 0;33 stats->ready.pop.steal .espec = 0;34 25 stats->ready.pop.search.attempt = 0; 35 26 stats->ready.pop.search.success = 0; 36 stats->ready.pop.search.elock = 0;37 stats->ready.pop.search.eempty = 0;38 stats->ready.pop.search.espec = 0;39 27 stats->ready.threads.migration = 0; 40 28 stats->ready.threads.extunpark = 0; 41 29 stats->ready.threads.threads = 0; 30 stats->ready.threads.cthreads = 0; 42 31 stats->ready.sleep.halts = 0; 43 32 stats->ready.sleep.cancels = 0; … … 59 48 stats->io.calls.completed = 0; 60 49 stats->io.calls.errors.busy = 0; 61 stats->io.poller.sleeps = 0;62 50 #endif 63 51 … … 68 56 } 69 57 58 static inline void tally_one( volatile uint64_t * agg, volatile uint64_t * val) { 59 uint64_t add = __atomic_exchange_n(val, 0_l64u, __ATOMIC_RELAXED); 60 __atomic_fetch_add(agg, add, __ATOMIC_RELAXED); 61 } 62 63 static inline void tally_one( volatile int64_t * agg, volatile int64_t * val) { 64 int64_t add = __atomic_exchange_n(val, 0_l64, __ATOMIC_RELAXED); 65 __atomic_fetch_add(agg, add, __ATOMIC_RELAXED); 66 } 67 70 68 void __tally_stats( struct __stats_t * cltr, struct __stats_t * proc ) { 71 __atomic_fetch_add( &cltr->ready.push.local.attempt, proc->ready.push.local.attempt, __ATOMIC_SEQ_CST ); proc->ready.push.local.attempt = 0; 72 __atomic_fetch_add( &cltr->ready.push.local.success, proc->ready.push.local.success, __ATOMIC_SEQ_CST ); proc->ready.push.local.success = 0; 73 __atomic_fetch_add( &cltr->ready.push.share.attempt, proc->ready.push.share.attempt, __ATOMIC_SEQ_CST ); proc->ready.push.share.attempt = 0; 74 __atomic_fetch_add( &cltr->ready.push.share.success, proc->ready.push.share.success, __ATOMIC_SEQ_CST ); proc->ready.push.share.success = 0; 75 __atomic_fetch_add( &cltr->ready.push.extrn.attempt, proc->ready.push.extrn.attempt, __ATOMIC_SEQ_CST ); proc->ready.push.extrn.attempt = 0;