Changeset 8d66610


Ignore:
Timestamp:
May 21, 2021, 4:48:10 PM (3 years ago)
Author:
Thierry Delisle <tdelisle@…>
Branches:
ADT, arm-eh, ast-experimental, enum, forall-pointer-decay, jacob/cs343-translation, master, new-ast-unique-expr, pthread-emulation, qualifiedEnum
Children:
f1bce515
Parents:
5407cdc (diff), 7404cdc (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.
Message:

Merge branch 'master' of plg.uwaterloo.ca:software/cfa/cfa-cc

Files:
21 added
69 edited
3 moved

Legend:

Unmodified
Added
Removed
  • doc/LaTeXmacros/common.tex

    r5407cdc r8d66610  
    1111%% Created On       : Sat Apr  9 10:06:17 2016
    1212%% Last Modified By : Peter A. Buhr
    13 %% Last Modified On : Sun Feb 14 15:52:46 2021
    14 %% Update Count     : 524
     13%% Last Modified On : Sat May  8 08:48:37 2021
     14%% Update Count     : 540
    1515%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    1616
     
    3838\usepackage{xspace}
    3939\newcommand{\CFAIcon}{\textsf{C}\raisebox{\depth}{\rotatebox{180}{\textsf{A}}}} % Cforall icon
    40 \newcommand{\CFA}{\protect\CFAIcon\xspace}                      % CFA symbolic name
    41 \newcommand{\CFL}{\textrm{Cforall}\xspace}                      % Cforall non-icon name
    42 \newcommand{\Celeven}{\textrm{C11}\xspace}                      % C11 symbolic name
     40\newcommand{\CFA}{\protect\CFAIcon\xspace}                              % CFA symbolic name
     41\newcommand{\CFL}{\textrm{Cforall}\xspace}                              % Cforall non-icon name
     42\newcommand{\Celeven}{\textrm{C11}\xspace}                              % C11 symbolic name
    4343\newcommand{\CCIcon}{\textrm{C}\kern-.1em\hbox{+\kern-.25em+}} % C++ icon
    44 \newcommand{\CC}{\protect\CCIcon\xspace}                        % C++ symbolic name
     44\newcommand{\CC}[1][]{\protect\CCIcon{#1}\xspace}               % C++ symbolic name
    4545% numbers disallowed in latex variables names => use number names
    46 \newcommand{\CCeleven}{\protect\CCIcon{11}\xspace}      % C++11 symbolic name
    47 \newcommand{\CCfourteen}{\protect\CCIcon{14}\xspace} % C++14 symbolic name
    48 \newcommand{\CCseventeen}{\protect\CCIcon{17}\xspace} % C++17 symbolic name
    49 \newcommand{\CCtwenty}{\protect\CCIcon{20}\xspace}      % C++20 symbolic name
     46\newcommand{\CCeleven}{\protect\CCIcon{11}\xspace}              % C++11 symbolic name
     47\newcommand{\CCfourteen}{\protect\CCIcon{14}\xspace}    % C++14 symbolic name
     48\newcommand{\CCseventeen}{\protect\CCIcon{17}\xspace}   % C++17 symbolic name
     49\newcommand{\CCtwenty}{\protect\CCIcon{20}\xspace}              % C++20 symbolic name
    5050\newcommand{\Csharp}{C\raisebox{-0.7ex}{\Large$^\sharp$}\xspace} % C# symbolic name
    5151
     
    102102\renewcommand\subsubsection{\@startsection{subsubsection}{3}{\z@}{-2.5ex \@plus -1ex \@minus -.2ex}{1.0ex \@plus .2ex}{\normalfont\normalsize\bfseries}}
    103103\renewcommand\paragraph{\@startsection{paragraph}{4}{\z@}{-2.0ex \@plus -1ex \@minus -.2ex}{-1em}{\normalfont\normalsize\bfseries}}
     104\renewcommand\subparagraph{\@startsection{subparagraph}{4}{\z@}{-1.5ex \@plus -1ex \@minus -.2ex}{-1em}{\normalfont\normalsize\bfseries\itshape}}
    104105
    105106% index macros
     
    152153% Latin abbreviation
    153154\newcommand{\abbrevFont}{\textit}                       % set empty for no italics
     155% If not followed by a comma or colon, add a comma.
     156\newcommand{\CheckCommaColon}{\@ifnextchar{,}{}{\@ifnextchar{:}{}{,\xspace}}}
     157% If not followed by a period, add a period.
     158\newcommand{\CheckPeriod}{\@ifnextchar{.}{}{.\xspace}}
     159
    154160\@ifundefined{eg}{
    155161\newcommand{\EG}{\abbrevFont{e}.\abbrevFont{g}.}
    156 \newcommand*{\eg}{%
    157         \@ifnextchar{,}{\EG}%
    158                 {\@ifnextchar{:}{\EG}%
    159                         {\EG,\xspace}}%
    160 }}{}%
     162\newcommand{\eg}{\EG\CheckCommaColon}
     163}{}%
    161164\@ifundefined{ie}{
    162165\newcommand{\IE}{\abbrevFont{i}.\abbrevFont{e}.}
    163 \newcommand*{\ie}{%
    164         \@ifnextchar{,}{\IE}%
    165                 {\@ifnextchar{:}{\IE}%
    166                         {\IE,\xspace}}%
    167 }}{}%
     166\newcommand{\ie}{\IE\CheckCommaColon}
     167}{}%
    168168\@ifundefined{etc}{
    169169\newcommand{\ETC}{\abbrevFont{etc}}
    170 \newcommand*{\etc}{%
    171         \@ifnextchar{.}{\ETC}%
    172         {\ETC.\xspace}%
    173 }}{}%
     170\newcommand{\etc}{\ETC\CheckPeriod}
     171}{}%
    174172\@ifundefined{etal}{
    175173\newcommand{\ETAL}{\abbrevFont{et}~\abbrevFont{al}}
    176 \newcommand*{\etal}{%
    177         \@ifnextchar{.}{\protect\ETAL}%
    178                 {\protect\ETAL.\xspace}%
    179 }}{}%
     174\newcommand{\etal}{\ETAL\CheckPeriod}
     175}{}%
    180176\@ifundefined{viz}{
    181177\newcommand{\VIZ}{\abbrevFont{viz}}
    182 \newcommand*{\viz}{%
    183         \@ifnextchar{.}{\VIZ}%
    184                 {\VIZ.\xspace}%
    185 }}{}%
     178\newcommand{\viz}{\VIZ\CheckPeriod}
     179}{}%
    186180\makeatother
    187181
     
    284278showlines=true,                                                 % show blank lines at end of code
    285279aboveskip=4pt,                                                  % spacing above/below code block
    286 belowskip=0pt,
     280belowskip=2pt,
    287281numberstyle=\footnotesize\sf,                   % numbering style
    288282% replace/adjust listing characters that look bad in sanserif
     
    297291\lstset{
    298292language=CFA,
    299 moredelim=**[is][\color{red}]{@}{@},    % red highlighting @...@
    300 %moredelim=**[is][\color{red}]{®}{®},   % red highlighting ®...® (registered trademark symbol) emacs: C-q M-.
     293%moredelim=**[is][\color{red}]{@}{@},   % red highlighting @...@
     294moredelim=**[is][\color{red}]{®}{®},    % red highlighting ®...® (registered trademark symbol) emacs: C-q M-.
    301295%moredelim=**[is][\color{blue}]{ß}{ß},  % blue highlighting ß...ß (sharp s symbol) emacs: C-q M-_
    302296%moredelim=**[is][\color{OliveGreen}]{¢}{¢}, % green highlighting ¢...¢ (cent symbol) emacs: C-q M-"
  • doc/theses/andrew_beach_MMath/Makefile

    r5407cdc r8d66610  
    3434        ${LATEX} ${BASE}
    3535        ${BIBTEX} ${BUILD}/${BASE}
    36         ${LATEX} ${BASE}
    3736        ${GLOSSARY} ${BUILD}/${BASE}
    3837        ${LATEX} ${BASE}
  • doc/theses/andrew_beach_MMath/cfalab.sty

    r5407cdc r8d66610  
    11% Package for CFA Research Lab.
     2% (Now more a personal collection and testing grounds for common.sty.)
    23%
    3 % Made by combining and updating various macro files people had made.
     4% This is a collection of commands everyone working on CFA related documents
     5% should find useful. So mostly programming language related tools.
    46%
    57% Internal commands are prefixed with "\cfalab@".
     
    1012
    1113% Other packages required.
     14%
     15% Access to new basic LaTeX tools and other low level commands.
    1216\RequirePackage{etoolbox}
     17% Code formatting tools and environments.
    1318\RequirePackage{listings}
     19% Automatically adds spaces.
    1420\RequirePackage{xspace}
    1521
    16 % Symbols: All symbols are zero argument robust commands with special rules
    17 % about the space following the c.s. token. Normally the space might be
    18 % re-added according to the rules of the xspace package. They may be followed
    19 % by a star (which the command will consume) to disable this behaviour.
     22% Tip for commands that end with \xspace: if the default is not correct then
     23% follow the command with {} to disable \xspace, use '{} ' to force add a
     24% space and '{}<whatever-follows>' to force remove one.
     25%
     26% \CFA
     27% Cforall with the forall symbol.
     28\newrobustcmd\CFA{\textsf{C\raisebox{\depth}{\rotatebox{180}{A}}}\xspace}
     29% \Cpp[<std>]
     30% C++ symbol name. You may optionally provide <std> to specify a standard.
     31\newrobustcmd\Cpp[1][\xspace]{C++#1}
    2032
    21 % \newsymbolcmd{<command>}{<replacement text>}
    22 %     Defines <command> to be a symbol that has the given <replacement text>.
    23 \newrobustcmd*\newsymbolcmd[2]{\newrobustcmd{#1}{\cfalab@symbol{#2}}}
    24 \def\cfalab@symbol#1{\@ifnextchar*{#1\cfalab@eatstar}{#1\xspace}}
    25 \def\cfalab@eatstar*{}
    26 
    27 % Cforall with the forall symbol.
    28 \newsymbolcmd\CFA{\textsf{C}\raisebox{\depth}{\rotatebox{180}{\textsf{A}}}}
    29 % C++ with kerning. (No standard number support.)
    30 \newsymbolcmd\CPP{\textrm{C}\kern-.1em\hbox{+\kern-.25em+}}
    31 
    32 % This is executed very early in the \begin{document} code.
     33% This is executed very early in the \begin{document} code, before the
     34% document's contents but after packages are loaded.
    3335\AtEndPreamble{
    3436  \@ifpackageloaded{hyperref}{
     
    3638    \pdfstringdefDisableCommands{
    3739      \def\CFA{CFA}
    38       \def\CPP{C++}
     40      \def\Cpp{C++}
     41      \def\lstinline{}
     42      \def\code#1#2{#2}
    3943    }
    4044  }{}
    4145}
     46
     47% \colour{<colour>}{<text>}
     48% Just \color but using the LaTeX style instead of TeX style command.
     49\newcommand*\colour[2]{{\color{#1}#2}}
     50
     51% \code{<language>}{<code>}
     52% Use the listings package to format the snipit of <code> in <language>.
     53\newrobustcmd*\code[2]{\lstinline[language=#1]{#2}}
     54
     55% \begin{cfa}[<options>]
     56% \end{cfa}
     57% Use the listings package to format a block of CFA code.
     58% Extra listings options can be passed in as an optional argument.
     59\lstnewenvironment{cfa}[1][]{\lstset{language=CFA}\lstset{#1}}{}
     60
     61% \settextunderscore{(new|old)}
     62% Redefines the underscore either as a new repersentation or the old one.
     63% Not that some other packages (ex. hyperref) can override this. Set it up
     64% after loading them.
     65\let\cfalab@textunderscore@old=\textunderscore
     66\newcommand\cfalab@textunderscore@new{%
     67    \leavevmode\makebox[1.2ex][c]{\rule{1ex}{0.075ex}}}
     68\newcommand\settextunderscore[1]{%
     69    \renewcommand\textunderscore{\csuse{cfalab@textunderscore@#1}}}
    4270
    4371% The CFA listings language. Based off of ANCI C and including GCC extensions.
     
    6189\lstset{defaultdialect={[UW]CFA}}
    6290
    63 % The cfalab style defines some common settings useful in different languages.
    64 \lstdefinestyle{cfalab}{%
    65     columns=fullflexible,
    66     basicstyle=\linespread{0.9}\tt,
    67     stringstyle=\tt,
     91% Create an internal paragraph indent amount. This is used internally to
     92% mimic the standard indent even when it has been overriden in the document.
     93\newlength\cfalab@parindent
     94\deflength\cfalab@parindent{\parindent}
     95
     96% The cfacommon style has many useful defaults for CFA and other types of
     97% code. Use the listings option "style=cfacommon" to load them.
     98\lstdefinestyle{cfacommon}{
     99  columns=fullflexible,
     100  basicstyle=\linespread{0.9}\sf,
     101  stringstyle=\tt,
     102  tabsize=5,
     103  % Indent code to paragraph indentation.
     104  xleftmargin=\cfalab@parindent,
     105  % Allow ASCII characters in the range 128-255.
     106  extendedchars=true,
     107  % This allows you to use "math mode" to insert LaTeX into the code.
     108  % Use \( and \) if you need to insert math mode inside that code.
     109  escapechar=\$,
     110  % Disable LaTeX math escape in CFA code $...$
     111  mathescape=false,
     112  keepspaces=true,
     113  % Do not show spaces with cup.
     114  showstringspaces=false,
     115  % Show blank lines at end of code.
     116  showlines=true,
     117  % Spacing above/below code block.
     118  aboveskip=4pt,belowskip=0pt,
     119  numberstyle=\footnotesize\sf,
     120  % Replace/adjust listing characters that look bad in sanserif.
     121  literate={-}{\makebox[1ex][c]{\raisebox{0.7ex}{\rule{0.75ex}{0.1ex}}}}1
     122    {^}{\raisebox{0.6ex}{$\scriptscriptstyle\land\,$}}1
     123    {~}{\raisebox{0.3ex}{$\scriptstyle\sim\,$}}1 {`}{\ttfamily\upshape\hspace*{-0.1ex}`}1
     124    {<-}{$\leftarrow$}2 {=>}{$\Rightarrow$}2
     125    {->}{\makebox[1ex][c]{\raisebox{0.4ex}{\rule{0.8ex}{0.075ex}}}\kern-0.2ex\textgreater}2,
    68126}
    69127
    70 % \code*[<escape character>]{<code>}
    71 %     Use the listings package to format a snipit of <code>.
    72 %     The <escape character> must be a character that does not appear in
    73 %     <code> and defaults to a backtick.
    74 \newcommand*\codeC[2][\`]{\lstinline[language=C]#1#2#1}
    75 \newcommand*\codeCFA[2][\`]{\lstinline[language=CFA]#1#2#1}
     128% common.tex Compatablity ===================================================
     129% Below this line is for compatability with the old common.tex file.
    76130
    77 % \settextunderscore{(new|old)}
    78 %     Redefines the underscore either as a new repersentation or the old one.
    79 %     Not that some other packages (ex. hyperref) can override this. Set it
    80 %     up after loading them.
    81 \let\cfalab@textunderscore@old=\textunderscore
    82 \newcommand\cfalab@textunderscore@new{%
    83     \leavevmode\makebox[1.2ex][c]{\rule{1ex}{0.075ex}}}
    84 \newcommand\settextunderscore[1]{%
    85     \renewcommand\textunderscore{\csuse{cfalab@textunderscore@#1}}}
     131% Backwards compatable way to activate the cfacommon style.
     132\newcommand{\CFAStyle}{\lstset{style=cfacommon}}
     133
     134% A couple of abbreviations are provided. Just ones someone liked.
     135%
     136% Abbreviation formatting commands (renew to customize):
     137\newcommand{\abbrevFont}{\textit}
     138%
     139% Abbreviations that, if not followed by a comma or colon, add a comma.
     140\newrobustcmd*\cfalab@abbrev@comma{%
     141  \@ifnextchar{,}{}{\@ifnextchar{:}{}{,\xspace}}}
     142\providerobustcmd*\eg{\abbrevFont{e}.\abbrevFont{g}.\cfalab@abbrev@comma}
     143\providerobustcmd*\ie{\abbrevFont{i}.\abbrevFont{e}.\cfalab@abbrev@comma}
     144%
     145% Abbreviations that, if not followed by a period, add a period.
     146\newrobustcmd*\cfalab@abbrev@period{\@ifnextchar{.}{}{.\xspace}}
     147\providerobustcmd*\etc{\abbrevFont{etc}\cfalab@abbrev@period}
     148\providerobustcmd*\etal{\abbrevFont{et}~\abbrevFont{al}\cfalab@abbrev@period}
     149\providerobustcmd*\viz{\abbrevFont{viz}\cfalab@abbrev@period}
    86150
    87151\endinput
  • doc/theses/andrew_beach_MMath/existing.tex

    r5407cdc r8d66610  
    1616to be defined~\cite{Moss18}.
    1717\begin{cfa}
    18 char i; int i; double i;                        $\C[3.75in]{// variable overload}$
    19 int f(); double f();                            $\C{// return overload}$
    20 void g( int ); void g( double );        $\C{// parameter overload}\CRT$
     18char i; int i; double i;
     19int f(); double f();
     20void g( int ); void g( double );
    2121\end{cfa}
    2222This feature requires name mangling so the assembly symbols are unique for
     
    2626mangling is:
    2727\begin{cfa}
    28 // name mangling
     28// name mangling on by default
    2929int i; // _X1ii_1
    30 @extern "C"@ {  // no name mangling
     30extern "C" {  // disables name mangling
    3131        int j; // j
    32         @extern "Cforall"@ {  // name mangling
     32        extern "Cforall" {  // enables name mangling
    3333                int k; // _X1ki_1
    3434        }
    35         // no name mangling
    36 }
    37 // name mangling
     35        // revert to no name mangling
     36}
     37// revert to name mangling
    3838\end{cfa}
    3939Both forms of @extern@ affect all the declarations within their nested lexical
     
    5050\begin{cfa}
    5151int i, j;
    52 int @&@ ri = i, @&&@ rri = ri;
     52int & ri = i, && rri = ri;
    5353rri = 3;  // auto-dereference assign to i
    54 @&@ri = @&@j; // rebindable
     54&ri = &j; // rebindable
    5555ri = 5;   // assign to j
    5656\end{cfa}
     
    6464
    6565In general, operator names in \CFA are constructed by bracketing an operator
    66 token with @?@, which indicates the position of the arguments. For example, infixed
    67 multiplication is @?*?@ while prefix dereference is @*?@. This syntax make it
    68 easy to tell the difference between prefix operations (such as @++?@) and
    69 post-fix operations (@?++@).
     66token with @?@, which indicates the position of the arguments. For example,
     67infixed multiplication is @?*?@ while prefix dereference is @*?@.
     68This syntax make it easy to tell the difference between prefix operations
     69(such as @++?@) and post-fix operations (@?++@).
    7070
    7171The special name for a constructor is @?{}@, which comes from the
    72 initialization syntax in C. The special name for a destructor is @^{}@, where
    73 the @^@ has no special meaning.
     72initialization syntax in C. That initialation syntax is also the operator
     73form. \CFA will generate a constructor call each time a variable is declared,
     74passing the initialization arguments to the constructort.
     75\begin{cfa}
     76struct Example { ... };
     77void ?{}(Example & this) { ... }
     78{
     79        Example a;
     80        Example b = {};
     81}
     82void ?{}(Example & this, char first, int num) { ... }
     83{
     84        Example c = {'a', 2};
     85}
     86\end{cfa}
     87Both @a@ and @b@ will be initalized with the first constructor (there is no
     88general way to skip initialation) while @c@ will be initalized with the
     89second.
     90
    7491% I don't like the \^{} symbol but $^\wedge$ isn't better.
    75 \begin{cfa}
    76 struct T { ... };
    77 void ?@{}@(@T &@ this, ...) { ... }  // constructor
    78 void ?@^{}@(@T &@ this, ...) { ... } // destructor
     92Similarly destructors use the special name @^?{}@ (the @^@ has no special
     93meaning). They can be called explicatly as well but normally they are
     94implicitly called on a variable when it goes out of scope.
     95\begin{cfa}
     96void ^?{}(Example & this) { ... }
    7997{
    80         T s = @{@ ... @}@;  // same constructor/initialization braces
    81 } // destructor call automatically generated
    82 \end{cfa}
    83 The first parameter is a reference parameter to the type for the
    84 constructor/destructor. Destructors may have multiple parameters.  The compiler
    85 implicitly matches an overloaded constructor @void ^?{}(T &, ...);@ to an
    86 object declaration with associated initialization, and generates a construction
    87 call after the object is allocated. When an object goes out of scope, the
    88 matching overloaded destructor @void ^?{}(T &);@ is called.  Without explicit
    89 definition, \CFA creates a default and copy constructor, destructor and
    90 assignment (like \Cpp). It is possible to define constructors/destructors for
    91 basic and existing types (unlike \Cpp).
     98    Example d;
     99} // <- implicit destructor call
     100\end{cfa}
     101No operator name is restricted in what function signatures they may be bound
     102to although most of the forms cannot be called in operator form. Some
     103``near-misses" will generate warnings.
     104
     105Whenever a type is defined, \CFA will create a default zero-argument
     106constructor, a copy constructor, a series of argument-per-field constructors
     107and a destructor. All user constructors are defined after this.
     108Because operators are never part of the type definition they may be added
     109at any time, including on built-in types.
    92110
    93111\section{Polymorphism}
     
    105123works on any type @T@:
    106124\begin{cfa}
    107 @forall( T )@ @T@ identity( @T@ val ) { return val; }
    108 int forty_two = identity( 42 ); // T bound to int, forty_two == 42
    109 \end{cfa}
     125forall( T ) T identity( T val ) { return val; }
     126int forty_two = identity( 42 );
     127char capital_a = identity( 'A' );
     128\end{cfa}
     129Each use of a polymorphic declaration will resolve its polymorphic parameters
     130(in this case, just @T@) to concrete types (@int@ in the first use and @char@
     131in the second).
    110132
    111133To allow a polymorphic function to be separately compiled, the type @T@ must be
     
    115137types used in a function, \eg:
    116138\begin{cfa}
    117 forall( T @| { void do_once(T); }@) // assertion
     139forall( T | { void do_once(T); })
    118140void do_twice(T value) {
    119141        do_once(value);
    120142        do_once(value);
    121143}
    122 void do_once(@int@ i) { ... }  // provide assertion
    123 @int@ i;
    124 do_twice(i); // implicitly pass assertion do_once to do_twice
    125 \end{cfa}
    126 Any object with a type fulfilling the assertion may be passed as an argument to
    127 a @do_twice@ call.
     144\end{cfa}
    128145
    129146A polymorphic function can be used in the same way as a normal function.  The
     
    132149all the variables replaced with the concrete types from the arguments) is
    133150defined at a call site.
     151\begin{cfa}
     152void do_once(int i) { ... }
     153int i;
     154do_twice(i);
     155\end{cfa}
     156Any object with a type fulfilling the assertion may be passed as an argument to
     157a @do_twice@ call.
    134158
    135159Note, a function named @do_once@ is not required in the scope of @do_twice@ to
     
    138162call.
    139163\begin{cfa}
    140 void do_once(double y) { ... } // global
     164void do_once(double y) { ... }
    141165int quadruple(int x) {
    142         void do_once(int y) { y = y * 2; } // local
    143         do_twice(x); // using local "do_once"
     166        void do_once(int y) { y = y * 2; }
     167        do_twice(x);
    144168        return x;
    145169}
     
    150174function. The matched assertion function is then passed as a function pointer
    151175to @do_twice@ and called within it.
     176The global definition of @do_once@ is ignored.
    152177
    153178To avoid typing long lists of assertions, constraints can be collect into
     
    161186and the @forall@ list in the previous example is replaced with the trait.
    162187\begin{cfa}
    163 forall(dtype T | @done_once(T)@)
     188forall(dtype T | done_once(T))
    164189\end{cfa}
    165190In general, a trait can contain an arbitrary number of assertions, both
     
    172197declarations instead of parameters, returns, and local variable declarations.
    173198\begin{cfa}
    174 forall(dtype @T@)
     199forall(dtype T)
    175200struct node {
    176         node(@T@) * next;  // generic linked node
    177         @T@ * data;
    178 }
    179 node(@int@) inode;
    180 \end{cfa}
    181 The generic type @node(T)@ is an example of a polymorphic-type usage.  Like \Cpp
    182 template usage, a polymorphic-type usage must specify a type parameter.
     201        node(T) * next;  // generic linked node
     202        T * data;
     203}
     204node(int) inode;
     205\end{cfa}
     206The generic type @node(T)@ is an example of a polymorphic type usage.  Like \Cpp
     207template usage, a polymorphic type usage must specify a type parameter.
    183208
    184209There are many other polymorphism features in \CFA but these are the ones used
     
    219244Each coroutine has a @main@ function, which takes a reference to a coroutine
    220245object and returns @void@.
    221 \begin{cfa}[numbers=left]
    222 void main(@CountUp & this@) { // argument matches trait is_coroutine
    223         unsigned int up = 0;  // retained between calls
    224         while (true) {
    225                 next = up; // make "up" available outside function
    226                 @suspend;@$\label{suspend}$
    227                 up += 1;
     246\begin{cfa}
     247void main(CountUp & this) {
     248    for (unsigned int next = 0 ; true ; ++next) {
     249                next = up;
     250                suspend;$\label{suspend}$
    228251        }
    229252}
     
    254277@mutex@.
    255278\begin{cfa}
    256 void example(MonitorA & @mutex@ argA, MonitorB & @mutex@ argB);
     279void example(MonitorA & mutex argA, MonitorB & mutex argB);
    257280\end{cfa}
    258281When the function is called, it implicitly acquires the monitor lock for all of
  • doc/theses/andrew_beach_MMath/features.tex

    r5407cdc r8d66610  
    2020\subparagraph{Raise}
    2121The raise is the starting point for exception handling. It marks the beginning
    22 of exception handling by \newterm{raising} an excepion, which passes it to
     22of exception handling by raising an excepion, which passes it to
    2323the EHM.
    2424
    2525Some well known examples include the @throw@ statements of \Cpp and Java and
    26 the \codePy{raise} statement from Python. In real systems a raise may preform
    27 some other work (such as memory management) but for the purposes of this
    28 overview that can be ignored.
     26the \code{Python}{raise} statement from Python. In real systems a raise may
     27preform some other work (such as memory management) but for the
     28purposes of this overview that can be ignored.
    2929
    3030\subparagraph{Handle}
     
    9393A handler labelled with any given exception can handle exceptions of that
    9494type or any child type of that exception. The root of the exception hierarchy
    95 (here \codeC{exception}) acts as a catch-all, leaf types catch single types
     95(here \code{C}{exception}) acts as a catch-all, leaf types catch single types
    9696and the exceptions in the middle can be used to catch different groups of
    9797related exceptions.
     
    101101between different sub-hierarchies.
    102102This design is used in \CFA even though it is not a object-orientated
    103 language using different tools to create the hierarchy.
     103language; so different tools are used to create the hierarchy.
    104104
    105105% Could I cite the rational for the Python IO exception rework?
     
    123123\section{Virtuals}
    124124Virtual types and casts are not part of \CFA's EHM nor are they required for
    125 any EHM. But \CFA uses a hierarchial system of exceptions and this feature
    126 is leveraged to create that.
    127 
    128 % Maybe talk about why the virtual system is so minimal.
    129 % Created for but not a part of the exception system.
     125any EHM.
     126However the \CFA uses a hierarchy built with the virtual system as the basis
     127for exceptions and exception matching.
     128
     129The virtual system would have ideally been part of \CFA before the work
     130on exception handling began, but unfortunately it was not.
     131Because of this only the features and framework needed for the EHM were
     132designed and implemented. Other features were considered to ensure that
     133the structure could accomidate other desirable features but they were not
     134implemented.
     135The rest of this section will only discuss the finalized portion of the
     136virtual system.
    130137
    131138The virtual system supports multiple ``trees" of types. Each tree is
     
    143150It is important to note that these are virtual members, not virtual methods
    144151of object-orientated programming, and can be of any type.
    145 However, since \CFA has function pointers and they are allowed, virtual
    146 members can be used to mimic virtual methods.
     152\CFA still supports virtual methods as a special case of virtual members.
     153Function pointers that take a pointer to the virtual type will be modified
     154with each level of inheritance so that refers to the new type.
     155This means an object can always be passed to a function in its virtual table
     156as if it were a method.
    147157
    148158Each virtual type has a unique id.
     
    175185While much of the virtual infrastructure is created, it is currently only used
    176186internally for exception handling. The only user-level feature is the virtual
    177 cast, which is the same as the \Cpp \lstinline[language=C++]|dynamic_cast|.
     187cast, which is the same as the \Cpp \code{C++}{dynamic_cast}.
    178188\label{p:VirtualCast}
    179189\begin{cfa}
     
    197207\begin{cfa}
    198208trait is_exception(exceptT &, virtualT &) {
    199         virtualT const & get_exception_vtable(exceptT *);
     209        // Numerous imaginary assertions.
    200210};
    201211\end{cfa}
    202212The trait is defined over two types, the exception type and the virtual table
    203 type. This should be one-to-one: each exception type has only one virtual
    204 table type and vice versa. The only assertion in the trait is
    205 @get_exception_vtable@, which takes a pointer of the exception type and
    206 returns a reference to the virtual table type instance.
    207 
    208 % TODO: This section, and all references to get_exception_vtable, are
    209 % out-of-data. Perhaps wait until the update is finished before rewriting it.
    210 The function @get_exception_vtable@ is actually a constant function.
    211 Regardless of the value passed in (including the null pointer) it should
    212 return a reference to the virtual table instance for that type.
    213 The reason it is a function instead of a constant is that it make type
    214 annotations easier to write as you can use the exception type instead of the
    215 virtual table type; which usually has a mangled name.
    216 % Also \CFA's trait system handles functions better than constants and doing
    217 % it this way reduce the amount of boiler plate we need.
     213type. Each exception type should have but a single virtual table type.
     214Now there are no actual assertions in this trait because the trait system
     215actually can't express them (adding such assertions would be part of
     216completing the virtual system). The imaginary assertions would probably come
     217from a trait defined by the virtual system, and state that the exception type
     218is a virtual type, is a decendent of @exception_t@ (the base exception type)
     219and note its virtual table type.
    218220
    219221% I did have a note about how it is the programmer's responsibility to make
     
    235237Both traits ensure a pair of types are an exception type and its virtual table
    236238and defines one of the two default handlers. The default handlers are used
    237 as fallbacks and are discussed in detail in \VRef{s:ExceptionHandling}.
     239as fallbacks and are discussed in detail in \vref{s:ExceptionHandling}.
    238240
    239241However, all three of these traits can be tricky to use directly.
     
    351353for particular exception type.
    352354The global default termination handler performs a cancellation
    353 \see{\VRef{s:Cancellation}} on the current stack with the copied exception.
     355(see \vref{s:Cancellation}) on the current stack with the copied exception.
    354356
    355357\subsection{Resumption}
     
    426428
    427429\subsubsection{Resumption Marking}
     430\label{s:ResumptionMarking}
    428431A key difference between resumption and termination is that resumption does
    429432not unwind the stack. A side effect that is that when a handler is matched
     
    472475The symmetry between resumption termination is why this pattern was picked.
    473476Other patterns, such as marking just the handlers that caught, also work but
    474 lack the symmetry means there are less rules to remember.
     477lack the symmetry means there are more rules to remember.
    475478
    476479\section{Conditional Catch}
     
    557560\end{cfa}
    558561If there are further handlers after this handler only the first version will
    559 check them. If multiple handlers on a single try block could handle the same
    560 exception the translations get more complex but they are equivilantly
     562check them. If multiple handlers on a single try block that could handle the
     563same exception the translations get more complex but they are equivilantly
    561564powerful.
    562565
     
    633636and the current stack is
    634637unwound. After that it depends one which stack is being cancelled.
    635 \begin{description}
    636 \item[Main Stack:]
     638
     639\paragraph{Main Stack}
    637640The main stack is the one used by the program main at the start of execution,
    638641and is the only stack in a sequential program.
     
    645648to, so it would have be explicitly managed.
    646649
    647 \item[Thread Stack:]
     650\paragraph{Thread Stack}
    648651A thread stack is created for a \CFA @thread@ object or object that satisfies
    649652the @is_thread@ trait.
     
    671674Also you can always add an explicit join if that is the desired behaviour.
    672675
    673 \item[Coroutine Stack:]
     676\paragraph{Coroutine Stack}
    674677A coroutine stack is created for a @coroutine@ object or object that
    675678satisfies the @is_coroutine@ trait.
     
    685688(in terms of coroutine state) called resume on this coroutine, so the message
    686689is passed to the latter.
    687 \end{description}
  • doc/theses/andrew_beach_MMath/future.tex

    r5407cdc r8d66610  
    110110\section{Zero-Cost Try}
    111111\CFA does not have zero-cost try-statements because the compiler generates C
    112 code rather than assembler code \see{\VPageref{p:zero-cost}}. When the compiler
     112code rather than assembler code (see \vpageref{p:zero-cost}). When the compiler
    113113does create its own assembly (or LLVM byte-code), then zero-cost try-statements
    114114are possible. The downside of zero-cost try-statements is the LSDA complexity,
  • doc/theses/andrew_beach_MMath/implement.tex

    r5407cdc r8d66610  
    99% Virtual table rules. Virtual tables, the pointer to them and the cast.
    1010While the \CFA virtual system currently has only one public feature, virtual
    11 cast \see{\VPageref{p:VirtualCast}}, substantial structure is required to
    12 support it, and provide features for exception handling and the standard
    13 library.
     11cast (see the virtual cast feature \vpageref{p:VirtualCast}),
     12substantial structure is required to support it,
     13and provide features for exception handling and the standard library.
    1414
    1515\subsection{Virtual Type}
    16 Virtual types only have one change to their structure, the addition of a
    17 pointer to the virtual table. This is always the first field so that
    18 if it is cast to a supertype the field's location is still known.
    19 
    20 This field is set as part of all new generated constructors.
    21 \todo{They only come as part exceptions and don't work.}
    22 After the object is created the field is constant.
    23 
    24 However it can be read from, internally it is just a regular field called
    25 @virtual_table@. Dereferencing it gives the virtual table and access to the
    26 type's virtual members.
     16Virtual types only have one change to their structure: the addition of a
     17pointer to the virtual table, which is called the \emph{virtual-table pointer}.
     18Internally, the field is called @virtual_table@.
     19The field is fixed after construction. It is always the first field in the
     20structure so that its location is always known.
     21\todo{Talk about constructors for virtual types (after they are working).}
     22
     23This is what binds an instance of a virtual type to a virtual table. This
     24pointer can be used as an identity check. It can also be used to access the
     25virtual table and the virtual members there.
     26
     27\subsection{Type Id}
     28Every virtual type has a unique id.
     29Type ids can be compared for equality (the types reperented are the same)
     30or used to access the type's type information.
     31The type information currently is only the parent's type id or, if the
     32type has no parent, zero.
     33
     34The id's are implemented as pointers to the type's type information instance.
     35Derefencing the pointer gets the type information.
     36By going back-and-forth between the type id and
     37the type info one can find every ancestor of a virtual type.
     38It also pushes the issue of creating a unique value (for
     39the type id) to the problem of creating a unique instance (for type
     40information) which the linker can solve.
     41
     42Advanced linker support is required because there is no place that appears
     43only once to attach the type information to. There should be one structure
     44definition but it is included in multiple translation units. Each virtual
     45table definition should be unique but there are an arbitrary number of thoses.
     46So the special section prefix \texttt{.gnu.linkonce} is used.
     47With a unique suffix (making the entire section name unique) the linker will
     48remove multiple definition making sure only one version exists after linking.
     49Then it is just a matter of making sure there is a unique name for each type.
     50
     51This is done in three phases.
     52The first phase is to generate a new structure definition to store the type
     53information. The layout is the same in each case, just the parent's type id,
     54but the types are changed.
     55The structure's name is change, it is based off the virtual type's name, and
     56the type of the parent's type id.
     57If the virtual type is polymorphic then the type information structure is
     58polymorphic as well, with the same polymorphic arguments.
     59
     60The second phase is to generate an instance of the type information with a
     61almost unique name, generated by mangling the virtual type name.
     62
     63The third phase is implicit with \CFA's overloading scheme. \CFA mangles
     64names with type information so that all of the symbols exported to the linker
     65are unique even if in \CFA code they are the same. Having two declarations
     66with the same name and same type is forbidden because it is impossible for
     67overload resolution to pick between them. This is why a unique type is
     68generated for each virtual type.
     69Polymorphic information is included in this mangling so polymorphic
     70types will have seperate instances for each set of polymorphic arguments.
     71
     72\begin{cfa}
     73struct TYPE_ID_TYPE {
     74        PARENT_ID_TYPE const * parent;
     75};
     76
     77__attribute__((cfa_linkonce))
     78TYPE_ID_TYPE const TYPE_ID_NAME = {
     79        &PARENT_ID_NAME,
     80};
     81\end{cfa}
     82
     83\subsubsection{cfa\_linkonce Attribute}
     84Another feature added to \CFA is a new attribute: \texttt{cfa\_linkonce}.
     85This attribute can be put on an object or function definition
     86(any global declaration with a name and a type).
     87This allows you to define that object or function multiple times.
     88All definitions should have the link-once attribute on them and all should
     89be identical.
     90
     91The simplist way to use it is to put a definition in a header where the
     92forward declaration would usually go.
     93This is how it is used for type-id instances. There was is no unique location
     94associated with a type except for the type definition which is in a header.
     95This allows the unique type-id object to be generated there.
     96
     97Internally @cfa_linkonce@ removes all @section@ attributes
     98from the declaration (as well as itself) and replaces them with
     99@section(".gnu.linkonce.NAME")@ where \texttt{NAME} is replaced by the
     100mangled name of the object.
     101The prefix \texttt{.gnu.linkonce} in section names is recognized by the
     102linker. If two of these sections with the same name, including everything
     103that comes after the special prefix, then only one will be used and the other
     104will be discarded.
    27105
    28106\subsection{Virtual Table}
    29 Every time a virtual type is defined the new virtual table type must also be
    30 defined.
    31 
    32 The unique instance is important because the address of the virtual table
    33 instance is used as the identifier for the virtual type. So a pointer to the
    34 virtual table and the ID for the virtual type are interchangable.
    35 \todo{Unique instances might be going so we will have to talk about the new
    36 system instead.}
    37 
    38 The first step in putting it all together is to create the virtual table type.
    39 The virtual table type is just a structure and can be described in terms of
    40 its fields. The first field is always the parent type ID (or a pointer to
    41 the parent virtual table) or 0 (the null pointer).
    42 Next are other fields on the parent virtual table are repeated.
    43 Finally are the fields used to store any new virtual members of the new
    44 The virtual type
    45 
    46 The virtual system is accessed through a private constant field inserted at the
    47 beginning of every virtual type, called the virtual-table pointer. This field
    48 points at a type's virtual table and is assigned during the object's
    49 construction. The address of a virtual table acts as the unique identifier for
    50 the virtual type, and the first field of a virtual table is a pointer to the
    51 parent virtual-table or @0p@. The remaining fields are duplicated from the
    52 parent tables in this type's inheritance chain, followed by any fields this type
    53 introduces. Parent fields are duplicated so they can be changed (all virtual
    54 members are overridable), so that references to the dispatched type
    55 are replaced with the current virtual type.
    56 % These are always taken by pointer or reference.
    57 
    58 % Simple ascii diragram:
    59 \begin{verbatim}
    60 parent_pointer  \
    61 parent_field0   |
    62 ...             | Same layout as parent.
    63 parent_fieldN   /
     107Each virtual type has a virtual table type that stores its type id and
     108virtual members.
     109Each virtual type instance is bound to a table instance that is filled with
     110the values of virtual members.
     111Both the layout of the fields and their value are decided by the rules given
     112below.
     113
     114The layout always comes in three parts.
     115The first section is just the type id at the head of the table. It is always
     116there to ensure that
     117The second section are all the virtual members of the parent, in the same
     118order as they appear in the parent's virtual table. Note that the type may
     119change slightly as references to the ``this" will change. This is limited to
     120inside pointers/references and via function pointers so that the size (and
     121hence the offsets) are the same.
     122The third section is similar to the second except that it is the new virtual
     123members introduced at this level in the hierarchy.
     124
     125\begin{figure}
     126\begin{cfa}
     127type_id
     128parent_field0
     129...
     130parent_fieldN
    64131child_field0
    65132...
    66133child_fieldN
    67 \end{verbatim}
    68 \todo{Refine the diagram}
    69 
    70 % For each virtual type, a virtual table is constructed. This is both a new type
    71 % and an instance of that type. Other instances of the type could be created
    72 % but the system doesn't use them. So this section will go over the creation of
    73 % the type and the instance.
    74 
    75 A virtual table is created when the virtual type is created. The name of the
    76 type is created by mangling the name of the base type. The name of the instance
    77 is also generated by name mangling. The fields are initialized automatically.
    78 The parent field is initialized by getting the type of the parent field and
    79 using that to calculate the mangled name of the parent's virtual table type.
    80 There are two special fields that are included like normal fields but have
    81 special initialization rules: the @size@ field is the type's size and is
    82 initialized with a @sizeof@ expression, the @align@ field is the type's
    83 alignment and uses an @alignof@ expression. The remaining fields are resolved
    84 to a name matching the field's name and type using the normal visibility and
    85 overload resolution rules of the type system.
    86 
    87 These operations are split up into several groups depending on where they take
    88 place which varies for monomorphic and polymorphic types. The first devision is
    89 between the declarations and the definitions. Declarations, such as a function
    90 signature or a aggregate's name, must always be visible but may be repeated in
    91 the form of forward declarations in headers. Definitions, such as function
    92 bodies and a aggregate's layout, can be separately compiled but must occur
    93 exactly once in a source file.
    94 
    95 \begin{sloppypar}
    96 The declarations include the virtual type definition and forward declarations
    97 of the virtual table instance, constructor, message function and
    98 @get_exception_vtable@. The definition includes the storage and initialization
    99 of the virtual table instance and the bodies of the three functions.
    100 \end{sloppypar}
    101 
    102 Monomorphic instances put all of these two groups in one place each.
    103 Polymorphic instances also split out the core declarations and definitions from
    104 the per-instance information. The virtual table type and most of the functions
    105 are polymorphic so they are all part of the core. The virtual table instance
    106 and the @get_exception_vtable@ function.
    107 
    108 \begin{sloppypar}
     134\end{cfa}
     135\caption{Virtual Table Layout}
     136\label{f:VirtualTableLayout}
     137\todo*{Improve the Virtual Table Layout diagram.}
     138\end{figure}
     139
     140The first and second sections together mean that every virtual table has a
     141prefix that has the same layout and types as its parent virtual table.
     142This, combined with the fixed offset to the virtual table pointer, means that
     143for any virtual type it doesn't matter if we have it or any of its
     144descendants, it is still always safe to access the virtual table through
     145the virtual table pointer.
     146From there it is safe to check the type id to identify the exact type of the
     147underlying object, access any of the virtual members and pass the object to
     148any of the method-like virtual members.
     149
     150When a virtual table is declared the user decides where to declare it and its
     151name. The initialization of the virtual table is entirely automatic based on
     152the context of the declaration.
     153
     154The type id is always fixed, each virtual table type will always have one
     155exactly one possible type id.
     156The virtual members are usually filled in by resolution. The best match for
     157a given name and type at the declaration site is filled in.
     158There are two exceptions to that rule: the @size@ field is the type's size
     159and is set to the result of a @sizeof@ expression, the @align@ field is the
     160type's alignment and similarly uses an @alignof@ expression.
     161
     162\subsubsection{Concurrency Integration}
    109163Coroutines and threads need instances of @CoroutineCancelled@ and
    110164@ThreadCancelled@ respectively to use all of their functionality. When a new
     
    112166the instance is created as well. The definition of the virtual table is created
    113167at the definition of the main function.
    114 \end{sloppypar}
     168
     169\begin{figure}
     170\begin{cfa}
     171coroutine Example {
     172        // fields
     173}
     174\end{cfa}
     175
     176\begin{cfa}
     177__attribute__((cfa_linkonce))
     178struct __cfatid_struct_CoroutineCancelled(Example)
     179                __cfatid_CoroutineCancelled = {
     180        &EXCEPTION_TYPE_ID,
     181};
     182extern CoroutineCancelled_vtable _default_vtable_object_declaration;
     183extern CoroutineCancelled_vtable & _default_vtable;
     184\end{cfa}
     185
     186\begin{cfa}
     187void main(Example & this) {
     188        // body
     189}
     190\end{cfa}
     191
     192\begin{cfa}
     193CoroutineCancelled_vtable _default_vtable_object_declaration = {
     194        __cfatid_CoroutineCancelled,
     195        // Virtual member initialization.
     196};
     197
     198CoroutineCancelled_vtable & _default_vtable =
     199        &_default_vtable_object_declaration;
     200\end{cfa}
     201\caption{Concurrency Transformations}
     202\label{f:ConcurrencyTransformations}
     203\end{figure}
     204\todo{Improve Concurrency Transformations figure.}
    115205
    116206\subsection{Virtual Cast}
     
    119209% The C-cast is just to make sure the generated code is correct so the rest of
    120210% the section is about that function.
    121 The function is
     211The function is implemented in the standard library and has the following
     212signature:
    122213\begin{cfa}
    123214void * __cfa__virtual_cast(
    124         struct __cfa__parent_vtable const * parent,
    125         struct __cfa__parent_vtable const * const * child );
    126 \end{cfa}
    127 and it is implemented in the standard library. The structure reperents the
    128 head of a vtable which is the pointer to the parent virtual table. The
    129 @parent@ points directly at the parent type virtual table while the @child@
    130 points at the object of the (possibe) child type.
    131 
    132 In terms of the virtual cast expression, @parent@ comes from looking up the
    133 type being cast to and @child@ is the result of the expression being cast.
    134 Because the complier outputs C code, some type C type casts are also used.
    135 The last bit of glue is an map that saves every virtual type the compiler
    136 sees. This is used to check the type used in a virtual cast is a virtual
    137 type and to get its virtual table.
    138 (It also checks for conflicting definitions.)
    139 
    140 Inside the function it is a simple conditional. If the type repersented by
    141 @parent@ is or is an ancestor of the type repersented by @*child@ (it
    142 requires one more level of derefence to pass through the object) then @child@
    143 is returned, otherwise the null pointer is returned.
    144 
    145 The check itself is preformed is a simple linear search. If the child
    146 virtual table or any of its ancestors (which are retreved through the first
    147 field of every virtual table) are the same as the parent virtual table then
    148 the cast succeeds.
     215        struct __cfavir_type_td parent,
     216        struct __cfavir_type_id const * child );
     217\end{cfa}
     218The type id of target type of the virtual cast is passed in as @parent@ and
     219the cast target is passed in as @child@.
     220
     221For C generation both arguments and the result are wrapped with type casts.
     222There is also an internal store inside the compiler to make sure that the
     223target type is a virtual type.
     224% It also checks for conflicting definitions.
     225
     226The virtual cast either returns the original pointer as a new type or null.
     227So the function just does the parent check and returns the approprate value.
     228The parent check is a simple linear search of child's ancestors using the
     229type information.
    149230
    150231\section{Exceptions}
     
    161242
    162243Stack unwinding is the process of removing stack frames (activations) from the
    163 stack. On function entry and return, unwinding is handled directly by the code
    164 embedded in the function. Usually, the stack-frame size is known statically
    165 based on parameter and local variable declarations. For dynamically-sized
    166 local variables, a runtime computation is necessary to know the frame
    167 size. Finally, a function's frame-size may change during execution as local
    168 variables (static or dynamic sized) go in and out of scope.
     244stack. On function entry and return, unwinding is handled directly by the
     245call/return code embedded in the function.
     246In many cases the position of the instruction pointer (relative to parameter
     247and local declarations) is enough to know the current size of the stack
     248frame.
     249
     250Usually, the stack-frame size is known statically based on parameter and
     251local variable declarations. Even with dynamic stack-size the information
     252to determain how much of the stack has to be removed is still contained
     253within the function.
    169254Allocating/deallocating stack space is usually an $O(1)$ operation achieved by
    170255bumping the hardware stack-pointer up or down as needed.
    171 
    172 Unwinding across multiple stack frames is more complex because individual stack
    173 management code associated with each frame is bypassed. That is, the location
    174 of a function's frame-management code is largely unknown and dispersed
    175 throughout the function, hence the current frame size managed by that code is
    176 also unknown. Hence, code unwinding across frames does not have direct
    177 knowledge about what is on the stack, and hence, how much of the stack needs to
    178 be removed.
    179 
    180 % At a very basic level this can be done with @setjmp@ \& @longjmp@ which simply
    181 % move the top of the stack, discarding everything on the stack above a certain
    182 % point. However this ignores all the cleanup code that should be run when
    183 % certain sections of the stack are removed (for \CFA these are from destructors
    184 % and finally clauses) and also requires that the point to which the stack is
    185 % being unwound is known ahead of time. libunwind is used to address both of
    186 % these problems.
     256Constructing/destructing values on the stack takes longer put in terms of
     257figuring out what needs to be done is of similar complexity.
     258
     259Unwinding across multiple stack frames is more complex because that
     260information is no longer contained within the current function.
     261With seperate compilation a function has no way of knowing what its callers
     262are so it can't know how large those frames are.
     263Without altering the main code path it is also hard to pass that work off
     264to the caller.
    187265
    188266The traditional unwinding mechanism for C is implemented by saving a snap-shot
     
    191269reseting to a snap-shot of an arbitrary but existing function frame on the
    192270stack. It is up to the programmer to ensure the snap-shot is valid when it is
    193 reset, making this unwinding approach fragile with potential errors that are
    194 difficult to debug because the stack becomes corrupted.
    195 
    196 However, many languages define cleanup actions that must be taken when objects
    197 are deallocated from the stack or blocks end, such as running a variable's
    198 destructor or a @try@ statement's @finally@ clause. Handling these mechanisms
    199 requires walking the stack and checking each stack frame for these potential
    200 actions.
    201 
    202 For exceptions, it must be possible to walk the stack frames in search of @try@
    203 statements to match and execute a handler. For termination exceptions, it must
    204 also be possible to unwind all stack frames from the throw to the matching
    205 catch, and each of these frames must be checked for cleanup actions. Stack
    206 walking is where most of the complexity and expense of exception handling
    207 appears.
     271reset and that all required clean-up from the unwound stacks is preformed.
     272This approach is fragile and forces a work onto the surounding code.
     273
     274With respect to that work forced onto the surounding code,
     275many languages define clean-up actions that must be taken when certain
     276sections of the stack are removed. Such as when the storage for a variable
     277is removed from the stack or when a try statement with a finally clause is
     278(conceptually) popped from the stack.
     279None of these should be handled by the user, that would contradict the
     280intention of these features, so they need to be handled automatically.
     281
     282To safely remove sections of the stack the language must be able to find and
     283run these clean-up actions even when removing multiple functions unknown at
     284the beginning of the unwinding.
    208285
    209286One of the most popular tools for stack management is libunwind, a low-level
     
    215292\subsection{libunwind Usage}
    216293Libunwind, accessed through @unwind.h@ on most platforms, is a C library that
    217 provides \CC-style stack-unwinding. Its operation is divided into two phases:
     294provides \Cpp-style stack-unwinding. Its operation is divided into two phases:
    218295search and cleanup. The dynamic target search -- phase 1 -- is used to scan the
    219296stack and decide where unwinding should stop (but no unwinding occurs). The
     
    226303LSDA can contain any information but conventionally it is a table with entries
    227304representing regions of the function and what has to be done there during
    228 unwinding. These regions are bracketed by the instruction pointer. If the
     305unwinding. These regions are bracketed by instruction addresses. If the
    229306instruction pointer is within a region's start/end, then execution is currently
    230307executing in that region. Regions are used to mark out the scopes of objects
     
    238315
    239316The GCC compilation flag @-fexceptions@ causes the generation of an LSDA and
    240 attaches its personality function. However, this
     317attaches a personality function to each function.
     318In plain C (which \CFA currently compiles down to) this
    241319flag only handles the cleanup attribute:
    242 \todo{Peter: What is attached? Andrew: It uses the .cfi\_personality directive
    243 and that's all I know.}
    244320\begin{cfa}
    245321void clean_up( int * var ) { ... }
    246322int avar __attribute__(( cleanup(clean_up) ));
    247323\end{cfa}
    248 which is used on a variable and specifies a function, in this case @clean_up@,
    249 run when the variable goes out of scope.
    250 The function is passed a pointer to the object being removed from the stack
    251 so it can be used to mimic destructors.
    252 However, this feature cannot be used to mimic @try@ statements as it cannot
    253 control the unwinding.
     324The attribue is used on a variable and specifies a function,
     325in this case @clean_up@, run when the variable goes out of scope.
     326This is enough to mimic destructors, but not try statements which can effect
     327the unwinding.
     328
     329To get full unwinding support all of this has to be done directly with
     330assembly and assembler directives. Partiularly the cfi directives
     331\texttt{.cfi\_lsda} and \texttt{.cfi\_personality}.
    254332
    255333\subsection{Personality Functions}
     
    268346\end{lstlisting}
    269347The @action@ argument is a bitmask of possible actions:
    270 \begin{enumerate}
     348\begin{enumerate}[topsep=5pt]
    271349\item
    272350@_UA_SEARCH_PHASE@ specifies a search phase and tells the personality function
     
    291369@_UA_FORCE_UNWIND@ specifies a forced unwind call. Forced unwind only performs
    292370the cleanup phase and uses a different means to decide when to stop
    293 \see{\VRef{s:ForcedUnwind}}.
     371(see \vref{s:ForcedUnwind}).
    294372\end{enumerate}
    295373
    296374The @exception_class@ argument is a copy of the
    297 \lstinline[language=C]|exception|'s @exception_class@ field.
    298 
    299 The \lstinline[language=C]|exception| argument is a pointer to the user
    300 provided storage object. It has two public fields, the exception class, which
    301 is actually just a number, identifying the exception handling mechanism that
    302 created it, and the cleanup function. The cleanup function is called if
    303 required by the exception.
     375\code{C}{exception}'s @exception_class@ field.
     376This a number that identifies the exception handling mechanism that created
     377the
     378
     379The \code{C}{exception} argument is a pointer to the user
     380provided storage object. It has two public fields: the @exception_class@,
     381which is described above, and the @exception_cleanup@ function.
     382The clean-up function is used by the EHM to clean-up the exception if it
     383should need to be freed at an unusual time, it takes an argument that says
     384why it had to be cleaned up.
    304385
    305386The @context@ argument is a pointer to an opaque type passed to helper
     
    309390that can be passed several places in libunwind. It includes a number of
    310391messages for special cases (some of which should never be used by the
    311 personality function) and error codes but unless otherwise noted the
     392personality function) and error codes. However, unless otherwise noted, the
    312393personality function should always return @_URC_CONTINUE_UNWIND@.
    313394
     
    324405@_URC_END_OF_STACK@.
    325406
    326 Second, when a handler is matched, raise exception continues onto the cleanup
    327 phase.
     407Second, when a handler is matched, raise exception moves to the clean-up
     408phase and walks the stack a second time.
    328409Once again, it calls the personality functions of each stack frame from newest
    329410to oldest. This pass stops at the stack frame containing the matching handler.
     
    338419Forced Unwind is the other central function in libunwind.
    339420\begin{cfa}
    340 _Unwind_Reason_Code _Unwind_ForcedUnwind( _Unwind_Exception *,
     421_Unwind_Reason_Code _Unwind_ForcedUnwind(_Unwind_Exception *,
    341422        _Unwind_Stop_Fn, void *);
    342423\end{cfa}
     
    380461Each stack must have its own exception context. In a sequential \CFA program,
    381462there is only one stack with a single global exception-context. However, when
    382 the library @libcfathread@ is linked, there are multiple stacks where each
     463the library @libcfathread@ is linked, there are multiple stacks and each
    383464needs its own exception context.
    384465
    385 General access to the exception context is provided by function
     466The exception context should be retrieved by calling the function
    386467@this_exception_context@. For sequential execution, this function is defined as
    387468a weak symbol in the \CFA system-library, @libcfa@. When a \CFA program is
     
    390471
    391472The sequential @this_exception_context@ returns a hard-coded pointer to the
    392 global execption context.
     473global exception context.
    393474The concurrent version adds the exception context to the data stored at the
    394 base of each stack. When @this_exception_context@ is called it retrieves the
     475base of each stack. When @this_exception_context@ is called, it retrieves the
    395476active stack and returns the address of the context saved there.
    396477
     
    399480% catches. Talk about GCC nested functions.
    400481
    401 Termination exceptions use libunwind heavily because it matches the intended
    402 use from \CC exceptions closely. The main complication for \CFA is that the
     482\CFA termination exceptions use libunwind heavily because they match \Cpp
     483\Cpp exceptions closely. The main complication for \CFA is that the
    403484compiler generates C code, making it very difficult to generate the assembly to
    404485form the LSDA for try blocks or destructors.
     
    411492per-exception storage.
    412493
    413 [Quick ASCII diagram to get started.]
     494\begin{figure}
    414495\begin{verbatim}
    415496Fixed Header  | _Unwind_Exception   <- pointer target
     
    420501              V ...
    421502\end{verbatim}
    422 
    423 Exceptions are stored in variable-sized blocks.
    424 The first component is a fixed sized data structure that contains the
     503\caption{Exception Layout}
     504\label{f:ExceptionLayout}
     505\end{figure}
     506\todo*{Convert the exception layout to an actual diagram.}
     507
     508Exceptions are stored in variable-sized blocks (see \vref{f:ExceptionLayout}).
     509The first component is a fixed-sized data structure that contains the
    425510information for libunwind and the exception system. The second component is an
    426511area of memory big enough to store the exception. Macros with pointer arthritic
     
    428513@_Unwind_Exception@ to the entire node.
    429514
    430 All of these nodes are linked together in a list, one list per stack, with the
     515Multipe exceptions can exist at the same time because exceptions can be
     516raised inside handlers, destructors and finally blocks.
     517Figure~\vref{f:MultipleExceptions} shows a program that has multiple
     518exceptions active at one time.
     519Each time an exception is thrown and caught the stack unwinds and the finally
     520clause runs. This will throw another exception (until @num_exceptions@ gets
     521high enough) which must be allocated. The previous exceptions may not be
     522freed because the handler/catch clause has not been run.
     523So the EHM must keep them alive while it allocates exceptions for new throws.
     524
     525\begin{figure}
     526\centering
     527% Andrew: Figure out what these do and give them better names.
     528\newsavebox{\myboxA}
     529\newsavebox{\myboxB}
     530\begin{lrbox}{\myboxA}
     531\begin{lstlisting}[language=CFA,{moredelim=**[is][\color{red}]{@}{@}}]
     532unsigned num_exceptions = 0;
     533void throws() {
     534    try {
     535        try {
     536            ++num_exceptions;
     537            throw (Example){table};
     538        } finally {
     539            if (num_exceptions < 3) {
     540                throws();
     541            }
     542        }
     543    } catch (exception_t *) {
     544        --num_exceptions;
     545    }
     546}
     547int main() {
     548    throws();
     549}
     550\end{lstlisting}
     551\end{lrbox}
     552
     553\begin{lrbox}{\myboxB}
     554\begin{lstlisting}
     555\end{lstlisting}
     556\end{lrbox}
     557
     558{\usebox\myboxA}
     559\hspace{25pt}
     560{\usebox\myboxB}
     561
     562\caption{Multiple Exceptions}
     563\label{f:MultipleExceptions}
     564\end{figure}
     565\todo*{Work on multiple exceptions code sample.}
     566
     567All exceptions are stored in nodes which are then linked together in lists,
     568one list per stack, with the
    431569list head stored in the exception context. Within each linked list, the most
    432570recently thrown exception is at the head followed by older thrown
     
    439577exception, the copy function, and the free function, so they are specific to an
    440578exception type. The size and copy function are used immediately to copy an
    441 exception into managed memory. After the exception is handled the free function
    442 is used to clean up the exception and then the entire node is passed to free
    443 so the memory can be given back to the heap.
     579exception into managed memory. After the exception is handled, the free
     580function is used to clean up the exception and then the entire node is
     581passed to free so the memory can be given back to the heap.
    444582
    445583\subsection{Try Statements and Catch Clauses}
     
    454592calls them.
    455593Because this function is known and fixed (and not an arbitrary function that
    456 happens to contain a try statement) this means the LSDA can be generated ahead
     594happens to contain a try statement), the LSDA can be generated ahead
    457595of time.
    458596
    459597Both the LSDA and the personality function are set ahead of time using
    460 embedded assembly. This is handcrafted using C @asm@ statements and contains
     598embedded assembly. This assembly code is handcrafted using C @asm@ statements
     599and contains
    461600enough information for the single try statement the function repersents.
    462601
     
    487626nested functions and all other functions besides @__cfaehm_try_terminate@ in
    488627\CFA use the GCC personality function and the @-fexceptions@ flag to generate
    489 the LSDA. This allows destructors to be implemented with the cleanup attribute.
     628the LSDA.
     629Using this pattern, \CFA implements destructors with the cleanup attribute.
     630
     631\begin{figure}
     632\begin{cfa}
     633try {
     634        // TRY BLOCK
     635} catch (Exception1 * name1 ; check(name1)) {
     636        // CATCH BLOCK 1
     637} catch (Exception2 * name2) {
     638        // CATCH BLOCK 2
     639}
     640\end{cfa}
     641
     642\begin{cfa}
     643void try(void) {
     644        // TRY BLOCK
     645}
     646int match(exception_t * __exception_inst) {
     647        {
     648                Exception1 * name1;
     649                if (name1 = (virtual Exception1 *)__exception_inst && check(name1)) {
     650                        return 1;
     651                }
     652        }
     653        {
     654                Exception2 * name2;
     655                if (name2 = (virtual Exception2 *)__exception_inst) {
     656                        return 2;
     657                }
     658        }
     659        return 0;
     660}
     661void catch(exception_t * __exception_inst, int __handler_index) {
     662        switch (__handler_index) {
     663        case 1:
     664        {
     665                Exception1 * name1 = (virtual Exception1 *)__exception_inst;
     666                // CATCH BLOCK 1
     667        }
     668        return;
     669        case 2:
     670        {
     671                Exception2 * name2 = (virtual Exception2 *)__exception_inst;
     672                // CATCH BLOCK 2
     673        }
     674        return;
     675        }
     676}
     677{
     678        __cfaehm_try_terminate(try, catch, match);
     679}
     680\end{cfa}
     681
     682\caption{Termination Transformation}
     683\label{f:TerminationTransformation}
     684\todo*{Improve (compress?) Termination Transformations.}
     685\end{figure}
    490686
    491687\section{Resumption}
    492688% The stack-local data, the linked list of nodes.
    493689
    494 Resumption simple to implement because there is no stack unwinding. The
    495 resumption raise uses a list of nodes for its stack traversal. The head of the
    496 list is stored in the exception context. The nodes in the list have a pointer
    497 to the next node and a pointer to the handler function.
    498 
    499 A resumption raise traverses this list. At each node the handler function is
    500 called, passing the exception by pointer. It returns true if the exception is
    501 handled and false otherwise.
    502 
    503 The handler function does both the matching and handling. It computes the
    504 condition of each @catchResume@ in top-to-bottom order, until it finds a
    505 handler that matches. If no handler matches then the function returns
    506 false. Otherwise the matching handler is run; if it completes successfully, the
    507 function returns true. Rethrowing, through the @throwResume;@ statement,
    508 causes the function to return true.
     690Resumption simpler to implement than termination
     691because there is no stack unwinding.
     692Instead of storing the data in a special area using assembly,
     693there is just a linked list of possible handlers for each stack,
     694with each node on the list reperenting a try statement on the stack.
     695
     696The head of the list is stored in the exception context.
     697The nodes are stored in order, with the more recent try statements closer
     698to the head of the list.
     699Instead of traversing the stack resumption handling traverses the list.
     700At each node the EHM checks to see if the try statement the node repersents
     701can handle the exception. If it can, then the exception is handled and
     702the operation finishes, otherwise the search continues to the next node.
     703If the search reaches the end of the list without finding a try statement
     704that can handle the exception the default handler is executed and the
     705operation finishes.
     706
     707In each node is a handler function which does most of the work there.
     708The handler function is passed the raised the exception and returns true
     709if the exception is handled and false if it cannot be handled here.
     710
     711For each @catchResume@ clause the handler function will:
     712check to see if the raised exception is a descendant type of the declared
     713exception type, if it is and there is a conditional expression then it will
     714run the test, if both checks pass the handling code for the clause is run
     715and the function returns true, otherwise it moves onto the next clause.
     716If this is the last @catchResume@ clause then instead of moving onto
     717the next clause the function returns false as no handler could be found.
     718
     719\begin{figure}
     720\begin{cfa}
     721try {
     722        // TRY BLOCK
     723} catchResume (Exception1 * name1 ; check(name1)) {
     724        // CATCH BLOCK 1
     725} catchResume (Exception2 * name2) {
     726        // CATCH BLOCK 2
     727}
     728\end{cfa}
     729
     730\begin{cfa}
     731bool handle(exception_t * __exception_inst) {
     732        {
     733                Exception1 * name1;
     734                if (name1 = (virtual Exception1 *)__exception_inst && check(name1)) {
     735                        // CATCH BLOCK 1
     736                        return 1;
     737                }
     738        }
     739        {
     740                Exception2 * name2;
     741                if (name2 = (virtual Exception2 *)__exception_inst) {
     742                        // CATCH BLOCK 2
     743                        return 2;
     744                }
     745        }
     746        return false;
     747}
     748struct __try_resume_node __resume_node
     749        __attribute__((cleanup( __cfaehm_try_resume_cleanup )));
     750__cfaehm_try_resume_setup( &__resume_node, handler );
     751\end{cfa}
     752
     753\caption{Resumption Transformation}
     754\label{f:ResumptionTransformation}
     755\todo*{Improve (compress?) Resumption Transformations.}
     756\end{figure}
    509757
    510758% Recursive Resumption Stuff:
    511 Search skipping \see{\VPageref{p:searchskip}}, which ignores parts of the stack
     759Search skipping (see \vpageref{s:ResumptionMarking}), which ignores parts of
     760the stack
    512761already examined, is accomplished by updating the front of the list as the
    513 search continues. Before the handler at a node is called the head of the list
     762search continues. Before the handler at a node is called, the head of the list
    514763is updated to the next node of the current node. After the search is complete,
    515764successful or not, the head of the list is reset.
     
    524773stack -- the first one points over all the checked handlers -- and the ordering
    525774is maintained.
     775
     776\begin{figure}
     777\begin{minipage}[l][][b]{0,2\textwidth}
     778\begin{verbatim}
     779
     780
     781  X <-
     782  |
     783  V
     784  X
     785  |
     786  V
     787  X
     788\end{verbatim}
     789Initial State
     790\end{minipage}
     791\begin{minipage}[l][][b]{0,2\textwidth}
     792\begin{verbatim}
     793
     794
     795  X
     796  |
     797  V
     798  X <-
     799  |
     800  V
     801  X
     802\end{verbatim}
     803Handler Found
     804\end{minipage}
     805\begin{minipage}[l][][b]{0,2\textwidth}
     806\begin{verbatim}
     807  X <-
     808 /
     809/ X
     810| |
     811\ V
     812  X
     813  |
     814  V
     815  X
     816\end{verbatim}
     817Try Block Added
     818\end{minipage}
     819\begin{minipage}[l][][b]{0,2\textwidth}
     820\begin{verbatim}
     821
     822
     823  X <-
     824  |
     825  V
     826  X
     827  |
     828  V
     829  X
     830\end{verbatim}
     831Handler Done
     832\end{minipage}
     833\caption{Resumption Marking}
     834\label{f:ResumptionMarking}
     835\todo*{Convert Resumption Marking into a line figure.}
     836\end{figure}
    526837
    527838\label{p:zero-cost}
     
    540851\section{Finally}
    541852% Uses destructors and GCC nested functions.
    542 Finally clauses is placed into a GCC nested-function with a unique name, and no
    543 arguments or return values. This nested function is then set as the cleanup
     853A finally clause is placed into a GCC nested-function with a unique name,
     854and no arguments or return values.
     855This nested function is then set as the cleanup
    544856function of an empty object that is declared at the beginning of a block placed
    545857around the context of the associated @try@ statement.
    546858
    547 The rest is handled by GCC. The try block and all handlers are inside the
     859The rest is handled by GCC. The try block and all handlers are inside this
    548860block. At completion, control exits the block and the empty object is cleaned
    549861up, which runs the function that contains the finally code.
     
    553865
    554866Cancellation also uses libunwind to do its stack traversal and unwinding,
    555 however it uses a different primary function @_Unwind_ForcedUnwind@. Details
    556 of its interface can be found in the \VRef{s:ForcedUnwind}.
     867however it uses a different primary function: @_Unwind_ForcedUnwind@. Details
     868of its interface can be found in the Section~\vref{s:ForcedUnwind}.
    557869
    558870The first step of cancellation is to find the cancelled stack and its type:
     
    560872pointer and the current thread pointer, and every thread stores a pointer to
    561873its main coroutine and the coroutine it is currently executing.
    562 
    563 So if the active thread's main and current coroutine are the same. If they
    564 are then the current stack is a thread stack, otherwise it is a coroutine
    565 stack. If it is a thread stack then an equality check with the stored main
    566 thread pointer and current thread pointer is enough to tell if the current
    567 thread is the main thread or not.
     874\todo*{Consider adding a description of how threads are coroutines.}
     875
     876If a the current thread's main and current coroutines are the same then the
     877current stack is a thread stack. Furthermore it is easy to compare the
     878current thread to the main thread to see if they are the same. And if this
     879is not a thread stack then it must be a coroutine stack.
    568880
    569881However, if the threading library is not linked, the sequential execution is on
     
    574886Regardless of how the stack is chosen, the stop function and parameter are
    575887passed to the forced-unwind function. The general pattern of all three stop
    576 functions is the same: they continue unwinding until the end of stack when they
    577 do there primary work.
     888functions is the same: they continue unwinding until the end of stack and
     889then preform their transfer.
    578890
    579891For main stack cancellation, the transfer is just a program abort.
  • doc/theses/andrew_beach_MMath/uw-ethesis.tex

    r5407cdc r8d66610  
    6666% Tip: Photographs should be cropped and compressed so as not to be too large.
    6767
    68 % To create a PDF output that is optimized for double-sided printing:
    69 % 1) comment-out the \documentclass statement in the preamble below, and
    70 %    un-comment the second \documentclass line.
    71 % 2) change the value assigned below to the boolean variable "PrintVersion"
    72 %    from "false" to "true".
    73 
    7468% ======================================================================
    7569%   D O C U M E N T   P R E A M B L E
     
    8781}
    8882
    89 % Some LaTeX commands I define for my own nomenclature.
    90 % If you have to, it's easier to make changes to nomenclature once here than
    91 % in a million places throughout your thesis!
    92 \newcommand{\package}[1]{\textbf{#1}} % package names in bold text
    93 \newcommand{\cmmd}[1]{\textbackslash\texttt{#1}} % command name in tt font
    94 \newcommand{\href}[1]{#1} % does nothing, but defines the command so the
    95 % print-optimized version will ignore \href tags (redefined by hyperref pkg).
    96 % Anything defined here may be redefined by packages added below...
     83% Does nothing, ignores \href tags (redefined by hyperref package).
     84\newcommand{\href}[1]{#1}
    9785
    9886% For a nomenclature (optional; available from ctan.org)
     
    10593% Removes large sections of the document.
    10694\usepackage{comment}
    107 % Adds todos (Must be included after comment.)
    108 \usepackage{todonotes}
     95% Adds todo commands.
     96\usepackage{todo}
     97% cfa macros used in the document
     98\usepackage{cfalab}
     99% allow global and individual modification of spacing
     100\usepackage{enumitem}
     101% Improved reference tools.
     102\usepackage[nospace]{varioref}
    109103
    110104% Hyperlinks make it very easy to navigate an electronic document.
     
    151145
    152146% Exception to the rule of hyperref being the last add-on package
    153 \usepackage[automake,toc,abbreviations]{glossaries-extra}
     147\usepackage[toc,abbreviations]{glossaries-extra}
    154148% If glossaries-extra is not in your LaTeX distribution, get it from CTAN
    155149% (http://ctan.org/pkg/glossaries-extra), although it's supposed to be in
     
    208202\makeglossaries
    209203
    210 % cfa macros used in the document
    211 %\usepackage{cfalab}
    212 % I'm going to bring back eventually.
    213 \makeatletter
    214 % Combines all \CC* commands:
    215 \newrobustcmd*\Cpp[1][\xspace]{\cfalab@Cpp#1}
    216 \newcommand\cfalab@Cpp{C\kern-.1em\hbox{+\kern-.25em+}}
    217 % Optional arguments do not work with pdf string. (Some fix-up required.)
    218 \pdfstringdefDisableCommands{\def\Cpp{C++}}
    219 
    220 % Wrappers for inline code snippits.
    221 \newrobustcmd*\codeCFA[1]{\lstinline[language=CFA]{#1}}
    222 \newrobustcmd*\codeC[1]{\lstinline[language=C]{#1}}
    223 \newrobustcmd*\codeCpp[1]{\lstinline[language=C++]{#1}}
    224 \newrobustcmd*\codePy[1]{\lstinline[language=Python]{#1}}
    225 
    226 % Colour text, formatted in LaTeX style instead of TeX style.
    227 \newcommand*\colour[2]{{\color{#1}#2}}
    228 \makeatother
    229 
    230 \input{common}
    231 % CFA code-style for all languages
    232 \CFAStyle
    233 % CFA default lnaguage
    234 \lstset{language=CFA,basicstyle=\linespread{0.9}\tt}
     204% listings package configuation:
     205\lstMakeShortInline@
     206\lstset{language=CFA,style=cfacommon,basicstyle=\linespread{0.9}\tt}
     207\lstset{moredelim=**[is][\protect\color{red}]{@}{@}}
    235208% Annotations from Peter:
    236209\newcommand{\PAB}[1]{{\color{blue}PAB: #1}}
     
    262235% Tip: Putting each sentence on a new line is a way to simplify later editing.
    263236%----------------------------------------------------------------------
     237\input{intro}
    264238\input{existing}
    265239\input{features}
    266240\input{implement}
    267 %\input{unwinding}
    268241\input{future}
    269242
     
    325298\phantomsection         % allows hyperref to link to the correct page
    326299
     300\todos
     301
    327302%----------------------------------------------------------------------
    328303\end{document} % end of logical document
  • doc/theses/mubeen_zulfiqar_MMath/AllocDS1.fig

    r5407cdc r8d66610  
    88-2
    991200 2
    10 6 4950 1275 5250 1425
    11 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 5025 1350 20 20 5025 1350 5045 1350
    12 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 5100 1350 20 20 5100 1350 5120 1350
    13 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 5175 1350 20 20 5175 1350 5195 1350
     106 4200 1575 4500 1725
     111 3 0 1 0 0 50 -1 20 0.000 1 0.0000 4275 1650 20 20 4275 1650 4295 1650
     121 3 0 1 0 0 50 -1 20 0.000 1 0.0000 4350 1650 20 20 4350 1650 4370 1650
     131 3 0 1 0 0 50 -1 20 0.000 1 0.0000 4425 1650 20 20 4425 1650 4445 1650
    1414-6
    15 6 5700 1950 6000 2100
    16 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 5775 2025 20 20 5775 2025 5795 2025
    17 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 5850 2025 20 20 5850 2025 5870 2025
    18 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 5925 2025 20 20 5925 2025 5945 2025
    19 -6
    20 6 3600 2100 3900 2475
     156 2850 2475 3150 2850
    21162 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    2217        1 1 1.00 45.00 90.00
    23          3675 2100 3675 2325
     18         2925 2475 2925 2700
    24192 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
    25          3600 2325 3900 2325 3900 2475 3600 2475 3600 2325
     20         2850 2700 3150 2700 3150 2850 2850 2850 2850 2700
    2621-6
    27 6 5100 2100 5400 2475
     226 4350 2475 4650 2850
    28232 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    2924        1 1 1.00 45.00 90.00
    30          5175 2100 5175 2325
     25         4425 2475 4425 2700
    31262 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
    32          5100 2325 5400 2325 5400 2475 5100 2475 5100 2325
     27         4350 2700 4650 2700 4650 2850 4350 2850 4350 2700
    3328-6
    34 6 4350 2100 4575 2775
     296 3600 2475 3825 3150
    35302 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    3631        1 1 1.00 45.00 90.00
    37          4425 2100 4425 2325
     32         3675 2475 3675 2700
    38332 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
    39          4350 2325 4575 2325 4575 2475 4350 2475 4350 2325
     34         3600 2700 3825 2700 3825 2850 3600 2850 3600 2700
    40352 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
    41          4350 2625 4575 2625 4575 2775 4350 2775 4350 2625
     36         3600 3000 3825 3000 3825 3150 3600 3150 3600 3000
    42372 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    4338        1 1 1.00 45.00 90.00
    44          4425 2400 4425 2625
     39         3675 2775 3675 3000
    4540-6
    46 6 5700 3225 6000 3375
    47 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 5775 3300 20 20 5775 3300 5795 3300
    48 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 5850 3300 20 20 5850 3300 5870 3300
    49 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 5925 3300 20 20 5925 3300 5945 3300
     416 4875 3600 5175 3750
     421 3 0 1 0 0 50 -1 20 0.000 1 0.0000 4950 3675 20 20 4950 3675 4970 3675
     431 3 0 1 0 0 50 -1 20 0.000 1 0.0000 5025 3675 20 20 5025 3675 5045 3675
     441 3 0 1 0 0 50 -1 20 0.000 1 0.0000 5100 3675 20 20 5100 3675 5120 3675
     45-6
     466 4875 2325 5175 2475
     471 3 0 1 0 0 50 -1 20 0.000 1 0.0000 4950 2400 20 20 4950 2400 4970 2400
     481 3 0 1 0 0 50 -1 20 0.000 1 0.0000 5025 2400 20 20 5025 2400 5045 2400
     491 3 0 1 0 0 50 -1 20 0.000 1 0.0000 5100 2400 20 20 5100 2400 5120 2400
     50-6
     516 5625 2325 5925 2475
     521 3 0 1 0 0 50 -1 20 0.000 1 0.0000 5700 2400 20 20 5700 2400 5720 2400
     531 3 0 1 0 0 50 -1 20 0.000 1 0.0000 5775 2400 20 20 5775 2400 5795 2400
     541 3 0 1 0 0 50 -1 20 0.000 1 0.0000 5850 2400 20 20 5850 2400 5870 2400
     55-6
     566 5625 3600 5925 3750
     571 3 0 1 0 0 50 -1 20 0.000 1 0.0000 5700 3675 20 20 5700 3675 5720 3675
     581 3 0 1 0 0 50 -1 20 0.000 1 0.0000 5775 3675 20 20 5775 3675 5795 3675
     591 3 0 1 0 0 50 -1 20 0.000 1 0.0000 5850 3675 20 20 5850 3675 5870 3675
    5060-6
    51612 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    52          2700 1950 3900 1950
     62         2400 2100 2400 2550
    53632 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    54          3000 1800 3000 2175
     64         2550 2100 2550 2550
    55652 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    56          3150 1800 3150 2175
     66         2700 2100 2700 2550
    57672 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    58          3300 1800 3300 2175
     68         2850 2100 2850 2550
    59692 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    60          3450 1800 3450 2175
     70         3000 2100 3000 2550
    61712 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    62          3600 1800 3600 2175
     72         3600 2100 3600 2550
    63732 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    64          3750 1800 3750 2175
     74         3900 2100 3900 2550
    65752 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    66          4200 1950 5400 1950
     76         4050 2100 4050 2550
    67772 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    68          4350 1800 4350 2175
     78         4200 2100 4200 2550
    69792 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    70          4500 1800 4500 2175
     80         4350 2100 4350 2550
    71812 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    72          4650 1800 4650 2175
     82         4500 2100 4500 2550
    73832 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    74          4800 1800 4800 2175
     84         3300 1500 3300 1800
    75852 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    76          4950 1800 4950 2175
     86         3600 1500 3600 1800
    77872 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    78          5100 1800 5100 2175
    79 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    80          5250 1800 5250 2175
    81 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    82          4050 1200 4050 1500
    83 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    84          4350 1200 4350 1500
    85 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    86          4650 1200 4650 1500
     88         3900 1500 3900 1800
    87892 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
    88          3750 1200 5550 1200 5550 1500 3750 1500 3750 1200
     90         3000 1500 4800 1500 4800 1800 3000 1800 3000 1500
    89912 1 1 1 0 7 50 -1 -1 4.000 0 0 -1 1 0 2
    9092        1 1 1.00 45.00 90.00
    91          3975 1350 3375 1800
     93         3225 1650 2625 2100
    92942 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    9395        1 1 1.00 45.00 90.00
    94          3900 1350 3300 1800
     96         3150 1650 2550 2100
    95972 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    9698        1 1 1.00 45.00 90.00
    97          4200 1350 4800 1800
     99         3450 1650 4050 2100
    981002 1 1 1 0 7 50 -1 -1 4.000 0 0 -1 1 0 2
    99101        1 1 1.00 45.00 90.00
    100          4125 1350 4725 1800
     102         3375 1650 3975 2100
    1011032 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    102          2850 1800 2850 2175
     104         2100 2100 2100 2550
     1052 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
     106         1950 2250 3150 2250
     1072 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
     108         3450 2250 4650 2250
    1031092 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
    104          2700 1800 3900 1800 3900 2175 2700 2175 2700 1800
     110         1950 2100 3150 2100 3150 2550 1950 2550 1950 2100
    1051112 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
    106          4200 1800 5400 1800 5400 2175 4200 2175 4200 1800
     112         3450 2100 4650 2100 4650 2550 3450 2550 3450 2100
     1132 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
     114         2250 2100 2250 2550
     1152 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
     116         3750 2100 3750 2550
    1071172 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    108118        1 1 1.00 45.00 90.00
    109          2775 2100 2775 2325
     119         2025 2475 2025 2700
    1101202 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    111121        1 1 1.00 45.00 90.00
    112          2775 2400 2775 2625
     122         2025 2775 2025 3000
    1131232 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
    114          2700 2625 2850 2625 2850 2775 2700 2775 2700 2625
     124         1950 3000 2100 3000 2100 3150 1950 3150 1950 3000
    1151252 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
    116          2700 2325 2850 2325 2850 2475 2700 2475 2700 2325
     126         1950 2700 2100 2700 2100 2850 1950 2850 1950 2700
    1171272 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 3
    118128        1 1 1.00 45.00 90.00
    119          2700 3375 3450 3375 3450 3150
     129         1950 3750 2700 3750 2700 3525
    1201302 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
    121          2700 3150 3900 3150 3900 3525 2700 3525 2700 3150
     131         1950 3525 3150 3525 3150 3900 1950 3900 1950 3525
    1221322 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 3
    123133        1 1 1.00 45.00 90.00
    124          4200 3375 4950 3375 4950 3150
     134         3450 3750 4200 3750 4200 3525
    1251352 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
    126          4200 3150 5400 3150 5400 3525 4200 3525 4200 3150
     136         3450 3525 4650 3525 4650 3900 3450 3900 3450 3525
    1271372 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 3
    128138        1 1 1.00 45.00 90.00
    129          3900 4350 4950 4350 4950 3900
     139         3150 4650 4200 4650 4200 4275
    1301402 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
    131          3900 3900 5400 3900 5400 4575 3900 4575 3900 3900
    132 4 2 0 50 -1 0 12 0.0000 2 135 975 2625 2175 free buckets\001
    133 4 2 0 50 -1 0 12 0.0000 2 135 435 2625 1950 locks\001
    134 4 1 0 50 -1 0 12 0.0000 2 135 1365 4650 1125 N thread buckets\001
    135 4 1 0 50 -1 0 12 0.0000 2 180 390 5175 1725 heap\001
    136 4 1 0 50 -1 0 12 0.0000 2 180 390 2925 1725 heap\001
    137 4 1 0 50 -1 0 12 0.0000 2 180 915 3300 3075 bump alloc\001
    138 4 0 0 50 -1 0 12 0.0000 2 135 360 4275 3325 lock\001
    139 4 1 0 50 -1 0 12 0.0000 2 180 915 4800 3075 bump alloc\001
    140 4 0 0 50 -1 0 12 0.0000 2 135 360 3975 4075 lock\001
    141 4 1 0 50 -1 0 12 0.0000 2 135 345 4725 3825 sbrk\001
    142 4 0 0 50 -1 0 12 0.0000 2 135 360 2775 3325 lock\001
    143 4 2 0 50 -1 0 12 0.0000 2 135 675 2625 2625 free lists\001
     141         3150 4275 4650 4275 4650 4875 3150 4875 3150 4275
     1422 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
     143         1950 2400 3150 2400
     1442 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
     145         3450 2400 4650 2400
     1462 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
     147         5400 2100 5400 3900
     1484 2 0 50 -1 0 11 0.0000 2 120 300 1875 2250 lock\001
     1494 1 0 50 -1 0 12 0.0000 2 135 1935 3900 1425 N kernel-thread buckets\001
     1504 1 0 50 -1 0 12 0.0000 2 195 810 4425 2025 heap$_2$\001
     1514 1 0 50 -1 0 12 0.0000 2 195 810 2175 2025 heap$_1$\001
     1524 2 0 50 -1 0 11 0.0000 2 120 270 1875 2400 size\001
     1534 2 0 50 -1 0 11 0.0000 2 120 270 1875 2550 free\001
     1544 1 0 50 -1 0 12 0.0000 2 180 825 2550 3450 local pool\001
     1554 0 0 50 -1 0 12 0.0000 2 135 360 3525 3700 lock\001
     1564 0 0 50 -1 0 12 0.0000 2 135 360 3225 4450 lock\001
     1574 2 0 50 -1 0 12 0.0000 2 135 600 1875 3000 free list\001
     1584 1 0 50 -1 0 12 0.0000 2 180 825 4050 3450 local pool\001
     1594 1 0 50 -1 0 12 0.0000 2 180 1455 3900 4200 global pool (sbrk)\001
     1604 0 0 50 -1 0 12 0.0000 2 135 360 2025 3700 lock\001
     1614 1 0 50 -1 0 12 0.0000 2 180 720 6450 3150 free pool\001
     1624 1 0 50 -1 0 12 0.0000 2 180 390 6450 2925 heap\001
  • doc/theses/mubeen_zulfiqar_MMath/AllocDS2.fig

    r5407cdc r8d66610  
    88-2
    991200 2
    10 6 2850 2025 3150 2175
    11 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 2925 2100 20 20 2925 2100 2945 2100
    12 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 3000 2100 20 20 3000 2100 3020 2100
    13 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 3075 2100 20 20 3075 2100 3095 2100
     106 2850 2100 3150 2250
     111 3 0 1 0 0 50 -1 20 0.000 1 0.0000 2925 2175 20 20 2925 2175 2945 2175
     121 3 0 1 0 0 50 -1 20 0.000 1 0.0000 3000 2175 20 20 3000 2175 3020 2175
     131 3 0 1 0 0 50 -1 20 0.000 1 0.0000 3075 2175 20 20 3075 2175 3095 2175
    1414-6
    15 6 4050 2025 4350 2175
    16 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 4125 2100 20 20 4125 2100 4145 2100
    17 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 4200 2100 20 20 4200 2100 4220 2100
    18 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 4275 2100 20 20 4275 2100 4295 2100
     156 4050 2100 4350 2250
     161 3 0 1 0 0 50 -1 20 0.000 1 0.0000 4125 2175 20 20 4125 2175 4145 2175
     171 3 0 1 0 0 50 -1 20 0.000 1 0.0000 4200 2175 20 20 4200 2175 4220 2175
     181 3 0 1 0 0 50 -1 20 0.000 1 0.0000 4275 2175 20 20 4275 2175 4295 2175
    1919-6
    20 6 4650 2025 4950 2175
    21 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 4725 2100 20 20 4725 2100 4745 2100
    22 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 4800 2100 20 20 4800 2100 4820 2100
    23 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 4875 2100 20 20 4875 2100 4895 2100
     206 4650 2100 4950 2250
     211 3 0 1 0 0 50 -1 20 0.000 1 0.0000 4725 2175 20 20 4725 2175 4745 2175
     221 3 0 1 0 0 50 -1 20 0.000 1 0.0000 4800 2175 20 20 4800 2175 4820 2175
     231 3 0 1 0 0 50 -1 20 0.000 1 0.0000 4875 2175 20 20 4875 2175 4895 2175
    2424-6
    25 6 3450 2025 3750 2175
    26 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 3525 2100 20 20 3525 2100 3545 2100
    27 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 3600 2100 20 20 3600 2100 3620 2100
    28 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 3675 2100 20 20 3675 2100 3695 2100
     256 3450 2100 3750 2250
     261 3 0 1 0 0 50 -1 20 0.000 1 0.0000 3525 2175 20 20 3525 2175 3545 2175
     271 3 0 1 0 0 50 -1 20 0.000 1 0.0000 3600 2175 20 20 3600 2175 3620 2175
     281 3 0 1 0 0 50 -1 20 0.000 1 0.0000 3675 2175 20 20 3675 2175 3695 2175
    2929-6
    30 6 3300 2100 3600 2475
     306 3300 2175 3600 2550
    31312 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    3232        1 1 1.00 45.00 90.00
    33          3375 2100 3375 2325
     33         3375 2175 3375 2400
    34342 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
    35          3300 2325 3600 2325 3600 2475 3300 2475 3300 2325
     35         3300 2400 3600 2400 3600 2550 3300 2550 3300 2400
    3636-6
     372 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
     38         3150 1800 3150 2250
     392 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
     40         2850 1800 2850 2250
     412 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
     42         4650 1800 4650 2250
     432 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
     44         4950 1800 4950 2250
     452 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
     46         4500 1725 4500 2250
     472 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
     48         5100 1725 5100 2250
     492 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
     50         3450 1800 3450 2250
     512 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
     52         3750 1800 3750 2250
     532 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
     54         3300 1725 3300 2250
     552 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
     56         3900 1725 3900 2250
     572 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
     58         5250 1800 5250 2250
     592 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
     60         5400 1800 5400 2250
     612 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
     62         5550 1800 5550 2250
     632 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
     64         5700 1800 5700 2250
     652 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
     66         5850 1800 5850 2250
     672 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
     68         2700 1725 2700 2250
     692 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
     70        1 1 1.00 45.00 90.00
     71         3375 1275 3375 1575
     722 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
     73        1 1 1.00 45.00 90.00
     74         2700 1275 2700 1575
     752 1 1 1 0 7 50 -1 -1 4.000 0 0 -1 1 0 2
     76        1 1 1.00 45.00 90.00
     77         2775 1275 2775 1575
     782 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
     79        1 1 1.00 45.00 90.00
     80         5175 1275 5175 1575
     812 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
     82        1 1 1.00 45.00 90.00
     83         5625 1275 5625 1575
     842 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
     85        1 1 1.00 45.00 90.00
     86         3750 1275 3750 1575
     872 1 1 1 0 7 50 -1 -1 4.000 0 0 -1 1 0 2
     88        1 1 1.00 45.00 90.00
     89         3825 1275 3825 1575
    37902 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    3891         2700 1950 6000 1950
    39922 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    40          3150 1800 3150 2175
    41 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    42          2850 1800 2850 2175
    43 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    44          4650 1800 4650 2175
    45 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    46          4950 1800 4950 2175
    47 2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    48          4500 1725 4500 2175
    49 2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    50          5100 1725 5100 2175
    51 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    52          3450 1800 3450 2175
    53 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    54          3750 1800 3750 2175
    55 2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    56          3300 1725 3300 2175
    57 2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    58          3900 1725 3900 2175
    59 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    60          5250 1800 5250 2175
    61 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    62          5400 1800 5400 2175
    63 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    64          5550 1800 5550 2175
    65 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    66          5700 1800 5700 2175
    67 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    68          5850 1800 5850 2175
    69 2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    70          2700 1725 2700 2175
     93         2700 2100 6000 2100
    71942 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
    72          2700 1800 6000 1800 6000 2175 2700 2175 2700 1800
     95         2700 1800 6000 1800 6000 2250 2700 2250 2700 1800
    73962 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    7497        1 1 1.00 45.00 90.00
    75          2775 2100 2775 2325
     98         2775 2175 2775 2400
    76992 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    77100        1 1 1.00 45.00 90.00
    78          2775 2400 2775 2625
     101         2775 2475 2775 2700
    791022 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
    80          2700 2625 2850 2625 2850 2775 2700 2775 2700 2625
     103         2700 2700 2850 2700 2850 2850 2700 2850 2700 2700
    811042 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
    82          2700 2325 2850 2325 2850 2475 2700 2475 2700 2325
     105         2700 2400 2850 2400 2850 2550 2700 2550 2700 2400
    831062 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    84107        1 1 1.00 45.00 90.00
    85          4575 2100 4575 2325
     108         4575 2175 4575 2400
    861092 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
    87          4500 2325 5025 2325 5025 2475 4500 2475 4500 2325
     110         4500 2400 5025 2400 5025 2550 4500 2550 4500 2400
    881112 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 3
    89112        1 1 1.00 45.00 90.00
    90          3600 3525 4650 3525 4650 3075
     113         3600 3525 4650 3525 4650 3150
    911142 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
    92          3600 3075 5100 3075 5100 3750 3600 3750 3600 3075
    93 4 2 0 50 -1 0 12 0.0000 2 135 975 2625 2175 free buckets\001
    94 4 2 0 50 -1 0 12 0.0000 2 135 435 2625 1950 locks\001
     115         3600 3150 5100 3150 5100 3750 3600 3750 3600 3150
     1164 2 0 50 -1 0 11 0.0000 2 120 300 2625 1950 lock\001
    951174 1 0 50 -1 0 10 0.0000 2 150 1155 3000 1725 N$\\times$S$_1$\001
    961184 1 0 50 -1 0 10 0.0000 2 150 1155 3600 1725 N$\\times$S$_2$\001
     1194 1 0 50 -1 0 12 0.0000 2 180 390 4425 1500 heap\001
     1204 2 0 50 -1 0 12 0.0000 2 135 1140 2550 1425 kernel threads\001
     1214 2 0 50 -1 0 11 0.0000 2 120 270 2625 2100 size\001
     1224 2 0 50 -1 0 11 0.0000 2 120 270 2625 2250 free\001
     1234 2 0 50 -1 0 12 0.0000 2 135 600 2625 2700 free list\001
     1244 0 0 50 -1 0 12 0.0000 2 135 360 3675 3325 lock\001
     1254 1 0 50 -1 0 12 0.0000 2 180 1455 4350 3075 global pool (sbrk)\001
    971264 1 0 50 -1 0 10 0.0000 2 150 1110 4800 1725 N$\\times$S$_t$\001
    98 4 2 0 50 -1 0 12 0.0000 2 135 675 2625 2625 free lists\001
    99 4 0 0 50 -1 0 12 0.0000 2 135 360 3675 3250 lock\001
    100 4 1 0 50 -1 0 12 0.0000 2 135 345 4425 3000 sbrk\001
    101 4 1 0 50 -1 0 12 0.0000 2 180 390 4425 1500 heap\001
  • doc/theses/mubeen_zulfiqar_MMath/Makefile

    r5407cdc r8d66610  
    1515
    1616.PHONY: all clean
     17.PRECIOUS: %.dvi %.ps # do not delete intermediate files
    1718
    1819### Commands:
    1920LATEX = TEXINPUTS=${TEXLIB} && export TEXINPUTS && latex -halt-on-error -output-directory=${BUILD}
    2021BIBTEX = BIBINPUTS=${BIBLIB} bibtex
    21 #GLOSSARY=INDEXSTYLE=${BUILD} makeglossaries-lite
     22#GLOSSARY = INDEXSTYLE=${BUILD} makeglossaries-lite
    2223
    2324### Rules and Recipes:
     
    2526all: ${DOC}
    2627
    27 ${BUILD}/%.dvi: ${TEXSRC} ${FIGSRC:.fig=.tex} ${BIBSRC} Makefile | ${BUILD}
     28${BUILD}/%.dvi: ${TEXSRC} ${FIGSRC:%.fig=%.tex} ${BIBSRC} Makefile | ${BUILD}
    2829        ${LATEX} ${BASE}
    2930        ${BIBTEX} ${BUILD}/${BASE}
  • doc/theses/mubeen_zulfiqar_MMath/allocator.tex

    r5407cdc r8d66610  
     1\chapter{Allocator}
    12
    2 \chapter{Allocator}
     3\noindent
     4====================
     5
     6Writing Points:
     7\begin{itemize}
     8\item
     9Objective of @uHeapLmmm@.
     10\item
     11Design philosophy.
     12\item
     13Background and previous design of @uHeapLmmm@.
     14\item
     15Distributed design of @uHeapLmmm@.
     16
     17----- SHOULD WE GIVE IMPLEMENTATION DETAILS HERE? -----
     18
     19\PAB{Maybe. There might be an Implementation chapter.}
     20\item
     21figure.
     22\item
     23Advantages of distributed design.
     24\end{itemize}
     25
     26The new features added to @uHeapLmmm@ (incl. @malloc_size@ routine)
     27\CFA alloc interface with examples.
     28\begin{itemize}
     29\item
     30Why did we need it?
     31\item
     32The added benefits.
     33\end{itemize}
     34
     35----- SHOULD WE GIVE PERFORMANCE AND USABILITY COMPARISON OF DIFFERENT INTERFACES THAT WE TRIED? -----
     36
     37\PAB{Often Performance is its own chapter. I added one for now.}
     38
     39Performance evaluation using u-benchmark suite.
     40
     41\noindent
     42====================
    343
    444\newpage
    545\paragraph{Design 1: Decentralized}
    6 Fixed number of heaps: shard the heap into N heaps each with a bump-area allocated from the sbrk area.
     46Fixed number of heaps: shard the heap into N heaps each with a bump-area allocated from the @sbrk@ area.
    747Kernel threads (KT) are assigned to the N heaps.
    848When KTs $\le$ N, the heaps are uncontented.
  • doc/theses/mubeen_zulfiqar_MMath/background.tex

    r5407cdc r8d66610  
    11\chapter{Background}
    22
     3\noindent
     4====================
     5
     6Writing Points:
     7\begin{itemize}
     8\item
     9Classification of benchmarks.
     10\item
     11Literature review of current benchmarks.
     12\item
     13Features and limitations.
     14\item
     15Literature review of current memory allocators.
     16\item
     17Breakdown of memory allocation techniques.
     18\item
     19Features and limitations.
     20\end{itemize}
     21
     22\noindent
     23====================
     24
    325\cite{Wasik08}
  • doc/theses/mubeen_zulfiqar_MMath/benchmarks.tex

    r5407cdc r8d66610  
    11\chapter{Benchmarks}
     2
     3\noindent
     4====================
     5
     6Writing Points:
     7\begin{itemize}
     8\item
     9Performance matrices of memory allocation.
     10\item
     11Aim of micro benchmark suite.
     12
     13----- SHOULD WE GIVE IMPLEMENTATION DETAILS HERE? -----
     14
     15\PAB{For the benchmarks, yes.}
     16\item
     17A complete list of benchmarks in micro benchmark suite.
     18\item
     19One detailed section for each benchmark in micro benchmark suite including:
     20
     21\begin{itemize}
     22\item
     23The introduction of the benchmark.
     24\item
     25Figure.
     26\item
     27Results with popular memory allocators.
     28\end{itemize}
     29
     30\item
     31Summarize performance of current memory allocators.
     32\end{itemize}
     33
     34\noindent
     35====================
  • doc/theses/mubeen_zulfiqar_MMath/conclusion.tex

    r5407cdc r8d66610  
    11\chapter{Conclusion}
     2
     3\noindent
     4====================
     5
     6Writing Points:
     7\begin{itemize}
     8\item
     9Summarize u-benchmark suite.
     10\item
     11Summarize @uHeapLmmm@.
     12\item
     13Make recommendations on memory allocator design.
     14\end{itemize}
     15
     16\noindent
     17====================
  • doc/theses/mubeen_zulfiqar_MMath/intro.tex

    r5407cdc r8d66610  
    11\chapter{Introduction}
     2
     3\noindent
     4====================
     5
     6Writing Points:
     7\begin{itemize}
     8\item
     9Introduce dynamic memory allocation with brief background.
     10\item
     11Scope of the thesis.
     12\item
     13Importance of memory allocation and micro-benchmark suite.
     14\item
     15Research problem.
     16\item
     17Research objectives.
     18\item
     19The vision behind cfa-malloc.
     20\item
     21An outline of the thesis.
     22\end{itemize}
     23
     24\noindent
     25====================
  • doc/theses/mubeen_zulfiqar_MMath/uw-ethesis.tex

    r5407cdc r8d66610  
    8484\usepackage{graphicx}
    8585\usepackage{comment} % Removes large sections of the document.
    86 \usepackage{todonotes} % Adds todos (Must be included after comment.)
    8786
    8887% Hyperlinks make it very easy to navigate an electronic document.
     
    107106    colorlinks=true,        % false: boxed links; true: colored links
    108107    linkcolor=blue,         % color of internal links
    109     citecolor=green,        % color of links to bibliography
     108    citecolor=blue,        % color of links to bibliography
    110109    filecolor=magenta,      % color of file links
    111     urlcolor=cyan           % color of external links
     110    urlcolor=blue           % color of external links
    112111}
    113112\ifthenelse{\boolean{PrintVersion}}{   % for improved print quality, change some hyperref options
     
    168167\CFAStyle                                               % CFA code-style for all languages
    169168\lstset{language=CFA,basicstyle=\linespread{0.9}\tt}    % CFA default language
     169\newcommand{\PAB}[1]{{\color{red}PAB: #1}}
    170170
    171171%======================================================================
     
    194194\input{allocator}
    195195\input{benchmarks}
     196\input{performance}
    196197\input{conclusion}
    197198
  • doc/user/Makefile

    r5407cdc r8d66610  
    6161
    6262${BASE}.dvi : Makefile ${GRAPHS} ${PROGRAMS} ${PICTURES} ${FIGURES} ${SOURCES} \
    63                 ${Macros}/common.tex ${Macros}/lstlang.sty ${Macros}/indexstyle ../bibliography/pl.bib build/version | ${Build}
     63                ${Macros}/common.sty ${Macros}/lstlang.sty ${Macros}/indexstyle ../bibliography/pl.bib build/version | ${Build}
    6464        # Conditionally create an empty *.ind (index) file for inclusion until makeindex is run.
    6565        if [ ! -r ${basename $@}.ind ] ; then touch ${Build}/${basename $@}.ind ; fi
     
    6868        -${BibTeX} ${Build}/${basename $@}
    6969        # Some citations reference others so run again to resolve these citations
    70         ${LaTeX} ${basename $@}.tex
     70#       ${LaTeX} ${basename $@}.tex
    7171        -${BibTeX} ${Build}/${basename $@}
    7272        # Make index from *.aux entries and input index at end of document
     
    7575        ${LaTeX} ${basename $@}.tex
    7676        # Run again to get index title into table of contents
    77         ${LaTeX} ${basename $@}.tex
     77#       ${LaTeX} ${basename $@}.tex
    7878
    7979## Define the default recipes.
  • doc/user/user.tex

    r5407cdc r8d66610  
    1111%% Created On       : Wed Apr  6 14:53:29 2016
    1212%% Last Modified By : Peter A. Buhr
    13 %% Last Modified On : Sun Apr 25 19:03:03 2021
    14 %% Update Count     : 4951
     13%% Last Modified On : Sat May  8 08:51:33 2021
     14%% Update Count     : 5062
    1515%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    1616
     
    6565% keyword escape ¶...¶ (pilcrow symbol) emacs: C-q M-^
    6666% math escape $...$ (dollar symbol)
    67 \input{common}                                          % common CFA document macros
     67\usepackage{common}                                                                             % common CFA document macros
     68%\input{common}                                                                                 % common CFA document macros
    6869\setlength{\gcolumnposn}{3in}
    6970\CFAStyle                                                                                               % use default CFA format-style
     
    585586For example, the octal ©0© or hexadecimal ©0x© prefix may end with an underscore ©0_377© or ©0x_ff©;
    586587the exponent infix ©E© may start or end with an underscore ©1.0_E10©, ©1.0E_10© or ©1.0_E_10©;
    587 the type suffixes ©U©, ©L©, etc. may start with an underscore ©1_U©, ©1_ll© or ©1.0E10_f©.
     588the type suffixes ©U©, ©L©, \etc may start with an underscore ©1_U©, ©1_ll© or ©1.0E10_f©.
    588589\end{enumerate}
    589590It is significantly easier to read and enter long constants when they are broken up into smaller groupings (many cultures use comma and/or period among digits for the same purpose).
     
    15701571\end{cquote}
    15711572
    1572 All type qualifiers, \eg ©const©, ©volatile©, etc., are used in the normal way with the new declarations and also appear left to right, \eg:
     1573All type qualifiers, \eg ©const©, ©volatile©, \etc, are used in the normal way with the new declarations and also appear left to right, \eg:
    15731574\begin{cquote}
    15741575\begin{tabular}{@{}l@{\hspace{1em}}l@{\hspace{1em}}l@{}}
     
    15901591\end{tabular}
    15911592\end{cquote}
    1592 All declaration qualifiers, \eg ©extern©, ©static©, etc., are used in the normal way with the new declarations but can only appear at the start of a \CFA routine declaration,\footnote{\label{StorageClassSpecifier}
     1593All declaration qualifiers, \eg ©extern©, ©static©, \etc, are used in the normal way with the new declarations but can only appear at the start of a \CFA routine declaration,\footnote{\label{StorageClassSpecifier}
    15931594The placement of a storage-class specifier other than at the beginning of the declaration specifiers in a declaration is an obsolescent feature.~\cite[\S~6.11.5(1)]{C11}} \eg:
    15941595\begin{cquote}
     
    31473148also, it is unnecessary to specify all the fields of a struct in a multiple record-field tuple.
    31483149
    3149 Since tuple-index expressions are a form of member-access expression, it is possible to use tuple-index expressions in conjunction with member-access expressions to restructure a tuple (\eg, rearrange components, drop components, duplicate components, etc.).
     3150Since tuple-index expressions are a form of member-access expression, it is possible to use tuple-index expressions in conjunction with member-access expressions to restructure a tuple (\eg, rearrange components, drop components, duplicate components, \etc).
    31503151\begin{cfa}
    31513152[ int, int, long, double ] x;
     
    33123313
    33133314\section{Tuples}
     3315\label{tuples}
    33143316
    33153317In C and \CFA, lists of elements appear in several contexts, such as the parameter list for a routine call.
     
    34203422
    34213423\subsection{Tuple Coercions}
     3424\label{tuple coercions}\label{coercions!tuple}
    34223425
    34233426There are four coercions that can be performed on tuples and tuple variables: closing, opening, flattening and structuring.
     
    34643467
    34653468\subsection{Mass Assignment}
     3469\label{mass assignment}\label{assignment!mass}
    34663470
    34673471\CFA permits assignment to several variables at once using mass assignment~\cite{CLU}.
     
    35043508
    35053509\subsection{Multiple Assignment}
     3510\label{multiple assignment}\label{assignment!multiple}
    35063511
    35073512\CFA also supports the assignment of several values at once, known as multiple assignment~\cite{CLU,Galletly96}.
     
    35453550
    35463551\subsection{Cascade Assignment}
     3552\index{cascade assignment}\index{assignment!cascade}
    35473553
    35483554As in C, \CFA mass and multiple assignments can be cascaded, producing cascade assignment.
     
    35643570\section{Stream I/O Library}
    35653571\label{s:StreamIOLibrary}
    3566 \index{input/output stream library}
    3567 \index{stream library}
     3572\index{input}\index{output}
     3573\index{stream library}\index{library!stream}
    35683574
    35693575The goal of \CFA stream input/output (I/O) is to simplify the common cases\index{I/O!common case}, while fully supporting polymorphism and user defined types in a consistent way.
    35703576Stream I/O can be implicitly or explicitly formatted.
    3571 Implicit formatting means \CFA selects the output or input format for values that match with the type of a variable.
     3577Implicit formatting means \CFA selects the output or input format for values that matches the variable's type.
    35723578Explicit formatting means additional information is specified to augment how an output or input of value is interpreted.
    3573 \CFA formatting is a cross between C ©printf© and \CC ©cout© manipulators, and Python implicit spacing and newline.
     3579\CFA formatting incorporates ideas from C ©printf©, \CC ©stream© manipulators, and Python implicit spacing and newline.
    35743580Specifically:
    35753581\begin{itemize}
     
    35843590Hence, it is common programming practice to toggle manipulators on and then back to the default to prevent downstream side-effects.
    35853591Without this programming style, errors occur when moving prints, as manipulator effects incorrectly flow into the new location.
    3586 (To guarantee no side-effects, manipulator values must be saved and restored across function calls.)
    3587 \item
    3588 \CFA has more sophisticated implicit spacing between values than Python, plus implicit newline at the end of a print.
     3592Furthermore, to guarantee no side-effects, manipulator values must be saved and restored across function calls.
     3593\item
     3594\CFA has more sophisticated implicit value spacing than Python, plus implicit newline at the end of a print.
    35893595\end{itemize}
     3596
     3597The standard polymorphic I/Os stream are ©stdin©/©sin© (input), ©stdout©/©sout© and ©stderr©/©serr© (output) (like C++ ©cin©/©cout©/©cerr©).
     3598Polymorphic streams ©exit© and ©abort© provide implicit program termination without and with generating a stack trace and core file.
     3599Stream ©exit© implicitly returns ©EXIT_FAILURE© to the shell.
     3600\begin{cfa}
     3601®exit®   | "x (" | x | ") negative value."; // terminate and return EXIT_FAILURE to shell
     3602®abort® | "x (" | x | ") negative value."; // terminate and generate stack trace and core file
     3603\end{cfa}
     3604Note, \CFA stream variables ©stdin©, ©stdout©, ©stderr©, ©exit©, and ©abort© overload C variables ©stdin©, ©stdout©, ©stderr©, and functions ©exit© and ©abort©, respectively.
    35903605The \CFA header file for the I/O library is \Indexc{fstream.hfa}.
     3606
     3607
     3608\subsection{Basic I/O}
    35913609
    35923610For implicit formatted output, the common case is printing a series of variables separated by whitespace.
     
    36013619\begin{cfa}
    36023620
    3603 cout << x ®<< " "® << y ®<< " "® << z << endl;
     3621cout  <<  x  ®<< " "®  <<  y  ®<< " "®  <<  z << endl;
    36043622\end{cfa}
    36053623&
     
    36533671\end{tabular}
    36543672\end{cquote}
    3655 Input and output use a uniform operator, ©|©, rather than separate operators, as in ©>>© and ©<<© for \CC.
     3673Input and output use a uniform operator, ©|©, rather than \CC's ©>>© and ©<<© input/output operators.
    36563674There is a weak similarity between the \CFA logical-or operator and the \Index{Shell pipe-operator} for moving data, where data flows in the correct direction for input but the opposite direction for output.
    36573675
     
    36983716\end{cquote}
    36993717
     3718\VRef[Figure]{f:CFACommand-LineProcessing} shows idiomatic \CFA command-line processing and copying an input file to an output file.
     3719Note, a stream variable may be copied because it is a reference to an underlying stream data-structures.
     3720All I/O errors are handles as exceptions, but end-of-file is not an exception as C programmers are use to explicitly checking for it.
     3721
     3722\begin{figure}
     3723\begin{cfa}
     3724#include ®<fstream.hfa>®
     3725
     3726int main( int argc, char * argv[] ) {
     3727        ®ifstream® in  = stdin;                                 $\C{// copy default files}$
     3728        ®ofstream® out = stdout;
     3729
     3730        try {
     3731                choose ( argc ) {
     3732                  case 2, 3:
     3733                        ®open®( in, argv[1] );                  $\C{// open input file first as output creates file}$
     3734                        if ( argc == 3 ) ®open®( out, argv[2] ); $\C{// do not create output unless input opens}$
     3735                  case 1: ;                                                     $\C{// use default files}$
     3736                  default:
     3737                        ®exit® | "Usage" | argv[0] | "[ input-file (default stdin) "
     3738                                   "[ output-file (default stdout) ] ]";
     3739                } // choose
     3740        } catch( ®Open_Failure® * ex; ex->istream == &in ) {
     3741                ®exit® | "Unable to open input file" | argv[1];
     3742        } catch( ®Open_Failure® * ex; ex->ostream == &out ) {
     3743                ®close®( in );                                          $\C{// optional}$
     3744                ®exit® | "Unable to open output file" | argv[2];
     3745        } // try
     3746
     3747        out | nlOff;                                                    $\C{// turn off auto newline}$
     3748        in | nlOn;                                                              $\C{// turn on reading newline}$
     3749        char ch;
     3750        for () {                                                                $\C{// read/write characters}$
     3751                in | ch;
     3752          if ( eof( in ) ) break;                               $\C{// eof ?}$
     3753                out | ch;
     3754        } // for
     3755} // main
     3756\end{cfa}
     3757\caption{\CFA Command-Line Processing}
     3758\label{f:CFACommand-LineProcessing}
     3759\end{figure}
     3760
     3761\VRef[Figure]{f:StreamFunctions} shows the stream operations.
     3762\begin{itemize}[topsep=4pt,itemsep=2pt,parsep=0pt]
     3763\item
     3764\Indexc{fail} tests the stream error-indicator, returning nonzero if it is set.
     3765\item
     3766\Indexc{clear} resets the stream error-indicator.
     3767\item
     3768\Indexc{flush} (©ofstream© only) causes any unwritten data for a stream to be written to the file.
     3769\item
     3770\Indexc{eof} (©ifstream© only) tests the end-of-file indicator for the stream pointed to by stream.
     3771Returns true if the end-of-file indicator is set, otherwise false.
     3772\item
     3773\Indexc{open} binds the file with ©name© to a stream accessed with ©mode© (see ©fopen©).
     3774\item
     3775\Indexc{close} flushes the stream and closes the file.
     3776\item
     3777\Indexc{write} (©ofstream© only) write ©size© bytes to the stream.
     3778The bytes are written lazily to file when internal buffers fill.
     3779Eager buffer writes are done with ©flush©
     3780\item
     3781\Indexc{read} (©ifstream© only) read ©size© bytes to the stream.
     3782\item
     3783\Indexc{ungetc} (©ifstream© only) pushes the character back to the input stream.
     3784Pushed-back characters returned by subsequent reads in the reverse order of pushing.
     3785\end{itemize}
     3786The constructor functions:
     3787\begin{itemize}[topsep=4pt,itemsep=2pt,parsep=0pt]
     3788\item
     3789create an unbound stream, which is subsequently bound to a file with ©open©.
     3790\item
     3791create a bound stream to the associated file with given ©mode©.
     3792\end{itemize}
     3793The destructor closes the stream.
     3794
     3795\begin{figure}
     3796\begin{cfa}
     3797// *********************************** ofstream ***********************************
     3798
     3799bool fail( ofstream & );$\indexc{fail}\index{ofstream@©ofstream©!©fail©}$
     3800void clear( ofstream & );$\indexc{clear}\index{ofstream@©ofstream©!©clear©}$
     3801int flush( ofstream & );$\indexc{flush}\index{ofstream@©ofstream©!©flush©}$
     3802void open( ofstream &, const char name[], const char mode[] = "w" );$\indexc{open}\index{ofstream@©ofstream©!©open©}$
     3803void close( ofstream & );$\indexc{close}\index{ofstream@©ofstream©!©close©}$
     3804ofstream & write( ofstream &, const char data[], size_t size );$\indexc{write}\index{ofstream@©ofstream©!©write©}$
     3805
     3806void ?{}( ofstream & );$\index{ofstream@©ofstream©!©?{}©}$
     3807void ?{}( ofstream &, const char name[], const char mode[] = "w" );
     3808void ^?{}( ofstream & );$\index{ofstream@©ofstream©!©^?{}©}$
     3809
     3810// *********************************** ifstream ***********************************
     3811
     3812bool fail( ifstream & is );$\indexc{fail}\index{ifstream@©ifstream©!©fail©}$
     3813void clear( ifstream & );$\indexc{clear}\index{ifstream@©ifstream©!©clear©}$
     3814bool eof( ifstream & is );$\indexc{eof}\index{ifstream@©ifstream©!©eof©}$
     3815void open( ifstream & is, const char name[], const char mode[] = "r" );$\indexc{open}\index{ifstream@©ifstream©!©open©}$
     3816void close( ifstream & is );$\indexc{close}\index{ifstream@©ifstream©!©close©}$
     3817ifstream & read( ifstream & is, char data[], size_t size );$\indexc{read}\index{ifstream@©ifstream©!©read©}$
     3818ifstream & ungetc( ifstream & is, char c );$\indexc{unget}\index{ifstream@©ifstream©!©unget©}$
     3819
     3820void ?{}( ifstream & is );$\index{ifstream@©ifstream©!©?{}©}$
     3821void ?{}( ifstream & is, const char name[], const char mode[] = "r" );
     3822void ^?{}( ifstream & is );$\index{ifstream@©ifstream©!©^?{}©}$
     3823\end{cfa}
     3824\caption{Stream Functions}
     3825\label{f:StreamFunctions}
     3826\end{figure}
    37003827
    37013828
     
    38463973
    38473974\item
    3848 \Indexc{sepOn}\index{manipulator!sepOn@©sepOn©} and \Indexc{sepOff}\index{manipulator!sepOff@©sepOff©} toggle printing the separator with respect to the next printed item, and then return to the global seperator setting.
     3975\Indexc{sepOn}\index{manipulator!sepOn@©sepOn©} and \Indexc{sepOff}\index{manipulator!sepOff@©sepOff©} toggle printing the separator with respect to the next printed item, and then return to the global separator setting.
    38493976\begin{cfa}[belowskip=0pt]
    38503977sout | 1 | sepOff | 2 | 3; $\C{// turn off implicit separator for the next item}$
     
    40304157sout | wd( 4, "ab" ) | wd( 3, "ab" ) | wd( 2, "ab" );
    40314158\end{cfa}
    4032 \begin{cfa}[showspaces=true,aboveskip=0pt,belowskip=0pt]
     4159\begin{cfa}[showspaces=true,aboveskip=0pt]
    40334160®  ®34 ® ®34 34
    40344161®  ®4.000000 ® ®4.000000 4.000000
     
    43784505\end{cfa}
    43794506
    4380 \Textbf{WARNING:} ©printf©\index{printf@©printf©}, ©scanf©\index{scanf@©scanf©} and their derivatives are unsafe when used with user-level threading, as in \CFA.
    4381 These stream routines use kernel-thread locking (©futex©\index{futex@©futex©}), which block kernel threads, to prevent interleaving of I/O.
    4382 However, the following simple example illustrates how a deadlock can occur (other complex scenarios are possible).
    4383 Assume a single kernel thread and two user-level threads calling ©printf©.
    4384 One user-level thread acquires the I/O lock and is time-sliced while performing ©printf©.
    4385 The other user-level thread then starts execution, calls ©printf©, and blocks the only kernel thread because it cannot acquire the I/O lock.
    4386 It does not help if the kernel lock is multiple acquisition, \ie, the lock owner can acquire it multiple times, because it then results in two user threads in the ©printf© critical section, corrupting the stream.
     4507
     4508\section{String Stream}
     4509
     4510All the stream formatting capabilities are available to format text to/from a C string rather than to a stream file.
     4511\VRef[Figure]{f:StringStreamProcessing} shows writing (output) and reading (input) from a C string.
     4512\begin{figure}
     4513\begin{cfa}
     4514#include <fstream.hfa>
     4515#include <strstream.hfa>
     4516
     4517int main() {
     4518        enum { size = 256 };
     4519        char buf[size]; $\C{// output buffer}$
     4520        ®ostrstream osstr = { buf, size };® $\C{// bind output buffer/size}$
     4521        int i = 3, j = 5, k = 7;
     4522        double x = 12345678.9, y = 98765.4321e-11;
     4523
     4524        osstr | i | hex(j) | wd(10, k) | sci(x) | unit(eng(y)); $\C{// same lines of output}$
     4525        write( osstr );
     4526        printf( "%s", buf );
     4527        sout | i | hex(j) | wd(10, k) | sci(x) | unit(eng(y));
     4528
     4529        char buf2[] = "12 14 15 3.5 7e4"; $\C{// input buffer}$
     4530        ®istrstream isstr = { buf2 };®
     4531        isstr | i | j | k | x | y;
     4532        sout | i | j | k | x | y;
     4533}
     4534\end{cfa}
     4535\caption{String Stream Processing}
     4536\label{f:StringStreamProcessing}
     4537\end{figure}
     4538
     4539\VRef[Figure]{f:StringStreamFunctions} shows the string stream operations.
     4540\begin{itemize}[topsep=4pt,itemsep=2pt,parsep=0pt]
     4541\item
     4542\Indexc{write} (©ostrstream© only) writes all the buffered characters to the specified stream (©stdout© default).
     4543\end{itemize}
     4544The constructor functions:
     4545\begin{itemize}[topsep=4pt,itemsep=2pt,parsep=0pt]
     4546\item
     4547create a bound stream to a write buffer (©ostrstream©) of ©size© or a read buffer (©istrstream©) containing a C string terminated with ©'\0'©.
     4548\end{itemize}
     4549
     4550\begin{figure}
     4551\begin{cfa}
     4552// *********************************** ostrstream ***********************************
     4553
     4554ostrstream & write( ostrstream & os, FILE * stream = stdout );
     4555
     4556void ?{}( ostrstream &, char buf[], size_t size );
     4557
     4558// *********************************** istrstream ***********************************
     4559
     4560void ?{}( istrstream & is, char buf[] );
     4561\end{cfa}
     4562\caption{String Stream Functions}
     4563\label{f:StringStreamFunctions}
     4564\end{figure}
    43874565
    43884566
     
    54825660\item
    54835661Package: a container to organize modules for distribution; It has attributes like name, author,
    5484 version, dependences, etc.
    5485 \item
    5486 Project: a working set for a \CFA project; It has attributes like name, author, version, dependences, etc.
     5662version, dependences, \etc.
     5663\item
     5664Project: a working set for a \CFA project; It has attributes like name, author, version, dependences, \etc.
    54875665\end{itemize}
    54885666
     
    56215799
    56225800A package is defined by putting a project description file, Do.prj, with one or more modules into a directory.
    5623 This project description file contains the package's meta data, including package name, author, version, dependences, etc.
     5801This project description file contains the package's meta data, including package name, author, version, dependences, \etc.
    56245802It should be in the root of the package directory.
    56255803
     
    56785856Module: a container to organize a set of related types and methods; It has a module name, and several interfaces visible from outside
    56795857\item
    5680 Package: a container to organize modules for distribution; It has attributes like name, author, version, dependences, etc.
    5681 \item
    5682 Project: a working set for a \CFA project; It has attributes like name, author, version, dependences, etc.
     5858Package: a container to organize modules for distribution; It has attributes like name, author, version, dependences, \etc.
     5859\item
     5860Project: a working set for a \CFA project; It has attributes like name, author, version, dependences, \etc.
    56835861\end{itemize}
    56845862
     
    81118289\begin{cquote}
    81128290\begin{tabular}{@{}l@{\hspace{\parindentlnth}}|@{\hspace{\parindentlnth}}l@{}}
    8113 \multicolumn{1}{@{}c|@{\hspace{\parindentlnth}}}{\textbf{\CFA}} & \multicolumn{1}{@{\hspace{\parindentlnth}}c}{\textbf{C}@{}}   \\
     8291\multicolumn{1}{@{}c|@{\hspace{\parindentlnth}}}{\textbf{\CFA}} & \multicolumn{1}{@{\hspace{\parindentlnth}}c@{}}{\textbf{C}}   \\
    81148292\hline
    81158293\begin{cfa}
    8116 #include <gmp>$\indexc{gmp}$
     8294#include <gmp.hfa>$\indexc{gmp}$
    81178295int main( void ) {
    81188296        sout | "Factorial Numbers";
  • libcfa/src/common.hfa

    r5407cdc r8d66610  
    1010// Created On       : Wed Jul 11 17:54:36 2018
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Sat Aug 15 08:51:29 2020
    13 // Update Count     : 14
     12// Last Modified On : Wed May  5 14:02:04 2021
     13// Update Count     : 18
    1414//
    1515
     
    6767
    6868static inline {
    69         char min( char t1, char t2 ) { return t1 < t2 ? t1 : t2; } // optimization
    70         intptr_t min( intptr_t t1, intptr_t t2 ) { return t1 < t2 ? t1 : t2; } // optimization
    71         uintptr_t min( uintptr_t t1, uintptr_t t2 ) { return t1 < t2 ? t1 : t2; } // optimization
     69        char min( char v1, char v2 ) { return v1 < v2 ? v1 : v2; } // optimization
     70        int min( int v1, int v2 ) { return v1 < v2 ? v1 : v2; }
     71        unsigned int min( unsigned int v1, unsigned int v2 ) { return v1 < v2 ? v1 : v2; }
     72        long int min( long int v1, long int v2 ) { return v1 < v2 ? v1 : v2; }
     73        unsigned long int min( unsigned long int v1, unsigned int v2 ) { return v1 < v2 ? v1 : v2; }
     74        long long int min( long long int v1, long long int v2 ) { return v1 < v2 ? v1 : v2; }
     75        unsigned long long int min( unsigned long long int v1, unsigned int v2 ) { return v1 < v2 ? v1 : v2; }
    7276        forall( T | { int ?<?( T, T ); } )
    73         T min( T t1, T t2 ) { return t1 < t2 ? t1 : t2; }
     77        T min( T v1, T v2 ) { return v1 < v2 ? v1 : v2; }
    7478
    75         char max( char t1, char t2 ) { return t1 > t2 ? t1 : t2; } // optimization
    76         intptr_t max( intptr_t t1, intptr_t t2 ) { return t1 > t2 ? t1 : t2; } // optimization
    77         uintptr_t max( uintptr_t t1, uintptr_t t2 ) { return t1 > t2 ? t1 : t2; } // optimization
     79        char max( char v1, char v2 ) { return v1 > v2 ? v1 : v2; } // optimization
     80        int max( int v1, int v2 ) { return v1 > v2 ? v1 : v2; }
     81        unsigned int max( unsigned int v1, unsigned int v2 ) { return v1 > v2 ? v1 : v2; }
     82        long int max( long int v1, long int v2 ) { return v1 > v2 ? v1 : v2; }
     83        unsigned long int max( unsigned long int v1, unsigned long int v2 ) { return v1 > v2 ? v1 : v2; }
     84        long long int max( long long int v1, long long int v2 ) { return v1 > v2 ? v1 : v2; }
     85        unsigned long long int max( unsigned long long int v1, unsigned long long int v2 ) { return v1 > v2 ? v1 : v2; }
    7886        forall( T | { int ?>?( T, T ); } )
    79         T max( T t1, T t2 ) { return t1 > t2 ? t1 : t2; }
     87        T max( T v1, T v2 ) { return v1 > v2 ? v1 : v2; }
    8088
    8189        forall( T | { T min( T, T ); T max( T, T ); } )
  • libcfa/src/concurrency/alarm.cfa

    r5407cdc r8d66610  
    3838
    3939void __kernel_set_timer( Duration alarm ) {
    40         verifyf(alarm >= 1`us || alarm == 0, "Setting timer to < 1us (%jins)", alarm`ns);
    41         setitimer( ITIMER_REAL, &(itimerval){ alarm }, 0p );
     40        alarm = max(alarm, 1`us);
     41        itimerval otv @= { 0 };
     42        getitimer( ITIMER_REAL, &otv );
     43        Duration od = { otv.it_value };
     44        if(od == 0 || od > alarm) {
     45                setitimer( ITIMER_REAL, &(itimerval){ alarm }, 0p );
     46        }
    4247}
    4348
     
    4651//=============================================================================================
    4752
    48 void ?{}( alarm_node_t & this, $thread * thrd, Time alarm, Duration period) with( this ) {
     53void ?{}( alarm_node_t & this, $thread * thrd, Duration alarm, Duration period) with( this ) {
     54        this.initial = alarm;
     55        this.period  = period;
    4956        this.thrd = thrd;
    50         this.alarm = alarm;
    51         this.period = period;
     57        this.timeval = __kernel_get_time() + alarm;
    5258        set = false;
    5359        type = User;
    5460}
    5561
    56 void ?{}( alarm_node_t & this, processor * proc, Time alarm, Duration period ) with( this ) {
     62void ?{}( alarm_node_t & this, processor * proc, Duration alarm, Duration period ) with( this ) {
     63        this.initial = alarm;
     64        this.period  = period;
    5765        this.proc = proc;
    58         this.alarm = alarm;
    59         this.period = period;
     66        this.timeval = __kernel_get_time() + alarm;
    6067        set = false;
    6168        type = Kernel;
    6269}
    63 void ?{}( alarm_node_t & this, Alarm_Callback callback, Time alarm, Duration period ) with( this ) {
    64         this.alarm = alarm;
    65         this.period = period;
     70void ?{}( alarm_node_t & this, Alarm_Callback callback, Duration alarm, Duration period ) with( this ) {
    6671        this.callback = callback;
     72        this.initial = alarm;
     73        this.period  = period;
     74        this.timeval = __kernel_get_time() + alarm;
    6775        set = false;
    6876        type = Callback;
     
    7785void insert( alarm_list_t * this, alarm_node_t * n ) {
    7886        alarm_node_t * it = & (*this)`first;
    79         while( it && (n->alarm > it->alarm) ) {
     87        while( it && (n->timeval > it->timeval) ) {
    8088                it = & (*it)`next;
    8189        }
     
    105113        lock( event_kernel->lock __cfaabi_dbg_ctx2 );
    106114        {
    107                 verify( validate( alarms ) );
    108                 bool first = ! & alarms`first;
     115                /* paranoid */ verify( validate( alarms ) );
    109116
    110                 __cfadbg_print_safe( preemption, " KERNEL: alarm inserting %p (%lu).\n", this, this->alarm.tn );
     117                Time curr = __kernel_get_time();
     118                __cfadbg_print_safe( preemption, " KERNEL: alarm inserting %p (%lu -> %lu).\n", this, curr.tn, this->timeval.tn );
    111119                insert( &alarms, this );
    112                 if( first ) {
    113                         __kernel_set_timer( alarms`first.alarm - __kernel_get_time() );
    114                 }
     120                __kernel_set_timer( this->timeval - curr);
     121                this->set = true;
    115122        }
    116123        unlock( event_kernel->lock );
    117         this->set = true;
    118124        enable_interrupts();
    119125}
     
    124130        {
    125131                verify( validate( event_kernel->alarms ) );
    126                 remove( *this );
     132                if (this->set) remove( *this );
     133                this->set = false;
    127134        }
    128135        unlock( event_kernel->lock );
    129136        enable_interrupts();
    130         this->set = false;
    131137}
    132138
     
    136142
    137143void sleep( Duration duration ) {
    138         alarm_node_t node = { active_thread(), __kernel_get_time() + duration, 0`s };
     144        alarm_node_t node = { active_thread(), duration, 0`s };
    139145
    140146        register_self( &node );
  • libcfa/src/concurrency/alarm.hfa

    r5407cdc r8d66610  
    4646
    4747struct alarm_node_t {
    48         Time alarm;                             // time when alarm goes off
    49         Duration period;                        // if > 0 => period of alarm
     48        Duration initial;       // time when alarm goes off
     49        Duration period;        // if > 0 => period of alarm
    5050
    51         DLISTED_MGD_IMPL_IN(alarm_node_t)
     51        inline dlink(alarm_node_t);
    5252
    5353        union {
    54                 $thread * thrd;                                 // thrd who created event
    55                 processor * proc;                               // proc who created event
    56                 Alarm_Callback callback;                // callback to handle event
     54                $thread * thrd;                 // thrd who created event
     55                processor * proc;                       // proc who created event
     56                Alarm_Callback callback;        // callback to handle event
    5757        };
    5858
    59         bool set                :1;             // whether or not the alarm has be registered
    60         enum alarm_type type;           // true if this is not a user defined alarm
     59        Time timeval;           // actual time at which the alarm goes off
     60        enum alarm_type type;   // true if this is not a user defined alarm
     61        bool set                :1;     // whether or not the alarm has be registered
    6162};
    62 DLISTED_MGD_IMPL_OUT(alarm_node_t)
     63P9_EMBEDDED( alarm_node_t, dlink(alarm_node_t) )
    6364
    64 void ?{}( alarm_node_t & this, $thread * thrd, Time alarm, Duration period );
    65 void ?{}( alarm_node_t & this, processor   * proc, Time alarm, Duration period );
    66 void ?{}( alarm_node_t & this, Alarm_Callback callback, Time alarm, Duration period );
     65void ?{}( alarm_node_t & this, $thread * thrd, Duration alarm, Duration period );
     66void ?{}( alarm_node_t & this, processor * proc, Duration alarm, Duration period );
     67void ?{}( alarm_node_t & this, Alarm_Callback callback, Duration alarm, Duration period );
    6768void ^?{}( alarm_node_t & this );
    6869
    69 typedef dlist(alarm_node_t, alarm_node_t) alarm_list_t;
     70typedef dlist(alarm_node_t) alarm_list_t;
    7071
    7172void insert( alarm_list_t * this, alarm_node_t * n );
  • libcfa/src/concurrency/clib/cfathread.cfa

    r5407cdc r8d66610  
    2727      extern void __cfactx_invoke_thread(void (*main)(void *), void * this);
    2828}
     29
     30extern Time __kernel_get_time();
    2931
    3032//================================================================================
     
    265267        int cfathread_cond_timedwait(cfathread_cond_t *restrict cond, cfathread_mutex_t *restrict mut, const struct timespec *restrict abstime) __attribute__((nonnull (1,2,3))) {
    266268                Time t = { *abstime };
    267                 if( wait( (*cond)->impl, (*mut)->impl, t ) ) {
     269                timespec curr;
     270                clock_gettime( CLOCK_REALTIME, &curr );
     271                Time c = { curr };
     272                if( wait( (*cond)->impl, (*mut)->impl, t - c ) ) {
    268273                        return 0;
    269274                }
  • libcfa/src/concurrency/clib/cfathread.h

    r5407cdc r8d66610  
    8080
    8181        typedef struct cfathread_cond_attr {
     82                // WARNING: adding support for pthread_condattr_setclock would require keeping track of the clock
     83                // and reading it in cond_timedwait
    8284        } cfathread_condattr_t;
    8385        typedef struct cfathread_condition * cfathread_cond_t;
  • libcfa/src/concurrency/invoke.h

    r5407cdc r8d66610  
    146146        struct __thread_desc_link {
    147147                struct $thread * next;
    148                 struct $thread * prev;
    149148                volatile unsigned long long ts;
    150                 unsigned preferred;
    151149        };
    152150
     
    155153                // context that is switch during a __cfactx_switch
    156154                struct __stack_context_t context;
     155
     156                // Link lists fields
     157                // instrusive link field for threads
     158                struct __thread_desc_link link;
    157159
    158160                // current execution status for coroutine
     
    170172                struct cluster * curr_cluster;
    171173
    172                 // Link lists fields
    173                 // instrusive link field for threads
    174                 struct __thread_desc_link link;
     174                // preferred ready-queue
     175                unsigned preferred;
    175176
    176177                // coroutine body used to store context
  • libcfa/src/concurrency/io.cfa

    r5407cdc r8d66610  
    138138                /* paranoid */ verify( proc->io.ctx );
    139139
     140                __attribute__((unused)) cluster * cltr = proc->cltr;
    140141                $io_context & ctx = *proc->io.ctx;
     142
     143                // for(i; 2) {
     144                //      unsigned idx = proc->rdq.id + i;
     145                //      cltr->ready_queue.lanes.tscs[idx].tv = -1ull;
     146                // }
    141147
    142148                __ioarbiter_flush( ctx );
     
    151157                                // Update statistics
    152158                                __STATS__( false, io.calls.errors.busy ++; )
     159                                // for(i; 2) {
     160                                //      unsigned idx = proc->rdq.id + i;
     161                                //      cltr->ready_queue.lanes.tscs[idx].tv = rdtscl();
     162                                // }
    153163                                return;
    154164                        default:
     
    172182
    173183                ctx.proc->io.pending = false;
     184
     185                ready_schedule_lock();
     186                __cfa_io_drain( proc );
     187                ready_schedule_unlock();
     188                // for(i; 2) {
     189                //      unsigned idx = proc->rdq.id + i;
     190                //      cltr->ready_queue.lanes.tscs[idx].tv = rdtscl();
     191                // }
    174192        }
    175193
  • libcfa/src/concurrency/kernel.cfa

    r5407cdc r8d66610  
    163163        #if !defined(__CFA_NO_STATISTICS__)
    164164                if( this->print_halts ) {
    165                         __cfaabi_bits_print_safe( STDOUT_FILENO, "Processor : %d - %s (%p)\n", this->id, this->name, (void*)this);
     165                        __cfaabi_bits_print_safe( STDOUT_FILENO, "Processor : %d - %s (%p)\n", this->unique_id, this->name, (void*)this);
    166166                }
    167167        #endif
     
    170170                // Setup preemption data
    171171                preemption_scope scope = { this };
    172 
    173                 __STATS( unsigned long long last_tally = rdtscl(); )
    174172
    175173                // if we need to run some special setup, now is the time to do it.
     
    184182                MAIN_LOOP:
    185183                for() {
     184                        #define OLD_MAIN 1
     185                        #if OLD_MAIN
    186186                        // Check if there is pending io
    187187                        __maybe_io_drain( this );
     
    223223                                #if !defined(__CFA_NO_STATISTICS__)
    224224                                        if(this->print_halts) {
    225                                                 __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 0\n", this->id, rdtscl());
     225                                                __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 0\n", this->unique_id, rdtscl());
    226226                                        }
    227227                                #endif
     
    236236                                #if !defined(__CFA_NO_STATISTICS__)
    237237                                        if(this->print_halts) {
    238                                                 __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 1\n", this->id, rdtscl());
     238                                                __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 1\n", this->unique_id, rdtscl());
    239239                                        }
    240240                                #endif
     
    258258                        if( __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST) ) break MAIN_LOOP;
    259259
    260                         #if !defined(__CFA_NO_STATISTICS__)
    261                                 unsigned long long curr = rdtscl();
    262                                 if(curr > (last_tally + 500000000)) {
    263                                         __tally_stats(this->cltr->stats, __cfaabi_tls.this_stats);
    264                                         last_tally = curr;
    265                                 }
    266                         #endif
    267 
    268260                        if(this->io.pending && !this->io.dirty) {
    269261                                __cfa_io_flush( this );
    270262                        }
    271263
    272                 //      SEARCH: {
    273                 //              /* paranoid */ verify( ! __preemption_enabled() );
    274                 //              /* paranoid */ verify( kernelTLS().this_proc_id );
    275 
    276                 //              // First, lock the scheduler since we are searching for a thread
    277 
    278                 //              // Try to get the next thread
    279                 //              ready_schedule_lock();
    280                 //              readyThread = pop_fast( this->cltr );
    281                 //              ready_schedule_unlock();
    282                 //              if(readyThread) {  break SEARCH; }
    283 
    284                 //              // If we can't find a thread, might as well flush any outstanding I/O
    285                 //              if(this->io.pending) { __cfa_io_flush( this ); }
    286 
    287                 //              // Spin a little on I/O, just in case
    288                 //              for(25) {
    289                 //                      __maybe_io_drain( this );
    290                 //                      ready_schedule_lock();
    291                 //                      readyThread = pop_fast( this->cltr );
    292                 //                      ready_schedule_unlock();
    293                 //                      if(readyThread) {  break SEARCH; }
    294                 //              }
    295 
    296                 //              // no luck, try stealing a few times
    297                 //              for(25) {
    298                 //                      if( __maybe_io_drain( this ) ) {
    299                 //                              ready_schedule_lock();
    300                 //                              readyThread = pop_fast( this->cltr );
    301                 //                      } else {
    302                 //                              ready_schedule_lock();
    303                 //                              readyThread = pop_slow( this->cltr );
    304                 //                      }
    305                 //                      ready_schedule_unlock();
    306                 //                      if(readyThread) {  break SEARCH; }
    307                 //              }
    308 
    309                 //              // still no luck, search for a thread
    310                 //              ready_schedule_lock();
    311                 //              readyThread = pop_search( this->cltr );
    312                 //              ready_schedule_unlock();
    313                 //              if(readyThread) { break SEARCH; }
    314 
    315                 //              // Don't block if we are done
    316                 //              if( __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST) ) break MAIN_LOOP;
    317 
    318                 //              __STATS( __tls_stats()->ready.sleep.halts++; )
    319 
    320                 //              // Push self to idle stack
    321                 //              mark_idle(this->cltr->procs, * this);
    322 
    323                 //              // Confirm the ready-queue is empty
    324                 //              __maybe_io_drain( this );
    325                 //              ready_schedule_lock();
    326                 //              readyThread = pop_search( this->cltr );
    327                 //              ready_schedule_unlock();
    328 
    329                 //              if( readyThread ) {
    330                 //                      // A thread was found, cancel the halt
    331                 //                      mark_awake(this->cltr->procs, * this);
    332 
    333                 //                      __STATS( __tls_stats()->ready.sleep.cancels++; )
    334 
    335                 //                      // continue the main loop
    336                 //                      break SEARCH;
    337                 //              }
    338 
    339                 //              __STATS( if(this->print_halts) __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 0\n", this->id, rdtscl()); )
    340                 //              __cfadbg_print_safe(runtime_core, "Kernel : core %p waiting on eventfd %d\n", this, this->idle);
    341 
    342                 //              // __disable_interrupts_hard();
    343                 //              eventfd_t val;
    344                 //              eventfd_read( this->idle, &val );
    345                 //              // __enable_interrupts_hard();
    346 
    347                 //              __STATS( if(this->print_halts) __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 1\n", this->id, rdtscl()); )
    348 
    349                 //              // We were woken up, remove self from idle
    350                 //              mark_awake(this->cltr->procs, * this);
    351 
    352                 //              // DON'T just proceed, start looking again
    353                 //              continue MAIN_LOOP;
    354                 //      }
    355 
    356                 // RUN_THREAD:
    357                 //      /* paranoid */ verify( kernelTLS().this_proc_id );
    358                 //      /* paranoid */ verify( ! __preemption_enabled() );
    359                 //      /* paranoid */ verify( readyThread );
    360 
    361                 //      // Reset io dirty bit
    362                 //      this->io.dirty = false;
    363 
    364                 //      // We found a thread run it
    365                 //      __run_thread(this, readyThread);
    366 
    367                 //      // Are we done?
    368                 //      if( __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST) ) break MAIN_LOOP;
    369 
    370                 //      #if !defined(__CFA_NO_STATISTICS__)
    371                 //              unsigned long long curr = rdtscl();
    372                 //              if(curr > (last_tally + 500000000)) {
    373                 //                      __tally_stats(this->cltr->stats, __cfaabi_tls.this_stats);
    374                 //                      last_tally = curr;
    375                 //              }
    376                 //      #endif
    377 
    378                 //      if(this->io.pending && !this->io.dirty) {
    379                 //              __cfa_io_flush( this );
    380                 //      }
    381 
    382                 //      // Check if there is pending io
    383                 //      __maybe_io_drain( this );
     264                        #else
     265                                #warning new kernel loop
     266                        SEARCH: {
     267                                /* paranoid */ verify( ! __preemption_enabled() );
     268
     269                                // First, lock the scheduler since we are searching for a thread
     270                                ready_schedule_lock();
     271
     272                                // Try to get the next thread
     273                                readyThread = pop_fast( this->cltr );
     274                                if(readyThread) { ready_schedule_unlock(); break SEARCH; }
     275
     276                                // If we can't find a thread, might as well flush any outstanding I/O
     277                                if(this->io.pending) { __cfa_io_flush( this ); }
     278
     279                                // Spin a little on I/O, just in case
     280                                        for(5) {
     281                                        __maybe_io_drain( this );
     282                                        readyThread = pop_fast( this->cltr );
     283                                        if(readyThread) { ready_schedule_unlock(); break SEARCH; }
     284                                }
     285
     286                                // no luck, try stealing a few times
     287                                        for(5) {
     288                                        if( __maybe_io_drain( this ) ) {
     289                                                readyThread = pop_fast( this->cltr );
     290                                        } else {
     291                                                readyThread = pop_slow( this->cltr );
     292                                        }
     293                                        if(readyThread) { ready_schedule_unlock(); break SEARCH; }
     294                                }
     295
     296                                // still no luck, search for a thread
     297                                readyThread = pop_search( this->cltr );
     298                                if(readyThread) { ready_schedule_unlock(); break SEARCH; }
     299
     300                                // Don't block if we are done
     301                                if( __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST) ) break MAIN_LOOP;
     302
     303                                __STATS( __tls_stats()->ready.sleep.halts++; )
     304
     305                                // Push self to idle stack
     306                                ready_schedule_unlock();
     307                                mark_idle(this->cltr->procs, * this);
     308                                ready_schedule_lock();
     309
     310                                // Confirm the ready-queue is empty
     311                                __maybe_io_drain( this );
     312                                readyThread = pop_search( this->cltr );
     313                                ready_schedule_unlock();
     314
     315                                if( readyThread ) {
     316                                        // A thread was found, cancel the halt
     317                                        mark_awake(this->cltr->procs, * this);
     318
     319                                        __STATS( __tls_stats()->ready.sleep.cancels++; )
     320
     321                                        // continue the main loop
     322                                        break SEARCH;
     323                                }
     324
     325                                        __STATS( if(this->print_halts) __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 0\n", this->unique_id, rdtscl()); )
     326                                __cfadbg_print_safe(runtime_core, "Kernel : core %p waiting on eventfd %d\n", this, this->idle);
     327
     328                                // __disable_interrupts_hard();
     329                                eventfd_t val;
     330                                eventfd_read( this->idle, &val );
     331                                // __enable_interrupts_hard();
     332
     333                                        __STATS( if(this->print_halts) __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 1\n", this->unique_id, rdtscl()); )
     334
     335                                // We were woken up, remove self from idle
     336                                mark_awake(this->cltr->procs, * this);
     337
     338                                // DON'T just proceed, start looking again
     339                                continue MAIN_LOOP;
     340                        }
     341
     342                RUN_THREAD:
     343                        /* paranoid */ verify( ! __preemption_enabled() );
     344                        /* paranoid */ verify( readyThread );
     345
     346                        // Reset io dirty bit
     347                        this->io.dirty = false;
     348
     349                        // We found a thread run it
     350                        __run_thread(this, readyThread);
     351
     352                        // Are we done?
     353                        if( __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST) ) break MAIN_LOOP;
     354
     355                        if(this->io.pending && !this->io.dirty) {
     356                                __cfa_io_flush( this );
     357                        }
     358
     359                        ready_schedule_lock();
     360                        __maybe_io_drain( this );
     361                        ready_schedule_unlock();
     362                        #endif
    384363                }
    385364
     
    390369
    391370        post( this->terminated );
    392 
    393371
    394372        if(this == mainProcessor) {
     
    553531static void __schedule_thread( $thread * thrd ) {
    554532        /* paranoid */ verify( ! __preemption_enabled() );
    555         /* paranoid */ verify( kernelTLS().this_proc_id );
    556533        /* paranoid */ verify( ready_schedule_islocked());
    557534        /* paranoid */ verify( thrd );
     
    567544        /* paranoid */ verify( 0x0D15EA5E0D15EA5Ep == thrd->canary );
    568545
    569 
     546        const bool local = thrd->state != Start;
    570547        if (thrd->preempted == __NO_PREEMPTION) thrd->state = Ready;
    571548
     
    575552
    576553        // push the thread to the cluster ready-queue
    577         push( cl, thrd );
     554        push( cl, thrd, local );
    578555
    579556        // variable thrd is no longer safe to use
     
    611588static inline $thread * __next_thread(cluster * this) with( *this ) {
    612589        /* paranoid */ verify( ! __preemption_enabled() );
    613         /* paranoid */ verify( kernelTLS().this_proc_id );
    614590
    615591        ready_schedule_lock();
     
    617593        ready_schedule_unlock();
    618594
    619         /* paranoid */ verify( kernelTLS().this_proc_id );
    620595        /* paranoid */ verify( ! __preemption_enabled() );
    621596        return thrd;
     
    625600static inline $thread * __next_thread_slow(cluster * this) with( *this ) {
    626601        /* paranoid */ verify( ! __preemption_enabled() );
    627         /* paranoid */ verify( kernelTLS().this_proc_id );
    628602
    629603        ready_schedule_lock();
     
    638612        ready_schedule_unlock();
    639613
    640         /* paranoid */ verify( kernelTLS().this_proc_id );
    641614        /* paranoid */ verify( ! __preemption_enabled() );
    642615        return thrd;
     
    895868                unsigned tail = *ctx->cq.tail;
    896869                if(head == tail) return false;
     870                #if OLD_MAIN
    897871                ready_schedule_lock();
    898872                ret = __cfa_io_drain( proc );
    899873                ready_schedule_unlock();
     874                #else
     875                        ret = __cfa_io_drain( proc );
     876        #endif
    900877        #endif
    901878        return ret;
     
    926903        }
    927904
     905        static void crawl_list( cluster * cltr, dlist(processor) & list, unsigned count ) {
     906                /* paranoid */ verify( cltr->stats );
     907
     908                processor * it = &list`first;
     909                for(unsigned i = 0; i < count; i++) {
     910                        /* paranoid */ verifyf( it, "Unexpected null iterator, at index %u of %u\n", i, count);
     911                        /* paranoid */ verify( it->local_data->this_stats );
     912                        __tally_stats( cltr->stats, it->local_data->this_stats );
     913                        it = &(*it)`next;
     914                }
     915        }
     916
     917        void crawl_cluster_stats( cluster & this ) {
     918                // Stop the world, otherwise stats could get really messed-up
     919                // this doesn't solve all problems but does solve many
     920                // so it's probably good enough
     921                uint_fast32_t last_size = ready_mutate_lock();
     922
     923                        crawl_list(&this, this.procs.actives, this.procs.total - this.procs.idle);
     924                        crawl_list(&this, this.procs.idles  , this.procs.idle );
     925
     926                // Unlock the RWlock
     927                ready_mutate_unlock( last_size );
     928        }
     929
     930
    928931        void print_stats_now( cluster & this, int flags ) {
     932                crawl_cluster_stats( this );
    929933                __print_stats( this.stats, this.print_stats, "Cluster", this.name, (void*)&this );
    930934        }
  • libcfa/src/concurrency/kernel.hfa

    r5407cdc r8d66610  
    4949
    5050// Processor id, required for scheduling threads
    51 struct __processor_id_t {
    52         unsigned id:24;
    53 
    54         #if !defined(__CFA_NO_STATISTICS__)
    55                 struct __stats_t * stats;
    56         #endif
    57 };
     51
    5852
    5953coroutine processorCtx_t {
     
    6357// Wrapper around kernel threads
    6458struct __attribute__((aligned(128))) processor {
    65         // Main state
    66         inline __processor_id_t;
    67 
    6859        // Cluster from which to get threads
    6960        struct cluster * cltr;
     
    9081        pthread_t kernel_thread;
    9182
     83        // Unique id for the processor (not per cluster)
     84        unsigned unique_id;
     85
    9286        struct {
    9387                $io_context * ctx;
     
    113107
    114108        // Link lists fields
    115         DLISTED_MGD_IMPL_IN(processor)
     109        inline dlink(processor);
    116110
    117111        // special init fields
     
    123117        } init;
    124118
     119        struct KernelThreadData * local_data;
     120
    125121        #if !defined(__CFA_NO_STATISTICS__)
    126122                int print_stats;
     
    133129#endif
    134130};
     131P9_EMBEDDED( processor, dlink(processor) )
    135132
    136133void  ?{}(processor & this, const char name[], struct cluster & cltr);
     
    140137static inline void  ?{}(processor & this, struct cluster & cltr) { this{ "Anonymous Processor", cltr}; }
    141138static inline void  ?{}(processor & this, const char name[])     { this{name, *mainCluster}; }
    142 
    143 DLISTED_MGD_IMPL_OUT(processor)
    144139
    145140//-----------------------------------------------------------------------------
     
    152147
    153148// Aligned timestamps which are used by the relaxed ready queue
    154 struct __attribute__((aligned(128))) __timestamp_t;
    155 void  ?{}(__timestamp_t & this);
    156 void ^?{}(__timestamp_t & this);
     149struct __attribute__((aligned(128))) __timestamp_t {
     150        volatile unsigned long long tv;
     151};
     152
     153static inline void  ?{}(__timestamp_t & this) { this.tv = 0; }
     154static inline void ^?{}(__timestamp_t & this) {}
    157155
    158156//TODO adjust cache size to ARCHITECTURE
     
    177175void  ?{}(__ready_queue_t & this);
    178176void ^?{}(__ready_queue_t & this);
     177#if !defined(__CFA_NO_STATISTICS__)
     178        unsigned cnt(const __ready_queue_t & this, unsigned idx);
     179#endif
    179180
    180181// Idle Sleep
     
    190191
    191192        // List of idle processors
    192         dlist(processor, processor) idles;
     193        dlist(processor) idles;
    193194
    194195        // List of active processors
    195         dlist(processor, processor) actives;
     196        dlist(processor) actives;
    196197};
    197198
  • libcfa/src/concurrency/kernel/fwd.hfa

    r5407cdc r8d66610  
    3838                        struct $thread          * volatile this_thread;
    3939                        struct processor        * volatile this_processor;
    40                         struct __processor_id_t * volatile this_proc_id;
    41                         struct __stats_t        * volatile this_stats;
     40                        volatile bool sched_lock;
    4241
    4342                        struct {
     
    5655                                uint64_t bck_seed;
    5756                        } ready_rng;
     57
     58                        struct __stats_t        * volatile this_stats;
     59
     60
     61                        #ifdef __CFA_WITH_VERIFY__
     62                                // Debug, check if the rwlock is owned for reading
     63                                bool in_sched_lock;
     64                                unsigned sched_id;
     65                        #endif
    5866                } __cfaabi_tls __attribute__ ((tls_model ( "initial-exec" )));
    5967
  • libcfa/src/concurrency/kernel/startup.cfa

    r5407cdc r8d66610  
    7777static void doregister( struct cluster & cltr );
    7878static void unregister( struct cluster & cltr );
     79static void register_tls( processor * this );
     80static void unregister_tls( processor * this );
    7981static void ?{}( $coroutine & this, current_stack_info_t * info);
    8082static void ?{}( $thread & this, current_stack_info_t * info);
     
    123125        NULL,                                                                                           // cannot use 0p
    124126        NULL,
     127        false,
     128        { 1, false, false },
     129        0,
     130        { 0, 0 },
    125131        NULL,
    126         NULL,
    127         { 1, false, false },
     132        #ifdef __CFA_WITH_VERIFY__
     133                false,
     134                0,
     135        #endif
    128136};
    129137
     
    210218        (*mainProcessor){};
    211219
     220        register_tls( mainProcessor );
     221
    212222        //initialize the global state variables
    213223        __cfaabi_tls.this_processor = mainProcessor;
    214         __cfaabi_tls.this_proc_id   = (__processor_id_t*)mainProcessor;
    215224        __cfaabi_tls.this_thread    = mainThread;
    216225
     
    219228                __init_stats( __cfaabi_tls.this_stats );
    220229        #endif
     230        mainProcessor->local_data = &__cfaabi_tls;
    221231
    222232        // Enable preemption
     
    273283        #endif
    274284
     285        mainProcessor->local_data = 0p;
     286
     287        unregister_tls( mainProcessor );
     288
    275289        // Destroy the main processor and its context in reverse order of construction
    276290        // These were manually constructed so we need manually destroy them
     
    316330        processor * proc = (processor *) arg;
    317331        __cfaabi_tls.this_processor = proc;
    318         __cfaabi_tls.this_proc_id   = (__processor_id_t*)proc;
    319332        __cfaabi_tls.this_thread    = 0p;
    320333        __cfaabi_tls.preemption_state.[enabled, disable_count] = [false, 1];
     334        proc->local_data = &__cfaabi_tls;
     335
     336        register_tls( proc );
     337
    321338        // SKULLDUGGERY: We want to create a context for the processor coroutine
    322339        // which is needed for the 2-step context switch. However, there is no reason
     
    355372                #endif
    356373        #endif
     374
     375        proc->local_data = 0p;
     376
     377        unregister_tls( proc );
    357378
    358379        return 0p;
     
    446467        self_mon_p = &self_mon;
    447468        link.next = 0p;
    448         link.prev = 0p;
    449         link.preferred = -1u;
     469        link.ts   = 0;
     470        preferred = -1u;
    450471        last_proc = 0p;
    451472        #if defined( __CFA_WITH_VERIFY__ )
     
    475496        this.rdq.id  = -1u;
    476497        this.rdq.target = -1u;
    477         this.rdq.cutoff = -1ull;
     498        this.rdq.cutoff = 0ull;
    478499        do_terminate = false;
    479500        preemption_alarm = 0p;
     
    485506
    486507        this.init.thrd = initT;
     508
     509        this.local_data = 0p;
    487510
    488511        this.idle = eventfd(0, 0);
     
    496519        #endif
    497520
    498         // Register and Lock the RWlock so no-one pushes/pops while we are changing the queue
    499         uint_fast32_t last_size = ready_mutate_register((__processor_id_t*)&this);
    500                 this.cltr->procs.total += 1u;
    501                 insert_last(this.cltr->procs.actives, this);
    502 
    503                 // Adjust the ready queue size
    504                 ready_queue_grow( cltr );
    505 
    506         // Unlock the RWlock
    507         ready_mutate_unlock( last_size );
    508 
    509521        __cfadbg_print_safe(runtime_core, "Kernel : core %p created\n", &this);
    510522}
     
    512524// Not a ctor, it just preps the destruction but should not destroy members
    513525static void deinit(processor & this) {
    514         // Lock the RWlock so no-one pushes/pops while we are changing the queue
    515         uint_fast32_t last_size = ready_mutate_lock();
    516                 this.cltr->procs.total -= 1u;
    517                 remove(this);
    518 
    519                 // Adjust the ready queue size
    520                 ready_queue_shrink( this.cltr );
    521 
    522         // Unlock the RWlock and unregister: we don't need the read_lock any more
    523         ready_mutate_unregister((__processor_id_t*)&this, last_size );
    524 
    525526        close(this.idle);
    526527}
     
    656657        cltr->nthreads -= 1;
    657658        unlock(cltr->thread_list_lock);
     659}
     660
     661static void register_tls( processor * this ) {
     662        // Register and Lock the RWlock so no-one pushes/pops while we are changing the queue
     663        uint_fast32_t last_size;
     664        [this->unique_id, last_size] = ready_mutate_register();
     665
     666                this->cltr->procs.total += 1u;
     667                insert_last(this->cltr->procs.actives, *this);
     668
     669                // Adjust the ready queue size
     670                ready_queue_grow( this->cltr );
     671
     672        // Unlock the RWlock
     673        ready_mutate_unlock( last_size );
     674}
     675
     676
     677static void unregister_tls( processor * this ) {
     678        // Lock the RWlock so no-one pushes/pops while we are changing the queue
     679        uint_fast32_t last_size = ready_mutate_lock();
     680                this->cltr->procs.total -= 1u;
     681                remove(*this);
     682
     683                // clear the cluster so nothing gets pushed to local queues
     684                cluster * cltr = this->cltr;
     685                this->cltr = 0p;
     686
     687                // Adjust the ready queue size
     688                ready_queue_shrink( cltr );
     689
     690        // Unlock the RWlock and unregister: we don't need the read_lock any more
     691        ready_mutate_unregister( this->unique_id, last_size );
    658692}
    659693
  • libcfa/src/concurrency/kernel_private.hfa

    r5407cdc r8d66610  
    2525// Scheduler
    2626
    27 struct __attribute__((aligned(128))) __scheduler_lock_id_t;
    2827
    2928extern "C" {
     
    8079// Lock-Free registering/unregistering of threads
    8180// Register a processor to a given cluster and get its unique id in return
    82 void register_proc_id( struct __processor_id_t * );
     81unsigned register_proc_id( void );
    8382
    8483// Unregister a processor from a given cluster using its id, getting back the original pointer
    85 void unregister_proc_id( struct __processor_id_t * proc );
     84void unregister_proc_id( unsigned );
    8685
    8786//=======================================================================
     
    112111}
    113112
    114 // Cells use by the reader writer lock
    115 // while not generic it only relies on a opaque pointer
    116 struct __attribute__((aligned(128))) __scheduler_lock_id_t {
    117         // Spin lock used as the underlying lock
    118         volatile bool lock;
    119 
    120         // Handle pointing to the proc owning this cell
    121         // Used for allocating cells and debugging
    122         __processor_id_t * volatile handle;
    123 
    124         #ifdef __CFA_WITH_VERIFY__
    125                 // Debug, check if this is owned for reading
    126                 bool owned;
    127         #endif
    128 };
    129 
    130 static_assert( sizeof(struct __scheduler_lock_id_t) <= __alignof(struct __scheduler_lock_id_t));
     113
     114
     115
    131116
    132117//-----------------------------------------------------------------------
     
    147132
    148133        // writer lock
    149         volatile bool lock;
     134        volatile bool write_lock;
    150135
    151136        // data pointer
    152         __scheduler_lock_id_t * data;
     137        volatile bool * volatile * data;
    153138};
    154139
     
    163148static inline void ready_schedule_lock(void) with(*__scheduler_lock) {
    164149        /* paranoid */ verify( ! __preemption_enabled() );
    165         /* paranoid */ verify( kernelTLS().this_proc_id );
    166 
    167         unsigned iproc = kernelTLS().this_proc_id->id;
    168         /*paranoid*/ verify(data[iproc].handle == kernelTLS().this_proc_id);
    169         /*paranoid*/ verify(iproc < ready);
     150        /* paranoid */ verify( ! kernelTLS().in_sched_lock );
     151        /* paranoid */ verify( data[kernelTLS().sched_id] == &kernelTLS().sched_lock );
     152        /* paranoid */ verify( !kernelTLS().this_processor || kernelTLS().this_processor->unique_id == kernelTLS().sched_id );
    170153
    171154        // Step 1 : make sure no writer are in the middle of the critical section
    172         while(__atomic_load_n(&lock, (int)__ATOMIC_RELAXED))
     155        while(__atomic_load_n(&write_lock, (int)__ATOMIC_RELAXED))
    173156                Pause();
    174157
     
    179162
    180163        // Step 2 : acquire our local lock
    181         __atomic_acquire( &data[iproc].lock );
    182         /*paranoid*/ verify(data[iproc].lock);
     164        __atomic_acquire( &kernelTLS().sched_lock );
     165        /*paranoid*/ verify(kernelTLS().sched_lock);
    183166
    184167        #ifdef __CFA_WITH_VERIFY__
    185168                // Debug, check if this is owned for reading
    186                 data[iproc].owned = true;
     169                kernelTLS().in_sched_lock = true;
    187170        #endif
    188171}
     
    190173static inline void ready_schedule_unlock(void) with(*__scheduler_lock) {
    191174        /* paranoid */ verify( ! __preemption_enabled() );
    192         /* paranoid */ verify( kernelTLS().this_proc_id );
    193 
    194         unsigned iproc = kernelTLS().this_proc_id->id;
    195         /*paranoid*/ verify(data[iproc].handle == kernelTLS().this_proc_id);
    196         /*paranoid*/ verify(iproc < ready);
    197         /*paranoid*/ verify(data[iproc].lock);
    198         /*paranoid*/ verify(data[iproc].owned);
     175        /* paranoid */ verify( data[kernelTLS().sched_id] == &kernelTLS().sched_lock );
     176        /* paranoid */ verify( !kernelTLS().this_processor || kernelTLS().this_processor->unique_id == kernelTLS().sched_id );
     177        /* paranoid */ verify( kernelTLS().sched_lock );
     178        /* paranoid */ verify( kernelTLS().in_sched_lock );
    199179        #ifdef __CFA_WITH_VERIFY__
    200180                // Debug, check if this is owned for reading
    201                 data[iproc].owned = false;
     181                kernelTLS().in_sched_lock = false;
    202182        #endif
    203         __atomic_unlock(&data[iproc].lock);
     183        __atomic_unlock(&kernelTLS().sched_lock);
    204184}
    205185
     
    207187        static inline bool ready_schedule_islocked(void) {
    208188                /* paranoid */ verify( ! __preemption_enabled() );
    209                 /*paranoid*/ verify( kernelTLS().this_proc_id );
    210                 __processor_id_t * proc = kernelTLS().this_proc_id;
    211                 return __scheduler_lock->data[proc->id].owned;
     189                /* paranoid */ verify( (!kernelTLS().in_sched_lock) || kernelTLS().sched_lock );
     190                return kernelTLS().sched_lock;
    212191        }
    213192
    214193        static inline bool ready_mutate_islocked() {
    215                 return __scheduler_lock->lock;
     194                return __scheduler_lock->write_lock;
    216195        }
    217196#endif
     
    228207// Register a processor to a given cluster and get its unique id in return
    229208// For convenience, also acquires the lock
    230 static inline uint_fast32_t ready_mutate_register( struct __processor_id_t * proc ) {
    231         register_proc_id( proc );
    232         return ready_mutate_lock();
     209static inline [unsigned, uint_fast32_t] ready_mutate_register() {
     210        unsigned id = register_proc_id();
     211        uint_fast32_t last = ready_mutate_lock();
     212        return [id, last];
    233213}
    234214
    235215// Unregister a processor from a given cluster using its id, getting back the original pointer
    236216// assumes the lock is acquired
    237 static inline void ready_mutate_unregister( struct __processor_id_t * proc, uint_fast32_t last_s ) {
     217static inline void ready_mutate_unregister( unsigned id, uint_fast32_t last_s ) {
    238218        ready_mutate_unlock( last_s );
    239         unregister_proc_id( proc );
     219        unregister_proc_id( id );
    240220}
    241221
     
    281261// push thread onto a ready queue for a cluster
    282262// returns true if the list was previously empty, false otherwise
    283 __attribute__((hot)) void push(struct cluster * cltr, struct $thread * thrd);
     263__attribute__((hot)) void push(struct cluster * cltr, struct $thread * thrd, bool local);
    284264
    285265//-----------------------------------------------------------------------
  • libcfa/src/concurrency/locks.cfa

    r5407cdc r8d66610  
    188188                alarm_node_t alarm_node;
    189189                condition_variable(L) * cond;
    190                 info_thread(L) * i;
     190                info_thread(L) * info_thd;
    191191        };
    192192
    193         void ?{}( alarm_node_wrap(L) & this, Time alarm, Duration period, Alarm_Callback callback, condition_variable(L) * c, info_thread(L) * i ) {
     193        void ?{}( alarm_node_wrap(L) & this, Duration alarm, Duration period, Alarm_Callback callback, condition_variable(L) * c, info_thread(L) * i ) {
    194194                this.alarm_node{ callback, alarm, period };
    195195                this.cond = c;
    196                 this.i = i;
     196                this.info_thd = i;
    197197        }
    198198
     
    206206                //      may still be called after a thread has been removed from the queue but
    207207                //      before the alarm is unregistered
    208                 if ( listed(i) ) {      // is thread on queue
    209                         i->signalled = false;
     208                if ( listed(info_thd) ) {       // is thread on queue
     209                        info_thd->signalled = false;
    210210                        // remove this thread O(1)
    211                         remove( cond->blocked_threads, *i );
     211                        remove( cond->blocked_threads, *info_thd );
    212212                        cond->count--;
    213                         if( i->lock ) {
     213                        if( info_thd->lock ) {
    214214                                // call lock's on_notify if a lock was passed
    215                                 on_notify(*i->lock, i->t);
     215                                on_notify(*info_thd->lock, info_thd->t);
    216216                        } else {
    217217                                // otherwise wake thread
    218                                 unpark( i->t );
     218                                unpark( info_thd->t );
    219219                        }
    220220                }
     
    313313
    314314        // helper for wait()'s' with a timeout
    315         void queue_info_thread_timeout( condition_variable(L) & this, info_thread(L) & info, Time t ) with(this) {
     315        void queue_info_thread_timeout( condition_variable(L) & this, info_thread(L) & info, Duration t, Alarm_Callback callback ) with(this) {
    316316                lock( lock __cfaabi_dbg_ctx2 );
    317317                size_t recursion_count = queue_and_get_recursion(this, &info);
    318                 alarm_node_wrap(L) node_wrap = { t, 0`s, alarm_node_wrap_cast, &this, &info };
     318                alarm_node_wrap(L) node_wrap = { t, 0`s, callback, &this, &info };
    319319                register_self( &node_wrap.alarm_node );
    320320                unlock( lock );
     
    332332        #define WAIT_TIME( u, l, t ) \
    333333                info_thread( L ) i = { active_thread(), u, l }; \
    334                 queue_info_thread_timeout(this, i, t ); \
     334                queue_info_thread_timeout(this, i, t, alarm_node_wrap_cast ); \
    335335                return i.signalled;
    336336
     
    340340        void wait( condition_variable(L) & this, L & l, uintptr_t info ) with(this) { WAIT( info, &l ) }
    341341
    342         bool wait( condition_variable(L) & this, Duration duration                        ) with(this) { WAIT_TIME( 0   , 0p , __kernel_get_time() + duration ) }
    343         bool wait( condition_variable(L) & this, uintptr_t info, Duration duration        ) with(this) { WAIT_TIME( info, 0p , __kernel_get_time() + duration ) }
    344         bool wait( condition_variable(L) & this, Time time                                ) with(this) { WAIT_TIME( 0   , 0p , time ) }
    345         bool wait( condition_variable(L) & this, uintptr_t info, Time time                ) with(this) { WAIT_TIME( info, 0p , time ) }
    346         bool wait( condition_variable(L) & this, L & l, Duration duration                 ) with(this) { WAIT_TIME( 0   , &l , __kernel_get_time() + duration ) }
    347         bool wait( condition_variable(L) & this, L & l, uintptr_t info, Duration duration ) with(this) { WAIT_TIME( info, &l , __kernel_get_time() + duration ) }
    348         bool wait( condition_variable(L) & this, L & l, Time time                         ) with(this) { WAIT_TIME( 0   , &l , time ) }
    349         bool wait( condition_variable(L) & this, L & l, uintptr_t info, Time time         ) with(this) { WAIT_TIME( info, &l , time ) }
     342        bool wait( condition_variable(L) & this, Duration duration                        ) with(this) { WAIT_TIME( 0   , 0p , duration ) }
     343        bool wait( condition_variable(L) & this, uintptr_t info, Duration duration        ) with(this) { WAIT_TIME( info, 0p , duration ) }
     344        bool wait( condition_variable(L) & this, L & l, Duration duration                 ) with(this) { WAIT_TIME( 0   , &l , duration ) }
     345        bool wait( condition_variable(L) & this, L & l, uintptr_t info, Duration duration ) with(this) { WAIT_TIME( info, &l , duration ) }
    350346}
    351347
  • libcfa/src/concurrency/locks.hfa

    r5407cdc r8d66610  
    290290        bool wait( condition_variable(L) & this, Duration duration );
    291291        bool wait( condition_variable(L) & this, uintptr_t info, Duration duration );
    292         bool wait( condition_variable(L) & this, Time time );
    293         bool wait( condition_variable(L) & this, uintptr_t info, Time time );
    294292
    295293        void wait( condition_variable(L) & this, L & l );
     
    297295        bool wait( condition_variable(L) & this, L & l, Duration duration );
    298296        bool wait( condition_variable(L) & this, L & l, uintptr_t info, Duration duration );
    299         bool wait( condition_variable(L) & this, L & l, Time time );
    300         bool wait( condition_variable(L) & this, L & l, uintptr_t info, Time time );
    301 }
     297}
  • libcfa/src/concurrency/preemption.cfa

    r5407cdc r8d66610  
    1818
    1919#include "preemption.hfa"
     20
    2021#include <assert.h>
    2122
     
    2627#include <limits.h>                                                                             // PTHREAD_STACK_MIN
    2728
     29#include "bits/debug.hfa"
    2830#include "bits/signal.hfa"
    2931#include "kernel_private.hfa"
     
    105107static inline alarm_node_t * get_expired( alarm_list_t * alarms, Time currtime ) {
    106108        if( ! & (*alarms)`first ) return 0p;                                            // If no alarms return null
    107         if( (*alarms)`first.alarm >= currtime ) return 0p;      // If alarms head not expired return null
     109        if( (*alarms)`first.timeval >= currtime ) return 0p;    // If alarms head not expired return null
    108110        return pop(alarms);                                                                     // Otherwise just pop head
    109111}
     
    141143                if( period > 0 ) {
    142144                        __cfadbg_print_buffer_local( preemption, " KERNEL: alarm period is %lu.\n", period`ns );
    143                         node->alarm = currtime + period;    // Alarm is periodic, add currtime to it (used cached current time)
     145                        node->timeval = currtime + period;  // Alarm is periodic, add currtime to it (used cached current time)
    144146                        insert( alarms, node );             // Reinsert the node for the next time it triggers
    145147                }
     
    148150        // If there are still alarms pending, reset the timer
    149151        if( & (*alarms)`first ) {
    150                 Duration delta = (*alarms)`first.alarm - currtime;
    151                 Duration capped = max(delta, 50`us);
    152                 __kernel_set_timer( capped );
     152                Duration delta = (*alarms)`first.timeval - currtime;
     153                __kernel_set_timer( delta );
    153154        }
    154155}
     
    160161        // Alarms need to be enabled
    161162        if ( duration > 0 && ! alarm->set ) {
    162                 alarm->alarm = __kernel_get_time() + duration;
    163                 alarm->period = duration;
     163                alarm->initial = duration;
     164                alarm->period  = duration;
    164165                register_self( alarm );
    165166        }
     
    167168        else if ( duration == 0 && alarm->set ) {
    168169                unregister_self( alarm );
    169                 alarm->alarm = 0;
    170                 alarm->period = 0;
     170                alarm->initial = 0;
     171                alarm->period  = 0;
    171172        }
    172173        // If alarm is different from previous, change it
    173174        else if ( duration > 0 && alarm->period != duration ) {
    174175                unregister_self( alarm );
    175                 alarm->alarm = __kernel_get_time() + duration;
    176                 alarm->period = duration;
     176                alarm->initial = duration;
     177                alarm->period  = duration;
    177178                register_self( alarm );
    178179        }
     
    599600
    600601        // Notify the alarm thread of the shutdown
    601         sigval val = { 1 };
     602        sigval val;
     603        val.sival_int = 0;
    602604        pthread_sigqueue( alarm_thread, SIGALRM, val );
    603605
     
    619621// Used by thread to control when they want to receive preemption signals
    620622void ?{}( preemption_scope & this, processor * proc ) {
    621         (this.alarm){ proc, (Time){ 0 }, 0`s };
     623        (this.alarm){ proc, 0`s, 0`s };
    622624        this.proc = proc;
    623625        this.proc->preemption_alarm = &this.alarm;
     
    687689// Waits on SIGALRM and send SIGUSR1 to whom ever needs it
    688690static void * alarm_loop( __attribute__((unused)) void * args ) {
    689         __processor_id_t id;
    690         register_proc_id(&id);
    691         __cfaabi_tls.this_proc_id = &id;
    692 
     691        unsigned id = register_proc_id();
    693692
    694693        // Block sigalrms to control when they arrive
     
    707706                siginfo_t info;
    708707                int sig = sigwaitinfo( &mask, &info );
     708
     709                __cfadbg_print_buffer_decl ( preemption, " KERNEL: sigwaitinfo returned %d, c: %d, v: %d\n", sig, info.si_code, info.si_value.sival_int );
     710                __cfadbg_print_buffer_local( preemption, " KERNEL: SI_QUEUE %d, SI_TIMER %d, SI_KERNEL %d\n", SI_QUEUE, SI_TIMER, SI_KERNEL );
    709711
    710712                if( sig < 0 ) {
     
    714716                                case EAGAIN :
    715717                                case EINTR :
    716                                         {__cfaabi_dbg_print_buffer_decl( " KERNEL: Spurious wakeup %d.\n", err );}
     718                                        {__cfadbg_print_buffer_local( preemption, " KERNEL: Spurious wakeup %d.\n", err );}
    717719                                        continue;
    718720                                case EINVAL :
     
    726728                assertf(sig == SIGALRM, "Kernel Internal Error, sigwait: Unexpected signal %d (%d : %d)\n", sig, info.si_code, info.si_value.sival_int);
    727729
    728                 // __cfaabi_dbg_print_safe( "Kernel : Caught alarm from %d with %d\n", info.si_code, info.si_value.sival_int );
    729730                // Switch on the code (a.k.a. the sender) to
    730731                switch( info.si_code )
    731732                {
     733                // Signal was not sent by the kernel but by an other thread
     734                case SI_QUEUE:
     735                        // other threads may signal the alarm thread to shut it down
     736                        // or to manual cause the preemption tick
     737                        // use info.si_value and handle the case here
     738                        switch( info.si_value.sival_int ) {
     739                        case 0:
     740                                goto EXIT;
     741                        default:
     742                                abort( "SI_QUEUE with val %d", info.si_value.sival_int);
     743                        }
     744                        // fallthrough
    732745                // Timers can apparently be marked as sent for the kernel
    733746                // In either case, tick preemption
     
    739752                        unlock( event_kernel->lock );
    740753                        break;
    741                 // Signal was not sent by the kernel but by an other thread
    742                 case SI_QUEUE:
    743                         // For now, other thread only signal the alarm thread to shut it down
    744                         // If this needs to change use info.si_value and handle the case here
    745                         goto EXIT;
    746754                }
    747755        }
     
    749757EXIT:
    750758        __cfaabi_dbg_print_safe( "Kernel : Preemption thread stopping\n" );
    751         register_proc_id(&id);
     759        unregister_proc_id(id);
    752760
    753761        return 0p;
  • libcfa/src/concurrency/ready_queue.cfa

    r5407cdc r8d66610  
    1717// #define __CFA_DEBUG_PRINT_READY_QUEUE__
    1818
    19 // #define USE_MPSC
    2019
    2120#define USE_RELAXED_FIFO
     
    9392        this.alloc = 0;
    9493        this.ready = 0;
    95         this.lock  = false;
    9694        this.data  = alloc(this.max);
    97 
    98         /*paranoid*/ verify( 0 == (((uintptr_t)(this.data    )) % 64) );
    99         /*paranoid*/ verify( 0 == (((uintptr_t)(this.data + 1)) % 64) );
     95        this.write_lock  = false;
     96
    10097        /*paranoid*/ verify(__atomic_is_lock_free(sizeof(this.alloc), &this.alloc));
    10198        /*paranoid*/ verify(__atomic_is_lock_free(sizeof(this.ready), &this.ready));
     
    106103}
    107104
    108 void ?{}( __scheduler_lock_id_t & this, __processor_id_t * proc ) {
    109         this.handle = proc;
    110         this.lock   = false;
    111         #ifdef __CFA_WITH_VERIFY__
    112                 this.owned  = false;
    113         #endif
    114 }
    115105
    116106//=======================================================================
    117107// Lock-Free registering/unregistering of threads
    118 void register_proc_id( struct __processor_id_t * proc ) with(*__scheduler_lock) {
     108unsigned register_proc_id( void ) with(*__scheduler_lock) {
    119109        __cfadbg_print_safe(ready_queue, "Kernel : Registering proc %p for RW-Lock\n", proc);
     110        bool * handle = (bool *)&kernelTLS().sched_lock;
    120111
    121112        // Step - 1 : check if there is already space in the data
     
    124115        // Check among all the ready
    125116        for(uint_fast32_t i = 0; i < s; i++) {
    126                 __processor_id_t * null = 0p; // Re-write every loop since compare thrashes it
    127                 if( __atomic_load_n(&data[i].handle, (int)__ATOMIC_RELAXED) == null
    128                         && __atomic_compare_exchange_n( &data[i].handle, &null, proc, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
    129                         /*paranoid*/ verify(i < ready);
    130                         /*paranoid*/ verify(0 == (__alignof__(data[i]) % cache_line_size));
    131                         /*paranoid*/ verify((((uintptr_t)&data[i]) % cache_line_size) == 0);
    132                         proc->id = i;
     117                bool * volatile * cell = (bool * volatile *)&data[i]; // Cforall is bugged and the double volatiles causes problems
     118                /* paranoid */ verify( handle != *cell );
     119
     120                bool * null = 0p; // Re-write every loop since compare thrashes it
     121                if( __atomic_load_n(cell, (int)__ATOMIC_RELAXED) == null
     122                        && __atomic_compare_exchange_n( cell, &null, handle, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
     123                        /* paranoid */ verify(i < ready);
     124                        /* paranoid */ verify( (kernelTLS().sched_id = i, true) );
     125                        return i;
    133126                }
    134127        }
     
    141134
    142135        // Step - 3 : Mark space as used and then publish it.
    143         __scheduler_lock_id_t * storage = (__scheduler_lock_id_t *)&data[n];
    144         (*storage){ proc };
     136        data[n] = handle;
    145137        while() {
    146138                unsigned copy = n;
     
    154146
    155147        // Return new spot.
    156         /*paranoid*/ verify(n < ready);
    157         /*paranoid*/ verify(__alignof__(data[n]) == (2 * cache_line_size));
    158         /*paranoid*/ verify((((uintptr_t)&data[n]) % cache_line_size) == 0);
    159         proc->id = n;
    160 }
    161 
    162 void unregister_proc_id( struct __processor_id_t * proc ) with(*__scheduler_lock) {
    163         unsigned id = proc->id;
    164         /*paranoid*/ verify(id < ready);
    165         /*paranoid*/ verify(proc == __atomic_load_n(&data[id].handle, __ATOMIC_RELAXED));
    166         __atomic_store_n(&data[id].handle, 0p, __ATOMIC_RELEASE);
     148        /* paranoid */ verify(n < ready);
     149        /* paranoid */ verify( (kernelTLS().sched_id = n, true) );
     150        return n;
     151}
     152
     153void unregister_proc_id( unsigned id ) with(*__scheduler_lock) {
     154        /* paranoid */ verify(id < ready);
     155        /* paranoid */ verify(id == kernelTLS().sched_id);
     156        /* paranoid */ verify(data[id] == &kernelTLS().sched_lock);
     157
     158        bool * volatile * cell = (bool * volatile *)&data[id]; // Cforall is bugged and the double volatiles causes problems
     159
     160        __atomic_store_n(cell, 0p, __ATOMIC_RELEASE);
    167161
    168162        __cfadbg_print_safe(ready_queue, "Kernel : Unregister proc %p\n", proc);
     
    174168uint_fast32_t ready_mutate_lock( void ) with(*__scheduler_lock) {
    175169        /* paranoid */ verify( ! __preemption_enabled() );
     170        /* paranoid */ verify( ! kernelTLS().sched_lock );
    176171
    177172        // Step 1 : lock global lock
    178173        // It is needed to avoid processors that register mid Critical-Section
    179174        //   to simply lock their own lock and enter.
    180         __atomic_acquire( &lock );
     175        __atomic_acquire( &write_lock );
    181176
    182177        // Step 2 : lock per-proc lock
     
    186181        uint_fast32_t s = ready;
    187182        for(uint_fast32_t i = 0; i < s; i++) {
    188                 __atomic_acquire( &data[i].lock );
     183                volatile bool * llock = data[i];
     184                if(llock) __atomic_acquire( llock );
    189185        }
    190186
     
    203199        // Alternative solution : return s in write_lock and pass it to write_unlock
    204200        for(uint_fast32_t i = 0; i < last_s; i++) {
    205                 verify(data[i].lock);
    206                 __atomic_store_n(&data[i].lock, (bool)false, __ATOMIC_RELEASE);
     201                volatile bool * llock = data[i];
     202                if(llock) __atomic_store_n(llock, (bool)false, __ATOMIC_RELEASE);
    207203        }
    208204
    209205        // Step 2 : release global lock
    210         /*paranoid*/ assert(true == lock);
    211         __atomic_store_n(&lock, (bool)false, __ATOMIC_RELEASE);
     206        /*paranoid*/ assert(true == write_lock);
     207        __atomic_store_n(&write_lock, (bool)false, __ATOMIC_RELEASE);
    212208
    213209        /* paranoid */ verify( ! __preemption_enabled() );
     
    253249        }
    254250
    255         __attribute__((hot)) void push(struct cluster * cltr, struct $thread * thrd) with (cltr->ready_queue) {
     251        __attribute__((hot)) void push(struct cluster * cltr, struct $thread * thrd, bool push_local) with (cltr->ready_queue) {
    256252                __cfadbg_print_safe(ready_queue, "Kernel : Pushing %p on cluster %p\n", thrd, cltr);
    257253
    258                 const bool external = (!kernelTLS().this_processor) || (cltr != kernelTLS().this_processor->cltr);
     254                const bool external = !push_local || (!kernelTLS().this_processor) || (cltr != kernelTLS().this_processor->cltr);
    259255                /* paranoid */ verify(external || kernelTLS().this_processor->rdq.id < lanes.count );
    260 
    261                 // write timestamp
    262                 thrd->link.ts = rdtscl();
    263256
    264257                bool local;
     
    280273                        #endif
    281274
    282                 #if defined(USE_MPSC)
    283                         // mpsc always succeeds
    284                 } while( false );
    285                 #else
    286275                        // If we can't lock it retry
    287276                } while( !__atomic_try_acquire( &lanes.data[i].lock ) );
    288                 #endif
    289277
    290278                // Actually push it
    291279                push(lanes.data[i], thrd);
    292280
    293                 #if !defined(USE_MPSC)
    294                         // Unlock and return
    295                         __atomic_unlock( &lanes.data[i].lock );
    296                 #endif
     281                // Unlock and return
     282                __atomic_unlock( &lanes.data[i].lock );
    297283
    298284                // Mark the current index in the tls rng instance as having an item
     
    350336#endif
    351337#if defined(USE_WORK_STEALING)
    352         __attribute__((hot)) void push(struct cluster * cltr, struct $thread * thrd) with (cltr->ready_queue) {
     338        __attribute__((hot)) void push(struct cluster * cltr, struct $thread * thrd, bool push_local) with (cltr->ready_queue) {
    353339                __cfadbg_print_safe(ready_queue, "Kernel : Pushing %p on cluster %p\n", thrd, cltr);
    354340
    355                 const bool external = (!kernelTLS().this_processor) || (cltr != kernelTLS().this_processor->cltr);
     341                // #define USE_PREFERRED
     342                #if !defined(USE_PREFERRED)
     343                const bool external = !push_local || (!kernelTLS().this_processor) || (cltr != kernelTLS().this_processor->cltr);
    356344                /* paranoid */ verify(external || kernelTLS().this_processor->rdq.id < lanes.count );
    357 
    358                 // write timestamp
    359                 thrd->link.ts = rdtscl();
     345                #else
     346                        unsigned preferred = thrd->preferred;
     347                        const bool external = push_local || (!kernelTLS().this_processor) || preferred == -1u || thrd->curr_cluster != cltr;
     348                        /* paranoid */ verifyf(external || preferred < lanes.count, "Invalid preferred queue %u for %u lanes", preferred, lanes.count );
     349
     350                        unsigned r = preferred % READYQ_SHARD_FACTOR;
     351                        const unsigned start = preferred - r;
     352                #endif
    360353
    361354                // Try to pick a lane and lock it
     
    371364                        }
    372365                        else {
    373                                 processor * proc = kernelTLS().this_processor;
    374                                 unsigned r = proc->rdq.its++;
    375                                 i =  proc->rdq.id + (r % READYQ_SHARD_FACTOR);
     366                                #if !defined(USE_PREFERRED)
     367                                        processor * proc = kernelTLS().this_processor;
     368                                        unsigned r = proc->rdq.its++;
     369                                        i =  proc->rdq.id + (r % READYQ_SHARD_FACTOR);
     370                                #else
     371                                        i = start + (r++ % READYQ_SHARD_FACTOR);
     372                                #endif
    376373                        }
    377 
    378 
    379                 #if defined(USE_MPSC)
    380                         // mpsc always succeeds
    381                 } while( false );
    382                 #else
    383374                        // If we can't lock it retry
    384375                } while( !__atomic_try_acquire( &lanes.data[i].lock ) );
    385                 #endif
    386376
    387377                // Actually push it
    388378                push(lanes.data[i], thrd);
    389379
    390                 #if !defined(USE_MPSC)
    391                         // Unlock and return
    392                         __atomic_unlock( &lanes.data[i].lock );
    393                 #endif
     380                // Unlock and return
     381                __atomic_unlock( &lanes.data[i].lock );
    394382
    395383                #if !defined(__CFA_NO_STATISTICS__)
     
    410398
    411399                if(proc->rdq.target == -1u) {
     400                        unsigned long long min = ts(lanes.data[proc->rdq.id]);
     401                        for(int i = 0; i < READYQ_SHARD_FACTOR; i++) {
     402                                unsigned long long tsc = ts(lanes.data[proc->rdq.id + i]);
     403                                if(tsc < min) min = tsc;
     404                        }
     405                        proc->rdq.cutoff = min;
    412406                        proc->rdq.target = __tls_rand() % lanes.count;
    413                         unsigned it1  = proc->rdq.itr;
    414                         unsigned it2  = proc->rdq.itr + 1;
    415                         unsigned idx1 = proc->rdq.id + (it1 % READYQ_SHARD_FACTOR);
    416                         unsigned idx2 = proc->rdq.id + (it2 % READYQ_SHARD_FACTOR);
    417                         unsigned long long tsc1 = ts(lanes.data[idx1]);
    418                         unsigned long long tsc2 = ts(lanes.data[idx2]);
    419                         proc->rdq.cutoff = min(tsc1, tsc2);
    420                         if(proc->rdq.cutoff == 0) proc->rdq.cutoff = -1ull;
    421407                }
    422408                else {
    423409                        unsigned target = proc->rdq.target;
    424410                        proc->rdq.target = -1u;
    425                         if(lanes.tscs[target].tv < proc->rdq.cutoff) {
     411                        const unsigned long long bias = 0; //2_500_000_000;
     412                        const unsigned long long cutoff = proc->rdq.cutoff > bias ? proc->rdq.cutoff - bias : proc->rdq.cutoff;
     413                        if(lanes.tscs[target].tv < cutoff && ts(lanes.data[target]) < cutoff) {
    426414                                $thread * t = try_pop(cltr, target __STATS(, __tls_stats()->ready.pop.help));
    427415                                if(t) return t;
     
    430418
    431419                for(READYQ_SHARD_FACTOR) {
    432                         unsigned i = proc->rdq.id + (--proc->rdq.itr % READYQ_SHARD_FACTOR);
     420                        unsigned i = proc->rdq.id + (proc->rdq.itr++ % READYQ_SHARD_FACTOR);
    433421                        if($thread * t = try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.local))) return t;
    434422                }
     
    462450        // If list looks empty retry
    463451        if( is_empty(lane) ) {
    464                 __STATS( stats.espec++; )
    465452                return 0p;
    466453        }
     
    468455        // If we can't get the lock retry
    469456        if( !__atomic_try_acquire(&lane.lock) ) {
    470                 __STATS( stats.elock++; )
    471457                return 0p;
    472458        }
     
    475461        if( is_empty(lane) ) {
    476462                __atomic_unlock(&lane.lock);
    477                 __STATS( stats.eempty++; )
    478463                return 0p;
    479464        }
     
    481466        // Actually pop the list
    482467        struct $thread * thrd;
    483         thrd = pop(lane);
     468        unsigned long long tsv;
     469        [thrd, tsv] = pop(lane);
    484470
    485471        /* paranoid */ verify(thrd);
     472        /* paranoid */ verify(tsv);
    486473        /* paranoid */ verify(lane.lock);
    487474
     
    493480
    494481        #if defined(USE_WORK_STEALING)
    495                 lanes.tscs[w].tv = thrd->link.ts;
     482                lanes.tscs[w].tv = tsv;
    496483        #endif
     484
     485        thrd->preferred = w;
    497486
    498487        // return the popped thread
     
    522511// Check that all the intrusive queues in the data structure are still consistent
    523512static void check( __ready_queue_t & q ) with (q) {
    524         #if defined(__CFA_WITH_VERIFY__) && !defined(USE_MPSC)
     513        #if defined(__CFA_WITH_VERIFY__)
    525514                {
    526515                        for( idx ; lanes.count ) {
     
    528517                                assert(!lanes.data[idx].lock);
    529518
    530                                 assert(head(sl)->link.prev == 0p );
    531                                 assert(head(sl)->link.next->link.prev == head(sl) );
    532                                 assert(tail(sl)->link.next == 0p );
    533                                 assert(tail(sl)->link.prev->link.next == tail(sl) );
    534 
    535                                 if(is_empty(sl)) {
    536                                         assert(tail(sl)->link.prev == head(sl));
    537                                         assert(head(sl)->link.next == tail(sl));
    538                                 } else {
    539                                         assert(tail(sl)->link.prev != head(sl));
    540                                         assert(head(sl)->link.next != tail(sl));
    541                                 }
     519                                        if(is_empty(sl)) {
     520                                                assert( sl.anchor.next == 0p );
     521                                                assert( sl.anchor.ts   == 0  );
     522                                                assert( mock_head(sl)  == sl.prev );
     523                                        } else {
     524                                                assert( sl.anchor.next != 0p );
     525                                                assert( sl.anchor.ts   != 0  );
     526                                                assert( mock_head(sl)  != sl.prev );
     527                                        }
    542528                        }
    543529                }
     
    560546// fixes the list so that the pointers back to anchors aren't left dangling
    561547static inline void fix(__intrusive_lane_t & ll) {
    562         #if !defined(USE_MPSC)
    563                 // if the list is not empty then follow he pointer and fix its reverse
    564                 if(!is_empty(ll)) {
    565                         head(ll)->link.next->link.prev = head(ll);
    566                         tail(ll)->link.prev->link.next = tail(ll);
    567                 }
    568                 // Otherwise just reset the list
    569                 else {
    570                         verify(tail(ll)->link.next == 0p);
    571                         tail(ll)->link.prev = head(ll);
    572                         head(ll)->link.next = tail(ll);
    573                         verify(head(ll)->link.prev == 0p);
    574                 }
    575         #endif
    576 }
    577 
    578 static void assign_list(unsigned & value, dlist(processor, processor) & list, unsigned count) {
     548                        if(is_empty(ll)) {
     549                                verify(ll.anchor.next == 0p);
     550                                ll.prev = mock_head(ll);
     551                        }
     552}
     553
     554static void assign_list(unsigned & value, dlist(processor) & list, unsigned count) {
    579555        processor * it = &list`first;
    580556        for(unsigned i = 0; i < count; i++) {
     
    597573                lanes.tscs = alloc(lanes.count, lanes.tscs`realloc);
    598574                for(i; lanes.count) {
    599                         lanes.tscs[i].tv = ts(lanes.data[i]);
     575                        unsigned long long tsc = ts(lanes.data[i]);
     576                        lanes.tscs[i].tv = tsc != 0 ? tsc : rdtscl();
    600577                }
    601578        #endif
     
    686663                        while(!is_empty(lanes.data[idx])) {
    687664                                struct $thread * thrd;
    688                                 thrd = pop(lanes.data[idx]);
    689 
    690                                 push(cltr, thrd);
     665                                unsigned long long _;
     666                                [thrd, _] = pop(lanes.data[idx]);
     667
     668                                push(cltr, thrd, true);
    691669
    692670                                // for printing count the number of displaced threads
     
    725703        /* paranoid */ verify( ready_mutate_islocked() );
    726704}
     705
     706#if !defined(__CFA_NO_STATISTICS__)
     707        unsigned cnt(const __ready_queue_t & this, unsigned idx) {
     708                /* paranoid */ verify(this.lanes.count > idx);
     709                return this.lanes.data[idx].cnt;
     710        }
     711#endif
  • libcfa/src/concurrency/ready_subqueue.hfa

    r5407cdc r8d66610  
    77// Intrusives lanes which are used by the relaxed ready queue
    88struct __attribute__((aligned(128))) __intrusive_lane_t {
    9 
    10         #if defined(USE_MPSC)
    11                 mpsc_queue($thread) queue;
    12                 __attribute__((aligned(128)))
    13         #else
    14                 // anchor for the head and the tail of the queue
    15                 __attribute__((aligned(128))) struct __sentinel_t {
    16                         // Link lists fields
    17                         // instrusive link field for threads
    18                         // must be exactly as in $thread
    19                         __thread_desc_link link;
    20                 } before, after;
    21         #endif
     9        struct $thread * prev;
    2210
    2311        // spin lock protecting the queue
    2412        volatile bool lock;
    2513
    26         // Optional statistic counters
    27         #if !defined(__CFA_NO_SCHED_STATS__)
    28                 struct __attribute__((aligned(64))) {
    29                         // difference between number of push and pops
    30                         ssize_t diff;
     14        __thread_desc_link anchor;
    3115
    32                         // total number of pushes and pops
    33                         size_t  push;
    34                         size_t  pop ;
    35                 } stat;
     16        #if !defined(__CFA_NO_STATISTICS__)
     17                unsigned cnt;
    3618        #endif
    3719};
    3820
    39 void  ?{}(__intrusive_lane_t & this);
    40 void ^?{}(__intrusive_lane_t & this);
    41 
    4221// Get the head pointer (one before the first element) from the anchor
    43 static inline $thread * head(const __intrusive_lane_t & this) {
    44         #if defined(USE_MPSC)
    45                 return this.queue.head;
    46         #else
    47                 $thread * rhead = ($thread *)(
    48                         (uintptr_t)( &this.before ) - offsetof( $thread, link )
    49                 );
    50                 /* paranoid */ verify(rhead);
    51                 return rhead;
    52         #endif
    53 }
    54 
    55 // Get the tail pointer (one after the last element) from the anchor
    56 static inline $thread * tail(const __intrusive_lane_t & this) {
    57         #if defined(USE_MPSC)
    58                 return this.queue.tail;
    59         #else
    60                 $thread * rtail = ($thread *)(
    61                         (uintptr_t)( &this.after ) - offsetof( $thread, link )
    62                 );
    63                 /* paranoid */ verify(rtail);
    64                 return rtail;
    65         #endif
     22static inline $thread * mock_head(const __intrusive_lane_t & this) {
     23        $thread * rhead = ($thread *)(
     24                (uintptr_t)( &this.anchor ) - __builtin_offsetof( $thread, link )
     25        );
     26        return rhead;
    6627}
    6728
     
    6930void ?{}( __intrusive_lane_t & this ) {
    7031        this.lock = false;
     32        this.prev = mock_head(this);
     33        this.anchor.next = 0p;
     34        this.anchor.ts   = 0;
     35        #if !defined(__CFA_NO_STATISTICS__)
     36                this.cnt  = 0;
     37        #endif
    7138
    72         #if !defined(USE_MPSC)
    73                 this.before.link.prev = 0p;
    74                 this.before.link.next = tail(this);
    75                 this.before.link.ts   = 0;
    76 
    77                 this.after .link.prev = head(this);
    78                 this.after .link.next = 0p;
    79                 this.after .link.ts   = 0;
    80 
    81                 #if !defined(__CFA_NO_SCHED_STATS__)
    82                         this.stat.diff = 0;
    83                         this.stat.push = 0;
    84                         this.stat.pop  = 0;
    85                 #endif
    86 
    87                 // We add a boat-load of assertions here because the anchor code is very fragile
    88                 /* paranoid */ verify(((uintptr_t)( head(this) ) + offsetof( $thread, link )) == (uintptr_t)(&this.before));
    89                 /* paranoid */ verify(((uintptr_t)( tail(this) ) + offsetof( $thread, link )) == (uintptr_t)(&this.after ));
    90                 /* paranoid */ verify(head(this)->link.prev == 0p );
    91                 /* paranoid */ verify(head(this)->link.next == tail(this) );
    92                 /* paranoid */ verify(tail(this)->link.next == 0p );
    93                 /* paranoid */ verify(tail(this)->link.prev == head(this) );
    94                 /* paranoid */ verify(&head(this)->link.prev == &this.before.link.prev );
    95                 /* paranoid */ verify(&head(this)->link.next == &this.before.link.next );
    96                 /* paranoid */ verify(&tail(this)->link.prev == &this.after .link.prev );
    97                 /* paranoid */ verify(&tail(this)->link.next == &this.after .link.next );
    98                 /* paranoid */ verify(__alignof__(__intrusive_lane_t) == 128);
    99                 /* paranoid */ verify(__alignof__(this) == 128);
    100                 /* paranoid */ verifyf(((intptr_t)(&this) % 128) == 0, "Expected address to be aligned %p %% 128 == %zd", &this, ((intptr_t)(&this) % 128));
    101         #endif
     39        // We add a boat-load of assertions here because the anchor code is very fragile
     40        /* paranoid */ _Static_assert( offsetof( $thread, link ) == offsetof(__intrusive_lane_t, anchor) );
     41        /* paranoid */ verify( offsetof( $thread, link ) == offsetof(__intrusive_lane_t, anchor) );
     42        /* paranoid */ verify( ((uintptr_t)( mock_head(this) ) + offsetof( $thread, link )) == (uintptr_t)(&this.anchor) );
     43        /* paranoid */ verify( &mock_head(this)->link.next == &this.anchor.next );
     44        /* paranoid */ verify( &mock_head(this)->link.ts   == &this.anchor.ts   );
     45        /* paranoid */ verify( mock_head(this)->link.next == 0p );
     46        /* paranoid */ verify( mock_head(this)->link.ts   == 0  );
     47        /* paranoid */ verify( mock_head(this) == this.prev );
     48        /* paranoid */ verify( __alignof__(__intrusive_lane_t) == 128 );
     49        /* paranoid */ verify( __alignof__(this) == 128 );
     50        /* paranoid */ verifyf( ((intptr_t)(&this) % 128) == 0, "Expected address to be aligned %p %% 128 == %zd", &this, ((intptr_t)(&this) % 128) );
    10251}
    10352
    10453// Dtor is trivial
    10554void ^?{}( __intrusive_lane_t & this ) {
    106         #if !defined(USE_MPSC)
    107                 // Make sure the list is empty
    108                 /* paranoid */ verify(head(this)->link.prev == 0p );
    109                 /* paranoid */ verify(head(this)->link.next == tail(this) );
    110                 /* paranoid */ verify(tail(this)->link.next == 0p );
    111                 /* paranoid */ verify(tail(this)->link.prev == head(this) );
    112         #endif
     55        // Make sure the list is empty
     56        /* paranoid */ verify( this.anchor.next == 0p );
     57        /* paranoid */ verify( this.anchor.ts   == 0  );
     58        /* paranoid */ verify( mock_head(this)  == this.prev );
    11359}
    11460
    11561// Push a thread onto this lane
    11662// returns true of lane was empty before push, false otherwise
    117 bool push(__intrusive_lane_t & this, $thread * node) {
    118         #if defined(USE_MPSC)
    119                 inline $thread * volatile & ?`next ( $thread * this )  __attribute__((const)) {
    120                         return this->link.next;
    121                 }
    122                 push(this.queue, node);
    123         #else
    124                 #if defined(__CFA_WITH_VERIFY__)
    125                         /* paranoid */ verify(this.lock);
    126                         /* paranoid */ verify(node->link.ts != 0);
    127                         /* paranoid */ verify(node->link.next == 0p);
    128                         /* paranoid */ verify(node->link.prev == 0p);
    129                         /* paranoid */ verify(tail(this)->link.next == 0p);
    130                         /* paranoid */ verify(head(this)->link.prev == 0p);
     63static inline void push( __intrusive_lane_t & this, $thread * node ) {
     64        /* paranoid */ verify( this.lock );
     65        /* paranoid */ verify( node->link.next == 0p );
     66        /* paranoid */ verify( node->link.ts   == 0  );
     67        /* paranoid */ verify( this.prev->link.next == 0p );
     68        /* paranoid */ verify( this.prev->link.ts   == 0  );
     69        if( this.anchor.next == 0p ) {
     70                /* paranoid */ verify( this.anchor.next == 0p );
     71                /* paranoid */ verify( this.anchor.ts   == 0  );
     72                /* paranoid */ verify( this.prev == mock_head( this ) );
     73        } else {
     74                /* paranoid */ verify( this.anchor.next != 0p );
     75                /* paranoid */ verify( this.anchor.ts   != 0  );
     76                /* paranoid */ verify( this.prev != mock_head( this ) );
     77        }
    13178
    132                         if(this.before.link.ts == 0l) {
    133                                 /* paranoid */ verify(tail(this)->link.prev == head(this));
    134                                 /* paranoid */ verify(head(this)->link.next == tail(this));
    135                         } else {
    136                                 /* paranoid */ verify(tail(this)->link.prev != head(this));
    137                                 /* paranoid */ verify(head(this)->link.next != tail(this));
    138                         }
    139                 #endif
    140 
    141                 // Get the relevant nodes locally
    142                 $thread * tail = tail(this);
    143                 $thread * prev = tail->link.prev;
    144 
    145                 // Do the push
    146                 node->link.next = tail;
    147                 node->link.prev = prev;
    148                 prev->link.next = node;
    149                 tail->link.prev = node;
    150 
    151                 // Update stats
    152                 #if !defined(__CFA_NO_SCHED_STATS__)
    153                         this.stat.diff++;
    154                         this.stat.push++;
    155                 #endif
    156 
    157                 verify(node->link.next == tail(this));
    158 
    159                 // Check if the queue used to be empty
    160                 if(this.before.link.ts == 0l) {
    161                         this.before.link.ts = node->link.ts;
    162                         /* paranoid */ verify(node->link.prev == head(this));
    163                         return true;
    164                 }
    165                 return false;
     79        // Get the relevant nodes locally
     80        this.prev->link.next = node;
     81        this.prev->link.ts   = rdtscl();
     82        this.prev = node;
     83        #if !defined(__CFA_NO_STATISTICS__)
     84                this.cnt++;
    16685        #endif
    16786}
     
    17089// returns popped
    17190// returns true of lane was empty before push, false otherwise
    172 $thread * pop(__intrusive_lane_t & this) {
    173         /* paranoid */ verify(this.lock);
    174         #if defined(USE_MPSC)
    175                 inline $thread * volatile & ?`next ( $thread * this )  __attribute__((const)) {
    176                         return this->link.next;
    177                 }
    178                 return pop(this.queue);
    179         #else
    180                 /* paranoid */ verify(this.before.link.ts != 0ul);
     91static inline [* $thread, unsigned long long] pop( __intrusive_lane_t & this ) {
     92        /* paranoid */ verify( this.lock );
     93        /* paranoid */ verify( this.anchor.next != 0p );
     94        /* paranoid */ verify( this.anchor.ts   != 0  );
    18195
    182                 // Get anchors locally
    183                 $thread * head = head(this);
    184                 $thread * tail = tail(this);
     96        // Get the relevant nodes locally
     97        unsigned long long ts = this.anchor.ts;
     98        $thread * node = this.anchor.next;
     99        this.anchor.next = node->link.next;
     100        this.anchor.ts   = node->link.ts;
     101        bool is_empty = this.anchor.ts == 0;
     102        node->link.next = 0p;
     103        node->link.ts   = 0;
     104        #if !defined(__CFA_NO_STATISTICS__)
     105                this.cnt--;
     106        #endif
    185107
    186                 // Get the relevant nodes locally
    187                 $thread * node = head->link.next;
    188                 $thread * next = node->link.next;
     108        // Update head time stamp
     109        if(is_empty) this.prev = mock_head( this );
    189110
    190                 /* paranoid */ verify(node != tail);
    191                 /* paranoid */ verify(node);
    192 
    193                 // Do the pop
    194                 head->link.next = next;
    195                 next->link.prev = head;
    196                 node->link.next = 0p;
    197                 node->link.prev = 0p;
    198 
    199                 // Update head time stamp
    200                 this.before.link.ts = next->link.ts;
    201 
    202                 // Update stats
    203                 #ifndef __CFA_NO_SCHED_STATS__
    204                         this.stat.diff--;
    205                         this.stat.pop ++;
    206                 #endif
    207 
    208                 // Check if we emptied list and return accordingly
    209                 /* paranoid */ verify(tail(this)->link.next == 0p);
    210                 /* paranoid */ verify(head(this)->link.prev == 0p);
    211                 if(next == tail) {
    212                         /* paranoid */ verify(this.before.link.ts == 0);
    213                         /* paranoid */ verify(tail(this)->link.prev == head(this));
    214                         /* paranoid */ verify(head(this)->link.next == tail(this));
    215                         return node;
    216                 }
    217                 else {
    218                         /* paranoid */ verify(next->link.ts != 0);
    219                         /* paranoid */ verify(tail(this)->link.prev != head(this));
    220                         /* paranoid */ verify(head(this)->link.next != tail(this));
    221                         /* paranoid */ verify(this.before.link.ts != 0);
    222                         return node;
    223                 }
    224         #endif
     111        /* paranoid */ verify( node->link.next == 0p );
     112        /* paranoid */ verify( node->link.ts   == 0  );
     113        return [node, ts];
    225114}
    226115
    227116// Check whether or not list is empty
    228117static inline bool is_empty(__intrusive_lane_t & this) {
    229         #if defined(USE_MPSC)
    230                 return this.queue.head == 0p;
    231         #else
    232                 // Cannot verify here since it may not be locked
    233                 return this.before.link.ts == 0;
    234         #endif
     118        return this.anchor.ts == 0;
    235119}
    236120
    237121// Return the timestamp
    238122static inline unsigned long long ts(__intrusive_lane_t & this) {
    239         #if defined(USE_MPSC)
    240                 $thread * tl = this.queue.head;
    241                 if(!tl) return -1ull;
    242                 return tl->link.ts;
    243         #else
    244                 // Cannot verify here since it may not be locked
    245                 return this.before.link.ts;
    246         #endif
     123        // Cannot verify here since it may not be locked
     124        return this.anchor.ts;
    247125}
    248 
    249 // Aligned timestamps which are used by the relaxed ready queue
    250 struct __attribute__((aligned(128))) __timestamp_t {
    251         volatile unsigned long long tv;
    252 };
    253 
    254 void  ?{}(__timestamp_t & this) { this.tv = 0; }
    255 void ^?{}(__timestamp_t & this) {}
  • libcfa/src/concurrency/stats.cfa

    r5407cdc r8d66610  
    1919                stats->ready.pop.local .attempt = 0;
    2020                stats->ready.pop.local .success = 0;
    21                 stats->ready.pop.local .elock   = 0;
    22                 stats->ready.pop.local .eempty  = 0;
    23                 stats->ready.pop.local .espec   = 0;
    2421                stats->ready.pop.help  .attempt = 0;
    2522                stats->ready.pop.help  .success = 0;
    26                 stats->ready.pop.help  .elock   = 0;
    27                 stats->ready.pop.help  .eempty  = 0;
    28                 stats->ready.pop.help  .espec   = 0;
    2923                stats->ready.pop.steal .attempt = 0;
    3024                stats->ready.pop.steal .success = 0;
    31                 stats->ready.pop.steal .elock   = 0;
    32                 stats->ready.pop.steal .eempty  = 0;
    33                 stats->ready.pop.steal .espec   = 0;
    3425                stats->ready.pop.search.attempt = 0;
    3526                stats->ready.pop.search.success = 0;
    36                 stats->ready.pop.search.elock   = 0;
    37                 stats->ready.pop.search.eempty  = 0;
    38                 stats->ready.pop.search.espec   = 0;
    3927                stats->ready.threads.migration = 0;
    4028                stats->ready.threads.extunpark = 0;
    4129                stats->ready.threads.threads   = 0;
     30                stats->ready.threads.cthreads  = 0;
    4231                stats->ready.sleep.halts   = 0;
    4332                stats->ready.sleep.cancels = 0;
     
    5948                        stats->io.calls.completed   = 0;
    6049                        stats->io.calls.errors.busy = 0;
    61                         stats->io.poller.sleeps     = 0;
    6250                #endif
    6351
     
    6856        }
    6957
     58        static inline void tally_one( volatile uint64_t * agg, volatile uint64_t * val) {
     59                uint64_t add = __atomic_exchange_n(val, 0_l64u, __ATOMIC_RELAXED);
     60                __atomic_fetch_add(agg, add, __ATOMIC_RELAXED);
     61        }
     62
     63        static inline void tally_one( volatile int64_t * agg, volatile int64_t * val) {
     64                int64_t add = __atomic_exchange_n(val, 0_l64, __ATOMIC_RELAXED);
     65                __atomic_fetch_add(agg, add, __ATOMIC_RELAXED);
     66        }
     67
    7068        void __tally_stats( struct __stats_t * cltr, struct __stats_t * proc ) {
    71                 __atomic_fetch_add( &cltr->ready.push.local.attempt, proc->ready.push.local.attempt, __ATOMIC_SEQ_CST ); proc->ready.push.local.attempt = 0;
    72                 __atomic_fetch_add( &cltr->ready.push.local.success, proc->ready.push.local.success, __ATOMIC_SEQ_CST ); proc->ready.push.local.success = 0;
    73                 __atomic_fetch_add( &cltr->ready.push.share.attempt, proc->ready.push.share.attempt, __ATOMIC_SEQ_CST ); proc->ready.push.share.attempt = 0;
    74                 __atomic_fetch_add( &cltr->ready.push.share.success, proc->ready.push.share.success, __ATOMIC_SEQ_CST ); proc->ready.push.share.success = 0;
    75                 __atomic_fetch_add( &cltr->ready.push.extrn.attempt, proc->ready.push.extrn.attempt, __ATOMIC_SEQ_CST ); proc->ready.push.extrn.attempt = 0;