Changeset bcd74f3


Ignore:
Timestamp:
May 8, 2020, 4:44:53 PM (17 months ago)
Author:
Thierry Delisle <tdelisle@…>
Branches:
arm-eh, jacob/cs343-translation, master, new-ast, new-ast-unique-expr
Children:
5c9b20c
Parents:
e3bc51c (diff), 0e7e3c17 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.
Message:

Merge branch 'master' into new-ast

Files:
23 added
4 deleted
64 edited

Legend:

Unmodified
Added
Removed
  • doc/bibliography/pl.bib

    re3bc51c rbcd74f3  
    45244524}
    45254525
     4526@misc{libfibre,
     4527    key         = {libfibre},
     4528    author      = {Martin Karsten},
     4529    title       = {{libfibre:~User-Level Threading Runtime}},
     4530    howpublished= {\href{https://git.uwaterloo.ca/mkarsten/libfibre}
     4531                  {https://\-git.uwaterloo.ca/\-mkarsten/\-libfibre}},
     4532    note        = {[Online; accessed 2020-04-15]},
     4533}
     4534
    45264535@article{Linda,
    45274536    keywords    = {Linda, concurrency},
     
    45554564    address     = {Belmont},
    45564565    year        = 1967,
     4566}
     4567
     4568@inproceedings{Fang06,
     4569    author      = {Fang, Yi and McMillan, Kenneth L. and Pnueli, Amir and Zuck, Lenore D.},
     4570    editor      = {Najm, Elie and Pradat-Peyre, Jean-Fran{\c{c}}ois and Donzeau-Gouge, V{\'e}ronique Vigui{\'e}},
     4571    title       = {Liveness by Invisible Invariants},
     4572    booktitle   = {Formal Techniques for Networked and Distributed Systems - FORTE 2006},
     4573    year        = 2006,
     4574    publisher   = {Springer Berlin Heidelberg},
     4575    address     = {Berlin, Heidelberg},
     4576    pages       = {356--371},
    45574577}
    45584578
     
    47394759    contributer = {pabuhr@plg},
    47404760    author      = {Gregory R. Andrews},
    4741     title       = {A Method for Solving Synronization Problems},
     4761    title       = {A Method for Solving Synchronization Problems},
    47424762    journal     = scp,
    47434763    volume      = 13,
     
    50645084    title       = {Multiple Inheritance for {C}{\kern-.1em\hbox{\large\texttt{+\kern-.25em+}}}},
    50655085    booktitle   = {Proceedings of the Spring '87 EUUG Conference},
    5066     month       = may, year = 1987
     5086    month       = may,
     5087    year        = 1987,
    50675088}
    50685089
     
    66466667}
    66476668
     6669@article{Aravind09,
     6670    author      = {Alex A. Aravind and Wim H. Hesselink},
     6671    title       = {A Queue Based Mutual Exclusion Algorithm},
     6672    journal     = acta,
     6673    volume      = 46,
     6674    pages       = {73--86},
     6675    year        = 2009,
     6676}
     6677
    66486678% R
    66496679
     
    80308060}
    80318061
     8062@article{Karsten20,
     8063    author      = {Karsten, Martin and Barghi, Saman},
     8064    title       = {{User-level Threading: Have Your Cake and Eat It Too}},
     8065    year        = {2020},
     8066    issue_date  = {March 2020},
     8067    publisher   = {Association for Computing Machinery},
     8068    address     = {New York, NY, USA},
     8069    volume      = {4},
     8070    number      = {1},
     8071    url         = {https://doi.org/10.1145/3379483},
     8072    doi         = {10.1145/3379483},
     8073    journal     = {Proc. ACM Meas. Anal. Comput. Syst.},
     8074    month       = mar,
     8075    numpages    = {30},
     8076}
     8077
    80328078@techreport{Harmony,
    80338079    keywords    = {messages, concurrency},
     
    80458091    contributer = {gjditchfield@plg},
    80468092    author      = {Henry Lieverman},
    8047     title       = {Using Prototypical Objects to Implement Shared Behavior in
    8048                   Object Oriented Systems},
     8093    title       = {Using Prototypical Objects to Implement Shared Behavior in Object Oriented Systems},
    80498094    journal     = sigplan,
    8050     month       = nov, year = 1986,
    8051     volume      = 21, number = 11, pages = {214-223}
     8095    month       = nov,
     8096    year        = 1986,
     8097    volume      = 21,
     8098    number      = 11,
     8099    pages       = {214-223}
    80528100}
    80538101
  • doc/theses/thierry_delisle_PhD/comp_II/Makefile

    re3bc51c rbcd74f3  
    22
    33Build = build
    4 Figures = figures
     4Figures = img
    55Macros = ../../../LaTeXmacros
    66TeXLIB = .:${Macros}:${Build}:../../../bibliography:
  • doc/theses/thierry_delisle_PhD/comp_II/comp_II.tex

    re3bc51c rbcd74f3  
    33\usepackage[T1]{fontenc}
    44\usepackage[utf8]{inputenc}
    5 \usepackage{listings}           % for code listings
    65\usepackage{xspace}
    76\usepackage{xcolor}
    87\usepackage{graphicx}
    98\usepackage{epic,eepic}
     9\usepackage{listings}                   % for code listings
    1010\usepackage{glossaries}
    1111\usepackage{textcomp}
     12% cfa macros used in the document
     13\input{common}
     14
     15\setlist{topsep=6pt,parsep=0pt}         % global reduce spacing between points
     16\newcommand{\uC}{$\mu$\CC}
    1217\usepackage[hidelinks]{hyperref}
     18\setlength{\abovecaptionskip}{5pt plus 3pt minus 2pt}
     19\lstMakeShortInline$%                   % single-character for \lstinline
    1320%\usepackage[margin=1in]{geometry}
    1421%\usepackage{float}
    1522
    16 % cfa macros used in the document
    17 \input{common}
    1823\input{glossary}
    1924
     
    2732
    2833\author{
    29         \huge Thierry Delisle \\
    30         \Large \vspace*{0.1in} \texttt{tdelisle@uwaterloo.ca} \\
     34        \huge Thierry Delisle \vspace*{5pt} \\
     35        \Large \texttt{tdelisle@uwaterloo.ca} \vspace*{5pt} \\
    3136        \Large Cheriton School of Computer Science \\
    3237        \Large University of Waterloo
     
    4247
    4348\newcommand{\cit}{\textsuperscript{[Citation Needed]}\xspace}
    44 \newcommand{\TODO}{~\newline{\large\bf\color{red} TODO :}\xspace}
     49\newcommand{\TODO}{{\large\bf\color{red} TODO: }\xspace}
    4550
    4651% ===============================================================================
     
    5459\section{Introduction}
    5560\subsection{\CFA and the \CFA concurrency package}
    56 \CFA\cit is a modern, polymorphic, non-object-oriented, backwards-compatible extension of the C programming language. It aims to add high-productivity features while maintaning the predictible performance of C. As such, concurrency in \CFA\cit aims to offer simple and safe high-level tools while still allowing performant code. \CFA concurrrent code is written in the synchronous programming paradigm but uses \glspl{uthrd} in order to achieve the simplicity and maintainability of synchronous programming without sacrificing the efficiency of asynchronous programing. As such, the \CFA \emph{scheduler} is a preemptive user-level scheduler that maps \glspl{uthrd} onto \glspl{kthrd}.
    57 
    58 Scheduling occurs when execution switches from one thread to another, where the second thread is implicitly chosen by the scheduler. This scheduling is an indirect handoff, as opposed to generators and coroutines which explicitly switch to the next generator and coroutine respectively. The cost of switching between two threads for an indirect handoff has two components : the cost of actually context-switching, i.e., changing the relevant registers to move execution from one thread to the other, and the cost of scheduling, i.e., deciding which thread to run next among all the threads ready to run. The first cost is generally constant and fixed\footnote{Affecting the context-switch cost is whether it is done in one step, after the scheduling, or in two steps, context-switching to a fixed third-thread before scheduling.}, while the scheduling cost can vary based on the system state. Adding multiple \glspl{kthrd} does not fundamentally change the scheduler semantics or requirements, it simply adds new correctness requirements, i.e. \textit{linearizability}, and a new dimension to performance: scalability, where scheduling cost now also depends on contention.
    59 
    60 The more threads switch, the more the administration cost of scheduling becomes noticeable. It is therefore important to build a scheduler with the lowest possible cost and latency. Another important consideration is \emph{fairness}. In principle, scheduling should give the illusion of perfect fairness, where all threads ready to run are running \emph{simultaneously}. While the illusion of simultaneity is easier to reason about, it can break down if the scheduler allows to much unfairness. Therefore, the scheduler should offer as much fairness as needed to guarantee eventual progress, but use unfairness to help performance. In practice, threads must wait in turn but there can be advantages to unfair scheduling, similar to the the express cash register at a grocery store.
    61 
    62 The goal of this research is to produce a scheduler that is simple for programmers to understand and offers good performance. Here understandability does not refer to the API but to how much scheduling concerns programmers need to take into account when writing a \CFA concurrent package. Therefore, the main goal of this proposal is :
     61\CFA\cite{Moss18} is a modern, polymorphic, non-object-oriented, concurrent, backwards-compatible extension of the C programming language.
     62It aims to add high-productivity features while maintaining the predictable performance of C.
     63As such, concurrency in \CFA\cite{Delisle19} aims to offer simple and safe high-level tools while still allowing performant code.
     64\CFA concurrent code is written in the synchronous programming paradigm but uses \glspl{uthrd} in order to achieve the simplicity and maintainability of synchronous programming without sacrificing the efficiency of asynchronous programing.
     65As such, the \CFA \newterm{scheduler} is a preemptive user-level scheduler that maps \glspl{uthrd} onto \glspl{kthrd}.
     66
     67\newterm{Scheduling} occurs when execution switches from one thread to another, where the second thread is implicitly chosen by the scheduler.
     68This scheduling is an indirect handoff, as opposed to generators and coroutines which explicitly switch to the next generator and coroutine respectively.
     69The cost of switching between two threads for an indirect handoff has two components:
     70\begin{enumerate}
     71\item
     72the cost of actually context-switching, \ie changing the relevant registers to move execution from one thread to the other,
     73\item
     74and the cost of scheduling, \ie deciding which thread to run next among all the threads ready to run.
     75\end{enumerate}
     76The first cost is generally constant and fixed\footnote{Affecting the constant context-switch cost is whether it is done in one step, after the scheduling, or in two steps, context-switching to a fixed third-thread before scheduling.}, while the scheduling cost can vary based on the system state.
     77Adding multiple \glspl{kthrd} does not fundamentally change the scheduler semantics or requirements, it simply adds new correctness requirements, \ie \newterm{linearizability}\footnote{Meaning however fast the CPU threads run, there is an equivalent sequential order that gives the same result.}, and a new dimension to performance: scalability, where scheduling cost now also depends on contention.
     78
     79The more threads switch, the more the administration cost of scheduling becomes noticeable.
     80It is therefore important to build a scheduler with the lowest possible cost and latency.
     81Another important consideration is \newterm{fairness}.
     82In principle, scheduling should give the illusion of perfect fairness, where all threads ready to run are running \emph{simultaneously}.
     83While the illusion of simultaneity is easier to reason about, it can break down if the scheduler allows too much unfairness.
     84Therefore, the scheduler should offer as much fairness as needed to guarantee eventual progress, but use unfairness to help performance.
     85In practice, threads must wait in turn but there can be advantages to unfair scheduling, similar to the the express cash-register at a grocery store.
     86
     87The goal of this research is to produce a scheduler that is simple for programmers to understand and offers good performance.
     88Here understandability does not refer to the API but to how much scheduling concerns programmers need to take into account when writing a \CFA concurrent package.
     89Therefore, the main goal of this proposal is :
    6390\begin{quote}
    6491The \CFA scheduler should be \emph{viable} for \emph{any} workload.
    6592\end{quote}
    6693
    67 For a general purpose scheduler, it is impossible to produce an optimal algorithm as it would require knowledge of the future behaviour of threads. As such, scheduling performance is generally either defined by the best case scenario, i.e., a workload to which the scheduler is tailored, or the worst case scenario, i.e., the scheduler behaves no worst than \emph{X}. For this proposal, the performance is evaluated using the second approach to allow \CFA programmers to rely on scheduling performance. Be cause there is no optimal scheduler, ultimately \CFA may allow programmers to write their own scheduler; but that is not the subject of this proposal, which considers only the default scheduler. As such, it is important that only programmers with exceptionally high performance requirements should need to write their own scheduler and replace the scheduler in this proposal.
    68 
    69 Finally, the scheduling objective includes producing a scheduling strategy with sufficient fairness guarantees, creating an abstraction layer over the operating system to handle kernel-threads spinning unnecessarily, scheduling blocking I/O operations, and writing sufficient library tools to allow developers to indirectly use the scheduler.
     94For a general purpose scheduler, it is impossible to produce an optimal algorithm as it would require knowledge of the future behaviour of threads.
     95As such, scheduling performance is generally either defined by the best case scenario, \ie a workload to which the scheduler is tailored, or the worst case scenario, \ie the scheduler behaves no worst than \emph{X}.
     96For this proposal, the performance is evaluated using the second approach to allow \CFA programmers to rely on scheduling performance.
     97Because there is no optimal scheduler, ultimately \CFA may allow programmers to write their own scheduler; but that is not the subject of this proposal, which considers only the default scheduler.
     98As such, it is important that only programmers with exceptionally high performance requirements should need to write their own scheduler and replace the scheduler in this proposal.
     99
     100To achieve the \CFA scheduling goal includes:
     101\begin{enumerate}
     102\item
     103producing a scheduling strategy with sufficient fairness guarantees,
     104\item
     105creating an abstraction layer over the operating system to handle kernel-threads spinning unnecessarily,
     106\item
     107scheduling blocking I/O operations,
     108\item
     109and writing sufficient library tools to allow developers to indirectly use the scheduler, either through tuning knobs or replacing the default scheduler.
     110\end{enumerate}
    70111
    71112% ===============================================================================
     
    73114
    74115\section{\CFA Scheduling}
    75 To scheduler user-level threads across all workloads, the scheduler has a number of requirements:
    76 
    77 \paragraph{Correctness} As with any other concurrent data structure or algorithm, the correctness requirement is paramount. The scheduler cannot allow threads to be dropped from the ready-queue, i.e., scheduled but never run, or be executed multiple times when only being scheduled once. Since \CFA concurrency has no spurious wakeup, this definition of correctness also means the scheduler should have no spurious wakeup. The \CFA scheduler must be correct.
    78 
    79 \paragraph{Performance} The performance of a scheduler can generally be mesured in terms of scheduling cost, scalability and latency. Scheduling cost is the cost to switch from one thread to another, as mentioned above. For simple applications where a single kernel thread does most of the scheduling, it is generally the dominating cost. When adding many kernel threads, scalability becomes an issue, effectively increasing the cost of context-switching when contention is high. Finally, a third axis of performance is tail latency. This measurement is related to fairness and mesures how long is needed for a thread to be run once scheduled and is evaluated in the worst cases. The \CFA scheduler should offer good performance in all three metrics.
    80 
    81 \paragraph{Fairness} Like performance, this requirements has several aspect : eventual progress, predictability and performance reliablility. As a hard requirement, the \CFA scheduler must guarantee eventual progress, i.e., prevent starvation, otherwise the above mentioned illusion of simultaneous execution is broken and the scheduler becomes much more complex to reason about. Beyond this requirement, performance should be predictible and reliable, which means similar workloads achieve similar performance and programmer intuition is respected. An example of this is : a thread that yields agressively should not run more often then other tasks. While this is intuitive, it does not hold true for many work-stealing or feedback based schedulers. The \CFA scheduler must guarantee eventual progress and should be predictible and offer reliable performance.
    82 
    83 \paragraph{Efficiency} Finally, efficient usage of CPU resources is also an important requirement. This issue is discussed more in depth towards the end of this proposal. It effectively refers to avoiding using CPU power when there are no threads to run, and conversely, use all CPUs available when the workload can benefit from it. Balancing these two states is where the complexity lies. The \CFA scheduler should be efficient with respect to the underlying (shared) computer.
     116To schedule user-level threads across all workloads, the scheduler has a number of requirements:
     117
     118\paragraph{Correctness} As with any other concurrent data structure or algorithm, the correctness requirement is paramount.
     119The scheduler cannot allow threads to be dropped from the ready queue, \ie scheduled but never run, or be executed multiple times when only being scheduled once.
     120Since \CFA concurrency has no spurious wakeup, this definition of correctness also means the scheduler should have no spurious wakeup.
     121The \CFA scheduler must be correct.
     122
     123\paragraph{Performance} The performance of a scheduler can generally be measured in terms of scheduling cost, scalability and latency.
     124\newterm{Scheduling cost} is the cost to switch from one thread to another, as mentioned above.
     125For simple applications, where a single kernel thread does most of the scheduling, it is generally the dominating cost.
     126\newterm{Scalability} is the cost of adding multiple kernel threads because it increases the time for context switching because of contention by multiple threads accessing shared resources, \eg the ready queue.
     127Finally, \newterm{tail latency} is service delay and relates to thread fairness.
     128Specifically, latency measures how long a thread waits to run once scheduled and is evaluated in the worst case.
     129The \CFA scheduler should offer good performance for all three metrics.
     130
     131\paragraph{Fairness} Like performance, this requirement has several aspect : eventual progress, predictability and performance reliability.
     132\newterm{Eventual progress} guarantees every scheduled thread is eventually run, \ie prevent starvation.
     133As a hard requirement, the \CFA scheduler must guarantee eventual progress, otherwise the above mentioned illusion of simultaneous execution is broken and the scheduler becomes much more complex to reason about.
     134\newterm{Predictability} and \newterm{reliability} means similar workloads achieve similar performance and programmer execution intuition is respected.
     135For example, a thread that yields aggressively should not run more often then other tasks.
     136While this is intuitive, it does not hold true for many work-stealing or feedback based schedulers.
     137The \CFA scheduler must guarantee eventual progress and should be predictable and offer reliable performance.
     138
     139\paragraph{Efficiency} Finally, efficient usage of CPU resources is also an important requirement and is discussed in depth towards the end of the proposal.
     140\newterm{Efficiency} means avoiding using CPU cycles when there are no threads to run, and conversely, use all CPUs available when the workload can benefit from it.
     141Balancing these two states is where the complexity lies.
     142The \CFA scheduler should be efficient with respect to the underlying (shared) computer.
    84143
    85144\bigskip To achieve these requirements, I can reject two broad types of scheduling strategies : feedback-based and priority schedulers.
    86145
    87146\subsection{Feedback-Based Schedulers}
    88 Many operating systems use schedulers based on feedback in some form, e.g., measuring how much CPU a particular thread has used\footnote{Different metrics can measured here but it is not relevant to the discussion.} and schedule threads based on this metric. These strategies are sensible for operating systems but rely on two assumptions on the workload :
     147Many operating systems use schedulers based on feedback in some form, \eg measuring how much CPU a particular thread has used\footnote{Different metrics can be measured but it is not relevant to the discussion.} and schedule threads based on this metric.
     148These strategies are sensible for operating systems but rely on two assumptions for the workload:
    89149
    90150\begin{enumerate}
     
    93153\end{enumerate}
    94154
    95 While these two assumptions generally hold for operating systems, they may not for user-level threading. Since \CFA has the explicit goal of allowing many smaller threads, this can naturally lead to threads with much shorter lifetime, which are only scheduled a few times. Scheduling strategies based on feedback cannot be effective in these cases because they do not have the opportunity to measure the metrics that underlie the algorithm. Note that the problem of feedback convergence (reacting too slowly to scheduling events) is not specific to short lived threads but can also occur with threads that show drastic changes in scheduling, e.g., threads running for long periods of time and then suddenly blocking and unblocking quickly and repeatedly.
    96 
    97 In the context of operating systems, these concerns can be overshadowed by a more pressing concern : security. When multiple users are involved, it is possible that some users are malevolent and try to exploit the scheduling strategy in order to achieve some nefarious objective. Security concerns mean that more precise and robust fairness metrics must be used to guarantee fairness across processes created by users as well as threads created within a process. In the case of the \CFA scheduler, every thread runs in the same user-space and is controlled by the same user. Fairness across users is therefore a given and it is then possible to safely ignore the possibility that threads are malevolent. This approach allows for a much simpler fairness metric and in this proposal ``fairness'' is considered as follows : when multiple threads are cycling through the system, the total ordering of threads being scheduled, i.e., pushed onto the ready-queue, should not differ much from the total ordering of threads being executed, i.e., popped from the ready-queue.
    98 
    99 Since feedback is not necessarily feasible within the lifetime of all threads and a simple fairness metric can be used, the scheduling strategy proposed for the \CFA runtime does not use per-threads feedback. Feedback in general is not rejected for secondary concerns like idle sleep for kernel threads, but no feedback is used to decide which thread to run next.
     155While these two assumptions generally hold for operating systems, they may not for user-level threading.
     156Since \CFA has the explicit goal of allowing many smaller threads, this can naturally lead to threads with much shorter lifetimes that are only scheduled a few times.
     157Scheduling strategies based on feedback cannot be effective in these cases because there is no opportunity to measure the metrics that underlie the algorithm.
     158Note, the problem of \newterm{feedback convergence} (reacting too slowly to scheduling events) is not specific to short lived threads but can also occur with threads that show drastic changes in scheduling, \eg threads running for long periods of time and then suddenly blocking and unblocking quickly and repeatedly.
     159
     160In the context of operating systems, these concerns can be overshadowed by a more pressing concern : security.
     161When multiple users are involved, it is possible some users are malevolent and try to exploit the scheduling strategy to achieve some nefarious objective.
     162Security concerns mean more precise and robust fairness metrics must be used to guarantee fairness across processes created by users as well as threads created within a process.
     163In the case of the \CFA scheduler, every thread runs in the same user space and is controlled by the same user.
     164Fairness across users is therefore a given and it is then possible to safely ignore the possibility that threads are malevolent.
     165This approach allows for a much simpler fairness metric and in this proposal \emph{fairness} is defined as: when multiple threads are cycling through the system, the total ordering of threads being scheduled, \ie pushed onto the ready-queue, should not differ much from the total ordering of threads being executed, \ie popped from the ready-queue.
     166
     167Since feedback is not necessarily feasible within the lifetime of all threads and a simple fairness metric can be used, the scheduling strategy proposed for the \CFA runtime does not use per-threads feedback.
     168Feedback in general is not rejected for secondary concerns like idle sleep for kernel threads, but no feedback is used to decide which thread to run next.
    100169
    101170\subsection{Priority Schedulers}
    102 Another broad category of schedulers are priority schedulers. In these scheduling strategies, threads have priorities and the runtime schedules the threads with the highest priority before scheduling other threads. Threads with equal priority are scheduled using a secondary strategy, often something simple like round-robin or FIFO. These priority mean that, as long as there is a thread with a higher priority that desires to run, a thread with a lower priority does not run. This possible starving of threads can dramatically increase programming complexity since starving threads and priority inversion (prioritizing a lower priority thread) can both lead to serious problems.
    103 
    104 An important observation to make is that threads do not need to have explicit priorities for problems to occur. Indeed, any system with multiple ready-queues and attempts to exhaust one queue before accessing the other queues, can encounter starvation problems. A popular scheduling strategy that suffers from implicit priorities is work-stealing. Work-stealing is generally presented as follows, each processor has a list of ready threads.
    105 \begin{enumerate}
    106         \item Run threads from ``this'' processor's list first.
    107         \item If ``this'' processor's list is empty, run threads from some other processor's list.
    108 \end{enumerate}
    109 
    110 In a loaded system\footnote{A loaded system is a system where threads are being run at the same rate they are scheduled.}, if a thread does not yield, block or preempt for an extended period of time, threads on the same processor's list starve if no other processors exhaust their list.
    111 
    112 Since priorities can be complex for programmers to handle, the scheduling strategy proposed for the \CFA runtime does not use a strategy with either implicit or explicit thread priorities.
     171Another broad category of schedulers are priority schedulers.
     172In these scheduling strategies, threads have priorities and the runtime schedules the threads with the highest priority before scheduling other threads.
     173Threads with equal priority are scheduled using a secondary strategy, often something simple like round-robin or FIFO.
     174A consequence of priority is that, as long as there is a thread with a higher priority that desires to run, a thread with a lower priority does not run.
     175This possible starving of threads can dramatically increase programming complexity since starving threads and priority inversion (prioritizing a lower priority thread) can both lead to serious problems.
     176
     177An important observation is that threads do not need to have explicit priorities for problems to occur.
     178Indeed, any system with multiple ready-queues that attempts to exhaust one queue before accessing the other queues, essentially provide implicit priority, which can encounter starvation problems.
     179For example, a popular scheduling strategy that suffers from implicit priorities is work stealing.
     180\newterm{Work stealing} is generally presented as follows:
     181\begin{enumerate}
     182        \item Each processor has a list of ready threads.
     183        \item Each processor runs threads from its ready queue first.
     184        \item If a processor's ready queue is empty, attempt to run threads from some other processor's ready queue.
     185\end{enumerate}
     186
     187In a loaded system\footnote{A \newterm{loaded system} is a system where threads are being run at the same rate they are scheduled.}, if a thread does not yield, block, or preempt for an extended period of time, threads on the same processor's list starve if no other processors exhaust their list.
     188
     189Since priorities can be complex for programmers to incorporate into their execution intuition, the scheduling strategy proposed for the \CFA runtime does not use a strategy with either implicit or explicit thread priorities.
    113190
    114191\subsection{Schedulers without feedback or priorities}
    115 This proposal conjectures that is is possible to construct a default scheduler for the \CFA runtime that offers good scalability and a simple fairness guarantee that is easy for programmers to reason about. The simplest fairness guarantee is FIFO ordering, i.e., threads scheduled first run first. However, enforcing FIFO ordering generally conflicts with scalability across multiple processors because of the additionnal synchronization. Thankfully, strict FIFO is not needed for sufficient fairness. Since concurrency is inherently non-deterministic, fairness concerns in scheduling are only a problem if a thread repeatedly runs before another thread can run. This relaxation is possible because the non-determinism means that programmers must already handle ordering problems in order to produce correct code and already must rely on weak guarantees, for example that a specific thread will \emph{eventually} run. Since some reordering does not break correctness, the FIFO fairness guarantee can be significantly relaxed without causing problems. For this proposal, the target guarantee is that the \CFA scheduler provides \emph{probable} FIFO ordering, which allows reordering but makes it improbable that threads are reordered far from their position in total ordering.
    116 
    117 Scheduling is defined as follows :
     192This proposal conjectures that is is possible to construct a default scheduler for the \CFA runtime that offers good scalability and a simple fairness guarantee that is easy for programmers to reason about.
     193The simplest fairness guarantee is FIFO ordering, \ie threads scheduled first run first.
     194However, enforcing FIFO ordering generally conflicts with scalability across multiple processors because of the additional synchronization.
     195Thankfully, strict FIFO is not needed for sufficient fairness.
     196Since concurrency is inherently non-deterministic, fairness concerns in scheduling are only a problem if a thread repeatedly runs before another thread can run.
     197Some relaxation is possible because non-determinism means programmers already handle ordering problems to produce correct code and hence rely on weak guarantees, \eg that a specific thread will \emph{eventually} run.
     198Since some reordering does not break correctness, the FIFO fairness guarantee can be significantly relaxed without causing problems.
     199For this proposal, the target guarantee is that the \CFA scheduler provides \emph{probable} FIFO ordering, which allows reordering but makes it improbable that threads are reordered far from their position in total ordering.
     200
     201The \CFA scheduler fairness is defined as follows:
    118202\begin{itemize}
    119203        \item Given two threads $X$ and $Y$, the odds that thread $X$ runs $N$ times \emph{after} thread $Y$ is scheduled but \emph{before} it is run, decreases exponentially with regard to $N$.
    120204\end{itemize}
    121 
    122205While this is not a bounded guarantee, the probability that unfairness persist for long periods of times decreases exponentially, making persisting unfairness virtually impossible.
    123206
    124207% ===============================================================================
    125208% ===============================================================================
    126 \section{Proposal}
    127 
    128 \subsection{Ready-Queue} \label{sec:queue}
    129 A simple ready-queue can be built from a FIFO queue, where user-threads are pushed onto the queue when they are ready to run, and processors (kernel-threads acting as virtual processors) pop the user-threads from the queue and execute them. Using the paper\cite{alistarh2018relaxed} as a basis, it is simple to build a relaxed FIFO list that is fast and scalable for loaded or overloaded systems. The described queue uses an array of underlying strictly FIFO queues as shown in Figure~\ref{fig:base}\footnote{For this section, the number of underlying queues is assumed to be constant. Section~\ref{sec:resize} discusses resizing the array.}. Pushing new data is done by selecting one of these underlying queues at random, recording a timestamp for the operation and pushing to the selected queue. Popping is done by selecting two queues at random and popping from the queue with the oldest timestamp. A higher number of underlying queues leads to less contention on each queue and therefore better performance. In a loaded system, it is highly likely the queues are non-empty, i.e., several tasks are on each of the underlying queues. This means that selecting a queue at random to pop from is highly likely to yield a queue with available items. In Figure~\ref{fig:base}, ignoring the ellipsis, the chances of getting an empty queue is 2/7 per pick, meaning two random picks yield an item approximately 9 times out of 10.
     209\section{Proposal Details}
     210
     211\subsection{Central Ready Queue} \label{sec:queue}
     212A central ready queue can be built from a FIFO queue, where user threads are pushed onto the queue when they are ready to run, and processors (kernel-threads acting as virtual processors) pop the user threads from the queue and execute them.
     213Alistarh \etal~\cite{alistarh2018relaxed} show it is straightforward to build a relaxed FIFO list that is fast and scalable for loaded or overloaded systems.
     214The described queue uses an array of underlying strictly FIFO queues as shown in Figure~\ref{fig:base}\footnote{For this section, the number of underlying queues is assumed to be constant.
     215Section~\ref{sec:resize} discusses resizing the array.}.
     216Pushing new data is done by selecting one of these underlying queues at random, recording a timestamp for the operation and pushing to the selected queue.
     217Popping is done by selecting two queues at random and popping from the queue with the oldest timestamp.
     218A higher number of underlying queues leads to less contention on each queue and therefore better performance.
     219In a loaded system, it is highly likely the queues are non-empty, \ie several tasks are on each of the underlying queues.
     220This means that selecting a queue at random to pop from is highly likely to yield a queue with available items.
     221In Figure~\ref{fig:base}, ignoring the ellipsis, the chances of getting an empty queue is 2/7 per pick, meaning two random picks yield an item approximately 9 times out of 10.
    130222
    131223\begin{figure}
     
    133225                \input{base}
    134226        \end{center}
    135         \caption{Relaxed FIFO list at the base of the scheduler: an array of strictly FIFO lists. The timestamp is in all nodes and cell arrays.}
     227        \caption{Relaxed FIFO list at the base of the scheduler: an array of strictly FIFO lists.
     228The timestamp is in all nodes and cell arrays.}
    136229        \label{fig:base}
    137230\end{figure}
     
    145238\end{figure}
    146239
    147 When the ready queue is \emph{more empty}, i.e., several of the queues are empty, selecting a random queue for popping is less likely to yield a valid selection and more attempts need to be made, resulting in a performance degradation. Figure~\ref{fig:empty} shows an example with fewer elements where the chances of getting an empty queue is 5/7 per pick, meaning two random picks yield an item only half the time. Since the ready queue is not empty, the pop operation \emph{must} find an element before returning and therefore must retry. Overall performance is therefore influenced by the contention on the underlying queues and pop performance is influenced by the item density. This leads to four performance cases, as depicted in Table~\ref{tab:perfcases}.
     240When the ready queue is \emph{more empty}, \ie several of the queues are empty, selecting a random queue for popping is less likely to yield a successful selection and more attempts are needed, resulting in a performance degradation.
     241Figure~\ref{fig:empty} shows an example with fewer elements, where the chances of getting an empty queue is 5/7 per pick, meaning two random picks yield an item only half the time.
     242Since the ready queue is not empty, the pop operation \emph{must} find an element before returning and therefore must retry.
     243Note, the popping kernel thread has no work to do, but CPU cycles are wasted both for available user and kernel threads during the pop operation as the popping thread is using a CPU.
     244Overall performance is therefore influenced by the contention on the underlying queues and pop performance is influenced by the item density.
     245
     246This leads to four performance cases for the centralized ready-queue, as depicted in Table~\ref{tab:perfcases}.
     247The number of processors (many or few) refers to the number of kernel threads \emph{actively} attempting to pop user threads from the queues, not the total number of kernel threads.
     248The number of threads (many or few) refers to the number of user threads ready to be run.
     249Many threads means they outnumber processors significantly and most underlying queues have items, few threads mean there are barely more threads than processors and most underlying queues are empty.
     250Cases with fewer threads than processors are discussed in Section~\ref{sec:sleep}.
    148251
    149252\begin{table}
     
    155258                        Many Threads & A: good performance & B: good performance \\
    156259                        \hline
    157                         Few Threads  & C: poor performance & D: poor performance \\
     260                        Few Threads  & C: worst performance & D: poor performance \\
    158261                        \hline
    159262                \end{tabular}
    160263        \end{center}
    161         \caption{Performance of the relaxed FIFO list in different cases. The number of processors (many or few) refers to the number of kernel-threads \emph{actively} attempting to pop user-threads from the queues, not the total number of kernel-threads. The number of threads (many or few) refers to the number of user-threads ready to be run. Many threads means they outnumber processors significantly and most underlying queues have items, few threads mean there are barely more threads than processors and most underlying queues are empty. Cases with fewer threads than processors are discussed in Section~\ref{sec:sleep}.}
     264        \caption{Expected performance of the relaxed FIFO list in different cases.}
    162265        \label{tab:perfcases}
    163266\end{table}
    164267
    165 Table~\ref{tab:perfcases}
    166 
    167 Performance can be improved in case~D (Table~\ref{tab:perfcases}) by adding information to help processors find which inner queues are used. This addition aims to avoid the cost of retrying the pop operation but does not affect contention on the underlying queues and can incur some management cost for both push and pop operations. The approach used to encode this information can vary in density and be either global or local, where density means the information is either packed in few cachelines or spread across several cachelines, and local information means each thread uses an independent copy instead of a single global, i.e., common, source of information.
    168 
    169 For example, bitmask can be used to identify which inner queues are currently in use, as shown in Figure~\ref{fig:emptybit}. This means that processors can often find user-threads in constant time, regardless of how many underlying queues are empty. Furthermore, modern x86 CPUs have extended bit manipulation instructions (BMI2) which allow using the bitmask with very little overhead compared to the randomized selection approach for a filled readyqueue, offerring decent performance even in cases with many empty inner queues. However, this technique has its limits: with a single word\footnote{Word refers here to however many bits can be written atomicly.} bitmask, the total number of underlying queues in the ready queue is limited to the number of bits in the word. With a multi-word bitmask, this maximum limit can be increased arbitrarily, but it is not possible to check if the queue is empty by reading the bitmask atomicly.
    170 
    171 Finally, a dense bitmap, either single or multi-word, causes additional problems
    172 in case C (Table 1), because many processors are continuously scanning the
    173 bitmask to find the few available threads. This increased contention on the
    174 bitmask(s) reduces performance because of cache misses and the bitmask is
    175 updated more frequently by the scanning processors racing to read and/or update
    176 that information. This increased update frequency means the information in the
    177 bitmask will more often be stale before a processor can use it to find an item.
     268Performance can be improved in case~D (Table~\ref{tab:perfcases}) by adding information to help processors find which inner queues are used.
     269This addition aims to avoid the cost of retrying the pop operation but does not affect contention on the underlying queues and can incur some management cost for both push and pop operations.
     270The approach used to encode this information can vary in density and be either global or local.
     271\newterm{Density} means the information is either packed in a few cachelines or spread across several cachelines, and \newterm{local information} means each thread uses an independent copy instead of a single global, \ie common, source of information.
     272
     273For example, Figure~\ref{fig:emptybit} shows a dense bitmask to identify which inner queues are currently in use.
     274This approach means processors can often find user threads in constant time, regardless of how many underlying queues are empty.
     275Furthermore, modern x86 CPUs have extended bit manipulation instructions (BMI2) that allow using the bitmask with very little overhead compared to the randomized selection approach for a filled ready queue, offering good performance even in cases with many empty inner queues.
     276However, this technique has its limits: with a single word\footnote{Word refers here to however many bits can be written atomically.} bitmask, the total number of underlying queues in the ready queue is limited to the number of bits in the word.
     277With a multi-word bitmask, this maximum limit can be increased arbitrarily, but it is not possible to check if the queue is empty by reading the bitmask atomically.
     278
     279Finally, a dense bitmap, either single or multi-word, causes additional problems in case C (Table 1), because many processors are continuously scanning the bitmask to find the few available threads.
     280This increased contention on the bitmask(s) reduces performance because of cache misses after updates and the bitmask is updated more frequently by the scanning processors racing to read and/or update that information.
     281This increased update frequency means the information in the bitmask is more often stale before a processor can use it to find an item, \ie mask read says there are available user threads but none on queue.
    178282
    179283\begin{figure}
     
    185289\end{figure}
    186290
    187 Another approach is to use a hiearchical data structure, for example Figure~\ref{fig:emptytree}. Creating a tree of nodes to reduce contention has been shown to work in similar cases\cite{ellen2007snzi}\footnote{This particular paper seems to be patented in the US. How does that affect \CFA? Can I use it in my work?}. However, this approach may lead to poorer performance in case~B (Table~\ref{tab:perfcases}) due to the inherent pointer chasing cost and already low contention cost in that case.
     291Figure~\ref{fig:emptytree} shows another approach using a hierarchical tree data-structure to reduce contention and has been shown to work in similar cases~\cite{ellen2007snzi}\footnote{This particular paper seems to be patented in the US.
     292How does that affect \CFA? Can I use it in my work?}.
     293However, this approach may lead to poorer performance in case~B (Table~\ref{tab:perfcases}) due to the inherent pointer chasing cost and already low contention cost in that case.
    188294
    189295\begin{figure}
     
    195301\end{figure}
    196302
    197 Finally, a third approach is to use dense information, similar to the bitmap, but have each thread keep its own independant copies of it. While this approach can offer good scalability \emph{and} low latency, the livelyness of the information can become a problem. In the simple cases, local copies of which underlying queues are empty can become stale and end-up not being useful for the pop operation. A more serious problem is that reliable information is necessary for some parts of this algorithm to be correct. As mentionned in this section, processors must know \emph{reliably} whether the list is empty or not to decide if they can return \texttt{NULL} or if they must keep looking during a pop operation. Section~\ref{sec:sleep} discusses another case where reliable information is required for the algorithm to be correct.
    198 
    199 \begin{figure}
    200         \begin{center}
    201                 {\resizebox{0.8\textwidth}{!}{\input{emptytls}}}
     303Finally, a third approach is to use dense information, similar to the bitmap, but have each thread keep its own independent copy of it.
     304While this approach can offer good scalability \emph{and} low latency, the liveliness of the information can become a problem.
     305In the simple cases, local copies of which underlying queues are empty can become stale and end-up not being useful for the pop operation.
     306A more serious problem is that reliable information is necessary for some parts of this algorithm to be correct.
     307As mentioned in this section, processors must know \emph{reliably} whether the list is empty or not to decide if they can return \texttt{NULL} or if they must keep looking during a pop operation.
     308Section~\ref{sec:sleep} discusses another case where reliable information is required for the algorithm to be correct.
     309
     310\begin{figure}
     311        \begin{center}
     312                \input{emptytls}
    202313        \end{center}
    203314        \caption{``More empty'' queue with added per processor bitmask to indicate which array cells have items.}
     
    205316\end{figure}
    206317
    207 There is a fundamental tradeoff among these approach. Dense global information about empty underlying queues helps zero-contention cases at the cost of high-contention case. Sparse global information helps high-contention cases but increases latency in zero-contention-cases, to read and ``aggregate'' the information\footnote{Hiearchical structures, e.g., binary search tree, effectively aggregate information but following pointer chains, learning information for each node. Similarly, other sparse schemes would need to read multiple cachelines to acquire all the information needed.}. Finally, dense local information has both the advantages of low latency in zero-contention cases and scalability in high-contention cases, however the information can become stale making it difficult to use to ensure correctness. The fact that these solutions have these fundamental limits suggest to me that a better solution combines these properties in an interesting ways. The lock discussed in Section~\ref{sec:resize} also allows for solutions that adapt to the number of processors, which could also prove useful.
     318There is a fundamental tradeoff among these approach.
     319Dense global information about empty underlying queues helps zero-contention cases at the cost of high-contention case.
     320Sparse global information helps high-contention cases but increases latency in zero-contention-cases, to read and ``aggregate'' the information\footnote{Hierarchical structures, \eg binary search tree, effectively aggregate information but follow pointer chains, learning information at each node.
     321Similarly, other sparse schemes need to read multiple cachelines to acquire all the information needed.}.
     322Finally, dense local information has both the advantages of low latency in zero-contention cases and scalability in high-contention cases, however the information can become stale making it difficult to use to ensure correctness.
     323The fact that these solutions have these fundamental limits suggest to me a better solution that attempts to combine these properties in an interesting ways.
     324Also, the lock discussed in Section~\ref{sec:resize} allows for solutions that adapt to the number of processors, which could also prove useful.
    208325
    209326\paragraph{Objectives and Existing Work}
    210327
    211 How much scalability is actually needed is highly debatable \emph{libfibre}\cit has compared favorably to other schedulers in webserver tests\cit and uses a single atomic counter in its scheduling algorithm similarly to the proposed bitmask. As such, the single atomic instruction on a shared cacheline may be sufficiently performant.
    212 
    213 I have built a prototype of this ready-queue in the shape of a data-queue, i.e., nodes on the queue are structures with a single int and the intrusive data fields. Using this prototype I ran preliminary performance experiments which confirm the expected performance in Table~\ref{tab:perfcases}. However, these experiments only offer a hint at the actual performance of the scheduler since threads form more complex operations than simple integer nodes, e.g., threads are not independant of each other, when a thread blocks some other thread must intervene to wake it.
    214 
    215 I have also integrated this prototype into the \CFA runtime, but have not yet created performance experiments to compare results. As creating one-to-one comparisons with the prototype will be complex.
     328How much scalability is actually needed is highly debatable.
     329\emph{libfibre}\cite{libfibre} has compared favorably to other schedulers in webserver tests\cite{karstenuser} and uses a single atomic counter in its scheduling algorithm similarly to the proposed bitmask.
     330As such, the single atomic instruction on a shared cacheline may be sufficiently performant.
     331
     332I have built a prototype of this ready queue in the shape of a data queue, \ie nodes on the queue are structures with a single int representing a thread and intrusive data fields.
     333Using this prototype I ran preliminary performance experiments that confirm the expected performance in Table~\ref{tab:perfcases}.
     334However, these experiments only offer a hint at the actual performance of the scheduler since threads form more complex operations than simple integer nodes, \eg threads are not independent of each other, when a thread blocks some other thread must intervene to wake it.
     335
     336I have also integrated this prototype into the \CFA runtime, but have not yet created performance experiments to compare results, as creating one-to-one comparisons between the prototype and the \CFA runtime will be complex.
    216337
    217338\subsection{Dynamic Resizing} \label{sec:resize}
     339
    218340\begin{figure}
    219341        \begin{center}
     
    224346\end{figure}
    225347
    226 The \CFA runtime system groups processors together as clusters. Threads on a cluster are always scheduled on one of the processors of the cluster. Currently, the runtime handles dynamically adding and removing processors from clusters at any time. Since this is part of the existing design, the proposed scheduler must also support this behaviour. However, dynamicaly resizing a cluster is considered a rare event associated with setup, teardown and major configuration changes. This assumption is made both in the design of the proposed scheduler as well as in the original design of the \CFA runtime system. As such, the proposed scheduler must honor the correctness of these behaviour but does not have any performance objectives with regard to resizing a cluster. How long adding or removing processors take and how much this disrupts the performance of other threads is considered a secondary concern since it should be amortized over long period of times. However, as mentionned in Section~\ref{sec:queue}, contention on the underlying queues can have a direct impact on performance. The number of underlying queues must therefore be adjusted as the number of processors grows or shrinks. Since the underlying queues are stored in a dense array, changing the number of queues requires resizing the array and expanding the array requires moving it. This can introduce memory reclamation problems if not done correctly.
     348The \CFA runtime system groups processors together as \newterm{clusters}, as shown in Figure~\ref{fig:system}.
     349Threads on a cluster are always scheduled on one of the processors of the cluster.
     350Currently, the runtime handles dynamically adding and removing processors from clusters at any time.
     351Since this is part of the existing design, the proposed scheduler must also support this behaviour.
     352However, dynamically resizing a cluster is considered a rare event associated with setup, tear down and major configuration changes.
     353This assumption is made both in the design of the proposed scheduler as well as in the original design of the \CFA runtime system.
     354As such, the proposed scheduler must honour the correctness of this behaviour but does not have any performance objectives with regard to resizing a cluster.
     355How long adding or removing processors take and how much this disrupts the performance of other threads is considered a secondary concern since it should be amortized over long period of times.
     356However, as mentioned in Section~\ref{sec:queue}, contention on the underlying queues can have a direct impact on performance.
     357The number of underlying queues must therefore be adjusted as the number of processors grows or shrinks.
     358Since the underlying queues are stored in a dense array, changing the number of queues requires resizing the array and expanding the array requires moving it, which can introduce memory reclamation problems if not done correctly.
    227359
    228360\begin{figure}
     
    230362                \input{resize}
    231363        \end{center}
    232         \caption{Copy of data structure shown in Figure~\ref{fig:base}. }
     364        \caption{Copy of data structure shown in Figure~\ref{fig:base}.}
    233365        \label{fig:base2}
    234366\end{figure}
    235367
    236 It is important to note how the array is used in this case. While the array cells are modified by every push and pop operation, the array itself, i.e., the pointer that would change when resized, is only read during these operations. Therefore the use of this pointer can be described as frequent reads and infrequent writes. This description effectively matches with the description of a Reader-Writer lock, infrequent but invasive updates among frequent read operations. In the case of the Ready-Queue described above, read operations are operations that push or pop from the ready-queue but do not invalidate any references to the ready queue data structures. Writes on the other-hand would add or remove inner queues, invalidating references to the array of inner queues in the process. Therefore, the current proposed approach to this problem is to add a per-cluster Reader Writer lock around the ready queue to prevent restructuring of the ready-queue data structure while threads are being pushed or popped.
    237 
    238 There are possible alternatives to the Reader Writer lock solution. This problem is effectively a memory reclamation problem and as such there is a large body of research on the subject\cit. However, the RWlock solution is simple and can be leveraged to solve other problems (e.g., processor ordering and memory reclamation of threads), which makes it an attractive solution.
     368It is important to note how the array is used in this case.
     369While the array cells are modified by every push and pop operation, the array itself, \ie the pointer that would change when resized, is only read during these operations.
     370Therefore the use of this pointer can be described as frequent reads and infrequent writes.
     371This description effectively matches with the description of a reader-writer lock, infrequent but invasive updates among frequent read operations.
     372In the case of the ready queue described above, read operations are operations that push or pop from the ready queue but do not invalidate any references to the ready queue data structures.
     373Writes on the other hand would add or remove inner queues, invalidating references to the array of inner queues in a process.
     374Therefore, the current proposed approach to this problem is to add a per-cluster reader-writer lock around the ready queue to prevent restructuring of the ready-queue data-structure while threads are being pushed or popped.
     375
     376There are possible alternatives to the reader-writer lock solution.
     377This problem is effectively a memory reclamation problem and as such there is a large body of research on the subject\cite{michael2004hazard, brown2015reclaiming}.
     378However, the reader-write lock-solution is simple and can be leveraged to solve other problems (\eg processor ordering and memory reclamation of threads), which makes it an attractive solution.
    239379
    240380\paragraph{Objectives and Existing Work}
    241 The lock must offer scalability and performance on par with the actual ready-queue in order not to introduce a new bottleneck. I have already built a lock that fits the desired requirements and preliminary testing show scalability and performance that exceed the target. As such, I do not consider this lock to be a risk on this project.
     381The lock must offer scalability and performance on par with the actual ready-queue in order not to introduce a new bottleneck.
     382I have already built a lock that fits the desired requirements and preliminary testing show scalability and performance that exceed the target.
     383As such, I do not consider this lock to be a risk for this project.
    242384
    243385\subsection{Idle Sleep} \label{sec:sleep}
    244 As mentioned, idle sleep is the process of putting processors to sleep when they have no threads to execute. In this context, processors are kernel-threads and sleeping refers to asking the kernel to block a thread. This benefit can be achieved with either thread synchronization operations like pthread\_cond\_wait or using signal operations like sigsuspend. The goal of putting idle processors to sleep is two-fold, it reduces energy consumption in cases where more idle kernel-threads translate to idle hardware threads, and reduces contention on the ready queue, since the otherwise idle processors generally contend trying to pop items from the queue.
    245 
    246 Support for idle sleep broadly involves calling the operating system to block the kernel thread and handling the race between a blocking thread  and the waking thread, and handling which kernel thread should sleep or wake-up.
    247 
    248 When a processor decides to sleep, there is a race that occurs between it signalling that is going to sleep (so other processors can find sleeping processors) and actually blocking the kernel thread. This is equivalent to the classic problem of missing signals when using condition variables: the ``sleepy'' processor indicates that it will sleep but has not yet gone to sleep, when another processor attempts to wake it up, the waking-up operation may claim nothing needs to be done and the signal is missed. In cases where threads are scheduled from processors on the current cluster, loosing signals is not necessarily critical, because at least some processors on the cluster are awake and may check for more processors eventually. Individual processors always finish scheduling threads before looking for new work, which means that the last processor to go to sleep cannot miss threads scheduled from inside the cluster (if they do, that demonstrates the ready-queue is not linearizable). However, this guarantee does not hold if threads are scheduled from outside the cluster, either due to an external event like timers and I/O, or due to a thread migrating from a different cluster. In this case, missed signals can lead to the cluster deadlocking where it should not\footnote{Clusters ``should'' never deadlock, but for this proposal, cases where \CFA users \emph{actually} write \CFA code that leads to a deadlock is considered as a deadlock that ``should'' happen. }. Therefore, it is important that the scheduling of threads include a mechanism where signals \emph{cannot} be missed. For performance reasons, it can be advantageous to have a secondary mechanism that allows signals to be missed in cases where it cannot lead to a deadlock. To be safe, this process must include a ``handshake'' where it is guaranteed that either~: the sleepy processor notices that a thread is scheduled after it signalled its intent to block or code scheduling threads sees the intent to sleep before scheduling and be able to wake-up the processor. This matter is complicated by the fact that pthreads offers few tools to implement this solution and offers no guarantee of ordering of threads waking up for most of these tools.
    249 
    250 Another issues is trying to avoid kernel threads sleeping and waking frequently. A possible partial solution is to order the processors so that the one which most recently went to sleep is woken up. This allows other sleeping processors to reach deeper sleep state (when these are available) while keeping ``hot'' processors warmer. Note that while this generally means organising the processors in a stack, I believe that the unique index provided by the ReaderWriter lock can be reused to strictly order the waking order of processors, causing a LIFO like waking order. While a strict LIFO stack is probably better, using the processor index could prove useful and offer a sufficiently LIFO ordering.
    251 
    252 Finally, another important aspect of Idle Sleep is when should processors make the decision to sleep and when is it appropriate for sleeping processors to be woken up. Processors that are unnecessarily unblocked lead to unnecessary contention and power consumption, while too many sleeping processors can lead to sub-optimal throughput. Furthermore, transitions from sleeping to awake and vice-versa also add unnecessary latency. There is already a wealth of research on the subject and I may use an existing approach for the Idle Sleep heuristic in this project.
     386
     387\newterm{Idle sleep} is the process of putting processors to sleep when they have no threads to execute.
     388In this context, processors are kernel threads and sleeping refers to asking the kernel to block a thread.
     389This operation can be achieved with either thread synchronization operations like $pthread_cond_wait$ or using signal operations like $sigsuspend$.
     390The goal of putting idle processors to sleep is:
     391\begin{enumerate}
     392\item
     393reduce contention on the ready queue, since the otherwise idle processors generally contend trying to pop items from the queue,
     394\item
     395give back unneeded CPU time associated with a process to other user processors executing on the computer,
     396\item
     397and reduce energy consumption in cases where more idle kernel-threads translate to idle CPUs, which can cycle down.
     398\end{enumerate}
     399Support for idle sleep broadly involves calling the operating system to block the kernel thread and handling the race between a blocking thread and the waking thread, and handling which kernel thread should sleep or wake up.
     400
     401When a processor decides to sleep, there is a race that occurs between it signalling that is going to sleep (so other processors can find sleeping processors) and actually blocking the kernel thread.
     402This operation is equivalent to the classic problem of missing signals when using condition variables: the ``sleepy'' processor indicates its intention to block but has not yet gone to sleep when another processor attempts to wake it up.
     403The waking-up operation sees the blocked process and signals it, but the blocking process is racing to sleep so the signal is missed.
     404In cases where kernel threads are managed as processors on the current cluster, loosing signals is not necessarily critical, because at least some processors on the cluster are awake and may check for more processors eventually.
     405Individual processors always finish scheduling user threads before looking for new work, which means that the last processor to go to sleep cannot miss threads scheduled from inside the cluster (if they do, that demonstrates the ready queue is not linearizable).
     406However, this guarantee does not hold if threads are scheduled from outside the cluster, either due to an external event like timers and I/O, or due to a user (or kernel) thread migrating from a different cluster.
     407In this case, missed signals can lead to the cluster deadlocking\footnote{Clusters should only deadlock in cases where a \CFA programmer \emph{actually} write \CFA code that leads to a deadlock.}.
     408Therefore, it is important that the scheduling of threads include a mechanism where signals \emph{cannot} be missed.
     409For performance reasons, it can be advantageous to have a secondary mechanism that allows signals to be missed in cases where it cannot lead to a deadlock.
     410To be safe, this process must include a ``handshake'' where it is guaranteed that either~: the sleeping processor notices that a user thread is scheduled after the sleeping processor signalled its intent to block or code scheduling threads sees the intent to sleep before scheduling and be able to wake-up the processor.
     411This matter is complicated by the fact that pthreads and Linux offer few tools to implement this solution and no guarantee of ordering of threads waking up for most of these tools.
     412
     413Another important issue is avoiding kernel threads sleeping and waking frequently because there is a significant operating-system cost.
     414This scenario happens when a program oscillates between high and low activity, needing most and then less processors.
     415A possible partial solution is to order the processors so that the one which most recently went to sleep is woken up.
     416This allows other sleeping processors to reach deeper sleep state (when these are available) while keeping ``hot'' processors warmer.
     417Note that while this generally means organizing the processors in a stack, I believe that the unique index provided in my reader-writer lock can be reused to strictly order the waking processors, causing a mostly LIFO order.
     418While a strict LIFO stack is probably better, the processor index could prove useful for other reasons, while still offering a sufficiently LIFO ordering.
     419
     420A final important aspect of idle sleep is when should processors make the decision to sleep and when is it appropriate for sleeping processors to be woken up.
     421Processors that are unnecessarily unblocked lead to unnecessary contention, CPU usage, and power consumption, while too many sleeping processors can lead to sub-optimal throughput.
     422Furthermore, transitions from sleeping to awake and vice-versa also add unnecessary latency.
     423There is already a wealth of research on the subject\cite{schillings1996engineering, wiki:thunderherd} and I may use an existing approach for the idle-sleep heuristic in this project, \eg\cite{karstenuser}.
    253424
    254425\subsection{Asynchronous I/O}
    255 The final aspect of this proposal is asynchronous I/O. Without it, user threads that execute I/O operations block the underlying kernel thread, which leads to poor throughput. It would be preferrable to block the user-thread and reuse the underlying kernel-thread to run other ready threads. This approach requires intercepting the user-threads' calls to I/O operations, redirecting them to an asynchronous I/O interface and handling the multiplexing between the synchronous and asynchronous API. As such, these are the three components needed to implemented support for asynchronous I/O : an OS abstraction layer over the asynchronous interface, an event-engine to (de)multiplex the operations and a synchronous interface for users to use. None of these components currently exist in \CFA and I will need to build all three for this project.
     426
     427The final aspect of this proposal is asynchronous I/O.
     428Without it, user threads that execute I/O operations block the underlying kernel thread, which leads to poor throughput.
     429It is preferable to block the user thread performing the I/O and reuse the underlying kernel-thread to run other ready user threads.
     430This approach requires intercepting user-thread calls to I/O operations, redirecting them to an asynchronous I/O interface, and handling the multiplexing/demultiplexing between the synchronous and asynchronous API.
     431As such, there are three components needed to implemented support for asynchronous I/O:
     432\begin{enumerate}
     433\item
     434an OS abstraction layer over the asynchronous interface,
     435\item
     436an event-engine to (de)multiplex the operations,
     437\item
     438and a synchronous interface for users to use.
     439\end{enumerate}
     440None of these components currently exist in \CFA and I will need to build all three for this project.
    256441
    257442\paragraph{OS Abstraction}
    258 One fundamental part for converting blocking I/O operations into non-blocking ones is having an underlying asynchronous I/O interface to direct the I/O operations. While there exists many different APIs for asynchronous I/O, it is not part of this proposal to create a novel API. I plan to use an existing one that is sufficient. uC++ uses the \texttt{select} as its interface, which handles ttys, pipes and sockets, but not disk. It entails significant complexity and is being replaced, which make it a less interesting alternative. Another popular interface is \texttt{epoll}\cit, which is supposed to be cheaper than \texttt{select}. However, epoll also does not handle the file system and seems to have problem to linux pipes and \texttt{TTY}s\cit. A very recent alternative that must still be investigated is \texttt{io\_uring}. It claims to address some of the issues with \texttt{epoll} but is too recent to be confident that it does. Finally, a popular cross-platform alternative is \texttt{libuv}, which offers asynchronous sockets and asynchronous file system operations (among other features). However, as a full-featured library it includes much more than what is needed and could conflict with other features of \CFA unless significant effort is made to merge them together.
    259 
    260 \paragraph{Event-Engine}
    261 Laying on top of the asynchronous interface layer is the event-engine. This engine is responsible for multiplexing (batching) the synchronous I/O requests into an asynchronous I/O request and demultiplexing the results onto appropriate blocked threads. This step can be straightforward for the simple cases, but can become quite complex. Decisions that need to be made include : whether to poll from a seperate kernel thread or a regularly scheduled user thread, what should be the ordering used when results satisfy many requests, how to handle threads waiting for multiple operations, etc.
     443One fundamental part for converting blocking I/O operations into non-blocking ones is having an underlying asynchronous I/O interface to direct the I/O operations.
     444While there exists many different APIs for asynchronous I/O, it is not part of this proposal to create a novel API.
     445It is sufficient to make one work in the complex context of the \CFA runtime.
     446\uC uses the $select$\cite{select} as its interface, which handles ttys, pipes and sockets, but not disk.
     447$select$ entails significant complexity and is being replaced in UNIX operating-systems, which make it a less interesting alternative.
     448Another popular interface is $epoll$\cite{epoll}, which is supposed to be cheaper than $select$.
     449However, $epoll$ also does not handle the file system and anectodal evidence suggest it has problem with linux pipes and $TTY$s.
     450A popular cross-platform alternative is $libuv$\cite{libuv}, which offers asynchronous sockets and asynchronous file system operations (among other features).
     451However, as a full-featured library it includes much more than I need and could conflict with other features of \CFA unless significant effort is made to merge them together.
     452A very recent alternative that I am investigating is $io_uring$\cite{io_uring}.
     453It claims to address some of the issues with $epoll$ and my early investigating suggest that the claim is accurate.
     454$io_uring$ uses a much more general approach where system calls are register to a queue and later executed by the kernel, rather than relying on system calls to return an error instead of blocking and subsequently waiting for changes on file descriptors.
     455I believe this approach allows for fewer problems, \eg the manpage for $open$\cite{open} states:
     456\begin{quote}
     457        Note that [the $O_NONBLOCK$ flag] has no effect for regular files and block devices;
     458        that is, I/O operations will (briefly) block when device activity is required, regardless of whether $O_NONBLOCK$ is set.
     459        Since $O_NONBLOCK$ semantics might eventually be implemented, applications should not depend upon blocking behavior when specifying this flag for regular files and block devices.
     460\end{quote}
     461This makes approach based on $epoll$/$select$ less reliable since they may not work for every file descriptors.
     462For this reason, I plan to use $io_uring$ as the OS abstraction for the \CFA runtime, unless further work shows problems I haven't encountered yet.
     463However, only a small subset of the features are available in Ubuntu as of April 2020\cite{wiki:ubuntu-linux}, which will limit performance comparisons.
     464I do not believe this will affect the comparison result.
     465
     466\paragraph{Event Engine}
     467Laying on top of the asynchronous interface layer is the event engine.
     468This engine is responsible for multiplexing (batching) the synchronous I/O requests into asynchronous I/O requests and demultiplexing the results to appropriate blocked user threads.
     469This step can be straightforward for simple cases, but becomes quite complex when there are thousands of user threads performing both reads and writes, possibly on overlapping file descriptors.
     470Decisions that need to be made include:
     471\begin{enumerate}
     472\item
     473whether to poll from a separate kernel thread or a regularly scheduled user thread,
     474\item
     475what should be the ordering used when results satisfy many requests,
     476\item
     477how to handle threads waiting for multiple operations, etc.
     478\end{enumerate}
    262479
    263480\paragraph{Interface}
    264 Finally, for these components to be available, it is necessary to expose them through a synchronous interface. The interface can be novel but it is preferrable to match the existing POSIX interface in order to be compatible with existing code. Matching allows C programs written using this interface to be transparently converted to \CFA with minimal effeort. Where this is not applicable, a novel interface will be created to fill the gaps.
     481Finally, for these non-blocking I/O components to be available, it is necessary to expose them through a synchronous interface because that is the \CFA concurrent programming style.
     482The interface can be novel but it is preferable to match the existing POSIX interface when possible to be compatible with existing code.
     483Matching allows C programs written using this interface to be transparently converted to \CFA with minimal effort.
     484Where new functionality is needed, I will create a novel interface to fill gaps and provide advanced features.
    265485
    266486
     
    268488% ===============================================================================
    269489\section{Discussion}
    270 
     490I believe that runtime system and scheduling are still open topics.
     491Many ``state of the art'' production frameworks still use single threaded event-loops because of performance considerations, \eg \cite{nginx-design}, and, to my knowledge, no wideyl available system language offers modern threading facilities.
     492I believe the proposed work offers a novel runtime and scheduling package, where existing work only offers fragments that users must assemble themselves when possible.
    271493
    272494% ===============================================================================
    273495% ===============================================================================
    274496\section{Timeline}
    275 
     497\begin{center}
     498\begin{tabular}{ | r @{--} l | p{4in} | }
     499\hline May 2020 & October 2020   & Creation of the performance benchmark. \\
     500\hline November 2020 & March 2021   & Completion of the implementation. \\
     501\hline March 2021 & April 2021  & Final Performance experiments. \\
     502\hline May 2021 & August 2021 & Thesis writing and defense. \\
     503\hline
     504\end{tabular}
     505\end{center}
    276506
    277507% B I B L I O G R A P H Y
  • doc/theses/thierry_delisle_PhD/comp_II/img/base.fig

    re3bc51c rbcd74f3  
    1 #FIG 3.2  Produced by xfig version 3.2.7a
     1#FIG 3.2  Produced by xfig version 3.2.5c
    22Landscape
    33Center
    44Inches
    5 Letter
     5Letter 
    66100.00
    77Single
    88-2
    991200 2
    10 6 2400 3075 3000 4200
    11 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
    12          3000 3335 2850 3075 2550 3075 2400 3335 2550 3595 2850 3595
    13          3000 3335
    14 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    15         1 1 1.00 45.00 90.00
    16          2700 4200 2700 3600
    17 -6
    18 6 2400 2175 3000 3375
    19 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
    20          3000 2435 2850 2175 2550 2175 2400 2435 2550 2695 2850 2695
    21          3000 2435
    22 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    23         1 1 1.00 45.00 90.00
    24          2700 3375 2700 2700
    25 -6
    26 6 3600 2175 4200 3375
    27 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
    28          4200 2435 4050 2175 3750 2175 3600 2435 3750 2695 4050 2695
    29          4200 2435
    30 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    31         1 1 1.00 45.00 90.00
    32          3900 3375 3900 2700
    33 -6
    34 6 3600 3075 4200 4200
    35 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
    36          4200 3335 4050 3075 3750 3075 3600 3335 3750 3595 4050 3595
    37          4200 3335
    38 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    39         1 1 1.00 45.00 90.00
    40          3900 4200 3900 3600
    41 -6
    42 6 4200 3075 4800 4200
    43 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
    44          4800 3335 4650 3075 4350 3075 4200 3335 4350 3595 4650 3595
    45          4800 3335
    46 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    47         1 1 1.00 45.00 90.00
    48          4500 4200 4500 3600
    49 -6
    50 6 4800 3075 5400 4200
    51 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
    52          5400 3335 5250 3075 4950 3075 4800 3335 4950 3595 5250 3595
    53          5400 3335
    54 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    55         1 1 1.00 45.00 90.00
    56          5100 4200 5100 3600
    57 -6
    58 6 4800 2175 5400 3375
    59 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
    60          5400 2435 5250 2175 4950 2175 4800 2435 4950 2695 5250 2695
    61          5400 2435
    62 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    63         1 1 1.00 45.00 90.00
    64          5100 3375 5100 2700
    65 -6
    66 6 4800 1275 5400 2475
    67 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
    68          5400 1535 5250 1275 4950 1275 4800 1535 4950 1795 5250 1795
    69          5400 1535
    70 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    71         1 1 1.00 45.00 90.00
    72          5100 2475 5100 1800
    73 -6
    74 6 6000 2175 6600 3375
    75 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
    76          6600 2435 6450 2175 6150 2175 6000 2435 6150 2695 6450 2695
    77          6600 2435
    78 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    79         1 1 1.00 45.00 90.00
    80          6300 3375 6300 2700
    81 -6
    82 6 6000 3075 6600 4200
    83 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
    84          6600 3335 6450 3075 6150 3075 6000 3335 6150 3595 6450 3595
    85          6600 3335
    86 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    87         1 1 1.00 45.00 90.00
    88          6300 4200 6300 3600
    89 -6
    90106 6750 4125 7050 4275
    91111 3 0 1 0 0 50 -1 20 0.000 1 0.0000 6825 4200 20 20 6825 4200 6845 4200
     
    93131 3 0 1 0 0 50 -1 20 0.000 1 0.0000 6975 4200 20 20 6975 4200 6995 4200
    9414-6
     156 2400 2100 3000 2700
     161 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 2700 2400 300 300 2700 2400 3000 2400
     172 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
     18         2400 2475 3000 2475
     194 1 0 50 -1 0 11 0.0000 2 120 210 2700 2650 TS\001
     20-6
     216 2400 3000 3000 3600
     221 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 2700 3300 300 300 2700 3300 3000 3300
     232 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
     24         2400 3375 3000 3375
     254 1 0 50 -1 0 11 0.0000 2 120 210 2700 3550 TS\001
     26-6
     271 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 3900 2400 300 300 3900 2400 4200 2400
     281 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 3900 3300 300 300 3900 3300 4200 3300
     291 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 5100 1500 300 300 5100 1500 5400 1500
     301 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 5100 2400 300 300 5100 2400 5400 2400
     311 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 5100 3300 300 300 5100 3300 5400 3300
     321 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 6300 2400 300 300 6300 2400 6600 2400
     331 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 6300 3300 300 300 6300 3300 6600 3300
     341 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 4509 3302 300 300 4509 3302 4809 3302
    95352 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    9636         3000 3900 3000 4500
     
    109492 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
    11050         2400 3900 7200 3900 7200 4500 2400 4500 2400 3900
     512 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
     52        1 1 1.00 45.00 90.00
     53         2700 3300 2700 2700
     542 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
     55        1 1 1.00 45.00 90.00
     56         3900 3300 3900 2700
     572 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
     58        1 1 1.00 45.00 90.00
     59         3900 4200 3900 3600
     602 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
     61        1 1 1.00 45.00 90.00
     62         5100 2475 5100 1800
     632 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
     64        1 1 1.00 45.00 90.00
     65         5100 3300 5100 2700
     662 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
     67        1 1 1.00 45.00 90.00
     68         5100 4200 5100 3600
     692 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
     70        1 1 1.00 45.00 90.00
     71         6300 3300 6300 2700
     722 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
     73        1 1 1.00 45.00 90.00
     74         6300 4200 6300 3600
     752 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
     76        1 1 1.00 45.00 90.00
     77         2700 4200 2700 3600
    111782 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    112          2454 2520 2957 2520
    113 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    114          2454 3417 2957 3417
    115 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    116          2400 4350 3000 4350
    117 4 2 0 50 -1 0 12 0.0000 2 165 720 2100 4200 Array of\001
    118 4 2 0 50 -1 0 12 0.0000 2 150 540 2100 4425 Queues\001
    119 4 2 0 50 -1 0 12 0.0000 2 135 630 2100 3075 Threads\001
    120 4 2 0 50 -1 0 12 0.0000 2 165 450 2100 2850 Ready\001
    121 4 0 0 50 -1 0 11 0.0000 2 135 180 2595 3561 TS\001
    122 4 0 0 50 -1 0 11 0.0000 2 135 180 2595 2665 TS\001
    123 4 0 0 50 -1 0 11 0.0000 2 135 180 2595 4479 TS\001
     79         2400 4275 3000 4275
     802 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
     81        1 1 1.00 45.00 90.00
     82         4500 4200 4500 3600
     834 2 0 50 -1 0 12 0.0000 2 180 660 2100 4200 Array of\001
     844 2 0 50 -1 0 12 0.0000 2 165 600 2100 4425 Queues\001
     854 2 0 50 -1 0 12 0.0000 2 135 645 2100 3075 Threads\001
     864 2 0 50 -1 0 12 0.0000 2 180 525 2100 2850 Ready\001
     874 1 0 50 -1 0 11 0.0000 2 120 210 2700 4450 TS\001
  • doc/theses/thierry_delisle_PhD/comp_II/img/empty.fig

    re3bc51c rbcd74f3  
    88-2
    991200 2
    10 6 4800 3075 5400 4200
    11 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    12         1 1 1.00 45.00 90.00
    13          5100 4200 5100 3600
    14 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
    15          5400 3335 5250 3075 4950 3075 4800 3335 4950 3595 5250 3595
    16          5400 3335
    17 -6
    18 6 2400 2175 3000 3375
    19 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
    20          3000 2435 2850 2175 2550 2175 2400 2435 2550 2695 2850 2695
    21          3000 2435
    22 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    23         1 1 1.00 45.00 90.00
    24          2700 3375 2700 2700
    25 -6
    26 6 2400 3075 3000 4200
    27 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
    28          3000 3335 2850 3075 2550 3075 2400 3335 2550 3595 2850 3595
    29          3000 3335
    30 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    31         1 1 1.00 45.00 90.00
    32          2700 4200 2700 3600
    33 -6
    34106 6750 4125 7050 4275
    35111 3 0 1 0 0 50 -1 20 0.000 1 0.0000 6825 4200 20 20 6825 4200 6845 4200
     
    37131 3 0 1 0 0 50 -1 20 0.000 1 0.0000 6975 4200 20 20 6975 4200 6995 4200
    3814-6
     151 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 5100 3300 300 300 5100 3300 5400 3300
     161 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 2700 2400 300 300 2700 2400 3000 2400
     171 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 2700 3300 300 300 2700 3300 3000 3300
    39182 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    4019         3000 3900 3000 4500
     
    53322 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
    5433         2400 3900 7200 3900 7200 4500 2400 4500 2400 3900
     342 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
     35        1 1 1.00 45.00 90.00
     36         2700 3300 2700 2700
     372 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
     38        1 1 1.00 45.00 90.00
     39         5100 4200 5100 3600
     402 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
     41        1 1 1.00 45.00 90.00
     42         2700 4200 2700 3600
    55434 2 0 50 -1 0 12 0.0000 2 180 660 2100 4200 Array of\001
    56444 2 0 50 -1 0 12 0.0000 2 165 600 2100 4425 Queues\001
  • doc/theses/thierry_delisle_PhD/comp_II/img/emptybit.fig

    re3bc51c rbcd74f3  
    88-2
    991200 2
    10 6 4800 3075 5400 4200
    11 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    12         1 1 1.00 45.00 90.00
    13          5100 4200 5100 3600
    14 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
    15          5400 3335 5250 3075 4950 3075 4800 3335 4950 3595 5250 3595
    16          5400 3335
    17 -6
    18 6 2400 2175 3000 3375
    19 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
    20          3000 2435 2850 2175 2550 2175 2400 2435 2550 2695 2850 2695
    21          3000 2435
    22 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    23         1 1 1.00 45.00 90.00
    24          2700 3375 2700 2700
    25 -6
    26 6 2400 3075 3000 4200
    27 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
    28          3000 3335 2850 3075 2550 3075 2400 3335 2550 3595 2850 3595
    29          3000 3335
    30 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    31         1 1 1.00 45.00 90.00
    32          2700 4200 2700 3600
    33 -6
    34106 6750 4125 7050 4275
    35111 3 0 1 0 0 50 -1 20 0.000 1 0.0000 6825 4200 20 20 6825 4200 6845 4200
     
    37131 3 0 1 0 0 50 -1 20 0.000 1 0.0000 6975 4200 20 20 6975 4200 6995 4200
    3814-6
     151 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 5100 3300 300 300 5100 3300 5400 3300
     161 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 2700 2400 300 300 2700 2400 3000 2400
     171 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 2700 3300 300 300 2700 3300 3000 3300
    39182 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    4019         3000 3900 3000 4500
     
    53322 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
    5433         2400 3900 7200 3900 7200 4500 2400 4500 2400 3900
     342 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
     35        1 1 1.00 45.00 90.00
     36         2700 3300 2700 2700
     372 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
     38        1 1 1.00 45.00 90.00
     39         5100 4200 5100 3600
     402 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
     41        1 1 1.00 45.00 90.00
     42         2700 4200 2700 3600
     434 0 0 50 -1 5 14 0.0000 2 180 1800 3750 4800 [1000100...]\001
    55444 2 0 50 -1 0 12 0.0000 2 180 660 2100 4200 Array of\001
    56454 2 0 50 -1 0 12 0.0000 2 165 600 2100 4425 Queues\001
    57464 2 0 50 -1 0 12 0.0000 2 135 645 2100 3075 Threads\001
    58474 2 0 50 -1 0 12 0.0000 2 180 525 2100 2850 Ready\001
    59 4 0 0 50 -1 5 14 0.0000 2 180 1800 3750 4800 [1000100...]\001
  • doc/theses/thierry_delisle_PhD/comp_II/img/emptytls.fig

    re3bc51c rbcd74f3  
    1 #FIG 3.2  Produced by xfig version 3.2.7a
     1#FIG 3.2  Produced by xfig version 3.2.5c
    22Landscape
    33Center
    44Inches
    5 Letter
     5Letter 
    66100.00
    77Single
    88-2
    991200 2
    10 6 4800 3075 5400 4200
    11 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    12         1 1 1.00 45.00 90.00
    13          5100 4200 5100 3600
    14 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
    15          5400 3335 5250 3075 4950 3075 4800 3335 4950 3595 5250 3595
    16          5400 3335
    17 -6
    18 6 2400 2175 3000 3375
    19 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
    20          3000 2435 2850 2175 2550 2175 2400 2435 2550 2695 2850 2695
    21          3000 2435
    22 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    23         1 1 1.00 45.00 90.00
    24          2700 3375 2700 2700
    25 -6
    26 6 2400 3075 3000 4200
    27 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
    28          3000 3335 2850 3075 2550 3075 2400 3335 2550 3595 2850 3595
    29          3000 3335
    30 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    31         1 1 1.00 45.00 90.00
    32          2700 4200 2700 3600
    33 -6
    34106 6750 4125 7050 4275
    35111 3 0 1 0 0 50 -1 20 0.000 1 0.0000 6825 4200 20 20 6825 4200 6845 4200
     
    37131 3 0 1 0 0 50 -1 20 0.000 1 0.0000 6975 4200 20 20 6975 4200 6995 4200
    3814-6
     151 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 5100 3300 300 300 5100 3300 5400 3300
     161 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 2700 2400 300 300 2700 2400 3000 2400
     171 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 2700 3300 300 300 2700 3300 3000 3300
    39182 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    4019         3000 3900 3000 4500
     
    53322 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
    5433         2400 3900 7200 3900 7200 4500 2400 4500 2400 3900
    55 4 2 0 50 -1 0 12 0.0000 2 165 720 2100 4200 Array of\001
    56 4 2 0 50 -1 0 12 0.0000 2 150 540 2100 4425 Queues\001
    57 4 2 0 50 -1 0 12 0.0000 2 135 630 2100 3075 Threads\001
    58 4 2 0 50 -1 0 12 0.0000 2 165 450 2100 2850 Ready\001
    59 4 0 0 50 -1 5 14 0.0000 2 165 1080 2400 5100 [1000100...]\001
    60 4 0 0 50 -1 5 14 0.0000 2 165 1080 4425 5100 [1000100...]\001
    61 4 0 0 50 -1 5 14 0.0000 2 165 1080 6450 5100 [1000100...]\001
    62 4 0 0 50 -1 0 13 0.0000 2 135 630 1500 5175 Bitmask\001
    63 4 0 0 50 -1 0 13 0.0000 2 135 1080 1050 4950 Thread-Local\001
     342 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
     35        1 1 1.00 45.00 90.00
     36         2700 3300 2700 2700
     372 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
     38        1 1 1.00 45.00 90.00
     39         5100 4200 5100 3600
     402 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
     41        1 1 1.00 45.00 90.00
     42         2700 4200 2700 3600
     434 0 0 50 -1 5 14 0.0000 2 180 1800 2400 5100 [1000100...]\001
     444 0 0 50 -1 5 14 0.0000 2 180 1800 4425 5100 [1000100...]\001
     454 0 0 50 -1 5 14 0.0000 2 180 1800 6450 5100 [1000100...]\001
     464 0 0 50 -1 0 13 0.0000 2 135 690 1500 5175 Bitmask\001
     474 0 0 50 -1 0 13 0.0000 2 150 1155 1050 4950 Thread-Local\001
     484 2 0 50 -1 0 12 0.0000 2 180 660 2100 4200 Array of\001
     494 2 0 50 -1 0 12 0.0000 2 165 600 2100 4425 Queues\001
     504 2 0 50 -1 0 12 0.0000 2 135 645 2100 3075 Threads\001
     514 2 0 50 -1 0 12 0.0000 2 180 525 2100 2850 Ready\001
  • doc/theses/thierry_delisle_PhD/comp_II/img/emptytree.fig

    re3bc51c rbcd74f3  
    88-2
    991200 2
    10 6 4800 3075 5400 4200
    11 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    12         1 1 1.00 45.00 90.00
    13          5100 4200 5100 3600
    14 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
    15          5400 3335 5250 3075 4950 3075 4800 3335 4950 3595 5250 3595
    16          5400 3335
    17 -6
    18 6 2400 2175 3000 3375
    19 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
    20          3000 2435 2850 2175 2550 2175 2400 2435 2550 2695 2850 2695
    21          3000 2435
    22 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    23         1 1 1.00 45.00 90.00
    24          2700 3375 2700 2700
    25 -6
    26 6 2400 3075 3000 4200
    27 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
    28          3000 3335 2850 3075 2550 3075 2400 3335 2550 3595 2850 3595
    29          3000 3335
    30 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    31         1 1 1.00 45.00 90.00
    32          2700 4200 2700 3600
    33 -6
    34106 6750 4125 7050 4275
    35111 3 0 1 0 0 50 -1 20 0.000 1 0.0000 6825 4200 20 20 6825 4200 6845 4200
     
    37131 3 0 1 0 0 50 -1 20 0.000 1 0.0000 6975 4200 20 20 6975 4200 6995 4200
    3814-6
    39 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    40          3000 3900 3000 4500
    41 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    42          3600 3900 3600 4500
    43 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    44          4200 3900 4200 4500
    45 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    46          4800 3900 4800 4500
    47 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    48          5400 3900 5400 4500
    49 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    50          6000 3900 6000 4500
    51 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    52          6600 3900 6600 4500
    53 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
    54          2400 3900 7200 3900 7200 4500 2400 4500 2400 3900
     151 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 5100 3300 300 300 5100 3300 5400 3300
     161 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 2700 2400 300 300 2700 2400 3000 2400
     171 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 2700 3300 300 300 2700 3300 3000 3300
    55182 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    5619        1 1 1.00 45.00 90.00
     
    8346        1 1 1.00 45.00 90.00
    8447         5850 5400 6300 5100
     482 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
     49         3000 3900 3000 4500
     502 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
     51         3600 3900 3600 4500
     522 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
     53         4200 3900 4200 4500
     542 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
     55         4800 3900 4800 4500
     562 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
     57         5400 3900 5400 4500
     582 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
     59         6000 3900 6000 4500
     602 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
     61         6600 3900 6600 4500
     622 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
     63         2400 3900 7200 3900 7200 4500 2400 4500 2400 3900
     642 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
     65        1 1 1.00 45.00 90.00
     66         2700 3300 2700 2700
     672 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
     68        1 1 1.00 45.00 90.00
     69         5100 4200 5100 3600
     702 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
     71        1 1 1.00 45.00 90.00
     72         2700 4200 2700 3600
     734 1 0 50 -1 0 12 0.0000 2 135 135 3300 4725 X\001
     744 1 0 50 -1 0 12 0.0000 2 135 135 3900 5025 X\001
     754 1 0 50 -1 0 12 0.0000 2 135 135 5700 4725 X\001
     764 1 0 50 -1 0 12 0.0000 2 135 135 6300 5025 X\001
    85774 2 0 50 -1 0 12 0.0000 2 180 660 2100 4200 Array of\001
    86784 2 0 50 -1 0 12 0.0000 2 165 600 2100 4425 Queues\001
    87794 2 0 50 -1 0 12 0.0000 2 135 645 2100 3075 Threads\001
    88804 2 0 50 -1 0 12 0.0000 2 180 525 2100 2850 Ready\001
    89 4 1 0 50 -1 0 12 0.0000 2 135 135 3300 4725 X\001
    90 4 1 0 50 -1 0 12 0.0000 2 135 135 3900 5025 X\001
    91 4 1 0 50 -1 0 12 0.0000 2 135 135 5700 4725 X\001
    92 4 1 0 50 -1 0 12 0.0000 2 135 135 6300 5025 X\001
  • doc/theses/thierry_delisle_PhD/comp_II/img/resize.fig

    re3bc51c rbcd74f3  
    1 #FIG 3.2  Produced by xfig version 3.2.7a
     1#FIG 3.2  Produced by xfig version 3.2.5c
    22Landscape
    33Center
    44Inches
    5 Letter
     5Letter 
    66100.00
    77Single
    88-2
    991200 2
    10 6 2400 3075 3000 4200
    11 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
    12          3000 3335 2850 3075 2550 3075 2400 3335 2550 3595 2850 3595
    13          3000 3335
    14 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
     106 7500 3675 8475 4500
     112 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 1 2
    1512        1 1 1.00 45.00 90.00
    16          2700 4200 2700 3600
    17 -6
    18 6 2400 2175 3000 3375
    19 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
    20          3000 2435 2850 2175 2550 2175 2400 2435 2550 2695 2850 2695
    21          3000 2435
    22 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    2313        1 1 1.00 45.00 90.00
    24          2700 3375 2700 2700
    25 -6
    26 6 3600 2175 4200 3375
    27 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
    28          4200 2435 4050 2175 3750 2175 3600 2435 3750 2695 4050 2695
    29          4200 2435
    30 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    31         1 1 1.00 45.00 90.00
    32          3900 3375 3900 2700
    33 -6
    34 6 3600 3075 4200 4200
    35 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
    36          4200 3335 4050 3075 3750 3075 3600 3335 3750 3595 4050 3595
    37          4200 3335
    38 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    39         1 1 1.00 45.00 90.00
    40          3900 4200 3900 3600
    41 -6
    42 6 4200 3075 4800 4200
    43 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
    44          4800 3335 4650 3075 4350 3075 4200 3335 4350 3595 4650 3595
    45          4800 3335
    46 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    47         1 1 1.00 45.00 90.00
    48          4500 4200 4500 3600
    49 -6
    50 6 4800 3075 5400 4200
    51 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
    52          5400 3335 5250 3075 4950 3075 4800 3335 4950 3595 5250 3595
    53          5400 3335
    54 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    55         1 1 1.00 45.00 90.00
    56          5100 4200 5100 3600
    57 -6
    58 6 4800 2175 5400 3375
    59 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
    60          5400 2435 5250 2175 4950 2175 4800 2435 4950 2695 5250 2695
    61          5400 2435
    62 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    63         1 1 1.00 45.00 90.00
    64          5100 3375 5100 2700
    65 -6
    66 6 4800 1275 5400 2475
    67 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
    68          5400 1535 5250 1275 4950 1275 4800 1535 4950 1795 5250 1795
    69          5400 1535
    70 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    71         1 1 1.00 45.00 90.00
    72          5100 2475 5100 1800
    73 -6
    74 6 6000 2175 6600 3375
    75 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
    76          6600 2435 6450 2175 6150 2175 6000 2435 6150 2695 6450 2695
    77          6600 2435
    78 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    79         1 1 1.00 45.00 90.00
    80          6300 3375 6300 2700
    81 -6
    82 6 6000 3075 6600 4200
    83 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
    84          6600 3335 6450 3075 6150 3075 6000 3335 6150 3595 6450 3595
    85          6600 3335
    86 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    87         1 1 1.00 45.00 90.00
    88          6300 4200 6300 3600
     14         7500 4200 7950 4200
     154 0 0 50 -1 0 12 0.0000 2 135 915 7500 3825 Grows with\001
     164 0 0 50 -1 0 12 0.0000 2 135 840 7500 4050 additional\001
     174 0 0 50 -1 0 12 0.0000 2 135 840 7500 4425 processors\001
    8918-6
    90196 6750 4125 7050 4275
     
    93221 3 0 1 0 0 50 -1 20 0.000 1 0.0000 6975 4200 20 20 6975 4200 6995 4200
    9423-6
    95 6 7500 3675 8475 4500
    96 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 1 2
    97         1 1 1.00 45.00 90.00
    98         1 1 1.00 45.00 90.00
    99          7500 4200 7950 4200
    100 4 0 0 50 -1 0 12 0.0000 2 135 900 7500 3825 Grows with\001
    101 4 0 0 50 -1 0 12 0.0000 2 135 900 7500 4050 additional\001
    102 4 0 0 50 -1 0 12 0.0000 2 120 900 7500 4425 processors\001
    103 -6
     241 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 3900 2400 300 300 3900 2400 4200 2400
     251 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 3900 3300 300 300 3900 3300 4200 3300
     261 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 5100 1500 300 300 5100 1500 5400 1500
     271 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 5100 2400 300 300 5100 2400 5400 2400
     281 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 5100 3300 300 300 5100 3300 5400 3300
     291 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 6300 2400 300 300 6300 2400 6600 2400
     301 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 6300 3300 300 300 6300 3300 6600 3300
     311 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 4509 3302 300 300 4509 3302 4809 3302
     321 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 2700 2400 300 300 2700 2400 3000 2400
     331 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 2700 3300 300 300 2700 3300 3000 3300
     342 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 3
     35        1 1 1.00 60.00 120.00
     36         3750 5550 2400 5550 2400 4500
     372 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
     38         3600 5100 4500 5100 4500 6000 3600 6000 3600 5100
    104392 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    10540         3000 3900 3000 4500
     
    118532 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
    11954         2400 3900 7200 3900 7200 4500 2400 4500 2400 3900
    120 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
    121          3600 5100 4500 5100 4500 6300 3600 6300 3600 5100
    122 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 3
    123         1 1 1.00 60.00 120.00
    124          3750 5550 2400 5550 2400 4500
    125 4 2 0 50 -1 0 12 0.0000 2 165 720 2100 4200 Array of\001
    126 4 2 0 50 -1 0 12 0.0000 2 150 540 2100 4425 Queues\001
    127 4 2 0 50 -1 0 12 0.0000 2 135 630 2100 3075 Threads\001
    128 4 2 0 50 -1 0 12 0.0000 2 165 450 2100 2850 Ready\001
    129 4 0 0 50 -1 0 13 0.0000 2 135 1980 3600 5025 Cluster Data Strcuture\001
    130 4 0 0 50 -1 0 13 0.0000 2 165 1170 2400 5775 Array Pointer\001
     552 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
     56        1 1 1.00 45.00 90.00
     57         2700 3300 2700 2700
     582 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
     59        1 1 1.00 45.00 90.00
     60         3900 3300 3900 2700
     612 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
     62        1 1 1.00 45.00 90.00
     63         3900 4200 3900 3600
     642 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
     65        1 1 1.00 45.00 90.00
     66         5100 2475 5100 1800
     672 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
     68        1 1 1.00 45.00 90.00
     69         5100 3300 5100 2700
     702 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
     71        1 1 1.00 45.00 90.00
     72         5100 4200 5100 3600
     732 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
     74        1 1 1.00 45.00 90.00
     75         6300 3300 6300 2700
     762 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
     77        1 1 1.00 45.00 90.00
     78         6300 4200 6300 3600
     792 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
     80        1 1 1.00 45.00 90.00
     81         2700 4200 2700 3600
     822 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
     83        1 1 1.00 45.00 90.00
     84         4500 4200 4500 3600
     854 0 0 50 -1 0 13 0.0000 2 180 1170 2400 5775 Array Pointer\001
     864 2 0 50 -1 0 12 0.0000 2 180 660 2100 4200 Array of\001
     874 2 0 50 -1 0 12 0.0000 2 165 600 2100 4425 Queues\001
     884 2 0 50 -1 0 12 0.0000 2 135 645 2100 3075 Threads\001
     894 2 0 50 -1 0 12 0.0000 2 180 525 2100 2850 Ready\001
     904 1 0 50 -1 0 13 0.0000 2 150 1890 4050 5025 Cluster Data Structure\001
  • doc/theses/thierry_delisle_PhD/comp_II/local.bib

    re3bc51c rbcd74f3  
    7676
    7777@article{finkel1987dib,
    78   title={DIBa distributed implementation of backtracking},
     78  title={DIB-a distributed implementation of backtracking},
    7979  author={Finkel, Raphael and Manber, Udi},
    8080  journal={ACM Transactions on Programming Languages and Systems (TOPLAS)},
     
    223223
    224224% ===============================================================================
    225 % MISC
    226 % ===============================================================================
     225% Algorithms
     226% ===============================================================================
     227@article{michael2004hazard,
     228  title={Hazard pointers: Safe memory reclamation for lock-free objects},
     229  author={Michael, Maged M},
     230  journal={IEEE Transactions on Parallel and Distributed Systems},
     231  volume={15},
     232  number={6},
     233  pages={491--504},
     234  year={2004},
     235  publisher={IEEE}
     236}
     237
     238@inproceedings{brown2015reclaiming,
     239  title={Reclaiming memory for lock-free data structures: There has to be a better way},
     240  author={Brown, Trevor Alexander},
     241  booktitle={Proceedings of the 2015 ACM Symposium on Principles of Distributed Computing},
     242  pages={261--270},
     243  year={2015}
     244}
     245
    227246% Trevor's relaxed FIFO list
    228247@inproceedings{alistarh2018relaxed,
     
    242261  year={2007}
    243262}
     263
     264% ===============================================================================
     265% Linux Man Pages
     266% ===============================================================================
     267@manual{open,
     268  key        = "open",
     269  title      = "open(2) Linux User's Manual",
     270  year       = "2020",
     271  month      = "February",
     272}
     273
     274@manual{epoll,
     275  key        = "epoll",
     276  title      = "epoll(7) Linux User's Manual",
     277  year       = "2019",
     278  month      = "March",
     279}
     280
     281@manual{select,
     282  key        = "select",
     283  title      = "select(7) Linux User's Manual",
     284  year       = "2019",
     285  month      = "March",
     286}
     287
     288@misc{io_uring,
     289  title   = {Efficient IO with io\_uring},
     290  author  = {Axboe, Jens},
     291  year    = "2019",
     292  month   = "March",
     293  version = {0,4},
     294  howpublished = {\url{https://kernel.dk/io_uring.pdf}}
     295}
     296
     297@misc{libuv,
     298  key   = "libuv",
     299  title = {libuv},
     300  howpublished = {\url{https://github.com/libuv/libuv}}
     301}
     302
     303% ===============================================================================
     304% MISC
     305% ===============================================================================
     306
     307@misc{nginx-design,
     308  key   = "nginx",
     309  title={Inside {NGINX}: How We Designed for Performance \& Scale},
     310  howpublished= {\href{https://www.nginx.com/blog/inside-nginx-how-we-designed-for-performance-scale}
     311                {https://\-www.nginx.com/\-blog/\-inside\--nginx\--how\--we\--designed\--for\--performance\--scale}},
     312}
     313
     314@article{schillings1996engineering,
     315  title={Be engineering insights: Benaphores},
     316  author={Schillings, Benoit},
     317  journal={Be Newsletters},
     318  volume={1},
     319  number={26},
     320  year={1996}
     321}
     322
     323@misc{wiki:thunderherd,
     324   author = "{Wikipedia contributors}",
     325   title = "Thundering herd problem --- {W}ikipedia{,} The Free Encyclopedia",
     326   year = "2020",
     327   howpublished = {\href{https://en.wikipedia.org/wiki/Thundering_herd_problem}
     328                  {https://\-en.wikipedia.org/\-wiki/\-Thundering\_herd\_problem}},},
     329   note = "[Online; accessed 14-April-2020]"
     330}
     331
     332@misc{wiki:ubuntu-linux,
     333   author = "{Wikipedia contributors}",
     334   title = "Ubuntu version history : Table of versions --- {W}ikipedia{,} The Free Encyclopedia",
     335   year = "2020",
     336   howpublished = {\href{https://en.wikipedia.org/wiki/Ubuntu_version_history\#Table_of_versions}
     337                  {https://\-en.wikipedia.org/\-wiki/\-Ubuntu\_version\_history\#Table\_of\_versions}},
     338   note = "[Online; accessed 15-April-2020]"
     339}
  • driver/cfa.cc

    re3bc51c rbcd74f3  
    385385        } // if
    386386
     387        string preludedir;
    387388        switch(path) {
    388         case Installed   : Putenv( argv, "--prelude-dir=" + libdir ); break;
    389         case BuildTree   : Putenv( argv, "--prelude-dir=" + libdir + "/prelude" ); break;
    390         case Distributed : Putenv( argv, "--prelude-dir=" + dir(argv[0]) ); break;
     389        case Installed   : preludedir = libdir; break;
     390        case BuildTree   : preludedir = libdir + "/prelude"; break;
     391        case Distributed : preludedir = dir(argv[0]); break;
    391392        }
     393
     394        Putenv( argv, "--prelude-dir=" + preludedir );
     395        args[nargs++] = "-include";
     396        args[nargs++] = (*new string(preludedir + "/defines.hfa")).c_str();
    392397
    393398        for ( int i = 0; i < nlibs; i += 1 ) {                          // copy non-user libraries after all user libraries
  • libcfa/Makefile.in

    re3bc51c rbcd74f3  
    106106 configure.lineno config.status.lineno
    107107mkinstalldirs = $(install_sh) -d
     108CONFIG_HEADER = $(top_builddir)/prelude/defines.hfa
    108109CONFIG_CLEAN_FILES =
    109110CONFIG_CLEAN_VPATH_FILES =
  • libcfa/configure

    re3bc51c rbcd74f3  
    790790enable_distcc
    791791with_cfa_name
     792enable_static
    792793enable_shared
    793 enable_static
    794794with_pic
    795795enable_fast_install
     
    14521452  --disable-silent-rules  verbose build output (undo: "make V=0")
    14531453  --enable-distcc     whether or not to enable distributed compilation
     1454  --enable-static[=PKGS]  build static libraries [default=no]
    14541455  --enable-shared[=PKGS]  build shared libraries [default=yes]
    1455   --enable-static[=PKGS]  build static libraries [default=yes]
    14561456  --enable-fast-install[=PKGS]
    14571457                          optimize for fast installation [default=yes]
     
    19601960
    19611961} # ac_fn_cxx_try_link
     1962
     1963# ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES
     1964# -------------------------------------------------------
     1965# Tests whether HEADER exists, giving a warning if it cannot be compiled using
     1966# the include files in INCLUDES and setting the cache variable VAR
     1967# accordingly.
     1968ac_fn_c_check_header_mongrel ()
     1969{
     1970  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
     1971  if eval \${$3+:} false; then :
     1972  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
     1973$as_echo_n "checking for $2... " >&6; }
     1974if eval \${$3+:} false; then :
     1975  $as_echo_n "(cached) " >&6
     1976fi
     1977eval ac_res=\$$3
     1978               { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
     1979$as_echo "$ac_res" >&6; }
     1980else
     1981  # Is the header compilable?
     1982{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5
     1983$as_echo_n "checking $2 usability... " >&6; }
     1984cat confdefs.h - <<_ACEOF >conftest.$ac_ext
     1985/* end confdefs.h.  */
     1986$4
     1987#include <$2>
     1988_ACEOF
     1989if ac_fn_c_try_compile "$LINENO"; then :
     1990  ac_header_compiler=yes
     1991else
     1992  ac_header_compiler=no
     1993fi
     1994rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
     1995{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5
     1996$as_echo "$ac_header_compiler" >&6; }
     1997
     1998# Is the header present?
     1999{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5
     2000$as_echo_n "checking $2 presence... " >&6; }
     2001cat confdefs.h - <<_ACEOF >conftest.$ac_ext
     2002/* end confdefs.h.  */
     2003#include <$2>
     2004_ACEOF
     2005if ac_fn_c_try_cpp "$LINENO"; then :
     2006  ac_header_preproc=yes
     2007else
     2008  ac_header_preproc=no
     2009fi
     2010rm -f conftest.err conftest.i conftest.$ac_ext
     2011{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5
     2012$as_echo "$ac_header_preproc" >&6; }
     2013
     2014# So?  What about this header?
     2015case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #((
     2016  yes:no: )
     2017    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5
     2018$as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;}
     2019    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
     2020$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
     2021    ;;
     2022  no:yes:* )
     2023    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5
     2024$as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;}
     2025    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2:     check for missing prerequisite headers?" >&5
     2026$as_echo "$as_me: WARNING: $2:     check for missing prerequisite headers?" >&2;}
     2027    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5
     2028$as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;}
     2029    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2:     section \"Present But Cannot Be Compiled\"" >&5
     2030$as_echo "$as_me: WARNING: $2:     section \"Present But Cannot Be Compiled\"" >&2;}
     2031    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
     2032$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
     2033( $as_echo "## --------------------------------------- ##
     2034## Report this to cforall@plg.uwaterloo.ca ##
     2035## --------------------------------------- ##"
     2036     ) | sed "s/^/$as_me: WARNING:     /" >&2
     2037    ;;
     2038esac
     2039  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
     2040$as_echo_n "checking for $2... " >&6; }
     2041if eval \${$3+:} false; then :
     2042  $as_echo_n "(cached) " >&6
     2043else
     2044  eval "$3=\$ac_header_compiler"
     2045fi
     2046eval ac_res=\$$3
     2047               { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
     2048$as_echo "$ac_res" >&6; }
     2049fi
     2050  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
     2051
     2052} # ac_fn_c_check_header_mongrel
    19622053cat >config.log <<_ACEOF
    19632054This file contains any messages produced by compilers while
     
    79398030
    79408031# Set options
     8032# Check whether --enable-static was given.
     8033if test "${enable_static+set}" = set; then :
     8034  enableval=$enable_static; p=${PACKAGE-default}
     8035    case $enableval in
     8036    yes) enable_static=yes ;;
     8037    no) enable_static=no ;;
     8038    *)
     8039     enable_static=no
     8040      # Look at the argument we got.  We use all the common list separators.
     8041      lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR,
     8042      for pkg in $enableval; do
     8043        IFS=$lt_save_ifs
     8044        if test "X$pkg" = "X$p"; then
     8045          enable_static=yes
     8046        fi
     8047      done
     8048      IFS=$lt_save_ifs
     8049      ;;
     8050    esac
     8051else
     8052  enable_static=no
     8053fi
     8054
     8055
     8056
     8057
     8058
     8059
     8060
    79418061
    79428062
     
    79718091fi
    79728092
    7973 
    7974 
    7975 
    7976 
    7977 
    7978 
    7979 
    7980 
    7981   # Check whether --enable-static was given.
    7982 if test "${enable_static+set}" = set; then :
    7983   enableval=$enable_static; p=${PACKAGE-default}
    7984     case $enableval in
    7985     yes) enable_static=yes ;;
    7986     no) enable_static=no ;;
    7987     *)
    7988      enable_static=no
    7989       # Look at the argument we got.  We use all the common list separators.
    7990       lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR,
    7991       for pkg in $enableval; do
    7992         IFS=$lt_save_ifs
    7993         if test "X$pkg" = "X$p"; then
    7994           enable_static=yes
    7995         fi
    7996       done
    7997       IFS=$lt_save_ifs
    7998       ;;
    7999     esac
    8000 else
    8001   enable_static=yes
    8002 fi
    80038093
    80048094
     
    1685916949
    1686016950
     16951for ac_header in linux/io_uring.h
     16952do :
     16953  ac_fn_c_check_header_mongrel "$LINENO" "linux/io_uring.h" "ac_cv_header_linux_io_uring_h" "$ac_includes_default"
     16954if test "x$ac_cv_header_linux_io_uring_h" = xyes; then :
     16955  cat >>confdefs.h <<_ACEOF
     16956#define HAVE_LINUX_IO_URING_H 1
     16957_ACEOF
     16958
     16959fi
     16960
     16961done
     16962
     16963for ac_func in preadv2 pwritev2
     16964do :
     16965  as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
     16966ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
     16967if eval test \"x\$"$as_ac_var"\" = x"yes"; then :
     16968  cat >>confdefs.h <<_ACEOF
     16969#define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1
     16970_ACEOF
     16971
     16972fi
     16973done
     16974
     16975
    1686116976ac_config_files="$ac_config_files Makefile src/Makefile prelude/Makefile"
     16977
     16978
     16979ac_config_headers="$ac_config_headers prelude/defines.hfa"
    1686216980
    1686316981
     
    1695217070test "x$exec_prefix" = xNONE && exec_prefix='${prefix}'
    1695317071
    16954 # Transform confdefs.h into DEFS.
    16955 # Protect against shell expansion while executing Makefile rules.
    16956 # Protect against Makefile macro expansion.
    16957 #
    16958 # If the first sed substitution is executed (which looks for macros that
    16959 # take arguments), then branch to the quote section.  Otherwise,
    16960 # look for a macro that doesn't take arguments.
    16961 ac_script='
    16962 :mline
    16963 /\\$/{
    16964  N
    16965  s,\\\n,,
    16966  b mline
    16967 }
    16968 t clear
    16969 :clear
    16970 s/^[     ]*#[    ]*define[       ][      ]*\([^  (][^    (]*([^)]*)\)[   ]*\(.*\)/-D\1=\2/g
    16971 t quote
    16972 s/^[     ]*#[    ]*define[       ][      ]*\([^  ][^     ]*\)[   ]*\(.*\)/-D\1=\2/g
    16973 t quote
    16974 b any
    16975 :quote
    16976 s/[      `~#$^&*(){}\\|;'\''"<>?]/\\&/g
    16977 s/\[/\\&/g
    16978 s/\]/\\&/g
    16979 s/\$/$$/g
    16980 H
    16981 :any
    16982 ${
    16983         g
    16984         s/^\n//
    16985         s/\n/ /g
    16986         p
    16987 }
    16988 '
    16989 DEFS=`sed -n "$ac_script" confdefs.h`
    16990 
     17072DEFS=-DHAVE_CONFIG_H
    1699117073
    1699217074ac_libobjs=
     
    1746617548esac
    1746717549
     17550case $ac_config_headers in *"
     17551"*) set x $ac_config_headers; shift; ac_config_headers=$*;;
     17552esac
    1746817553
    1746917554
     
    1747117556# Files that config.status was made for.
    1747217557config_files="$ac_config_files"
     17558config_headers="$ac_config_headers"
    1747317559config_commands="$ac_config_commands"
    1747417560
     
    1749217578      --file=FILE[:TEMPLATE]
    1749317579                   instantiate the configuration file FILE
     17580      --header=FILE[:TEMPLATE]
     17581                   instantiate the configuration header FILE
    1749417582
    1749517583Configuration files:
    1749617584$config_files
     17585
     17586Configuration headers:
     17587$config_headers
    1749717588
    1749817589Configuration commands:
     
    1756217653    as_fn_append CONFIG_FILES " '$ac_optarg'"
    1756317654    ac_need_defaults=false;;
    17564   --he | --h |  --help | --hel | -h )
     17655  --header | --heade | --head | --hea )
     17656    $ac_shift
     17657    case $ac_optarg in
     17658    *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;;
     17659    esac
     17660    as_fn_append CONFIG_HEADERS " '$ac_optarg'"
     17661    ac_need_defaults=false;;
     17662  --he | --h)
     17663    # Conflict between --help and --header
     17664    as_fn_error $? "ambiguous option: \`$1'
     17665Try \`$0 --help' for more information.";;
     17666  --help | --hel | -h )
    1756517667    $as_echo "$ac_cs_usage"; exit ;;
    1756617668  -q | -quiet | --quiet | --quie | --qui | --qu | --q \
     
    1762517727macro_version='`$ECHO "$macro_version" | $SED "$delay_single_quote_subst"`'
    1762617728macro_revision='`$ECHO "$macro_revision" | $SED "$delay_single_quote_subst"`'
     17729enable_static='`$ECHO "$enable_static" | $SED "$delay_single_quote_subst"`'
    1762717730enable_shared='`$ECHO "$enable_shared" | $SED "$delay_single_quote_subst"`'
    17628 enable_static='`$ECHO "$enable_static" | $SED "$delay_single_quote_subst"`'
    1762917731pic_mode='`$ECHO "$pic_mode" | $SED "$delay_single_quote_subst"`'
    1763017732enable_fast_install='`$ECHO "$enable_fast_install" | $SED "$delay_single_quote_subst"`'
     
    1800918111    "src/Makefile") CONFIG_FILES="$CONFIG_FILES src/Makefile" ;;
    1801018112    "prelude/Makefile") CONFIG_FILES="$CONFIG_FILES prelude/Makefile" ;;
     18113    "prelude/defines.hfa") CONFIG_HEADERS="$CONFIG_HEADERS prelude/defines.hfa" ;;
    1801118114
    1801218115  *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;;
     
    1802118124if $ac_need_defaults; then
    1802218125  test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files
     18126  test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers
    1802318127  test "${CONFIG_COMMANDS+set}" = set || CONFIG_COMMANDS=$config_commands
    1802418128fi
     
    1820918313fi # test -n "$CONFIG_FILES"
    1821018314
    18211 
    18212 eval set X "  :F $CONFIG_FILES      :C $CONFIG_COMMANDS"
     18315# Set up the scripts for CONFIG_HEADERS section.
     18316# No need to generate them if there are no CONFIG_HEADERS.
     18317# This happens for instance with `./config.status Makefile'.
     18318if test -n "$CONFIG_HEADERS"; then
     18319cat >"$ac_tmp/defines.awk" <<\_ACAWK ||
     18320BEGIN {
     18321_ACEOF
     18322
     18323# Transform confdefs.h into an awk script `defines.awk', embedded as
     18324# here-document in config.status, that substitutes the proper values into
     18325# config.h.in to produce config.h.
     18326
     18327# Create a delimiter string that does not exist in confdefs.h, to ease
     18328# handling of long lines.
     18329ac_delim='%!_!# '
     18330for ac_last_try in false false :; do
     18331  ac_tt=`sed -n "/$ac_delim/p" confdefs.h`
     18332  if test -z "$ac_tt"; then
     18333    break
     18334  elif $ac_last_try; then
     18335    as_fn_error $? "could not make $CONFIG_HEADERS" "$LINENO" 5
     18336  else
     18337    ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
     18338  fi
     18339done
     18340
     18341# For the awk script, D is an array of macro values keyed by name,
     18342# likewise P contains macro parameters if any.  Preserve backslash
     18343# newline sequences.
     18344
     18345ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]*
     18346sed -n '
     18347s/.\{148\}/&'"$ac_delim"'/g
     18348t rset
     18349:rset
     18350s/^[     ]*#[    ]*define[       ][      ]*/ /
     18351t def
     18352d
     18353:def
     18354s/\\$//
     18355t bsnl
     18356s/["\\]/\\&/g
     18357s/^ \('"$ac_word_re"'\)\(([^()]*)\)[     ]*\(.*\)/P["\1"]="\2"\
     18358D["\1"]=" \3"/p
     18359s/^ \('"$ac_word_re"'\)[         ]*\(.*\)/D["\1"]=" \2"/p
     18360d
     18361:bsnl
     18362s/["\\]/\\&/g
     18363s/^ \('"$ac_word_re"'\)\(([^()]*)\)[     ]*\(.*\)/P["\1"]="\2"\
     18364D["\1"]=" \3\\\\\\n"\\/p
     18365t cont
     18366s/^ \('"$ac_word_re"'\)[         ]*\(.*\)/D["\1"]=" \2\\\\\\n"\\/p
     18367t cont
     18368d
     18369:cont
     18370n
     18371s/.\{148\}/&'"$ac_delim"'/g
     18372t clear
     18373:clear
     18374s/\\$//
     18375t bsnlc
     18376s/["\\]/\\&/g; s/^/"/; s/$/"/p
     18377d
     18378:bsnlc
     18379s/["\\]/\\&/g; s/^/"/; s/$/\\\\\\n"\\/p
     18380b cont
     18381' <confdefs.h | sed '
     18382s/'"$ac_delim"'/"\\\
     18383"/g' >>$CONFIG_STATUS || ac_write_fail=1
     18384
     18385cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
     18386  for (key in D) D_is_set[key] = 1
     18387  FS = ""
     18388}
     18389/^[\t ]*#[\t ]*(define|undef)[\t ]+$ac_word_re([\t (]|\$)/ {
     18390  line = \$ 0
     18391  split(line, arg, " ")
     18392  if (arg[1] == "#") {
     18393    defundef = arg[2]
     18394    mac1 = arg[3]
     18395  } else {
     18396    defundef = substr(arg[1], 2)
     18397    mac1 = arg[2]
     18398  }
     18399  split(mac1, mac2, "(") #)
     18400  macro = mac2[1]
     18401  prefix = substr(line, 1, index(line, defundef) - 1)
     18402  if (D_is_set[macro]) {
     18403    # Preserve the white space surrounding the "#".
     18404    print prefix "define", macro P[macro] D[macro]
     18405    next
     18406  } else {
     18407    # Replace #undef with comments.  This is necessary, for example,
     18408    # in the case of _POSIX_SOURCE, which is predefined and required
     18409    # on some systems where configure will not decide to define it.
     18410    if (defundef == "undef") {
     18411      print "/*", prefix defundef, macro, "*/"
     18412      next
     18413    }
     18414  }
     18415}
     18416{ print }
     18417_ACAWK
     18418_ACEOF
     18419cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
     18420  as_fn_error $? "could not setup config headers machinery" "$LINENO" 5
     18421fi # test -n "$CONFIG_HEADERS"
     18422
     18423
     18424eval set X "  :F $CONFIG_FILES  :H $CONFIG_HEADERS    :C $CONFIG_COMMANDS"
    1821318425shift
    1821418426for ac_tag
     
    1842918641  || as_fn_error $? "could not create $ac_file" "$LINENO" 5
    1843018642 ;;
    18431 
     18643  :H)
     18644  #
     18645  # CONFIG_HEADER
     18646  #
     18647  if test x"$ac_file" != x-; then
     18648    {
     18649      $as_echo "/* $configure_input  */" \
     18650      && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs"
     18651    } >"$ac_tmp/config.h" \
     18652      || as_fn_error $? "could not create $ac_file" "$LINENO" 5
     18653    if diff "$ac_file" "$ac_tmp/config.h" >/dev/null 2>&1; then
     18654      { $as_echo "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5
     18655$as_echo "$as_me: $ac_file is unchanged" >&6;}
     18656    else
     18657      rm -f "$ac_file"
     18658      mv "$ac_tmp/config.h" "$ac_file" \
     18659        || as_fn_error $? "could not create $ac_file" "$LINENO" 5
     18660    fi
     18661  else
     18662    $as_echo "/* $configure_input  */" \
     18663      && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" \
     18664      || as_fn_error $? "could not create -" "$LINENO" 5
     18665  fi
     18666# Compute "$ac_file"'s index in $config_headers.
     18667_am_arg="$ac_file"
     18668_am_stamp_count=1
     18669for _am_header in $config_headers :; do
     18670  case $_am_header in
     18671    $_am_arg | $_am_arg:* )
     18672      break ;;
     18673    * )
     18674      _am_stamp_count=`expr $_am_stamp_count + 1` ;;
     18675  esac
     18676done
     18677echo "timestamp for $_am_arg" >`$as_dirname -- "$_am_arg" ||
     18678$as_expr X"$_am_arg" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
     18679         X"$_am_arg" : 'X\(//\)[^/]' \| \
     18680         X"$_am_arg" : 'X\(//\)$' \| \
     18681         X"$_am_arg" : 'X\(/\)' \| . 2>/dev/null ||
     18682$as_echo X"$_am_arg" |
     18683    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
     18684            s//\1/
     18685            q
     18686          }
     18687          /^X\(\/\/\)[^/].*/{
     18688            s//\1/
     18689            q
     18690          }
     18691          /^X\(\/\/\)$/{
     18692            s//\1/
     18693            q
     18694          }
     18695          /^X\(\/\).*/{
     18696            s//\1/
     18697            q
     18698          }
     18699          s/.*/./; q'`/stamp-h$_am_stamp_count
     18700 ;;
    1843218701
    1843318702  :C)  { $as_echo "$as_me:${as_lineno-$LINENO}: executing $ac_file commands" >&5
     
    1858718856macro_revision=$macro_revision
    1858818857
     18858# Whether or not to build static libraries.
     18859build_old_libs=$enable_static
     18860
    1858918861# Whether or not to build shared libraries.
    1859018862build_libtool_libs=$enable_shared
    18591 
    18592 # Whether or not to build static libraries.
    18593 build_old_libs=$enable_static
    1859418863
    1859518864# What type of objects to build.
  • libcfa/configure.ac

    re3bc51c rbcd74f3  
    109109
    110110# Checks for programs.
    111 LT_INIT
     111LT_INIT([disable-static])
    112112
    113113AC_PROG_CXX
     
    118118AC_PROG_MAKE_SET
    119119
     120AC_CHECK_HEADERS([linux/io_uring.h])
     121AC_CHECK_FUNCS([preadv2 pwritev2])
     122
    120123AC_CONFIG_FILES([
    121124        Makefile
     
    124127        ])
    125128
     129AC_CONFIG_HEADERS(prelude/defines.hfa)
     130
    126131AC_OUTPUT()
    127132
  • libcfa/prelude/Makefile.am

    re3bc51c rbcd74f3  
    2121# put into lib for now
    2222cfalibdir = ${CFA_LIBDIR}
    23 cfalib_DATA = gcc-builtins.cf builtins.cf extras.cf prelude.cfa bootloader.c
     23cfalib_DATA = gcc-builtins.cf builtins.cf extras.cf prelude.cfa bootloader.c defines.hfa
    2424
    2525CC = @LOCAL_CFACC@
  • libcfa/prelude/Makefile.in

    re3bc51c rbcd74f3  
    1 # Makefile.in generated by automake 1.16.1 from Makefile.am.
     1# Makefile.in generated by automake 1.15 from Makefile.am.
    22# @configure_input@
    33
    4 # Copyright (C) 1994-2018 Free Software Foundation, Inc.
     4# Copyright (C) 1994-2014 Free Software Foundation, Inc.
    55
    66# This Makefile.in is free software; the Free Software Foundation
     
    104104DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
    105105mkinstalldirs = $(install_sh) -d
     106CONFIG_HEADER = defines.hfa
    106107CONFIG_CLEAN_FILES =
    107108CONFIG_CLEAN_VPATH_FILES =
     
    154155am__installdirs = "$(DESTDIR)$(cfalibdir)"
    155156DATA = $(cfalib_DATA)
    156 am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
    157 am__DIST_COMMON = $(srcdir)/Makefile.in
     157am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) \
     158        $(LISP)defines.hfa.in
     159# Read a list of newline-separated strings from the standard input,
     160# and print each of them once, without duplicates.  Input order is
     161# *not* preserved.
     162am__uniquify_input = $(AWK) '\
     163  BEGIN { nonempty = 0; } \
     164  { items[$$0] = 1; nonempty = 1; } \
     165  END { if (nonempty) { for (i in items) print i; }; } \
     166'
     167# Make sure the list of sources is unique.  This is necessary because,
     168# e.g., the same source file might be shared among _SOURCES variables
     169# for different programs/libraries.
     170am__define_uniq_tagged_files = \
     171  list='$(am__tagged_files)'; \
     172  unique=`for i in $$list; do \
     173    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
     174  done | $(am__uniquify_input)`
     175ETAGS = etags
     176CTAGS = ctags
     177am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/defines.hfa.in
    158178DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
    159179ACLOCAL = @ACLOCAL@
     
    306326# put into lib for now
    307327cfalibdir = ${CFA_LIBDIR}
    308 cfalib_DATA = gcc-builtins.cf builtins.cf extras.cf prelude.cfa bootloader.c
     328cfalib_DATA = gcc-builtins.cf builtins.cf extras.cf prelude.cfa bootloader.c defines.hfa
    309329AM_CFLAGS = -g -Wall -Wno-unused-function -fPIC @ARCH_FLAGS@ @CONFIG_CFLAGS@
    310330AM_CFAFLAGS = @CONFIG_CFAFLAGS@
    311331MOSTLYCLEANFILES = bootloader.c builtins.cf extras.cf gcc-builtins.c gcc-builtins.cf prelude.cfa
    312332MAINTAINERCLEANFILES = ${addprefix ${libdir}/,${cfalib_DATA}} ${addprefix ${libdir}/,${lib_LIBRARIES}}
    313 all: all-am
     333all: defines.hfa
     334        $(MAKE) $(AM_MAKEFLAGS) all-am
    314335
    315336.SUFFIXES:
     
    331352            cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
    332353          *) \
    333             echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \
    334             cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \
     354            echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
     355            cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
    335356        esac;
    336357
     
    343364        cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
    344365$(am__aclocal_m4_deps):
     366
     367defines.hfa: stamp-h1
     368        @test -f $@ || rm -f stamp-h1
     369        @test -f $@ || $(MAKE) $(AM_MAKEFLAGS) stamp-h1
     370
     371stamp-h1: $(srcdir)/defines.hfa.in $(top_builddir)/config.status
     372        @rm -f stamp-h1
     373        cd $(top_builddir) && $(SHELL) ./config.status prelude/defines.hfa
     374$(srcdir)/defines.hfa.in:  $(am__configure_deps)
     375        ($(am__cd) $(top_srcdir) && $(AUTOHEADER))
     376        rm -f stamp-h1
     377        touch $@
     378
     379distclean-hdr:
     380        -rm -f defines.hfa stamp-h1
    345381
    346382mostlyclean-libtool:
     
    370406        files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
    371407        dir='$(DESTDIR)$(cfalibdir)'; $(am__uninstall_files_from_dir)
    372 tags TAGS:
    373 
    374 ctags CTAGS:
    375 
    376 cscope cscopelist:
    377 
    378 
    379 distdir: $(BUILT_SOURCES)
    380         $(MAKE) $(AM_MAKEFLAGS) distdir-am
    381 
    382 distdir-am: $(DISTFILES)
     408
     409ID: $(am__tagged_files)
     410        $(am__define_uniq_tagged_files); mkid -fID $$unique
     411tags: tags-am
     412TAGS: tags
     413
     414tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
     415        set x; \
     416        here=`pwd`; \
     417        $(am__define_uniq_tagged_files); \
     418        shift; \
     419        if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
     420          test -n "$$unique" || unique=$$empty_fix; \
     421          if test $$# -gt 0; then \
     422            $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
     423              "$$@" $$unique; \
     424          else \
     425            $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
     426              $$unique; \
     427          fi; \
     428        fi
     429ctags: ctags-am
     430
     431CTAGS: ctags
     432ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
     433        $(am__define_uniq_tagged_files); \
     434        test -z "$(CTAGS_ARGS)$$unique" \
     435          || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
     436             $$unique
     437
     438GTAGS:
     439        here=`$(am__cd) $(top_builddir) && pwd` \
     440          && $(am__cd) $(top_srcdir) \
     441          && gtags -i $(GTAGS_ARGS) "$$here"
     442cscopelist: cscopelist-am
     443
     444cscopelist-am: $(am__tagged_files)
     445        list='$(am__tagged_files)'; \
     446        case "$(srcdir)" in \
     447          [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
     448          *) sdir=$(subdir)/$(srcdir) ;; \
     449        esac; \
     450        for i in $$list; do \
     451          if test -f "$$i"; then \
     452            echo "$(subdir)/$$i"; \
     453          else \
     454            echo "$$sdir/$$i"; \
     455          fi; \
     456        done >> $(top_builddir)/cscope.files
     457
     458distclean-tags:
     459        -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
     460
     461distdir: $(DISTFILES)
    383462        @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
    384463        topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
     
    412491check-am: all-am
    413492check: check-am
    414 all-am: Makefile $(DATA)
     493all-am: Makefile $(DATA) defines.hfa
    415494installdirs:
    416495        for dir in "$(DESTDIR)$(cfalibdir)"; do \
     
    455534distclean: distclean-am
    456535        -rm -f Makefile
    457 distclean-am: clean-am distclean-generic
     536distclean-am: clean-am distclean-generic distclean-hdr distclean-tags
    458537
    459538dvi: dvi-am
     
    516595uninstall-am: uninstall-cfalibDATA
    517596
    518 .MAKE: install-am install-strip
    519 
    520 .PHONY: all all-am check check-am clean clean-generic clean-libtool \
    521         cscopelist-am ctags-am distclean distclean-generic \
    522         distclean-libtool distdir dvi dvi-am html html-am info info-am \
     597.MAKE: all install-am install-strip
     598
     599.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \
     600        clean-libtool cscopelist-am ctags ctags-am distclean \
     601        distclean-generic distclean-hdr distclean-libtool \
     602        distclean-tags distdir dvi dvi-am html html-am info info-am \
    523603        install install-am install-cfalibDATA install-data \
    524604        install-data-am install-dvi install-dvi-am install-exec \
     
    529609        maintainer-clean-generic maintainer-clean-local mostlyclean \
    530610        mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
    531         tags-am uninstall uninstall-am uninstall-cfalibDATA
     611        tags tags-am uninstall uninstall-am uninstall-cfalibDATA
    532612
    533613.PRECIOUS: Makefile
  • libcfa/src/Makefile.am

    re3bc51c rbcd74f3  
    3333# The built sources must not depend on the installed headers
    3434AM_CFAFLAGS = -quiet -cfalib -I$(srcdir)/stdhdr $(if $(findstring ${gdbwaittarget}, ${@}), -XCFA --gdb) @CONFIG_CFAFLAGS@
    35 AM_CFLAGS = -g -Wall -Wno-unused-function -fPIC -pthread @ARCH_FLAGS@ @CONFIG_CFLAGS@
     35AM_CFLAGS = -g -Wall -Wno-unused-function -fPIC -fexceptions -pthread @ARCH_FLAGS@ @CONFIG_CFLAGS@
    3636AM_CCASFLAGS = -g -Wall -Wno-unused-function @ARCH_FLAGS@ @CONFIG_CFLAGS@
    3737CFACC = @CFACC@
     
    3939#----------------------------------------------------------------------------------------------------------------
    4040if BUILDLIB
    41 headers_nosrc = bitmanip.hfa math.hfa gmp.hfa time_t.hfa bits/align.hfa bits/containers.hfa bits/defs.hfa bits/debug.hfa bits/locks.hfa
     41headers_nosrc = bitmanip.hfa math.hfa gmp.hfa time_t.hfa bits/align.hfa bits/containers.hfa bits/defs.hfa bits/debug.hfa bits/locks.hfa containers/list.hfa
    4242headers = fstream.hfa iostream.hfa iterator.hfa limits.hfa rational.hfa time.hfa stdlib.hfa common.hfa \
    4343          containers/maybe.hfa containers/pair.hfa containers/result.hfa containers/vector.hfa
     
    4848thread_headers_nosrc = concurrency/invoke.h
    4949thread_headers = concurrency/coroutine.hfa concurrency/thread.hfa concurrency/kernel.hfa concurrency/monitor.hfa concurrency/mutex.hfa
    50 thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/preemption.cfa ${thread_headers:.hfa=.cfa}
     50thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/io.cfa concurrency/preemption.cfa ${thread_headers:.hfa=.cfa}
    5151else
    5252headers =
  • libcfa/src/Makefile.in

    re3bc51c rbcd74f3  
    105105        $(am__nobase_cfa_include_HEADERS_DIST) $(am__DIST_COMMON)
    106106mkinstalldirs = $(install_sh) -d
     107CONFIG_HEADER = $(top_builddir)/prelude/defines.hfa
    107108CONFIG_CLEAN_FILES =
    108109CONFIG_CLEAN_VPATH_FILES =
     
    164165am__libcfathread_la_SOURCES_DIST =  \
    165166        concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa \
    166         concurrency/invoke.c concurrency/preemption.cfa \
    167         concurrency/coroutine.cfa concurrency/thread.cfa \
    168         concurrency/kernel.cfa concurrency/monitor.cfa \
    169         concurrency/mutex.cfa
     167        concurrency/invoke.c concurrency/io.cfa \
     168        concurrency/preemption.cfa concurrency/coroutine.cfa \
     169        concurrency/thread.cfa concurrency/kernel.cfa \
     170        concurrency/monitor.cfa concurrency/mutex.cfa
    170171@BUILDLIB_TRUE@am__objects_3 = concurrency/coroutine.lo \
    171172@BUILDLIB_TRUE@ concurrency/thread.lo concurrency/kernel.lo \
     
    174175@BUILDLIB_TRUE@ concurrency/CtxSwitch-@ARCHITECTURE@.lo \
    175176@BUILDLIB_TRUE@ concurrency/alarm.lo concurrency/invoke.lo \
    176 @BUILDLIB_TRUE@ concurrency/preemption.lo $(am__objects_3)
     177@BUILDLIB_TRUE@ concurrency/io.lo concurrency/preemption.lo \
     178@BUILDLIB_TRUE@ $(am__objects_3)
    177179am_libcfathread_la_OBJECTS = $(am__objects_4)
    178180libcfathread_la_OBJECTS = $(am_libcfathread_la_OBJECTS)
     
    193195am__v_at_0 = @
    194196am__v_at_1 =
    195 DEFAULT_INCLUDES = -I.@am__isrc@
     197DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/prelude
    196198depcomp = $(SHELL) $(top_srcdir)/automake/depcomp
    197199am__depfiles_maybe = depfiles
     
    239241        containers/vector.hfa bitmanip.hfa math.hfa gmp.hfa time_t.hfa \
    240242        bits/align.hfa bits/containers.hfa bits/defs.hfa \
    241         bits/debug.hfa bits/locks.hfa concurrency/coroutine.hfa \
    242         concurrency/thread.hfa concurrency/kernel.hfa \
    243         concurrency/monitor.hfa concurrency/mutex.hfa \
    244         concurrency/invoke.h
     243        bits/debug.hfa bits/locks.hfa containers/list.hfa \
     244        concurrency/coroutine.hfa concurrency/thread.hfa \
     245        concurrency/kernel.hfa concurrency/monitor.hfa \
     246        concurrency/mutex.hfa concurrency/invoke.h
    245247HEADERS = $(nobase_cfa_include_HEADERS)
    246248am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
     
    456458# The built sources must not depend on the installed headers
    457459AM_CFAFLAGS = -quiet -cfalib -I$(srcdir)/stdhdr $(if $(findstring ${gdbwaittarget}, ${@}), -XCFA --gdb) @CONFIG_CFAFLAGS@
    458 AM_CFLAGS = -g -Wall -Wno-unused-function -fPIC -pthread @ARCH_FLAGS@ @CONFIG_CFLAGS@
     460AM_CFLAGS = -g -Wall -Wno-unused-function -fPIC -fexceptions -pthread @ARCH_FLAGS@ @CONFIG_CFLAGS@
    459461AM_CCASFLAGS = -g -Wall -Wno-unused-function @ARCH_FLAGS@ @CONFIG_CFLAGS@
    460462@BUILDLIB_FALSE@headers_nosrc =
    461463
    462464#----------------------------------------------------------------------------------------------------------------
    463 @BUILDLIB_TRUE@headers_nosrc = bitmanip.hfa math.hfa gmp.hfa time_t.hfa bits/align.hfa bits/containers.hfa bits/defs.hfa bits/debug.hfa bits/locks.hfa
     465@BUILDLIB_TRUE@headers_nosrc = bitmanip.hfa math.hfa gmp.hfa time_t.hfa bits/align.hfa bits/containers.hfa bits/defs.hfa bits/debug.hfa bits/locks.hfa containers/list.hfa
    464466@BUILDLIB_FALSE@headers =
    465467@BUILDLIB_TRUE@headers = fstream.hfa iostream.hfa iterator.hfa limits.hfa rational.hfa time.hfa stdlib.hfa common.hfa \
     
    474476@BUILDLIB_FALSE@thread_headers =
    475477@BUILDLIB_TRUE@thread_headers = concurrency/coroutine.hfa concurrency/thread.hfa concurrency/kernel.hfa concurrency/monitor.hfa concurrency/mutex.hfa
    476 @BUILDLIB_TRUE@thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/preemption.cfa ${thread_headers:.hfa=.cfa}
     478@BUILDLIB_TRUE@thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/io.cfa concurrency/preemption.cfa ${thread_headers:.hfa=.cfa}
    477479
    478480#----------------------------------------------------------------------------------------------------------------
     
    608610        concurrency/$(DEPDIR)/$(am__dirstamp)
    609611concurrency/invoke.lo: concurrency/$(am__dirstamp) \
     612        concurrency/$(DEPDIR)/$(am__dirstamp)
     613concurrency/io.lo: concurrency/$(am__dirstamp) \
    610614        concurrency/$(DEPDIR)/$(am__dirstamp)
    611615concurrency/preemption.lo: concurrency/$(am__dirstamp) \
  • libcfa/src/bitmanip.hfa

    re3bc51c rbcd74f3  
    1111// Created On       : Sat Mar 14 18:12:27 2020
    1212// Last Modified By : Peter A. Buhr
    13 // Last Modified On : Mon Mar 16 14:28:46 2020
    14 // Update Count     : 49
     13// Last Modified On : Sun Apr 19 22:29:58 2020
     14// Update Count     : 121
    1515//
    1616
     
    2121// Bits are numbered 1-N.
    2222
    23 #include <assert.h>
     23//#include <assert.h>
     24
     25#define __bitsizeof( n ) (sizeof(n) * __CHAR_BIT__)
    2426
    2527static inline {
    26     // Count leading 0 bits.
    27     unsigned int cl0( unsigned char n ) { return n != 0 ? __builtin_clz( n ) - (sizeof(unsigned int) * __CHAR_BIT__ - sizeof(n) * __CHAR_BIT__) : sizeof(n) * __CHAR_BIT__; }
    28     unsigned int cl0( unsigned short int n ) { return n != 0 ? __builtin_clz( n ) - (sizeof(unsigned int) * __CHAR_BIT__ - sizeof(n) * __CHAR_BIT__) : sizeof(n) * __CHAR_BIT__; }
    29     unsigned int cl0( unsigned int n ) { return n != 0 ? __builtin_clz( n ) : sizeof(n) * __CHAR_BIT__; }
    30     unsigned int cl0( unsigned long int n ) { return n != 0 ? __builtin_clzl( n ) : sizeof(n) * __CHAR_BIT__; }
    31     unsigned int cl0( unsigned long long int n ) { return n != 0 ? __builtin_clzll( n ) : sizeof(n) * __CHAR_BIT__; }
     28        // Count leading 0 bits.
     29        unsigned int leading0s( unsigned char n ) { return n != 0 ? __builtin_clz( n ) - (__bitsizeof(unsigned int) - __bitsizeof(n)) : __bitsizeof(n); }
     30        unsigned int leading0s( unsigned short int n ) { return n != 0 ? __builtin_clz( n ) - (__bitsizeof(unsigned int) - __bitsizeof(n)) : __bitsizeof(n); }
     31        unsigned int leading0s( unsigned int n ) { return n != 0 ? __builtin_clz( n ) : __bitsizeof(n); }
     32        unsigned int leading0s( unsigned long int n ) { return n != 0 ? __builtin_clzl( n ) : __bitsizeof(n); }
     33        unsigned int leading0s( unsigned long long int n ) { return n != 0 ? __builtin_clzll( n ) : __bitsizeof(n); }
    3234
    33     // Count trailing 0 bits.
    34     unsigned int ct0( unsigned char n ) { return n != 0 ? __builtin_ctz( n ) : sizeof(n) * __CHAR_BIT__; }
    35     unsigned int ct0( unsigned short int n ) { return n != 0 ? __builtin_ctz( n ) : sizeof(n) * __CHAR_BIT__; }
    36     unsigned int ct0( unsigned int n ) { return n != 0 ? __builtin_ctz( n ) : sizeof(n) * __CHAR_BIT__; }
    37     unsigned int ct0( unsigned long int n ) { return n != 0 ? __builtin_ctzl( n ) : sizeof(n) * __CHAR_BIT__; }
    38     unsigned int ct0( unsigned long long int n ) { return n != 0 ? __builtin_ctzll( n ) : sizeof(n) * __CHAR_BIT__; }
     35        // Count trailing 0 bits.
     36        unsigned int trailing0s( unsigned char n ) { return n != 0 ? __builtin_ctz( n ) : __bitsizeof(n); }
     37        unsigned int trailing0s( unsigned short int n ) { return n != 0 ? __builtin_ctz( n ) : __bitsizeof(n); }
     38        unsigned int trailing0s( unsigned int n ) { return n != 0 ? __builtin_ctz( n ) : __bitsizeof(n); }
     39        unsigned int trailing0s( unsigned long int n ) { return n != 0 ? __builtin_ctzl( n ) : __bitsizeof(n); }
     40        unsigned int trailing0s( unsigned long long int n ) { return n != 0 ? __builtin_ctzll( n ) : __bitsizeof(n); }
    3941
    40     // Count all 1 bits.
    41     unsigned int ca1( unsigned char n ) { return __builtin_popcount( n ); }
    42     unsigned int ca1( unsigned short int n ) { return __builtin_popcount( n ); }
    43     unsigned int ca1( unsigned int n ) { return __builtin_popcount( n ); }
    44     unsigned int ca1( unsigned long int n ) { return __builtin_popcountl( n ); }
    45     unsigned int ca1( unsigned long long int n ) { return __builtin_popcountll( n ); }
     42        // Count all 1 bits.
     43        unsigned int all1s( unsigned char n ) { return __builtin_popcount( n ); }
     44        unsigned int all1s( unsigned short int n ) { return __builtin_popcount( n ); }
     45        unsigned int all1s( unsigned int n ) { return __builtin_popcount( n ); }
     46        unsigned int all1s( unsigned long int n ) { return __builtin_popcountl( n ); }
     47        unsigned int all1s( unsigned long long int n ) { return __builtin_popcountll( n ); }
    4648
    47     // Count all 0 bits.
    48     unsigned int ca0( unsigned char n ) { return sizeof(n) * __CHAR_BIT__ - __builtin_popcount( n ); }
    49     unsigned int ca0( unsigned short int n ) { return sizeof(n) * __CHAR_BIT__ - __builtin_popcount( n ); }
    50     unsigned int ca0( unsigned int n ) { return sizeof(n) * __CHAR_BIT__ - __builtin_popcount( n ); }
    51     unsigned int ca0( unsigned long int n ) { return sizeof(n) * __CHAR_BIT__ - __builtin_popcountl( n ); }
    52     unsigned int ca0( unsigned long long int n ) { return sizeof(n) * __CHAR_BIT__ - __builtin_popcountll( n ); }
     49        // Count all 0 bits.
     50        unsigned int all0s( unsigned char n ) { return __bitsizeof(n) - __builtin_popcount( n ); }
     51        unsigned int all0s( unsigned short int n ) { return __bitsizeof(n) - __builtin_popcount( n ); }
     52        unsigned int all0s( unsigned int n ) { return __bitsizeof(n) - __builtin_popcount( n ); }
     53        unsigned int all0s( unsigned long int n ) { return __bitsizeof(n) - __builtin_popcountl( n ); }
     54        unsigned int all0s( unsigned long long int n ) { return __bitsizeof(n) - __builtin_popcountll( n ); }
    5355
    54     // Find least significiant set bit. (ffs)
    55     unsigned int fls( unsigned int n ) { return __builtin_ffs( n ); }
    56     unsigned int fls( unsigned long int n ) { return __builtin_ffsl( n ); }
    57     unsigned int fls( unsigned long long int n ) { return __builtin_ffsll( n ); }
     56        // Find least significiant zero bit. (ffs)
     57        unsigned int low0( unsigned char n ) { return __builtin_ffs( (typeof(n))~n ); }
     58        unsigned int low0( unsigned short int n ) { return __builtin_ffs( (typeof(n))~n ); }
     59        unsigned int low0( unsigned int n ) { return __builtin_ffs( ~n ); }
     60        unsigned int low0( unsigned long int n ) { return __builtin_ffsl( ~n ); }
     61        unsigned int low0( unsigned long long int n ) { return __builtin_ffsll( ~n ); }
    5862
    59     // Find most significiant set bit.
    60     unsigned int fms( unsigned char n ) { return n != 0 ? sizeof(unsigned int) * __CHAR_BIT__ - __builtin_clz( n ) : 0; }
    61     unsigned int fms( unsigned short int n ) { return n != 0 ? sizeof(unsigned int) * __CHAR_BIT__ - __builtin_clz( n ) : 0; }
    62     unsigned int fms( unsigned int n ) { return n != 0 ? sizeof(n) * __CHAR_BIT__ - __builtin_clz( n ) : 0; }
    63     unsigned int fms( unsigned long int n ) { return n != 0 ? sizeof(n) * __CHAR_BIT__ - __builtin_clzl( n ) : 0; }
    64     unsigned int fms( unsigned long long int n ) { return n != 0 ? sizeof(n) * __CHAR_BIT__ - __builtin_clzll( n ) : 0; }
     63        // Find least significiant one bit.
     64        unsigned int low1( unsigned int n ) { return __builtin_ffs( n ); }
     65        unsigned int low1( unsigned long int n ) { return __builtin_ffsl( n ); }
     66        unsigned int low1( unsigned long long int n ) { return __builtin_ffsll( n ); }
    6567
    66     // Check for power of 2
    67     bool pow2( unsigned long int value ) {
    68                 return (value & (value - 1)) == 0;                              // clears bits below value, rounding down to the next lower multiple of value
    69     } // pow2
     68        // Find most significiant zero bit.
     69        unsigned int high0( unsigned char n ) { return n == (typeof(n))-1 ? 0 : __bitsizeof(unsigned int) - __builtin_clz( (typeof(n))~n ); }
     70        unsigned int high0( unsigned short int n ) { return n == (typeof(n))-1 ? 0 : __bitsizeof(unsigned int) - __builtin_clz( (typeof(n))~n ); }
     71        unsigned int high0( unsigned int n ) { return n == -1 ? 0 : __bitsizeof(n) - __builtin_clz( ~n ); }
     72        unsigned int high0( unsigned long int n ) { return n == -1 ? 0 : __bitsizeof(n) - __builtin_clzl( ~n ); }
     73        unsigned int high0( unsigned long long int n ) { return n == -1 ? 0 : __bitsizeof(n) - __builtin_clzll( ~n ); }
    7074
    71     // Returns value aligned at the floor of align.
    72     unsigned long int floor( unsigned long int value, unsigned long int align ) {
    73                 assert( pow2( align ) );
    74                 return value & -align;                                                  // clear bits above or equal to align, giving value % align
    75     } // floor
     75        // Find most significiant one bit.
     76        unsigned int high1( unsigned char n ) { return n == 0 ? 0 : __bitsizeof(unsigned int) - __builtin_clz( n ); }
     77        unsigned int high1( unsigned short int n ) { return n == 0 ? 0 : __bitsizeof(unsigned int) - __builtin_clz( n ); }
     78        unsigned int high1( unsigned int n ) { return n == 0 ? 0 : __bitsizeof(n) - __builtin_clz( n ); }
     79        unsigned int high1( unsigned long int n ) { return n == 0 ? 0 : __bitsizeof(n) - __builtin_clzl( n ); }
     80        unsigned int high1( unsigned long long int n ) { return n == 0 ? 0 : __bitsizeof(n) - __builtin_clzll( n ); }
    7681
    77     // Returns value aligned at the ceiling of align.
    78     unsigned long int ceiling( unsigned long int value, unsigned long int align ) {
    79                 assert( pow2( align ) );
    80                 return -floor( -value, align );                                 // negate, round down, negate is the same as round up
    81     } // ceiling
    82 }
     82        // Check for power of 2, clears bits below n, rounding down to the next lower multiple of n.  0 is not a power of 2
     83        // but this computation returns true because of the two's complement, so it is a special case.
     84        bool is_pow2( unsigned char n ) { return n == 0 ? false : (n & (n - 1)) == 0; }
     85        bool is_pow2( unsigned short int n ) { return n == 0 ? false : (n & (n - 1)) == 0; }
     86        bool is_pow2( unsigned int n ) { return n == 0 ? false : (n & (n - 1)) == 0; }
     87        bool is_pow2( unsigned long int n ) { return n == 0 ? false : (n & (n - 1)) == 0; }
     88        bool is_pow2( unsigned long long int n ) { return n == 0 ? false : (n & (n - 1)) == 0; }
     89
     90        // Returns n aligned at the floor of align, clear bits above or equal to align, giving n % align.
     91        signed char floor2( signed char n, char align ) { /*assert( is_pow2( align ) );*/ return n & -align; }
     92        unsigned char floor2( unsigned char n, unsigned char align ) { /*assert( is_pow2( align ) );*/ return n & -align; }
     93        short int floor2( short int n, short int align ) { /*assert( is_pow2( align ) );*/ return n & -align; }
     94        unsigned short int floor2( unsigned short int n, unsigned short int align ) { /*assert( is_pow2( align ) );*/ return n & -align; }
     95        int floor2( int n, int align ) { /*assert( is_pow2( align ) );*/ return n & -align; }
     96        unsigned int floor2( unsigned int n, unsigned int align ) { /*assert( is_pow2( align ) );*/ return n & -align; }
     97        long int floor2( long int n, long int align ) { /*assert( is_pow2( align ) );*/ return n & -align; }
     98        unsigned long int floor2( unsigned long int n, unsigned long int align ) { /*assert( is_pow2( align ) );*/ return n & -align; }
     99        long long int floor2( long long int n, long long int align ) { /*assert( is_pow2( align ) );*/ return n & -align; }
     100        unsigned long long int floor2( unsigned long long int n, unsigned long long int align ) { /*assert( is_pow2( align ) );*/ return n & -align; }
     101
     102        // forall( otype T | { T ?&?( T, T ); T -?( T ); } )
     103        // T floor2( T n, T align ) { /* assert( is_pow2( align ) ); */ return n & -align; }
     104
     105        signed char floor( signed char n, char align ) { return n / align * align; }
     106        unsigned char floor( unsigned char n, unsigned char align ) { return n / align * align; }
     107        short int floor( short int n, short int align ) { return n / align * align; }
     108        unsigned short int floor( unsigned short int n, unsigned short int align ) { return n / align * align; }
     109        int floor( int n, int align ) { return n / align * align; }
     110        unsigned int floor( unsigned int n, unsigned int align ) { return n / align * align; }
     111        long int floor( long int n, long int align ) { return n / align * align; }
     112        unsigned long int floor( unsigned long int n, unsigned long int align ) { return n / align * align; }
     113        long long int floor( long long int n, long long int align ) { return n / align * align; }
     114        unsigned long long int floor( unsigned long long int n, unsigned long long int align ) { return n / align * align; }
     115
     116        // forall( otype T | { T ?/?( T, T ); T ?*?( T, T ); } )
     117        // T floor( T n, T align ) { return n / align * align; }
     118
     119        // Returns n aligned at the ceiling of align, negate, round down, negate is the same as round up.
     120        signed char ceiling2( signed char n, char align ) { /*assert( is_pow2( align ) );*/ return -floor2( -n, align ); }
     121        unsigned char ceiling2( unsigned char n, unsigned char align ) { /*assert( is_pow2( align ) );*/ return -floor2( -n, align ); }
     122        short int ceiling2( short int n, short int align ) { /*assert( is_pow2( align ) );*/ return -floor2( -n, align ); }
     123        unsigned short int ceiling2( unsigned short int n, unsigned short int align ) { /*assert( is_pow2( align ) );*/ return -floor2( -n, align ); }
     124        int ceiling2( int n, int align ) { /*assert( is_pow2( align ) );*/ return -floor2( -n, align ); }
     125        unsigned int ceiling2( unsigned int n, unsigned int align ) { /*assert( is_pow2( align ) );*/ return -floor2( -n, align ); }
     126        long int ceiling2( long int n, long int align ) { /*assert( is_pow2( align ) );*/ return -floor2( -n, align ); }
     127        unsigned long int ceiling2( unsigned long int n, unsigned long int align ) { /*assert( is_pow2( align ) );*/ return -floor2( -n, align ); }
     128        long long int ceiling2( long long int n, long long int align ) { /*assert( is_pow2( align ) );*/ return -floor2( -n, align ); }
     129        unsigned long long int ceiling2( unsigned long long int n, unsigned long long int align ) { /*assert( is_pow2( align ) );*/ return -floor2( -n, align ); }
     130
     131        // forall( otype T | { T floor2( T, T ); T -?( T ); } )
     132        // T ceiling2( T n, T align ) { /* assert( is_pow2( align ) ); */ return -floor2( -n, align ); }
     133
     134        signed char ceiling( signed char n, char align ) { return (n + (align - 1)) / align; }
     135        unsigned char ceiling( unsigned char n, unsigned char align ) { return (n + (align - 1)) / align; }
     136        short int ceiling( short int n, short int align ) { return (n + (align - 1)) / align; }
     137        unsigned short int ceiling( unsigned short int n, unsigned short int align ) { return (n + (align - 1)) / align; }
     138        int ceiling( int n, int align ) { return (n + (align - 1)) / align; }
     139        unsigned int ceiling( unsigned int n, unsigned int align ) { return (n + (align - 1)) / align; }
     140        long int ceiling( long int n, long int align ) { return (n + (align - 1)) / align; }
     141        unsigned long int ceiling( unsigned long int n, unsigned long int align ) { return (n + (align - 1)) / align; }
     142        long long int ceiling( long long int n, long long int align ) { return (n + (align - 1)) / align; }
     143        unsigned long long int ceiling( unsigned long long int n, unsigned long long int align ) { return (n + (align - 1)) / align; }
     144
     145        // forall( otype T | { void ?{}( T &, one_t ); T ?+?( T, T ); T ?-?( T, T ); T ?/?( T, T ); } )
     146        // T ceiling( T n, T align ) { return (n + (align - (T){1})) / align; }
     147} // distribution
    83148
    84149// Local Variables: //
  • libcfa/src/bits/debug.hfa

    re3bc51c rbcd74f3  
    99// Author           : Thierry Delisle
    1010// Created On       : Mon Nov 28 12:27:26 2016
    11 // Last Modified By : Peter A. Buhr
    12 // Last Modified On : Tue Feb  4 12:29:21 2020
    13 // Update Count     : 9
     11// Last Modified By : Andrew Beach
     12// Last Modified On : Mon Apr 27 10:15:00 2020
     13// Update Count     : 10
    1414//
    1515
     
    4040#endif
    4141        #include <stdarg.h>
    42         #include <stdio.h>
    4342
    4443        extern void __cfaabi_bits_write( int fd, const char buffer[], int len );
     
    4948        extern void __cfaabi_bits_print_vararg( int fd, const char fmt[], va_list arg );
    5049        extern void __cfaabi_bits_print_buffer( int fd, char buffer[], int buffer_size, const char fmt[], ... ) __attribute__(( format(printf, 4, 5) ));
     50
     51#if defined(__CFA_DEBUG_PRINT__) \
     52                || defined(__CFA_DEBUG_PRINT_IO__) || defined(__CFA_DEBUG_PRINT_IO_CORE__) \
     53                || defined(__CFA_DEBUG_PRINT_MONITOR__) || defined(__CFA_DEBUG_PRINT_PREEMPTION__) \
     54                || defined(__CFA_DEBUG_PRINT_RUNTIME_CORE__) || defined(__CFA_DEBUG_PRINT_EXCEPTION__)
     55        #include <stdio.h>
     56        #include <unistd.h>
     57#endif
    5158#ifdef __cforall
    5259}
    5360#endif
    5461
     62// Deprecated: Use the versions with the new module names.
    5563#ifdef __CFA_DEBUG_PRINT__
    5664        #define __cfaabi_dbg_write( buffer, len )         __cfaabi_bits_write( STDERR_FILENO, buffer, len )
    5765        #define __cfaabi_dbg_acquire()                    __cfaabi_bits_acquire()
    5866        #define __cfaabi_dbg_release()                    __cfaabi_bits_release()
    59         #define __cfaabi_dbg_print_safe(...)              __cfaabi_bits_print_safe   (__VA_ARGS__)
    60         #define __cfaabi_dbg_print_nolock(...)            __cfaabi_bits_print_nolock (__VA_ARGS__)
    61         #define __cfaabi_dbg_print_buffer(...)            __cfaabi_bits_print_buffer (__VA_ARGS__)
    62         #define __cfaabi_dbg_print_buffer_decl(...)       char __dbg_text[256]; int __dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_bits_write( __dbg_text, __dbg_len );
    63         #define __cfaabi_dbg_print_buffer_local(...)      __dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_dbg_write( __dbg_text, __dbg_len );
     67        #define __cfaabi_dbg_print_safe(...)              __cfaabi_bits_print_safe   ( STDERR_FILENO, __VA_ARGS__ )
     68        #define __cfaabi_dbg_print_nolock(...)            __cfaabi_bits_print_nolock ( STDERR_FILENO, __VA_ARGS__ )
     69        #define __cfaabi_dbg_print_buffer(...)            __cfaabi_bits_print_buffer ( STDERR_FILENO, __VA_ARGS__ )
     70        #define __cfaabi_dbg_print_buffer_decl(...)       char __dbg_text[256]; int __dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_bits_write( STDERR_FILENO, __dbg_text, __dbg_len );
     71        #define __cfaabi_dbg_print_buffer_local(...)      __dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_dbg_write( STDERR_FILENO, __dbg_text, __dbg_len );
    6472#else
    6573        #define __cfaabi_dbg_write(...)               ((void)0)
     
    7381#endif
    7482
     83// Debug print functions and statements:
     84// Most are wrappers around the bits printing function but are not always used.
     85// If they are used depends if the group (first argument) is active or not. The group must be one
     86// defined belowe. The other arguments depend on the wrapped function.
     87#define __cfadbg_write(group, buffer, len) \
     88        __CFADBG_PRINT_GROUP_##group(__cfaabi_bits_write(STDERR_FILENO, buffer, len))
     89#define __cfadbg_acquire(group) \
     90        __CFADBG_PRINT_GROUP_##group(__cfaabi_bits_acquire())
     91#define __cfadbg_release(group) \
     92        __CFADBG_PRINT_GROUP_##group(__cfaabi_bits_release())
     93#define __cfadbg_print_safe(group, ...) \
     94        __CFADBG_PRINT_GROUP_##group(__cfaabi_bits_print_safe(STDERR_FILENO, __VA_ARGS__))
     95#define __cfadbg_print_nolock(group, ...) \
     96        __CFADBG_PRINT_GROUP_##group(__cfaabi_bits_print_nolock(STDERR_FILENO, __VA_ARGS__))
     97#define __cfadbg_print_buffer(group, ...) \
     98        __CFADBG_PRINT_GROUP_##group(__cfaabi_bits_print_buffer(STDERR_FILENO, __VA_ARGS__))
     99#define __cfadbg_print_buffer_decl(group, ...) \
     100        __CFADBG_PRINT_GROUP_##group(char __dbg_text[256]; int __dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_bits_write( __dbg_text, __dbg_len ))
     101#define __cfadbg_print_buffer_local(group, ...) \
     102        __CFADBG_PRINT_GROUP_##group(__dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_bits_write(STDERR_FILENO, __dbg_text, __dbg_len))
     103
     104// The debug print groups:
     105#if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_IO__)
     106#       define __CFADBG_PRINT_GROUP_io(...) __VA_ARGS__
     107#else
     108#       define __CFADBG_PRINT_GROUP_io(...) ((void)0)
     109#endif
     110#if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_IO__) || defined(__CFA_DEBUG_PRINT_IO_CORE__)
     111#       define __CFADBG_PRINT_GROUP_io_core(...) __VA_ARGS__
     112#else
     113#       define __CFADBG_PRINT_GROUP_io_core(...) ((void)0)
     114#endif
     115#if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_MONITOR__)
     116#       define __CFADBG_PRINT_GROUP_monitor(...) __VA_ARGS__
     117#else
     118#       define __CFADBG_PRINT_GROUP_monitor(...) ((void)0)
     119#endif
     120#if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_PREEMPTION__)
     121#       define __CFADBG_PRINT_GROUP_preemption(...) __VA_ARGS__
     122#else
     123#       define __CFADBG_PRINT_GROUP_preemption(...) ((void)0)
     124#endif
     125#if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_RUNTIME_CORE__)
     126#       define __CFADBG_PRINT_GROUP_runtime_core(...) __VA_ARGS__
     127#else
     128#       define __CFADBG_PRINT_GROUP_runtime_core(...) ((void)0)
     129#endif
     130#if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_READY_QUEUE__)
     131#       define __CFADBG_PRINT_GROUP_ready_queue(...) __VA_ARGS__
     132#else
     133#       define __CFADBG_PRINT_GROUP_ready_queue(...) ((void)0)
     134#endif
     135#if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_EXCEPTION__)
     136#       define __CFADBG_PRINT_GROUP_exception(...) __VA_ARGS__
     137#else
     138#       define __CFADBG_PRINT_GROUP_exception(...) ((void)0)
     139#endif
     140
    75141// Local Variables: //
    76142// mode: c //
  • libcfa/src/bits/locks.hfa

    re3bc51c rbcd74f3  
    112112        #endif
    113113
     114        extern "C" {
     115                char * strerror(int);
     116        }
     117        #define CHECKED(x) { int err = x; if( err != 0 ) abort("KERNEL ERROR: Operation \"" #x "\" return error %d - %s\n", err, strerror(err)); }
     118
    114119        struct __bin_sem_t {
    115                 bool                    signaled;
    116120                pthread_mutex_t         lock;
    117121                pthread_cond_t          cond;
     122                int                     val;
    118123        };
    119124
    120125        static inline void ?{}(__bin_sem_t & this) with( this ) {
    121                 signaled = false;
    122                 pthread_mutex_init(&lock, NULL);
    123                 pthread_cond_init (&cond, NULL);
     126                // Create the mutex with error checking
     127                pthread_mutexattr_t mattr;
     128                pthread_mutexattr_init( &mattr );
     129                pthread_mutexattr_settype( &mattr, PTHREAD_MUTEX_ERRORCHECK_NP);
     130                pthread_mutex_init(&lock, &mattr);
     131
     132                pthread_cond_init (&cond, 0p);
     133                val = 0;
    124134        }
    125135
    126136        static inline void ^?{}(__bin_sem_t & this) with( this ) {
    127                 pthread_mutex_destroy(&lock);
    128                 pthread_cond_destroy (&cond);
     137                CHECKED( pthread_mutex_destroy(&lock) );
     138                CHECKED( pthread_cond_destroy (&cond) );
    129139        }
    130140
    131141        static inline void wait(__bin_sem_t & this) with( this ) {
    132142                verify(__cfaabi_dbg_in_kernel());
    133                 pthread_mutex_lock(&lock);
    134                         if(!signaled) {   // this must be a loop, not if!
     143                CHECKED( pthread_mutex_lock(&lock) );
     144                        while(val < 1) {
    135145                                pthread_cond_wait(&cond, &lock);
    136146                        }
    137                         signaled = false;
    138                 pthread_mutex_unlock(&lock);
     147                        val -= 1;
     148                CHECKED( pthread_mutex_unlock(&lock) );
    139149        }
    140150
    141151        static inline bool post(__bin_sem_t & this) with( this ) {
    142                 pthread_mutex_lock(&lock);
    143                         bool needs_signal = !signaled;
    144                         signaled = true;
    145                 pthread_mutex_unlock(&lock);
     152                bool needs_signal = false;
    146153
    147                 if (needs_signal) pthread_cond_signal(&cond);
     154                CHECKED( pthread_mutex_lock(&lock) );
     155                        if(val < 1) {
     156                                val += 1;
     157                                pthread_cond_signal(&cond);
     158                                needs_signal = true;
     159                        }
     160                CHECKED( pthread_mutex_unlock(&lock) );
    148161
    149162                return needs_signal;
    150163        }
     164
     165        #undef CHECKED
    151166#endif
  • libcfa/src/bits/signal.hfa

    re3bc51c rbcd74f3  
    5454                        sig, handler, flags, errno, strerror( errno )
    5555                );
    56                 _exit( EXIT_FAILURE );
     56                _Exit( EXIT_FAILURE );
    5757        } // if
    5858}
  • libcfa/src/concurrency/alarm.cfa

    re3bc51c rbcd74f3  
    5151        this.alarm = alarm;
    5252        this.period = period;
    53         next = 0;
    5453        set = false;
    5554        kernel_alarm = false;
     
    6059        this.alarm = alarm;
    6160        this.period = period;
    62         next = 0;
    6361        set = false;
    6462        kernel_alarm = true;
     
    7169}
    7270
    73 #if !defined(NDEBUG) && (defined(__CFA_DEBUG__) || defined(__CFA_VERIFY__))
    74 bool validate( alarm_list_t * this ) {
    75         alarm_node_t ** it = &this->head;
    76         while( (*it) ) {
    77                 it = &(*it)->next;
     71void insert( alarm_list_t * this, alarm_node_t * n ) {
     72        alarm_node_t * it = & (*this)`first;
     73        while( it && (n->alarm > it->alarm) ) {
     74                it = & (*it)`next;
     75        }
     76        if ( it ) {
     77                insert_before( *it, *n );
     78        } else {
     79                insert_last(*this, *n);
    7880        }
    7981
    80         return it == this->tail;
    81 }
    82 #endif
    83 
    84 static inline void insert_at( alarm_list_t * this, alarm_node_t * n, __alarm_it_t p ) {
    85         verify( !n->next );
    86         if( p == this->tail ) {
    87                 this->tail = &n->next;
    88         }
    89         else {
    90                 n->next = *p;
    91         }
    92         *p = n;
    93 
    94         verify( validate( this ) );
    95 }
    96 
    97 void insert( alarm_list_t * this, alarm_node_t * n ) {
    98         alarm_node_t ** it = &this->head;
    99         while( (*it) && (n->alarm > (*it)->alarm) ) {
    100                 it = &(*it)->next;
    101         }
    102 
    103         insert_at( this, n, it );
    104 
    105         verify( validate( this ) );
     82        verify( validate( *this ) );
    10683}
    10784
    10885alarm_node_t * pop( alarm_list_t * this ) {
    109         alarm_node_t * head = this->head;
     86        verify( validate( *this ) );
     87        alarm_node_t * head = & (*this)`first;
    11088        if( head ) {
    111                 this->head = head->next;
    112                 if( !head->next ) {
    113                         this->tail = &this->head;
    114                 }
    115                 head->next = 0p;
     89                remove(*head);
    11690        }
    117         verify( validate( this ) );
     91        verify( validate( *this ) );
    11892        return head;
    11993}
    12094
    121 static inline void remove_at( alarm_list_t * this, alarm_node_t * n, __alarm_it_t it ) {
    122         verify( it );
    123         verify( (*it) == n );
    124 
    125         (*it) = n->next;
    126         if( !n-> next ) {
    127                 this->tail = it;
    128         }
    129         n->next = 0p;
    130 
    131         verify( validate( this ) );
    132 }
    133 
    134 static inline void remove( alarm_list_t * this, alarm_node_t * n ) {
    135         alarm_node_t ** it = &this->head;
    136         while( (*it) && (*it) != n ) {
    137                 it = &(*it)->next;
    138         }
    139 
    140         verify( validate( this ) );
    141 
    142         if( *it ) { remove_at( this, n, it ); }
    143 
    144         verify( validate( this ) );
    145 }
    146 
    14795void register_self( alarm_node_t * this ) {
    148         alarm_list_t * alarms = &event_kernel->alarms;
     96        alarm_list_t & alarms = event_kernel->alarms;
    14997
    15098        disable_interrupts();
     
    152100        {
    153101                verify( validate( alarms ) );
    154                 bool first = !alarms->head;
     102                bool first = ! & alarms`first;
    155103
    156                 insert( alarms, this );
     104                insert( &alarms, this );
    157105                if( first ) {
    158                         __kernel_set_timer( alarms->head->alarm - __kernel_get_time() );
     106                        __kernel_set_timer( alarms`first.alarm - __kernel_get_time() );
    159107                }
    160108        }
     
    168116        lock( event_kernel->lock __cfaabi_dbg_ctx2 );
    169117        {
    170                 verify( validate( &event_kernel->alarms ) );
    171                 remove( &event_kernel->alarms, this );
     118                verify( validate( event_kernel->alarms ) );
     119                remove( *this );
    172120        }
    173121        unlock( event_kernel->lock );
     
    176124}
    177125
     126//=============================================================================================
     127// Utilities
     128//=============================================================================================
     129
     130void sleep( Duration duration ) {
     131        alarm_node_t node = { active_thread(), __kernel_get_time() + duration, 0`s };
     132
     133        register_self( &node );
     134        park( __cfaabi_dbg_ctx );
     135
     136        /* paranoid */ verify( !node.set );
     137        /* paranoid */ verify( & node`next == 0p );
     138        /* paranoid */ verify( & node`prev == 0p );
     139}
     140
    178141// Local Variables: //
    179142// mode: c //
  • libcfa/src/concurrency/alarm.hfa

    re3bc51c rbcd74f3  
    2323#include "time.hfa"
    2424
     25#include <containers/list.hfa>
     26
    2527struct $thread;
    2628struct processor;
     
    4042        Time alarm;                             // time when alarm goes off
    4143        Duration period;                        // if > 0 => period of alarm
    42         alarm_node_t * next;            // intrusive link list field
     44
     45        DLISTED_MGD_IMPL_IN(alarm_node_t)
    4346
    4447        union {
     
    5053        bool kernel_alarm       :1;             // true if this is not a user defined alarm
    5154};
    52 
    53 typedef alarm_node_t ** __alarm_it_t;
     55DLISTED_MGD_IMPL_OUT(alarm_node_t)
    5456
    5557void ?{}( alarm_node_t & this, $thread * thrd, Time alarm, Duration period );
     
    5759void ^?{}( alarm_node_t & this );
    5860
    59 struct alarm_list_t {
    60         alarm_node_t * head;
    61         __alarm_it_t tail;
    62 };
    63 
    64 static inline void ?{}( alarm_list_t & this ) with( this ) {
    65         head = 0;
    66         tail = &head;
    67 }
     61typedef dlist(alarm_node_t, alarm_node_t) alarm_list_t;
    6862
    6963void insert( alarm_list_t * this, alarm_node_t * n );
  • libcfa/src/concurrency/kernel.cfa

    re3bc51c rbcd74f3  
    1515
    1616#define __cforall_thread__
     17// #define __CFA_DEBUG_PRINT_RUNTIME_CORE__
    1718
    1819//C Includes
     
    4041#include "invoke.h"
    4142
     43
    4244//-----------------------------------------------------------------------------
    4345// Some assembly required
     
    230232        idle{};
    231233
    232         __cfaabi_dbg_print_safe("Kernel : Starting core %p\n", &this);
     234        __cfadbg_print_safe(runtime_core, "Kernel : Starting core %p\n", &this);
    233235
    234236        this.stack = __create_pthread( &this.kernel_thread, __invoke_processor, (void *)&this );
    235237
    236         __cfaabi_dbg_print_safe("Kernel : core %p started\n", &this);
     238        __cfadbg_print_safe(runtime_core, "Kernel : core %p created\n", &this);
    237239}
    238240
    239241void ^?{}(processor & this) with( this ){
    240242        if( ! __atomic_load_n(&do_terminate, __ATOMIC_ACQUIRE) ) {
    241                 __cfaabi_dbg_print_safe("Kernel : core %p signaling termination\n", &this);
     243                __cfadbg_print_safe(runtime_core, "Kernel : core %p signaling termination\n", &this);
    242244
    243245                __atomic_store_n(&do_terminate, true, __ATOMIC_RELAXED);
     
    248250        }
    249251
    250         pthread_join( kernel_thread, 0p );
     252        int err = pthread_join( kernel_thread, 0p );
     253        if( err != 0 ) abort("KERNEL ERROR: joining processor %p caused error %s\n", &this, strerror(err));
     254
    251255        free( this.stack );
    252256}
    253257
    254 void ?{}(cluster & this, const char name[], Duration preemption_rate) with( this ) {
     258void ?{}(cluster & this, const char name[], Duration preemption_rate, int io_flags) with( this ) {
    255259        this.name = name;
    256260        this.preemption_rate = preemption_rate;
     
    258262        ready_queue_lock{};
    259263
     264        #if !defined(__CFA_NO_STATISTICS__)
     265                print_stats = false;
     266        #endif
     267
    260268        procs{ __get };
    261269        idles{ __get };
    262270        threads{ __get };
    263271
     272        __kernel_io_startup( this, io_flags, &this == mainCluster );
     273
    264274        doregister(this);
    265275}
    266276
    267277void ^?{}(cluster & this) {
     278        __kernel_io_shutdown( this, &this == mainCluster );
     279
    268280        unregister(this);
    269281}
     
    281293        verify(this);
    282294
    283         __cfaabi_dbg_print_safe("Kernel : core %p starting\n", this);
     295        __cfadbg_print_safe(runtime_core, "Kernel : core %p starting\n", this);
    284296
    285297        doregister(this->cltr, this);
     
    289301                preemption_scope scope = { this };
    290302
    291                 __cfaabi_dbg_print_safe("Kernel : core %p started\n", this);
     303                __cfadbg_print_safe(runtime_core, "Kernel : core %p started\n", this);
    292304
    293305                $thread * readyThread = 0p;
     
    315327                }
    316328
    317                 __cfaabi_dbg_print_safe("Kernel : core %p stopping\n", this);
     329                __cfadbg_print_safe(runtime_core, "Kernel : core %p stopping\n", this);
    318330        }
    319331
     
    322334        V( this->terminated );
    323335
    324         __cfaabi_dbg_print_safe("Kernel : core %p terminated\n", this);
     336        __cfadbg_print_safe(runtime_core, "Kernel : core %p terminated\n", this);
    325337
    326338        // HACK : the coroutine context switch expects this_thread to be set
     
    467479
    468480        //We now have a proper context from which to schedule threads
    469         __cfaabi_dbg_print_safe("Kernel : core %p created (%p, %p)\n", proc, &proc->runner, &ctx);
     481        __cfadbg_print_safe(runtime_core, "Kernel : core %p created (%p, %p)\n", proc, &proc->runner, &ctx);
    470482
    471483        // SKULLDUGGERY: Since the coroutine doesn't have its own stack, we can't
     
    478490
    479491        // Main routine of the core returned, the core is now fully terminated
    480         __cfaabi_dbg_print_safe("Kernel : core %p main ended (%p)\n", proc, &proc->runner);
     492        __cfadbg_print_safe(runtime_core, "Kernel : core %p main ended (%p)\n", proc, &proc->runner);
    481493
    482494        return 0p;
     
    611623}
    612624
    613 void unpark( $thread * thrd __cfaabi_dbg_ctx_param2 ) {
    614         if( !thrd ) return;
    615 
    616         disable_interrupts();
     625// KERNEL ONLY unpark with out disabling interrupts
     626void __unpark( $thread * thrd __cfaabi_dbg_ctx_param2 ) {
    617627        static_assert(sizeof(thrd->state) == sizeof(int));
    618628
     
    643653                        abort();
    644654        }
     655}
     656
     657void unpark( $thread * thrd __cfaabi_dbg_ctx_param2 ) {
     658        if( !thrd ) return;
     659
     660        disable_interrupts();
     661        __unpark( thrd __cfaabi_dbg_ctx_fwd2 );
    645662        enable_interrupts( __cfaabi_dbg_ctx );
    646663}
     
    704721static void __kernel_startup(void) {
    705722        verify( ! kernelTLS.preemption_state.enabled );
    706         __cfaabi_dbg_print_safe("Kernel : Starting\n");
     723        __cfadbg_print_safe(runtime_core, "Kernel : Starting\n");
    707724
    708725        __page_size = sysconf( _SC_PAGESIZE );
     
    715732        (*mainCluster){"Main Cluster"};
    716733
    717         __cfaabi_dbg_print_safe("Kernel : Main cluster ready\n");
     734        __cfadbg_print_safe(runtime_core, "Kernel : Main cluster ready\n");
    718735
    719736        // Start by initializing the main thread
     
    725742        (*mainThread){ &info };
    726743
    727         __cfaabi_dbg_print_safe("Kernel : Main thread ready\n");
     744        __cfadbg_print_safe(runtime_core, "Kernel : Main thread ready\n");
    728745
    729746
     
    746763
    747764                runner{ &this };
    748                 __cfaabi_dbg_print_safe("Kernel : constructed main processor context %p\n", &runner);
     765                __cfadbg_print_safe(runtime_core, "Kernel : constructed main processor context %p\n", &runner);
    749766        }
    750767
     
    771788
    772789
    773 
    774790        // THE SYSTEM IS NOW COMPLETELY RUNNING
    775         __cfaabi_dbg_print_safe("Kernel : Started\n--------------------------------------------------\n\n");
     791
     792
     793        // Now that the system is up, finish creating systems that need threading
     794        __kernel_io_finish_start( *mainCluster );
     795
     796
     797        __cfadbg_print_safe(runtime_core, "Kernel : Started\n--------------------------------------------------\n\n");
    776798
    777799        verify( ! kernelTLS.preemption_state.enabled );
     
    781803
    782804static void __kernel_shutdown(void) {
    783         __cfaabi_dbg_print_safe("\n--------------------------------------------------\nKernel : Shutting down\n");
     805        //Before we start shutting things down, wait for systems that need threading to shutdown
     806        __kernel_io_prepare_stop( *mainCluster );
    784807
    785808        /* paranoid */ verify( TL_GET( preemption_state.enabled ) );
    786809        disable_interrupts();
    787810        /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
     811
     812        __cfadbg_print_safe(runtime_core, "\n--------------------------------------------------\nKernel : Shutting down\n");
    788813
    789814        // SKULLDUGGERY: Notify the mainProcessor it needs to terminates.
     
    801826        // Destroy the main processor and its context in reverse order of construction
    802827        // These were manually constructed so we need manually destroy them
     828        void ^?{}(processor & this) with( this ){
     829                /* paranoid */ verify( this.do_terminate == true );
     830        }
     831
    803832        ^(*mainProcessor){};
    804833
     
    808837        ^(*mainThread){};
    809838
     839        ^(*mainCluster){};
     840
    810841        ^(__cfa_dbg_global_clusters.list){};
    811842        ^(__cfa_dbg_global_clusters.lock){};
    812843
    813         __cfaabi_dbg_print_safe("Kernel : Shutdown complete\n");
     844        __cfadbg_print_safe(runtime_core, "Kernel : Shutdown complete\n");
    814845}
    815846
     
    836867
    837868        // We are ready to sleep
    838         __cfaabi_dbg_print_safe("Kernel : Processor %p ready to sleep\n", this);
     869        __cfadbg_print_safe(runtime_core, "Kernel : Processor %p ready to sleep\n", this);
    839870        wait( idle );
    840871
    841872        // We have woken up
    842         __cfaabi_dbg_print_safe("Kernel : Processor %p woke up and ready to run\n", this);
     873        __cfadbg_print_safe(runtime_core, "Kernel : Processor %p woke up and ready to run\n", this);
    843874
    844875        // Get ourself off the idle list
     
    856887static bool __wake_one(cluster * this, __attribute__((unused)) bool force) {
    857888        // if we don't want to force check if we know it's false
    858         if( !this->idles.head && !force ) return false;
     889        // if( !this->idles.head && !force ) return false;
    859890
    860891        // First, lock the cluster idle
     
    869900
    870901        // Wake them up
     902        __cfadbg_print_safe(runtime_core, "Kernel : waking Processor %p\n", this->idles.head);
     903        /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    871904        post( this->idles.head->idle );
    872905
     
    878911// Unconditionnaly wake a thread
    879912static bool __wake_proc(processor * this) {
    880         return post( this->idle );
     913        __cfadbg_print_safe(runtime_core, "Kernel : waking Processor %p\n", this);
     914
     915        disable_interrupts();
     916                /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
     917                bool ret = post( this->idle );
     918        enable_interrupts( __cfaabi_dbg_ctx );
     919
     920        return ret;
    881921}
    882922
     
    9601000void ^?{}(semaphore & this) {}
    9611001
    962 void P(semaphore & this) with( this ){
     1002bool P(semaphore & this) with( this ){
    9631003        lock( lock __cfaabi_dbg_ctx2 );
    9641004        count -= 1;
     
    9701010                unlock( lock );
    9711011                park( __cfaabi_dbg_ctx );
     1012                return true;
    9721013        }
    9731014        else {
    9741015            unlock( lock );
     1016            return false;
    9751017        }
    9761018}
     
    9891031        // make new owner
    9901032        unpark( thrd __cfaabi_dbg_ctx2 );
     1033
     1034        return thrd != 0p;
     1035}
     1036
     1037bool V(semaphore & this, unsigned diff) with( this ) {
     1038        $thread * thrd = 0p;
     1039        lock( lock __cfaabi_dbg_ctx2 );
     1040        int release = max(-count, (int)diff);
     1041        count += diff;
     1042        for(release) {
     1043                unpark( pop_head( waiting ) __cfaabi_dbg_ctx2 );
     1044        }
     1045
     1046        unlock( lock );
    9911047
    9921048        return thrd != 0p;
  • libcfa/src/concurrency/kernel.hfa

    re3bc51c rbcd74f3  
    1717
    1818#include <stdbool.h>
     19#include <stdint.h>
    1920
    2021#include "invoke.h"
     
    3738void  ?{}(semaphore & this, int count = 1);
    3839void ^?{}(semaphore & this);
    39 void   P (semaphore & this);
     40bool   P (semaphore & this);
    4041bool   V (semaphore & this);
     42bool   V (semaphore & this, unsigned count);
    4143
    4244
     
    111113
    112114//-----------------------------------------------------------------------------
     115// I/O
     116struct __io_data;
     117
     118#define CFA_CLUSTER_IO_POLLER_USER_THREAD 1 << 0
     119// #define CFA_CLUSTER_IO_POLLER_KERNEL_SIDE 1 << 1
     120
     121//-----------------------------------------------------------------------------
    113122// Cluster
    114123struct cluster {
     
    141150                cluster * prev;
    142151        } node;
     152
     153        struct __io_data * io;
     154
     155        #if !defined(__CFA_NO_STATISTICS__)
     156                bool print_stats;
     157        #endif
    143158};
    144159extern Duration default_preemption();
    145160
    146 void ?{} (cluster & this, const char name[], Duration preemption_rate);
     161void ?{} (cluster & this, const char name[], Duration preemption_rate, int flags);
    147162void ^?{}(cluster & this);
    148163
    149 static inline void ?{} (cluster & this)                           { this{"Anonymous Cluster", default_preemption()}; }
    150 static inline void ?{} (cluster & this, Duration preemption_rate) { this{"Anonymous Cluster", preemption_rate}; }
    151 static inline void ?{} (cluster & this, const char name[])        { this{name, default_preemption()}; }
     164static inline void ?{} (cluster & this)                                      { this{"Anonymous Cluster", default_preemption(), 0}; }
     165static inline void ?{} (cluster & this, Duration preemption_rate)            { this{"Anonymous Cluster", preemption_rate, 0}; }
     166static inline void ?{} (cluster & this, const char name[])                   { this{name, default_preemption(), 0}; }
     167static inline void ?{} (cluster & this, int flags)                           { this{"Anonymous Cluster", default_preemption(), flags}; }
     168static inline void ?{} (cluster & this, Duration preemption_rate, int flags) { this{"Anonymous Cluster", preemption_rate, flags}; }
     169static inline void ?{} (cluster & this, const char name[], int flags)        { this{name, default_preemption(), flags}; }
    152170
    153171static inline [cluster *&, cluster *& ] __get( cluster & this ) __attribute__((const)) { return this.node.[next, prev]; }
     
    156174static inline struct cluster   * active_cluster  () { return TL_GET( this_processor )->cltr; }
    157175
     176#if !defined(__CFA_NO_STATISTICS__)
     177        static inline void print_stats_at_exit( cluster & this ) {
     178                this.print_stats = true;
     179        }
     180#endif
     181
    158182// Local Variables: //
    159183// mode: c //
  • libcfa/src/concurrency/kernel_private.hfa

    re3bc51c rbcd74f3  
    5959extern volatile thread_local __cfa_kernel_preemption_state_t preemption_state __attribute__ ((tls_model ( "initial-exec" )));
    6060
     61extern cluster * mainCluster;
     62
    6163//-----------------------------------------------------------------------------
    6264// Threads
     
    6971        extern void __cfaabi_dbg_thread_unregister( $thread * thrd );
    7072)
     73
     74// KERNEL ONLY unpark with out disabling interrupts
     75void __unpark( $thread * thrd __cfaabi_dbg_ctx_param2 );
     76
     77//-----------------------------------------------------------------------------
     78// I/O
     79void __kernel_io_startup     ( cluster &, int, bool );
     80void __kernel_io_finish_start( cluster & );
     81void __kernel_io_prepare_stop( cluster & );
     82void __kernel_io_shutdown    ( cluster &, bool );
    7183
    7284//-----------------------------------------------------------------------------
  • libcfa/src/concurrency/preemption.cfa

    re3bc51c rbcd74f3  
    4343// FwdDeclarations : Signal handlers
    4444static void sigHandler_ctxSwitch( __CFA_SIGPARMS__ );
     45static void sigHandler_alarm    ( __CFA_SIGPARMS__ );
    4546static void sigHandler_segv     ( __CFA_SIGPARMS__ );
    4647static void sigHandler_ill      ( __CFA_SIGPARMS__ );
     
    8384// Get next expired node
    8485static inline alarm_node_t * get_expired( alarm_list_t * alarms, Time currtime ) {
    85         if( !alarms->head ) return 0p;                                          // If no alarms return null
    86         if( alarms->head->alarm >= currtime ) return 0p;        // If alarms head not expired return null
     86        if( ! & (*alarms)`first ) return 0p;                                            // If no alarms return null
     87        if( (*alarms)`first.alarm >= currtime ) return 0p;      // If alarms head not expired return null
    8788        return pop(alarms);                                                                     // Otherwise just pop head
    8889}
     
    9798        while( node = get_expired( alarms, currtime ) ) {
    9899                // __cfaabi_dbg_print_buffer_decl( " KERNEL: preemption tick.\n" );
     100                Duration period = node->period;
     101                if( period == 0) {
     102                        node->set = false;                  // Node is one-shot, just mark it as not pending
     103                }
    99104
    100105                // Check if this is a kernel
     
    107112
    108113                // Check if this is a periodic alarm
    109                 Duration period = node->period;
    110114                if( period > 0 ) {
    111115                        // __cfaabi_dbg_print_buffer_local( " KERNEL: alarm period is %lu.\n", period.tv );
     
    113117                        insert( alarms, node );             // Reinsert the node for the next time it triggers
    114118                }
    115                 else {
    116                         node->set = false;                  // Node is one-shot, just mark it as not pending
    117                 }
    118119        }
    119120
    120121        // If there are still alarms pending, reset the timer
    121         if( alarms->head ) {
     122        if( & (*alarms)`first ) {
    122123                __cfaabi_dbg_print_buffer_decl( " KERNEL: @%ju(%ju) resetting alarm to %ju.\n", currtime.tv, __kernel_get_time().tv, (alarms->head->alarm - currtime).tv);
    123                 Duration delta = alarms->head->alarm - currtime;
    124                 Duration caped = max(delta, 50`us);
     124                Duration delta = (*alarms)`first.alarm - currtime;
     125                Duration capped = max(delta, 50`us);
    125126                // itimerval tim  = { caped };
    126127                // __cfaabi_dbg_print_buffer_local( "    Values are %lu, %lu, %lu %lu.\n", delta.tv, caped.tv, tim.it_value.tv_sec, tim.it_value.tv_usec);
    127128
    128                 __kernel_set_timer( caped );
     129                __kernel_set_timer( capped );
    129130        }
    130131}
     
    256257
    257258        if ( pthread_sigmask( SIG_BLOCK, &mask, 0p ) == -1 ) {
    258             abort( "internal error, pthread_sigmask" );
     259                abort( "internal error, pthread_sigmask" );
    259260        }
    260261}
     
    268269// reserved for future use
    269270static void timeout( $thread * this ) {
    270         //TODO : implement waking threads
     271        __unpark( this __cfaabi_dbg_ctx2 );
    271272}
    272273
     
    303304        // Setup proper signal handlers
    304305        __cfaabi_sigaction( SIGUSR1, sigHandler_ctxSwitch, SA_SIGINFO | SA_RESTART ); // __cfactx_switch handler
     306        __cfaabi_sigaction( SIGALRM, sigHandler_alarm    , SA_SIGINFO | SA_RESTART ); // debug handler
    305307
    306308        signal_block( SIGALRM );
     
    394396
    395397        force_yield( __ALARM_PREEMPTION ); // Do the actual __cfactx_switch
     398}
     399
     400static void sigHandler_alarm( __CFA_SIGPARMS__ ) {
     401        abort("SIGALRM should never reach the signal handler");
    396402}
    397403
  • libcfa/src/concurrency/thread.hfa

    re3bc51c rbcd74f3  
    117117}
    118118
     119//----------
     120// sleep: force thread to block and be rescheduled after Duration duration
     121void sleep( Duration duration );
     122
    119123// Local Variables: //
    120124// mode: c //
  • libcfa/src/exception.c

    re3bc51c rbcd74f3  
    1010// Created On       : Mon Jun 26 15:13:00 2017
    1111// Last Modified By : Andrew Beach
    12 // Last Modified On : Fri Apr 03 11:57:00 2020
    13 // Update Count     : 14
     12// Last Modified On : Tue Apr 14 12:01:00 2020
     13// Update Count     : 18
    1414//
    1515
     
    2828#include <unwind.h>
    2929#include <bits/debug.hfa>
     30#include "stdhdr/assert.h"
    3031
    3132// FIX ME: temporary hack to keep ARM build working
     
    7576// RESUMPTION ================================================================
    7677
     78static void reset_top_resume(struct __cfaehm_try_resume_node ** store) {
     79        this_exception_context()->top_resume = *store;
     80}
     81
    7782void __cfaehm_throw_resume(exception_t * except) {
    7883        struct exception_context_t * context = this_exception_context();
    7984
    80         __cfaabi_dbg_print_safe("Throwing resumption exception\n");
    81 
     85        __cfadbg_print_safe(exception, "Throwing resumption exception\n");
     86
     87        __attribute__((cleanup(reset_top_resume)))
    8288        struct __cfaehm_try_resume_node * original_head = context->top_resume;
    8389        struct __cfaehm_try_resume_node * current = context->top_resume;
     
    8692                context->top_resume = current->next;
    8793                if (current->handler(except)) {
    88                         context->top_resume = original_head;
    8994                        return;
    9095                }
    9196        }
    9297
    93         __cfaabi_dbg_print_safe("Unhandled exception\n");
    94         context->top_resume = original_head;
     98        __cfadbg_print_safe(exception, "Unhandled exception\n");
    9599
    96100        // Fall back to termination:
     
    176180        struct exception_context_t * context = this_exception_context();
    177181
    178         __cfaabi_dbg_print_safe("Deleting Exception\n");
     182        __cfadbg_print_safe(exception, "Deleting Exception\n");
    179183
    180184        // Remove the exception from the list.
     
    213217                struct _Unwind_Context * unwind_context,
    214218                void * stop_param) {
    215         if ( actions & _UA_END_OF_STACK  ) exit(1);
    216         if ( actions & _UA_CLEANUP_PHASE ) return _URC_NO_REASON;
    217 
    218         return _URC_FATAL_PHASE2_ERROR;
     219        // Verify actions follow the rules we expect.
     220        verify((actions & _UA_CLEANUP_PHASE) && (actions & _UA_FORCE_UNWIND));
     221        verify(!(actions & (_UA_SEARCH_PHASE | _UA_HANDER_FRAME)));
     222
     223        if ( actions & _UA_END_OF_STACK ) {
     224                exit(1);
     225        } else {
     226                return _URC_NO_REASON;
     227        }
    219228}
    220229
     
    252261
    253262void __cfaehm_throw_terminate( exception_t * val ) {
    254         __cfaabi_dbg_print_safe("Throwing termination exception\n");
     263        __cfadbg_print_safe(exception, "Throwing termination exception\n");
    255264
    256265        __cfaehm_allocate_exception( val );
     
    259268
    260269void __cfaehm_rethrow_terminate(void) {
    261         __cfaabi_dbg_print_safe("Rethrowing termination exception\n");
     270        __cfadbg_print_safe(exception, "Rethrowing termination exception\n");
    262271
    263272        __cfaehm_begin_unwind();
     
    275284{
    276285
    277         //__cfaabi_dbg_print_safe("CFA: 0x%lx\n", _Unwind_GetCFA(context));
    278         __cfaabi_dbg_print_safe("Personality function (%d, %x, %llu, %p, %p):",
     286        //__cfadbg_print_safe(exception, "CFA: 0x%lx\n", _Unwind_GetCFA(context));
     287        __cfadbg_print_safe(exception, "Personality function (%d, %x, %llu, %p, %p):",
    279288                        version, actions, exception_class, unwind_exception, unwind_context);
    280289
    281         // If we've reached the end of the stack then there is nothing much we can do...
    282         if (actions & _UA_END_OF_STACK) return _URC_END_OF_STACK;
    283 
     290        // Verify that actions follow the rules we expect.
     291        // This function should never be called at the end of the stack.
     292        verify(!(actions & _UA_END_OF_STACK));
     293        // Either only the search phase flag is set or...
    284294        if (actions & _UA_SEARCH_PHASE) {
    285                 __cfaabi_dbg_print_safe(" lookup phase");
    286         }
    287         else if (actions & _UA_CLEANUP_PHASE) {
    288                 __cfaabi_dbg_print_safe(" cleanup phase");
    289         }
    290         // Just in case, probably can't actually happen
    291         else {
    292                 printf(" error\n");
    293                 return _URC_FATAL_PHASE1_ERROR;
     295                verify(actions == _UA_SEARCH_PHASE);
     296                __cfadbg_print_safe(exception, " lookup phase");
     297        // ... we are in clean-up phase.
     298        } else {
     299                verify(actions & _UA_CLEANUP_PHASE);
     300                __cfadbg_print_safe(exception, " cleanup phase");
     301                // We shouldn't be the handler frame during forced unwind.
     302                if (actions & _UA_HANDLER_FRAME) {
     303                        verify(!(actions & _UA_FORCE_UNWIND));
     304                        __cfadbg_print_safe(exception, " (handler frame)");
     305                } else if (actions & _UA_FORCE_UNWIND) {
     306                        __cfadbg_print_safe(exception, " (force unwind)");
     307                }
    294308        }
    295309
     
    331345                        void * ep = (void*)lsd_info.Start + callsite_start + callsite_len;
    332346                        void * ip = (void*)instruction_ptr;
    333                         __cfaabi_dbg_print_safe("\nfound %p - %p (%p, %p, %p), looking for %p\n",
     347                        __cfadbg_print_safe(exception, "\nfound %p - %p (%p, %p, %p), looking for %p\n",
    334348                                        bp, ep, ls, cs, cl, ip);
    335349#endif // __CFA_DEBUG_PRINT__
     
    346360                if ( 0 == callsite_landing_pad ) {
    347361                        // Nothing to do, move along
    348                         __cfaabi_dbg_print_safe(" no landing pad");
     362                        __cfadbg_print_safe(exception, " no landing pad");
    349363                } else if (actions & _UA_SEARCH_PHASE) {
    350364                        // In search phase, these means we found a potential handler we must check.
     
    384398                                // Based on the return value, check if we matched the exception
    385399                                if (ret == _URC_HANDLER_FOUND) {
    386                                         __cfaabi_dbg_print_safe(" handler found\n");
     400                                        __cfadbg_print_safe(exception, " handler found\n");
    387401                                } else {
    388                                         __cfaabi_dbg_print_safe(" no handler\n");
     402                                        __cfadbg_print_safe(exception, " no handler\n");
    389403                                }
    390404                                return ret;
     
    392406
    393407                        // This is only a cleanup handler, ignore it
    394                         __cfaabi_dbg_print_safe(" no action");
    395                 } else if (actions & _UA_CLEANUP_PHASE) {
     408                        __cfadbg_print_safe(exception, " no action");
     409                } else {
    396410                        // In clean-up phase, no destructors here but this could be the handler.
    397411
     
    414428                        _Unwind_SetIP( unwind_context, ((lsd_info.LPStart) + (callsite_landing_pad)) );
    415429
    416                         __cfaabi_dbg_print_safe(" action\n");
     430                        __cfadbg_print_safe(exception, " action\n");
    417431
    418432                        // Return have some action to run
     
    421435        }
    422436        // No handling found
    423         __cfaabi_dbg_print_safe(" table end reached\n");
     437        __cfadbg_print_safe(exception, " table end reached");
    424438
    425439        UNWIND:
    426         __cfaabi_dbg_print_safe(" unwind\n");
     440        __cfadbg_print_safe(exception, " unwind\n");
    427441
    428442        // Keep unwinding the stack
     
    431445
    432446#pragma GCC push_options
    433 #pragma GCC optimize("O0")
     447#pragma GCC optimize(0)
    434448
    435449// Try statements are hoisted out see comments for details. While this could probably be unique
     
    528542        "       .quad __gcfa_personality_v0\n"
    529543#else // then __i386
    530         "   .long __gcfa_personality_v0\n"
     544        "       .long __gcfa_personality_v0\n"
    531545#endif
    532546);
  • libcfa/src/heap.cfa

    re3bc51c rbcd74f3  
    1010// Created On       : Tue Dec 19 21:58:35 2017
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Wed Apr  1 15:59:53 2020
    13 // Update Count     : 692
     12// Last Modified On : Wed May  6 17:29:26 2020
     13// Update Count     : 727
    1414//
    1515
     
    1919#include <errno.h>                                                                              // errno
    2020#include <string.h>                                                                             // memset, memcpy
     21#include <limits.h>                                                                             // ULONG_MAX
    2122extern "C" {
    2223#include <sys/mman.h>                                                                   // mmap, munmap
    2324} // extern "C"
    2425
    25 // #comment TD : Many of these should be merged into math I believe
    2626#include "bits/align.hfa"                                                               // libPow2
    2727#include "bits/defs.hfa"                                                                // likely, unlikely
     
    3030//#include "stdlib.hfa"                                                                 // bsearchl
    3131#include "malloc.h"
     32#include "bitmanip.hfa"                                                                 // ceiling
    3233
    3334#define MIN(x, y) (y > x ? x : y)
     
    8182};
    8283
     84size_t default_heap_expansion() __attribute__(( weak )) {
     85        return __CFA_DEFAULT_HEAP_EXPANSION__;
     86} // default_heap_expansion
     87
    8388size_t default_mmap_start() __attribute__(( weak )) {
    8489        return __CFA_DEFAULT_MMAP_START__;
    8590} // default_mmap_start
    86 
    87 size_t default_heap_expansion() __attribute__(( weak )) {
    88         return __CFA_DEFAULT_HEAP_EXPANSION__;
    89 } // default_heap_expansion
    9091
    9192
     
    181182                                } fake; // FakeHeader
    182183                        } kind; // Kind
    183                         uint32_t dimension;                                                     // used by calloc-like to remember number of array elements
     184                        size_t size;                                                            // allocation size in bytes
    184185                } header; // Header
    185186                char pad[libAlign() - sizeof( Header )];
     
    265266static unsigned long long int free_storage;
    266267static unsigned int free_calls;
     268static unsigned long long int aalloc_storage;
     269static unsigned int aalloc_calls;
    267270static unsigned long long int calloc_storage;
    268271static unsigned int calloc_calls;
    269272static unsigned long long int memalign_storage;
    270273static unsigned int memalign_calls;
     274static unsigned long long int amemalign_storage;
     275static unsigned int amemalign_calls;
    271276static unsigned long long int cmemalign_storage;
    272277static unsigned int cmemalign_calls;
     
    280285// Use "write" because streams may be shutdown when calls are made.
    281286static void printStats() {
    282         char helpText[512];
     287        char helpText[1024];
    283288        __cfaabi_bits_print_buffer( STDERR_FILENO, helpText, sizeof(helpText),
    284289                                                                        "\nHeap statistics:\n"
    285290                                                                        "  malloc: calls %u / storage %llu\n"
     291                                                                        "  aalloc: calls %u / storage %llu\n"
    286292                                                                        "  calloc: calls %u / storage %llu\n"
    287293                                                                        "  memalign: calls %u / storage %llu\n"
     294                                                                        "  amemalign: calls %u / storage %llu\n"
    288295                                                                        "  cmemalign: calls %u / storage %llu\n"
    289296                                                                        "  resize: calls %u / storage %llu\n"
     
    294301                                                                        "  sbrk: calls %u / storage %llu\n",
    295302                                                                        malloc_calls, malloc_storage,
     303                                                                        aalloc_calls, calloc_storage,
    296304                                                                        calloc_calls, calloc_storage,
    297305                                                                        memalign_calls, memalign_storage,
     306                                                                        amemalign_calls, amemalign_storage,
    298307                                                                        cmemalign_calls, cmemalign_storage,
    299308                                                                        resize_calls, resize_storage,
     
    307316
    308317static int printStatsXML( FILE * stream ) {                             // see malloc_info
    309         char helpText[512];
     318        char helpText[1024];
    310319        int len = snprintf( helpText, sizeof(helpText),
    311320                                                "<malloc version=\"1\">\n"
     
    314323                                                "</sizes>\n"
    315324                                                "<total type=\"malloc\" count=\"%u\" size=\"%llu\"/>\n"
     325                                                "<total type=\"aalloc\" count=\"%u\" size=\"%llu\"/>\n"
    316326                                                "<total type=\"calloc\" count=\"%u\" size=\"%llu\"/>\n"
    317327                                                "<total type=\"memalign\" count=\"%u\" size=\"%llu\"/>\n"
     328                                                "<total type=\"amemalign\" count=\"%u\" size=\"%llu\"/>\n"
    318329                                                "<total type=\"cmemalign\" count=\"%u\" size=\"%llu\"/>\n"
    319330                                                "<total type=\"resize\" count=\"%u\" size=\"%llu\"/>\n"
     
    325336                                                "</malloc>",
    326337                                                malloc_calls, malloc_storage,
     338                                                aalloc_calls, aalloc_storage,
    327339                                                calloc_calls, calloc_storage,
    328340                                                memalign_calls, memalign_storage,
     341                                                amemalign_calls, amemalign_storage,
    329342                                                cmemalign_calls, cmemalign_storage,
    330343                                                resize_calls, resize_storage,
     
    348361
    349362
    350 static inline bool setHeapExpand( size_t value ) {
    351   if ( heapExpand < pageSize ) return true;
    352         heapExpand = value;
    353         return false;
    354 } // setHeapExpand
    355 
    356 
    357363// thunk problem
    358364size_t Bsearchl( unsigned int key, const unsigned int * vals, size_t dim ) {
     
    371377
    372378static inline bool setMmapStart( size_t value ) {               // true => mmapped, false => sbrk
    373   if ( value < pageSize || bucketSizes[NoBucketSizes - 1] < value ) return true;
     379  if ( value < pageSize || bucketSizes[NoBucketSizes - 1] < value ) return false;
    374380        mmapStart = value;                                                                      // set global
    375381
     
    378384        assert( maxBucketsUsed < NoBucketSizes );                       // subscript failure ?
    379385        assert( mmapStart <= bucketSizes[maxBucketsUsed] ); // search failure ?
    380         return false;
     386        return true;
    381387} // setMmapStart
    382388
     
    437443
    438444        #ifdef __CFA_DEBUG__
    439         checkHeader( addr < heapBegin || header < (HeapManager.Storage.Header *)heapBegin, name, addr ); // bad low address ?
     445        checkHeader( addr < heapBegin, name, addr );            // bad low address ?
    440446        #endif // __CFA_DEBUG__
    441447
     
    496502        // along with the block and is a multiple of the alignment size.
    497503
    498   if ( unlikely( size > ~0ul - sizeof(HeapManager.Storage) ) ) return 0p;
     504  if ( unlikely( size > ULONG_MAX - sizeof(HeapManager.Storage) ) ) return 0p;
    499505        size_t tsize = size + sizeof(HeapManager.Storage);
    500506        if ( likely( tsize < mmapStart ) ) {                            // small size => sbrk
     
    548554                block->header.kind.real.home = freeElem;                // pointer back to free list of apropriate size
    549555        } else {                                                                                        // large size => mmap
    550   if ( unlikely( size > ~0ul - pageSize ) ) return 0p;
     556  if ( unlikely( size > ULONG_MAX - pageSize ) ) return 0p;
    551557                tsize = libCeiling( tsize, pageSize );                  // must be multiple of page size
    552558                #ifdef __STATISTICS__
     
    566572        } // if
    567573
     574        block->header.size = size;                                                      // store allocation size
    568575        void * addr = &(block->data);                                           // adjust off header to user bytes
    569576
     
    689696        #endif // FASTLOOKUP
    690697
    691         if ( setMmapStart( default_mmap_start() ) ) {
     698        if ( ! setMmapStart( default_mmap_start() ) ) {
    692699                abort( "HeapManager : internal error, mmap start initialization failure." );
    693700        } // if
     
    695702
    696703        char * end = (char *)sbrk( 0 );
    697         sbrk( (char *)libCeiling( (long unsigned int)end, libAlign() ) - end ); // move start of heap to multiple of alignment
    698         heapBegin = heapEnd = sbrk( 0 );                                        // get new start point
     704        heapBegin = heapEnd = sbrk( (char *)libCeiling( (long unsigned int)end, libAlign() ) - end ); // move start of heap to multiple of alignment
    699705} // HeapManager
    700706
     
    722728        //assert( heapManager.heapBegin != 0 );
    723729        //heapManager{};
    724         if ( heapManager.heapBegin == 0p ) heapManager{};
     730        if ( heapManager.heapBegin == 0p ) heapManager{};       // sanity check
    725731} // memory_startup
    726732
     
    734740        //assert( heapManager.heapBegin != 0 );
    735741        if ( unlikely( heapManager.heapBegin == 0p ) ) heapManager{}; // called before memory_startup ?
     742#if __SIZEOF_POINTER__ == 8
     743        verify( size < ((typeof(size_t))1 << 48) );
     744#endif // __SIZEOF_POINTER__ == 8
    736745        void * addr = doMalloc( size );
    737746        if ( unlikely( addr == 0p ) ) errno = ENOMEM;           // POSIX
     
    740749
    741750
    742 static inline void * callocNoStats( size_t noOfElems, size_t elemSize ) {
    743         size_t size = noOfElems * elemSize;
     751static inline void * callocNoStats( size_t dim, size_t elemSize ) {
     752        size_t size = dim * elemSize;
    744753        char * addr = (char *)mallocNoStats( size );
    745754  if ( unlikely( addr == 0p ) ) return 0p;
     
    758767                memset( addr, '\0', bsize - sizeof(HeapManager.Storage) ); // set to zeros
    759768
    760         assert( noOfElems <= UINT32_MAX );
    761         header->dimension = noOfElems;                                          // store number of array elements
    762769        header->kind.real.blockSize |= 2;                                       // mark as zero filled
    763770        return addr;
     
    801808
    802809
    803 static inline void * cmemalignNoStats( size_t alignment, size_t noOfElems, size_t elemSize ) {
    804         size_t size = noOfElems * elemSize;
     810static inline void * cmemalignNoStats( size_t alignment, size_t dim, size_t elemSize ) {
     811        size_t size = dim * elemSize;
    805812        char * addr = (char *)memalignNoStats( alignment, size );
    806813  if ( unlikely( addr == 0p ) ) return 0p;
     
    815822                memset( addr, '\0', dataStorage( bsize, addr, header ) ); // set to zeros
    816823
    817         assert( noOfElems <= UINT32_MAX );
    818         header->dimension = noOfElems;                                          // store initial array size
    819824        header->kind.real.blockSize |= 2;                                       // mark as zero filled
    820825        return addr;
     
    832837
    833838extern "C" {
    834         // Allocates size bytes and returns a pointer to the allocated memory. The memory is not initialized. If size is 0,
    835         // then malloc() returns either 0p, or a unique pointer value that can later be successfully passed to free().
     839        // Allocates size bytes and returns a pointer to the allocated memory.  The contents are undefined. If size is 0,
     840        // then malloc() returns a unique pointer value that can later be successfully passed to free().
    836841        void * malloc( size_t size ) {
    837842                #ifdef __STATISTICS__
     
    843848        } // malloc
    844849
    845         // Allocate memory for an array of nmemb elements of size bytes each and returns a pointer to the allocated
    846         // memory. The memory is set to zero. If nmemb or size is 0, then calloc() returns either 0p, or a unique pointer
    847         // value that can later be successfully passed to free().
    848         void * calloc( size_t noOfElems, size_t elemSize ) {
     850
     851        // Same as malloc() except size bytes is an array of dim elements each of elemSize bytes.
     852        void * aalloc( size_t dim, size_t elemSize ) {
     853                #ifdef __STATISTICS__
     854                __atomic_add_fetch( &aalloc_calls, 1, __ATOMIC_SEQ_CST );
     855                __atomic_add_fetch( &aalloc_storage, dim * elemSize, __ATOMIC_SEQ_CST );
     856                #endif // __STATISTICS__
     857
     858                return mallocNoStats( dim * elemSize );
     859        } // aalloc
     860
     861
     862        // Same as aalloc() with memory set to zero.
     863        void * calloc( size_t dim, size_t elemSize ) {
    849864                #ifdef __STATISTICS__
    850865                __atomic_add_fetch( &calloc_calls, 1, __ATOMIC_SEQ_CST );
    851                 __atomic_add_fetch( &calloc_storage, noOfElems * elemSize, __ATOMIC_SEQ_CST );
    852                 #endif // __STATISTICS__
    853 
    854                 return callocNoStats( noOfElems, elemSize );
     866                __atomic_add_fetch( &calloc_storage, dim * elemSize, __ATOMIC_SEQ_CST );
     867                #endif // __STATISTICS__
     868
     869                return callocNoStats( dim, elemSize );
    855870        } // calloc
    856871
    857         // Change the size of the memory block pointed to by ptr to size bytes. The contents are undefined.  If ptr is 0p,
    858         // then the call is equivalent to malloc(size), for all values of size; if size is equal to zero, and ptr is not 0p,
    859         // then the call is equivalent to free(ptr). Unless ptr is 0p, it must have been returned by an earlier call to
    860         // malloc(), calloc() or realloc(). If the area pointed to was moved, a free(ptr) is done.
    861 
     872        // Change the size of the memory block pointed to by oaddr to size bytes. The contents are undefined.  If oaddr is
     873        // 0p, then the call is equivalent to malloc(size), for all values of size; if size is equal to zero, and oaddr is
     874        // not 0p, then the call is equivalent to free(oaddr). Unless oaddr is 0p, it must have been returned by an earlier
     875        // call to malloc(), alloc(), calloc() or realloc(). If the area pointed to was moved, a free(oaddr) is done.
    862876        void * resize( void * oaddr, size_t size ) {
    863877                #ifdef __STATISTICS__
     
    874888                size_t bsize, oalign = 0;
    875889                headers( "resize", oaddr, header, freeElem, bsize, oalign );
     890
    876891                size_t odsize = dataStorage( bsize, oaddr, header ); // data storage available in bucket
    877 
    878892                // same size, DO NOT preserve STICKY PROPERTIES.
    879                 if ( oalign == 0 && size <= odsize && odsize <= size * 2 ) { // allow 50% wasted storage for smaller size
     893          if ( oalign == 0 && size <= odsize && odsize <= size * 2 ) { // allow 50% wasted storage for smaller size
    880894                        header->kind.real.blockSize &= -2;                      // no alignment and turn off 0 fill
    881895                        return oaddr;
     
    883897       
    884898                // change size, DO NOT preserve STICKY PROPERTIES.
     899                free( oaddr );
    885900                void * naddr = mallocNoStats( size );                   // create new area
    886                 free( oaddr );
    887901                return naddr;
    888902        } // resize
    889903
    890904
    891         // Same as resize but the contents shall be unchanged in the range from the start of the region up to the minimum of
     905        // Same as resize() but the contents are unchanged in the range from the start of the region up to the minimum of
    892906        // the old and new sizes.
    893907        void * realloc( void * oaddr, size_t size ) {
     
    939953        } // realloc
    940954
    941         // Allocates size bytes and returns a pointer to the allocated memory. The memory address shall be a multiple of
    942         // alignment, which must be a power of two. (obsolete)
     955        // Same as malloc() except the memory address is a multiple of alignment, which must be a power of two. (obsolete)
    943956        void * memalign( size_t alignment, size_t size ) {
    944957                #ifdef __STATISTICS__
     
    951964
    952965
     966        // Same as aalloc() with memory alignment.
     967        void * amemalign( size_t alignment, size_t dim, size_t elemSize ) {
     968                #ifdef __STATISTICS__
     969                __atomic_add_fetch( &cmemalign_calls, 1, __ATOMIC_SEQ_CST );
     970                __atomic_add_fetch( &cmemalign_storage, dim * elemSize, __ATOMIC_SEQ_CST );
     971                #endif // __STATISTICS__
     972
     973                return memalignNoStats( alignment, dim * elemSize );
     974        } // amemalign
     975
     976
    953977        // Same as calloc() with memory alignment.
    954         void * cmemalign( size_t alignment, size_t noOfElems, size_t elemSize ) {
     978        void * cmemalign( size_t alignment, size_t dim, size_t elemSize ) {
    955979                #ifdef __STATISTICS__
    956980                __atomic_add_fetch( &cmemalign_calls, 1, __ATOMIC_SEQ_CST );
    957                 __atomic_add_fetch( &cmemalign_storage, noOfElems * elemSize, __ATOMIC_SEQ_CST );
    958                 #endif // __STATISTICS__
    959 
    960                 return cmemalignNoStats( alignment, noOfElems, elemSize );
     981                __atomic_add_fetch( &cmemalign_storage, dim * elemSize, __ATOMIC_SEQ_CST );
     982                #endif // __STATISTICS__
     983
     984                return cmemalignNoStats( alignment, dim, elemSize );
    961985        } // cmemalign
    962986
     
    9931017
    9941018        // Frees the memory space pointed to by ptr, which must have been returned by a previous call to malloc(), calloc()
    995         // or realloc().  Otherwise, or if free(ptr) has already been called before, undefined behavior occurs. If ptr is
     1019        // or realloc().  Otherwise, or if free(ptr) has already been called before, undefined behaviour occurs. If ptr is
    9961020        // 0p, no operation is performed.
    9971021        void free( void * addr ) {
     
    10151039
    10161040
    1017         // Returns the alignment of the allocation.
     1041        // Returns the alignment of an allocation.
    10181042        size_t malloc_alignment( void * addr ) {
    10191043          if ( unlikely( addr == 0p ) ) return libAlign();      // minimum alignment
     
    10261050        } // malloc_alignment
    10271051
    1028 
    1029         // Returns true if the allocation is zero filled, i.e., initially allocated by calloc().
     1052        // Set the alignment for an the allocation and return previous alignment or 0 if no alignment.
     1053        size_t $malloc_alignment_set( void * addr, size_t alignment ) {
     1054          if ( unlikely( addr == 0p ) ) return libAlign();      // minimum alignment
     1055                size_t ret;
     1056                HeapManager.Storage.Header * header = headerAddr( addr );
     1057                if ( (header->kind.fake.alignment & 1) == 1 ) { // fake header ?
     1058                        ret = header->kind.fake.alignment & -2;         // remove flag from old value
     1059                        header->kind.fake.alignment = alignment | 1; // add flag to new value
     1060                } else {
     1061                        ret = 0;                                                                        // => no alignment to change
     1062                } // if
     1063                return ret;
     1064        } // $malloc_alignment_set
     1065
     1066
     1067        // Returns true if the allocation is zero filled, e.g., allocated by calloc().
    10301068        bool malloc_zero_fill( void * addr ) {
    10311069          if ( unlikely( addr == 0p ) ) return false;           // null allocation is not zero fill
     
    10341072                        header = realHeader( header );                          // backup from fake to real header
    10351073                } // if
    1036                 return (header->kind.real.blockSize & 2) != 0;  // zero filled (calloc/cmemalign) ?
     1074                return (header->kind.real.blockSize & 2) != 0;  // zero filled ?
    10371075        } // malloc_zero_fill
    10381076
    1039 
    1040         // Returns number of elements if the allocation is for an array, i.e., by calloc().
    1041         size_t malloc_dimension( void * addr ) {
     1077        // Set allocation is zero filled and return previous zero filled.
     1078        bool $malloc_zero_fill_set( void * addr ) {
    10421079          if ( unlikely( addr == 0p ) ) return false;           // null allocation is not zero fill
    10431080                HeapManager.Storage.Header * header = headerAddr( addr );
     
    10451082                        header = realHeader( header );                          // backup from fake to real header
    10461083                } // if
    1047                 return header->dimension;                                               // array (calloc/cmemalign)
    1048         } // malloc_zero_fill
     1084                bool ret = (header->kind.real.blockSize & 2) != 0; // zero filled ?
     1085                header->kind.real.blockSize |= 2;                               // mark as zero filled
     1086                return ret;
     1087        } // $malloc_zero_fill_set
     1088
     1089
     1090        // Returns original total allocation size (not bucket size) => array size is dimension * sizeif(T).
     1091        size_t malloc_size( void * addr ) {
     1092          if ( unlikely( addr == 0p ) ) return false;           // null allocation is not zero fill
     1093                HeapManager.Storage.Header * header = headerAddr( addr );
     1094                if ( (header->kind.fake.alignment & 1) == 1 ) { // fake header ?
     1095                        header = realHeader( header );                          // backup from fake to real header
     1096                } // if
     1097                return header->size;
     1098        } // malloc_size
     1099
     1100        // Set allocation size and return previous size.
     1101        size_t $malloc_size_set( void * addr, size_t size ) {
     1102          if ( unlikely( addr == 0p ) ) return false;           // null allocation is not zero fill
     1103                HeapManager.Storage.Header * header = headerAddr( addr );
     1104                if ( (header->kind.fake.alignment & 1) == 1 ) { // fake header ?
     1105                        header = realHeader( header );                          // backup from fake to real header
     1106                } // if
     1107                size_t ret = header->size;
     1108                header->size = size;
     1109                return ret;
     1110        } // $malloc_size_set
    10491111
    10501112
     
    10821144
    10831145
    1084         // Adjusts parameters that control the behavior of the memory-allocation functions (see malloc). The param argument
     1146        // Adjusts parameters that control the behaviour of the memory-allocation functions (see malloc). The param argument
    10851147        // specifies the parameter to be modified, and value specifies the new value for that parameter.
    10861148        int mallopt( int option, int value ) {
    10871149                choose( option ) {
    10881150                  case M_TOP_PAD:
    1089                         if ( setHeapExpand( value ) ) return 1;
     1151                        heapExpand = ceiling( value, pageSize ); return 1;
    10901152                  case M_MMAP_THRESHOLD:
    10911153                        if ( setMmapStart( value ) ) return 1;
     1154                        break;
    10921155                } // switch
    10931156                return 0;                                                                               // error, unsupported
  • libcfa/src/interpose.cfa

    re3bc51c rbcd74f3  
    1515
    1616#include <stdarg.h>                                                                             // va_start, va_end
     17#include <stdio.h>
    1718#include <string.h>                                                                             // strlen
    1819#include <unistd.h>                                                                             // _exit, getpid
  • libcfa/src/iostream.cfa

    re3bc51c rbcd74f3  
    1010// Created On       : Wed May 27 17:56:53 2015
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Wed Mar 11 14:35:35 2020
    13 // Update Count     : 860
     12// Last Modified On : Sat May  2 18:30:25 2020
     13// Update Count     : 1017
    1414//
    1515
     
    2929#include <complex.h>                                                                    // creal, cimag
    3030} // extern "C"
     31
     32#include <bitmanip.hfa>                                                                 // fms
    3133
    3234
     
    459461\
    460462                if ( f.base == 'b' || f.base == 'B' ) {                 /* bespoke binary format */ \
    461                         int bits;                                                                                                       \
    462                         if ( f.val == (T){0} ) bits = 1;                        /* force at least one bit to print */ \
    463                         else bits = sizeof(long long int) * 8 - __builtin_clzll( f.val ); /* position of most significant bit */ \
    464                         bits = bits > sizeof(f.val) * 8 ? sizeof(f.val) * 8 : bits; \
    465                         int spaces = f.wd - bits;                                       /* can be negative */ \
    466                         if ( ! f.flags.nobsdp ) { spaces -= 2; }        /* base prefix takes space */ \
    467                         /* printf( "%d %d\n", bits, spaces ); */ \
     463                        int bits = high1( f.val );                                      /* position of most significant bit */ \
     464                        if ( bits == 0 ) bits = 1;                                      /* 0 value => force one bit to print */ \
     465                        int spaces; \
    468466                        if ( ! f.flags.left ) {                                         /* right justified ? */ \
    469467                                /* Note, base prefix then zero padding or spacing then prefix. */ \
    470                                 if ( f.flags.pad0 || f.flags.pc ) { \
     468                                if ( f.flags.pc ) { \
     469                                        spaces = f.wd - f.pc; \
     470                                        if ( ! f.flags.nobsdp ) { spaces -= 2; } /* base prefix takes space */ \
     471                                        if ( spaces > 0 ) fmt( os, "%*s", spaces, " " ); /* space pad */ \
    471472                                        if ( ! f.flags.nobsdp ) { fmt( os, "0%c", f.base ); } \
    472                                         if ( f.flags.pc ) spaces = f.pc - bits; \
     473                                        spaces = f.pc - bits; \
    473474                                        if ( spaces > 0 ) fmt( os, "%0*d", spaces, 0 ); /* zero pad */ \
    474475                                } else { \
    475                                         if ( spaces > 0 ) fmt( os, "%*s", spaces, " " ); /* space pad */ \
    476                                         if ( ! f.flags.nobsdp ) { fmt( os, "0%c", f.base ); } \
     476                                        spaces = f.wd - bits; \
     477                                        if ( ! f.flags.nobsdp ) { spaces -= 2; } /* base prefix takes space */ \
     478                                        if ( f.flags.pad0 ) { \
     479                                                if ( ! f.flags.nobsdp ) { fmt( os, "0%c", f.base ); } \
     480                                                if ( spaces > 0 ) fmt( os, "%0*d", spaces, 0 ); /* zero pad */ \
     481                                        } else { \
     482                                                if ( spaces > 0 ) fmt( os, "%*s", spaces, " " ); /* space pad */ \
     483                                                if ( ! f.flags.nobsdp ) { fmt( os, "0%c", f.base ); } \
     484                                        } /* if */ \
    477485                                } /* if */ \
    478                         } else if ( ! f.flags.nobsdp ) { \
    479                                 fmt( os, "0%c", f.base ); \
     486                        } else { \
     487                                if ( ! f.flags.nobsdp ) fmt( os, "0%c", f.base ); \
     488                                if ( f.flags.pc ) { \
     489                                        spaces = f.pc - bits; \
     490                                        if ( spaces > 0 ) fmt( os, "%0*d", spaces, 0 ); /* zero pad */ \
     491                                        spaces = f.wd - f.pc; \
     492                                } else { /* pad0 flag ignored with left flag */ \
     493                                        spaces = f.wd - bits; \
     494                                } /* if */ \
     495                                if ( ! f.flags.nobsdp ) { spaces -= 2; } /* base prefix takes space */ \
    480496                        } /* if */ \
    481                         int shift = (bits - 1) / 4 * 4; /* floor( bits - 1, 4 ) */ \
     497                        int shift = floor( bits - 1, 4 ); \
    482498                        typeof( f.val ) temp = f.val; \
    483499                        fmt( os, "%s", shortbin[(temp >> shift) & 0xf] ); \
     
    565581                                fmt2.flags.pad0 = fmt2.flags.nobsdp = true;     \
    566582                                if ( f.base == 'b' | f.base == 'B' ) { \
    567                                         if ( f.wd > 64 ) fmt.wd = f.wd - 64; \
    568                                         if ( f.flags.pc && f.pc > 64 ) fmt.pc = f.pc - 64; \
    569                                         fmt2.wd = 64; \
     583                                        if ( fmt.flags.pc && fmt.pc > 64 ) fmt.pc -= 64; else { fmt.flags.pc = false; fmt.pc = 0; } \
     584                                        if ( fmt.flags.left ) { \
     585                                                fmt.flags.left = false; \
     586                                                fmt.wd = 0; \
     587                                                /* printf( "L %llo %llo %llo %d %d '%c' %x\n", msig, lsig, fmt.val, fmt.wd, fmt.pc, fmt.base, fmt.all ); */ \
     588                                                fmt2.flags.left = true; \
     589                                                int msigd = high1( msig ); \
     590                                                fmt2.wd = f.wd - (fmt.pc > msigd ? fmt.pc : msigd); \
     591                                                if ( ! fmt.flags.nobsdp ) fmt2.wd -= 2; /* compensate for 0b base specifier */ \
     592                                                if ( (int)fmt2.wd < 64 ) fmt2.wd = 64; /* cast deals with negative value */ \
     593                                                fmt2.flags.pc = true; fmt2.pc = 64; \
     594                                        } else { \
     595                                                if ( fmt.wd > 64 ) fmt.wd -= 64; \
     596                                                else fmt.wd = 1; \
     597                                                /* printf( "R %llo %llo %llo %d %d '%c' %x\n", msig, lsig, fmt.val, fmt.wd, fmt.pc, fmt.base, fmt.all ); */ \
     598                                                fmt2.wd = 64; \
     599                                        } /* if */ \
     600                                        /* printf( "C %llo %d %d '%c' %x\n", fmt2.val, fmt2.wd, fmt2.pc, fmt2.base, fmt2.all ); */ \
    570601                                        (ostype &)(os | fmt | "" | fmt2); \
    571602                                } else if ( f.base == 'o' ) { \
     603                                        if ( fmt.flags.pc && fmt.pc > 22 ) fmt.pc -= 22; else { fmt.flags.pc = false; fmt.pc = 0; } \
    572604                                        fmt.val = (unsigned long long int)fmt.val >> 2; \
    573                                         if ( f.wd > 21 ) fmt.wd = f.wd - 21; \
    574                                         if ( f.flags.pc && f.pc > 21 ) fmt.pc = f.pc - 21; \
    575                                         fmt2.wd = 1; \
    576                                         fmt2.val = ((msig & 0x3) << 1) + 1; \
    577                                         (ostype &)(os | fmt | "" | fmt2); \
    578                                         sepOff( os ); \
    579                                         fmt2.wd = 21; \
    580                                         fmt2.val = lsig & 0x7fffffffffffffff; \
     605                                        fmt2.val = ((msig & 0x3) << 1) + ((lsig & 0x8000000000000000U) != 0); \
     606                                        if ( fmt.flags.left ) { \
     607                                                fmt.flags.left = false; \
     608                                                fmt.wd = 0; \
     609                                                /* printf( "L %llo %llo %llo %d %d '%c' %x %llo %d %d '%c' %x\n", msig, lsig, fmt.val, fmt.wd, fmt.pc, fmt.base, fmt.all, fmt2.val, fmt2.wd, fmt2.pc, fmt2.base, fmt2.all ); */ \
     610                                                (ostype &)(os | fmt | "" | fmt2); \
     611                                                sepOff( os ); \
     612                                                fmt2.flags.left = true; \
     613                                                int msigd = ceiling( high1( fmt.val ), 3 ); \
     614                                                fmt2.wd = f.wd - (fmt.pc > msigd ? fmt.pc : msigd); \
     615                                                if ( ! fmt.flags.nobsdp ) fmt2.wd -= 1; /* compensate for 0 base specifier */ \
     616                                                if ( (int)fmt2.wd < 21 ) fmt2.wd = 21; /* cast deals with negative value */ \
     617                                                fmt2.flags.pc = true; fmt2.pc = 21; \
     618                                        } else { \
     619                                                if ( fmt.wd > 22 ) fmt.wd -= 22; \
     620                                                else fmt.wd = 1; \
     621                                                /* printf( "R %llo %llo %llo %d %d '%c' %x %llo %d %d '%c' %x\n", msig, lsig, fmt.val, fmt.wd, fmt.pc, fmt.base, fmt.all, fmt2.val, fmt2.wd, fmt2.pc, fmt2.base, fmt2.all ); */ \
     622                                                (ostype &)(os | fmt | "" | fmt2); \
     623                                                sepOff( os ); \
     624                                                fmt2.wd = 21; \
     625                                        } /* if */ \
     626                                        fmt2.val = lsig & 0x7fffffffffffffffU; \
     627                                        /* printf( "\nC %llo %d %d '%c' %x\n", fmt2.val, fmt2.wd, fmt2.pc, fmt2.base, fmt2.all ); */ \
    581628                                        (ostype &)(os | fmt2); \
    582                                 } else { \
    583                                         if ( f.flags.left ) { \
    584                                                 if ( f.wd > 16 ) fmt2.wd = f.wd - 16; \
    585                                                 fmt.wd = 16; \
     629                                } else { /* f.base == 'x'  | f.base == 'X' */ \
     630                                        if ( fmt.flags.pc && fmt.pc > 16 ) fmt.pc -= 16; else { fmt.flags.pc = false; fmt.pc = 0; } \
     631                                        if ( fmt.flags.left ) { \
     632                                                fmt.flags.left = false; \
     633                                                fmt.wd = 0; \
     634                                                /* printf( "L %llo %llo %llo %d %d '%c' %x\n", msig, lsig, fmt.val, fmt.wd, fmt.pc, fmt.base, fmt.all ); */ \
     635                                                fmt2.flags.left = true; \
     636                                                int msigd = high1( msig ); \
     637                                                fmt2.wd = f.wd - (fmt.pc > msigd ? fmt.pc : msigd); \
     638                                                if ( ! fmt.flags.nobsdp ) fmt2.wd -= 2; /* compensate for 0x base specifier */ \
     639                                                if ( (int)fmt2.wd < 16 ) fmt2.wd = 16; /* cast deals with negative value */ \
     640                                                fmt2.flags.pc = true; fmt2.pc = 16; \
    586641                                        } else { \
    587                                                 if ( f.wd > 16 ) fmt.wd = f.wd - 16; \
    588                                                 if ( f.flags.pc && f.pc > 16 ) fmt.pc = f.pc - 16; \
     642                                                if ( fmt.wd > 16 ) fmt.wd -= 16; \
     643                                                else fmt.wd = 1; \
     644                                                /* printf( "R %llo %llo %llo %d %d '%c' %x\n", msig, lsig, fmt.val, fmt.wd, fmt.pc, fmt.base, fmt.all ); */ \
    589645                                                fmt2.wd = 16; \
    590646                                        } /* if */ \
     647                                        /* printf( "C %llo %d %d '%c' %x\n", fmt2.val, fmt2.wd, fmt2.pc, fmt2.base, fmt2.all ); */ \
    591648                                        (ostype &)(os | fmt | "" | fmt2); \
    592649                                } /* if */ \
  • libcfa/src/startup.cfa

    re3bc51c rbcd74f3  
    1414//
    1515
    16 #include <time.h>                                                                               // tzset
     16#include <time.h>                // tzset
     17#include <locale.h>        // setlocale
    1718#include "startup.hfa"
    1819
     
    2122    void __cfaabi_appready_startup( void ) {
    2223                tzset();                                                                                // initialize time global variables
     24                setlocale(LC_NUMERIC, "");
    2325                #ifdef __CFA_DEBUG__
    2426                extern void heapAppStart();
  • libcfa/src/stdhdr/malloc.h

    re3bc51c rbcd74f3  
    1010// Created On       : Thu Jul 20 15:58:16 2017
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Sun Mar  8 10:01:20 2020
    13 // Update Count     : 11
     12// Last Modified On : Thu Apr 16 22:44:06 2020
     13// Update Count     : 13
    1414//
    1515
     
    3131
    3232extern "C" {
     33void * aalloc( size_t noOfElems, size_t elemSize );
     34void * amemalign( size_t alignment, size_t noOfElems, size_t elemSize );
    3335void * cmemalign( size_t alignment, size_t noOfElems, size_t elemSize );
    3436size_t malloc_alignment( void * );
    3537bool malloc_zero_fill( void * );
    36 size_t malloc_dimension( void * );
     38size_t malloc_size( void * );
    3739int malloc_stats_fd( int fd );
    3840} // extern "C"
  • libcfa/src/stdlib.cfa

    re3bc51c rbcd74f3  
    1010// Created On       : Thu Jan 28 17:10:29 2016
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Tue Mar 31 13:26:46 2020
    13 // Update Count     : 495
     12// Last Modified On : Thu Apr 16 22:43:33 2020
     13// Update Count     : 498
    1414//
    1515
     
    3737        } // alloc_set
    3838
    39         T * alloc_set( T ptr[], size_t dim, T fill ) { // realloc array with fill
     39        T * alloc_set( T ptr[], size_t dim, T fill ) {          // realloc array with fill
    4040                size_t olen = malloc_usable_size( ptr );                // current allocation
    4141                void * nptr = (void *)realloc( (void *)ptr, dim * sizeof(T) ); // C realloc
    4242                size_t nlen = malloc_usable_size( nptr );               // new allocation
    4343                if ( nlen > olen ) {                                                    // larger ?
    44                         for ( i; dim ) { memcpy( &ptr[i], &fill, sizeof(T) ); } // initialize with fill value
     44                        for ( i; malloc_size( ptr ) / sizeof(T) ~ dim ) {
     45                                memcpy( &ptr[i], &fill, sizeof(T) );    // initialize with fill value
     46                        } // for
    4547                } // if
    4648                return (T *)nptr;
  • libcfa/src/stdlib.hfa

    re3bc51c rbcd74f3  
    1010// Created On       : Thu Jan 28 17:12:35 2016
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Wed Apr  1 18:38:41 2020
    13 // Update Count     : 429
     12// Last Modified On : Thu Apr 16 22:44:05 2020
     13// Update Count     : 432
    1414//
    1515
     
    2525        void * memalign( size_t align, size_t size );           // malloc.h
    2626        size_t malloc_usable_size( void * ptr );                        // malloc.h
     27        size_t malloc_size( void * addr );                                      // CFA heap
    2728        void * cmemalign( size_t alignment, size_t noOfElems, size_t elemSize ); // CFA heap
    2829        void * memset( void * dest, int fill, size_t size ); // string.h
  • src/CompilationState.cc

    re3bc51c rbcd74f3  
    2727        nopreludep = false,
    2828        genproto = false,
     29        deterministic_output = false,
    2930        nomainp = false,
    3031        parsep = false,
  • src/CompilationState.h

    re3bc51c rbcd74f3  
    2828        nopreludep,
    2929        genproto,
     30        deterministic_output,
    3031        nomainp,
    3132        parsep,
  • src/Parser/parser.yy

    re3bc51c rbcd74f3  
    1010// Created On       : Sat Sep  1 20:22:55 2001
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Fri Mar  6 17:26:45 2020
    13 // Update Count     : 4474
     12// Last Modified On : Mon Apr 27 12:25:42 2020
     13// Update Count     : 4483
    1414//
    1515
     
    966966
    967967tuple_expression_list:
    968         assignment_expression_opt
    969         | tuple_expression_list ',' assignment_expression_opt
     968        assignment_expression
     969        | '@'                                                                                           // CFA
     970                { SemanticError( yylloc, "Eliding tuple element with '@' is currently unimplemented." ); $$ = nullptr; }
     971        | tuple_expression_list ',' assignment_expression
    970972                { $$ = (ExpressionNode *)($1->set_last( $3 )); }
     973        | tuple_expression_list ',' '@'
     974                { SemanticError( yylloc, "Eliding tuple element with '@' is currently unimplemented." ); $$ = nullptr; }
    971975        ;
    972976
  • src/ResolvExpr/TypeEnvironment.cc

    re3bc51c rbcd74f3  
    2020#include <utility>                     // for pair, move
    2121
     22#include "CompilationState.h"          // for deterministic_output
    2223#include "Common/utility.h"            // for maybeClone
    2324#include "SynTree/Type.h"              // for Type, FunctionType, Type::Fora...
     
    106107
    107108        void EqvClass::print( std::ostream &os, Indenter indent ) const {
    108                 os << "( ";
    109                 std::copy( vars.begin(), vars.end(), std::ostream_iterator< std::string >( os, " " ) );
    110                 os << ")";
     109                if( !deterministic_output ) {
     110                        os << "( ";
     111                        std::copy( vars.begin(), vars.end(), std::ostream_iterator< std::string >( os, " " ) );
     112                        os << ")";
     113                }
    111114                if ( type ) {
    112115                        os << " -> ";
     
    235238                // check safely bindable
    236239                if ( r.type && occursIn( r.type, s.vars.begin(), s.vars.end(), *this ) ) return false;
    237                
     240
    238241                // merge classes in
    239242                r.vars.insert( s.vars.begin(), s.vars.end() );
  • src/main.cc

    re3bc51c rbcd74f3  
    449449
    450450
    451 static const char optstring[] = ":c:ghlLmNnpP:S:twW:D:";
     451static const char optstring[] = ":c:ghlLmNnpdP:S:twW:D:";
    452452
    453453enum { PreludeDir = 128 };
     
    462462        { "no-prelude", no_argument, nullptr, 'n' },
    463463        { "prototypes", no_argument, nullptr, 'p' },
     464        { "deterministic-out", no_argument, nullptr, 'd' },
    464465        { "print", required_argument, nullptr, 'P' },
    465466        { "prelude-dir", required_argument, nullptr, PreludeDir },
     
    482483        "do not read prelude",                                // -n
    483484        "generate prototypes for prelude functions",            // -p
     485        "don't print output that isn't deterministic",        // -d
    484486        "print",                                              // -P
    485487        "<directory> prelude directory for debug/nodebug",      // no flag
     
    586588                        genproto = true;
    587589                        break;
     590                  case 'd':                                     // don't print non-deterministic output
     591                    deterministic_output = true;
     592                        break;
    588593                  case 'P':                                                                             // print options
    589594                        for ( int i = 0;; i += 1 ) {
  • tests/.expect/alloc.txt

    re3bc51c rbcd74f3  
    3535CFA realloc array alloc, fill
    36360xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede
    37 CFA realloc array alloc, 5
    38 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede
    39 CFA realloc array alloc, 5
    40 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef
    41 CFA realloc array alloc, 5
    42 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0xffffffff 0xffffffff 0xffffffff 0xffffffff 0xffffffff 0xffffffff 0xffffffff 0xffffffff 0xffffffff 0xffffffff 0xffffffff 0xffffffff 0xffffffff 0xffffffff
    4337
    4438C   memalign 42 42.5
  • tests/Makefile.am

    re3bc51c rbcd74f3  
    4141        -quiet @CFA_FLAGS@ \
    4242        -DIN_DIR="${abs_srcdir}/.in/"
     43
     44AM_CFAFLAGS = -XCFA --deterministic-out
    4345
    4446# get the desired cfa to test
  • tests/Makefile.in

    re3bc51c rbcd74f3  
    408408        -DIN_DIR="${abs_srcdir}/.in/"
    409409
     410AM_CFAFLAGS = -XCFA --deterministic-out
    410411
    411412# get the desired cfa to test
  • tests/alloc.cfa

    re3bc51c rbcd74f3  
    1010// Created On       : Wed Feb  3 07:56:22 2016
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Wed Apr  1 10:58:35 2020
    13 // Update Count     : 424
     12// Last Modified On : Mon Apr  6 21:08:23 2020
     13// Update Count     : 428
    1414//
    1515
     
    151151        ip = alloc_set( ip, 3 * dim, fill );                            // CFA realloc array alloc, fill
    152152        printf( "CFA realloc array alloc, fill\n" );
    153         for ( i; 3 * dim ) { printf( "%#x ", ip[i] );; }
    154         printf( "\n" );
    155         // do not free
    156 
    157         ip = alloc_set( ip, 3 * dim, 5 );                                       // CFA realloc array alloc, 5
     153        for ( i; 3 * dim ) { printf( "%#x ", ip[i] ); }
     154        printf( "\n" );
     155        // do not free
     156#if 0 // FIX ME
     157        ip = alloc_set( ip, 5 * dim, 5 );                                       // CFA realloc array alloc, 5
    158158        printf( "CFA realloc array alloc, 5\n" );
    159         for ( i; 3 * dim ) { printf( "%#x ", ip[i] ); }
     159        for ( i; 5 * dim ) { printf( "%#x ", ip[i] ); }
    160160        printf( "\n" );
    161161        // do not free
     
    167167        // do not free
    168168
    169         ip = alloc_set( ip, 3 * dim, 5 );                                       // CFA realloc array alloc, 5
     169        ip = alloc_set( ip, 5 * dim, 5 );                                       // CFA realloc array alloc, 5
    170170        printf( "CFA realloc array alloc, 5\n" );
    171         for ( i; 3 * dim ) { printf( "%#x ", ip[i] );; }
    172         printf( "\n" );
    173         free( ip );
    174 
     171        for ( i; 5 * dim ) { printf( "%#x ", ip[i] ); }
     172        printf( "\n" );
     173#endif // 0
     174        free( ip );
    175175
    176176        // resize, non-array types
  • tests/concurrent/.expect/monitor.txt

    re3bc51c rbcd74f3  
    1 4000000
     13000000
  • tests/concurrent/monitor.cfa

    re3bc51c rbcd74f3  
    2929
    3030void main( MyThread & this ) {
    31         for(int i = 0; i < 1_000_000; i++) {
     31        for(int i = 0; i < 750_000; i++) {
    3232                increment( global );
    3333        }
  • tests/errors/.expect/completeType.txt

    re3bc51c rbcd74f3  
    2727    void
    2828  )
    29   Environment:( _85_4_DT ) -> instance of struct A with body 0 (no widening)
     29  Environment: -> instance of struct A with body 0 (no widening)
    3030
    3131
     
    5050    void
    5151  )
    52   Environment:( _85_4_DT ) -> instance of struct B with body 1 (no widening)
     52  Environment: -> instance of struct B with body 1 (no widening)
    5353
    5454
     
    127127          void
    128128        )
    129         Environment:( _104_0_T ) -> instance of type T (not function type) (no widening)
     129        Environment: -> instance of type T (not function type) (no widening)
    130130
    131131      Could not satisfy assertion:
  • tests/exceptions/.expect/interact.txt

    re3bc51c rbcd74f3  
    1414resumption catch, will terminate
    1515inner termination catch
     16
     17throwing resume moon
     18resumption moon catch, will terminate
     19termination catch
     20throwing resume star
     21resumption star catch
  • tests/exceptions/.expect/resume.txt

    re3bc51c rbcd74f3  
    2525caught second exception
    2626recaught first exception
     27
     28inner catch
     29inner catch
     30outer catch
  • tests/exceptions/.expect/terminate.txt

    re3bc51c rbcd74f3  
    2424caught second exception
    2525recaught first exception
     26
     27inner catch
     28outer catch
  • tests/exceptions/conditional.cfa

    re3bc51c rbcd74f3  
    44// up the non-trivial exception is reasonable to do.
    55
    6 #include "except-mac.hfa"
     6#include <exception.hfa>
    77#include <stdio.h>
    88
    9 DECLARE_EXCEPT(num_error, BASE_EXCEPT,
     9VTABLE_DECLARATION(num_error)(
    1010        int (*code)(num_error *this);
    1111);
     
    3636    this.num = other.num;
    3737}
    38 void copy(num_error * this, num_error * other) {
    39         *this = *other;
    40 }
    4138void ^?{}(num_error & this) {
    4239    if( this.msg ) free( this.msg );
     
    4643}
    4744
    48 VTABLE_INSTANCE(num_error, BASE_EXCEPT, copy, ^?{},
    49         num_error_msg, num_error_code
     45VTABLE_INSTANCE(num_error)(
     46        num_error_msg,
     47        num_error_code,
    5048);
    5149
     
    5856
    5957        try {
    60                 THROW(&exc);
     58                throw &exc;
    6159        } catch (num_error * error ; 3 == error->virtual_table->code( error )) {
    6260                caught_num_error(3, error);
     
    6664
    6765        try {
    68                 THROW_RESUME(&exc);
     66                throwResume &exc;
    6967        } catchResume (num_error * error ; 3 == error->virtual_table->code( error )) {
    7068                caught_num_error(3, error);
  • tests/exceptions/finally.cfa

    re3bc51c rbcd74f3  
    11// Finally Clause Tests
    22
    3 #include "except-mac.hfa"
     3#include <exception.hfa>
    44#include "except-io.hfa"
    55
     
    1212                try {
    1313                        printf("termination throw\n");
    14                         THROW(&exc);
     14                        throw &exc;
    1515                } finally {
    1616                        loud_exit a = "termination inner finally";
     
    2828                try {
    2929                        printf("resumption throw\n");
    30                         THROW_RESUME(&exc);
     30                        throwResume &exc;
    3131                } finally {
    3232                        loud_exit a = "resumption inner finally";
  • tests/exceptions/interact.cfa

    re3bc51c rbcd74f3  
    11// Testing Interactions Between Termination and Resumption
    22
    3 #include "except-mac.hfa"
     3#include <exception.hfa>
    44#include "except-io.hfa"
    55
     
    1010        // Resume falls back to terminate.
    1111        try {
    12                 THROW_RESUME(&(star){});
     12                throwResume &(star){};
    1313        } catch (star *) {
    1414                printf("caught as termination\n");
     
    1717        try {
    1818                loud_region a = "try block with resume throw";
    19                 THROW_RESUME(&(star){});
     19                throwResume &(star){};
    2020        } catch (star *) {
    2121                printf("caught as termination\n");
     
    2929        try {
    3030                try {