Changeset bcd74f3
- Timestamp:
- May 8, 2020, 4:44:53 PM (4 years ago)
- Branches:
- ADT, arm-eh, ast-experimental, enum, forall-pointer-decay, jacob/cs343-translation, master, new-ast, new-ast-unique-expr, pthread-emulation, qualifiedEnum
- Children:
- 5c9b20c
- Parents:
- e3bc51c (diff), 0e7e3c17 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the(diff)
links above to see all the changes relative to each parent. - Files:
-
- 23 added
- 4 deleted
- 64 edited
Legend:
- Unmodified
- Added
- Removed
-
doc/bibliography/pl.bib
re3bc51c rbcd74f3 4524 4524 } 4525 4525 4526 @misc{libfibre, 4527 key = {libfibre}, 4528 author = {Martin Karsten}, 4529 title = {{libfibre:~User-Level Threading Runtime}}, 4530 howpublished= {\href{https://git.uwaterloo.ca/mkarsten/libfibre} 4531 {https://\-git.uwaterloo.ca/\-mkarsten/\-libfibre}}, 4532 note = {[Online; accessed 2020-04-15]}, 4533 } 4534 4526 4535 @article{Linda, 4527 4536 keywords = {Linda, concurrency}, … … 4555 4564 address = {Belmont}, 4556 4565 year = 1967, 4566 } 4567 4568 @inproceedings{Fang06, 4569 author = {Fang, Yi and McMillan, Kenneth L. and Pnueli, Amir and Zuck, Lenore D.}, 4570 editor = {Najm, Elie and Pradat-Peyre, Jean-Fran{\c{c}}ois and Donzeau-Gouge, V{\'e}ronique Vigui{\'e}}, 4571 title = {Liveness by Invisible Invariants}, 4572 booktitle = {Formal Techniques for Networked and Distributed Systems - FORTE 2006}, 4573 year = 2006, 4574 publisher = {Springer Berlin Heidelberg}, 4575 address = {Berlin, Heidelberg}, 4576 pages = {356--371}, 4557 4577 } 4558 4578 … … 4739 4759 contributer = {pabuhr@plg}, 4740 4760 author = {Gregory R. Andrews}, 4741 title = {A Method for Solving Syn ronization Problems},4761 title = {A Method for Solving Synchronization Problems}, 4742 4762 journal = scp, 4743 4763 volume = 13, … … 5064 5084 title = {Multiple Inheritance for {C}{\kern-.1em\hbox{\large\texttt{+\kern-.25em+}}}}, 5065 5085 booktitle = {Proceedings of the Spring '87 EUUG Conference}, 5066 month = may, year = 1987 5086 month = may, 5087 year = 1987, 5067 5088 } 5068 5089 … … 6646 6667 } 6647 6668 6669 @article{Aravind09, 6670 author = {Alex A. Aravind and Wim H. Hesselink}, 6671 title = {A Queue Based Mutual Exclusion Algorithm}, 6672 journal = acta, 6673 volume = 46, 6674 pages = {73--86}, 6675 year = 2009, 6676 } 6677 6648 6678 % R 6649 6679 … … 8030 8060 } 8031 8061 8062 @article{Karsten20, 8063 author = {Karsten, Martin and Barghi, Saman}, 8064 title = {{User-level Threading: Have Your Cake and Eat It Too}}, 8065 year = {2020}, 8066 issue_date = {March 2020}, 8067 publisher = {Association for Computing Machinery}, 8068 address = {New York, NY, USA}, 8069 volume = {4}, 8070 number = {1}, 8071 url = {https://doi.org/10.1145/3379483}, 8072 doi = {10.1145/3379483}, 8073 journal = {Proc. ACM Meas. Anal. Comput. Syst.}, 8074 month = mar, 8075 numpages = {30}, 8076 } 8077 8032 8078 @techreport{Harmony, 8033 8079 keywords = {messages, concurrency}, … … 8045 8091 contributer = {gjditchfield@plg}, 8046 8092 author = {Henry Lieverman}, 8047 title = {Using Prototypical Objects to Implement Shared Behavior in 8048 Object Oriented Systems}, 8093 title = {Using Prototypical Objects to Implement Shared Behavior in Object Oriented Systems}, 8049 8094 journal = sigplan, 8050 month = nov, year = 1986, 8051 volume = 21, number = 11, pages = {214-223} 8095 month = nov, 8096 year = 1986, 8097 volume = 21, 8098 number = 11, 8099 pages = {214-223} 8052 8100 } 8053 8101 -
doc/theses/thierry_delisle_PhD/comp_II/Makefile
re3bc51c rbcd74f3 2 2 3 3 Build = build 4 Figures = figures4 Figures = img 5 5 Macros = ../../../LaTeXmacros 6 6 TeXLIB = .:${Macros}:${Build}:../../../bibliography: -
doc/theses/thierry_delisle_PhD/comp_II/comp_II.tex
re3bc51c rbcd74f3 3 3 \usepackage[T1]{fontenc} 4 4 \usepackage[utf8]{inputenc} 5 \usepackage{listings} % for code listings6 5 \usepackage{xspace} 7 6 \usepackage{xcolor} 8 7 \usepackage{graphicx} 9 8 \usepackage{epic,eepic} 9 \usepackage{listings} % for code listings 10 10 \usepackage{glossaries} 11 11 \usepackage{textcomp} 12 % cfa macros used in the document 13 \input{common} 14 15 \setlist{topsep=6pt,parsep=0pt} % global reduce spacing between points 16 \newcommand{\uC}{$\mu$\CC} 12 17 \usepackage[hidelinks]{hyperref} 18 \setlength{\abovecaptionskip}{5pt plus 3pt minus 2pt} 19 \lstMakeShortInline$% % single-character for \lstinline 13 20 %\usepackage[margin=1in]{geometry} 14 21 %\usepackage{float} 15 22 16 % cfa macros used in the document17 \input{common}18 23 \input{glossary} 19 24 … … 27 32 28 33 \author{ 29 \huge Thierry Delisle \ \30 \Large \ vspace*{0.1in} \texttt{tdelisle@uwaterloo.ca} \\34 \huge Thierry Delisle \vspace*{5pt} \\ 35 \Large \texttt{tdelisle@uwaterloo.ca} \vspace*{5pt} \\ 31 36 \Large Cheriton School of Computer Science \\ 32 37 \Large University of Waterloo … … 42 47 43 48 \newcommand{\cit}{\textsuperscript{[Citation Needed]}\xspace} 44 \newcommand{\TODO}{ ~\newline{\large\bf\color{red} TODO :}\xspace}49 \newcommand{\TODO}{{\large\bf\color{red} TODO: }\xspace} 45 50 46 51 % =============================================================================== … … 54 59 \section{Introduction} 55 60 \subsection{\CFA and the \CFA concurrency package} 56 \CFA\cit is a modern, polymorphic, non-object-oriented, backwards-compatible extension of the C programming language. It aims to add high-productivity features while maintaning the predictible performance of C. As such, concurrency in \CFA\cit aims to offer simple and safe high-level tools while still allowing performant code. \CFA concurrrent code is written in the synchronous programming paradigm but uses \glspl{uthrd} in order to achieve the simplicity and maintainability of synchronous programming without sacrificing the efficiency of asynchronous programing. As such, the \CFA \emph{scheduler} is a preemptive user-level scheduler that maps \glspl{uthrd} onto \glspl{kthrd}. 57 58 Scheduling occurs when execution switches from one thread to another, where the second thread is implicitly chosen by the scheduler. This scheduling is an indirect handoff, as opposed to generators and coroutines which explicitly switch to the next generator and coroutine respectively. The cost of switching between two threads for an indirect handoff has two components : the cost of actually context-switching, i.e., changing the relevant registers to move execution from one thread to the other, and the cost of scheduling, i.e., deciding which thread to run next among all the threads ready to run. The first cost is generally constant and fixed\footnote{Affecting the context-switch cost is whether it is done in one step, after the scheduling, or in two steps, context-switching to a fixed third-thread before scheduling.}, while the scheduling cost can vary based on the system state. Adding multiple \glspl{kthrd} does not fundamentally change the scheduler semantics or requirements, it simply adds new correctness requirements, i.e. \textit{linearizability}, and a new dimension to performance: scalability, where scheduling cost now also depends on contention. 59 60 The more threads switch, the more the administration cost of scheduling becomes noticeable. It is therefore important to build a scheduler with the lowest possible cost and latency. Another important consideration is \emph{fairness}. In principle, scheduling should give the illusion of perfect fairness, where all threads ready to run are running \emph{simultaneously}. While the illusion of simultaneity is easier to reason about, it can break down if the scheduler allows to much unfairness. Therefore, the scheduler should offer as much fairness as needed to guarantee eventual progress, but use unfairness to help performance. In practice, threads must wait in turn but there can be advantages to unfair scheduling, similar to the the express cash register at a grocery store. 61 62 The goal of this research is to produce a scheduler that is simple for programmers to understand and offers good performance. Here understandability does not refer to the API but to how much scheduling concerns programmers need to take into account when writing a \CFA concurrent package. Therefore, the main goal of this proposal is : 61 \CFA\cite{Moss18} is a modern, polymorphic, non-object-oriented, concurrent, backwards-compatible extension of the C programming language. 62 It aims to add high-productivity features while maintaining the predictable performance of C. 63 As such, concurrency in \CFA\cite{Delisle19} aims to offer simple and safe high-level tools while still allowing performant code. 64 \CFA concurrent code is written in the synchronous programming paradigm but uses \glspl{uthrd} in order to achieve the simplicity and maintainability of synchronous programming without sacrificing the efficiency of asynchronous programing. 65 As such, the \CFA \newterm{scheduler} is a preemptive user-level scheduler that maps \glspl{uthrd} onto \glspl{kthrd}. 66 67 \newterm{Scheduling} occurs when execution switches from one thread to another, where the second thread is implicitly chosen by the scheduler. 68 This scheduling is an indirect handoff, as opposed to generators and coroutines which explicitly switch to the next generator and coroutine respectively. 69 The cost of switching between two threads for an indirect handoff has two components: 70 \begin{enumerate} 71 \item 72 the cost of actually context-switching, \ie changing the relevant registers to move execution from one thread to the other, 73 \item 74 and the cost of scheduling, \ie deciding which thread to run next among all the threads ready to run. 75 \end{enumerate} 76 The first cost is generally constant and fixed\footnote{Affecting the constant context-switch cost is whether it is done in one step, after the scheduling, or in two steps, context-switching to a fixed third-thread before scheduling.}, while the scheduling cost can vary based on the system state. 77 Adding multiple \glspl{kthrd} does not fundamentally change the scheduler semantics or requirements, it simply adds new correctness requirements, \ie \newterm{linearizability}\footnote{Meaning however fast the CPU threads run, there is an equivalent sequential order that gives the same result.}, and a new dimension to performance: scalability, where scheduling cost now also depends on contention. 78 79 The more threads switch, the more the administration cost of scheduling becomes noticeable. 80 It is therefore important to build a scheduler with the lowest possible cost and latency. 81 Another important consideration is \newterm{fairness}. 82 In principle, scheduling should give the illusion of perfect fairness, where all threads ready to run are running \emph{simultaneously}. 83 While the illusion of simultaneity is easier to reason about, it can break down if the scheduler allows too much unfairness. 84 Therefore, the scheduler should offer as much fairness as needed to guarantee eventual progress, but use unfairness to help performance. 85 In practice, threads must wait in turn but there can be advantages to unfair scheduling, similar to the the express cash-register at a grocery store. 86 87 The goal of this research is to produce a scheduler that is simple for programmers to understand and offers good performance. 88 Here understandability does not refer to the API but to how much scheduling concerns programmers need to take into account when writing a \CFA concurrent package. 89 Therefore, the main goal of this proposal is : 63 90 \begin{quote} 64 91 The \CFA scheduler should be \emph{viable} for \emph{any} workload. 65 92 \end{quote} 66 93 67 For a general purpose scheduler, it is impossible to produce an optimal algorithm as it would require knowledge of the future behaviour of threads. As such, scheduling performance is generally either defined by the best case scenario, i.e., a workload to which the scheduler is tailored, or the worst case scenario, i.e., the scheduler behaves no worst than \emph{X}. For this proposal, the performance is evaluated using the second approach to allow \CFA programmers to rely on scheduling performance. Be cause there is no optimal scheduler, ultimately \CFA may allow programmers to write their own scheduler; but that is not the subject of this proposal, which considers only the default scheduler. As such, it is important that only programmers with exceptionally high performance requirements should need to write their own scheduler and replace the scheduler in this proposal. 68 69 Finally, the scheduling objective includes producing a scheduling strategy with sufficient fairness guarantees, creating an abstraction layer over the operating system to handle kernel-threads spinning unnecessarily, scheduling blocking I/O operations, and writing sufficient library tools to allow developers to indirectly use the scheduler. 94 For a general purpose scheduler, it is impossible to produce an optimal algorithm as it would require knowledge of the future behaviour of threads. 95 As such, scheduling performance is generally either defined by the best case scenario, \ie a workload to which the scheduler is tailored, or the worst case scenario, \ie the scheduler behaves no worst than \emph{X}. 96 For this proposal, the performance is evaluated using the second approach to allow \CFA programmers to rely on scheduling performance. 97 Because there is no optimal scheduler, ultimately \CFA may allow programmers to write their own scheduler; but that is not the subject of this proposal, which considers only the default scheduler. 98 As such, it is important that only programmers with exceptionally high performance requirements should need to write their own scheduler and replace the scheduler in this proposal. 99 100 To achieve the \CFA scheduling goal includes: 101 \begin{enumerate} 102 \item 103 producing a scheduling strategy with sufficient fairness guarantees, 104 \item 105 creating an abstraction layer over the operating system to handle kernel-threads spinning unnecessarily, 106 \item 107 scheduling blocking I/O operations, 108 \item 109 and writing sufficient library tools to allow developers to indirectly use the scheduler, either through tuning knobs or replacing the default scheduler. 110 \end{enumerate} 70 111 71 112 % =============================================================================== … … 73 114 74 115 \section{\CFA Scheduling} 75 To scheduler user-level threads across all workloads, the scheduler has a number of requirements: 76 77 \paragraph{Correctness} As with any other concurrent data structure or algorithm, the correctness requirement is paramount. The scheduler cannot allow threads to be dropped from the ready-queue, i.e., scheduled but never run, or be executed multiple times when only being scheduled once. Since \CFA concurrency has no spurious wakeup, this definition of correctness also means the scheduler should have no spurious wakeup. The \CFA scheduler must be correct. 78 79 \paragraph{Performance} The performance of a scheduler can generally be mesured in terms of scheduling cost, scalability and latency. Scheduling cost is the cost to switch from one thread to another, as mentioned above. For simple applications where a single kernel thread does most of the scheduling, it is generally the dominating cost. When adding many kernel threads, scalability becomes an issue, effectively increasing the cost of context-switching when contention is high. Finally, a third axis of performance is tail latency. This measurement is related to fairness and mesures how long is needed for a thread to be run once scheduled and is evaluated in the worst cases. The \CFA scheduler should offer good performance in all three metrics. 80 81 \paragraph{Fairness} Like performance, this requirements has several aspect : eventual progress, predictability and performance reliablility. As a hard requirement, the \CFA scheduler must guarantee eventual progress, i.e., prevent starvation, otherwise the above mentioned illusion of simultaneous execution is broken and the scheduler becomes much more complex to reason about. Beyond this requirement, performance should be predictible and reliable, which means similar workloads achieve similar performance and programmer intuition is respected. An example of this is : a thread that yields agressively should not run more often then other tasks. While this is intuitive, it does not hold true for many work-stealing or feedback based schedulers. The \CFA scheduler must guarantee eventual progress and should be predictible and offer reliable performance. 82 83 \paragraph{Efficiency} Finally, efficient usage of CPU resources is also an important requirement. This issue is discussed more in depth towards the end of this proposal. It effectively refers to avoiding using CPU power when there are no threads to run, and conversely, use all CPUs available when the workload can benefit from it. Balancing these two states is where the complexity lies. The \CFA scheduler should be efficient with respect to the underlying (shared) computer. 116 To schedule user-level threads across all workloads, the scheduler has a number of requirements: 117 118 \paragraph{Correctness} As with any other concurrent data structure or algorithm, the correctness requirement is paramount. 119 The scheduler cannot allow threads to be dropped from the ready queue, \ie scheduled but never run, or be executed multiple times when only being scheduled once. 120 Since \CFA concurrency has no spurious wakeup, this definition of correctness also means the scheduler should have no spurious wakeup. 121 The \CFA scheduler must be correct. 122 123 \paragraph{Performance} The performance of a scheduler can generally be measured in terms of scheduling cost, scalability and latency. 124 \newterm{Scheduling cost} is the cost to switch from one thread to another, as mentioned above. 125 For simple applications, where a single kernel thread does most of the scheduling, it is generally the dominating cost. 126 \newterm{Scalability} is the cost of adding multiple kernel threads because it increases the time for context switching because of contention by multiple threads accessing shared resources, \eg the ready queue. 127 Finally, \newterm{tail latency} is service delay and relates to thread fairness. 128 Specifically, latency measures how long a thread waits to run once scheduled and is evaluated in the worst case. 129 The \CFA scheduler should offer good performance for all three metrics. 130 131 \paragraph{Fairness} Like performance, this requirement has several aspect : eventual progress, predictability and performance reliability. 132 \newterm{Eventual progress} guarantees every scheduled thread is eventually run, \ie prevent starvation. 133 As a hard requirement, the \CFA scheduler must guarantee eventual progress, otherwise the above mentioned illusion of simultaneous execution is broken and the scheduler becomes much more complex to reason about. 134 \newterm{Predictability} and \newterm{reliability} means similar workloads achieve similar performance and programmer execution intuition is respected. 135 For example, a thread that yields aggressively should not run more often then other tasks. 136 While this is intuitive, it does not hold true for many work-stealing or feedback based schedulers. 137 The \CFA scheduler must guarantee eventual progress and should be predictable and offer reliable performance. 138 139 \paragraph{Efficiency} Finally, efficient usage of CPU resources is also an important requirement and is discussed in depth towards the end of the proposal. 140 \newterm{Efficiency} means avoiding using CPU cycles when there are no threads to run, and conversely, use all CPUs available when the workload can benefit from it. 141 Balancing these two states is where the complexity lies. 142 The \CFA scheduler should be efficient with respect to the underlying (shared) computer. 84 143 85 144 \bigskip To achieve these requirements, I can reject two broad types of scheduling strategies : feedback-based and priority schedulers. 86 145 87 146 \subsection{Feedback-Based Schedulers} 88 Many operating systems use schedulers based on feedback in some form, e.g., measuring how much CPU a particular thread has used\footnote{Different metrics can measured here but it is not relevant to the discussion.} and schedule threads based on this metric. These strategies are sensible for operating systems but rely on two assumptions on the workload : 147 Many operating systems use schedulers based on feedback in some form, \eg measuring how much CPU a particular thread has used\footnote{Different metrics can be measured but it is not relevant to the discussion.} and schedule threads based on this metric. 148 These strategies are sensible for operating systems but rely on two assumptions for the workload: 89 149 90 150 \begin{enumerate} … … 93 153 \end{enumerate} 94 154 95 While these two assumptions generally hold for operating systems, they may not for user-level threading. Since \CFA has the explicit goal of allowing many smaller threads, this can naturally lead to threads with much shorter lifetime, which are only scheduled a few times. Scheduling strategies based on feedback cannot be effective in these cases because they do not have the opportunity to measure the metrics that underlie the algorithm. Note that the problem of feedback convergence (reacting too slowly to scheduling events) is not specific to short lived threads but can also occur with threads that show drastic changes in scheduling, e.g., threads running for long periods of time and then suddenly blocking and unblocking quickly and repeatedly. 96 97 In the context of operating systems, these concerns can be overshadowed by a more pressing concern : security. When multiple users are involved, it is possible that some users are malevolent and try to exploit the scheduling strategy in order to achieve some nefarious objective. Security concerns mean that more precise and robust fairness metrics must be used to guarantee fairness across processes created by users as well as threads created within a process. In the case of the \CFA scheduler, every thread runs in the same user-space and is controlled by the same user. Fairness across users is therefore a given and it is then possible to safely ignore the possibility that threads are malevolent. This approach allows for a much simpler fairness metric and in this proposal ``fairness'' is considered as follows : when multiple threads are cycling through the system, the total ordering of threads being scheduled, i.e., pushed onto the ready-queue, should not differ much from the total ordering of threads being executed, i.e., popped from the ready-queue. 98 99 Since feedback is not necessarily feasible within the lifetime of all threads and a simple fairness metric can be used, the scheduling strategy proposed for the \CFA runtime does not use per-threads feedback. Feedback in general is not rejected for secondary concerns like idle sleep for kernel threads, but no feedback is used to decide which thread to run next. 155 While these two assumptions generally hold for operating systems, they may not for user-level threading. 156 Since \CFA has the explicit goal of allowing many smaller threads, this can naturally lead to threads with much shorter lifetimes that are only scheduled a few times. 157 Scheduling strategies based on feedback cannot be effective in these cases because there is no opportunity to measure the metrics that underlie the algorithm. 158 Note, the problem of \newterm{feedback convergence} (reacting too slowly to scheduling events) is not specific to short lived threads but can also occur with threads that show drastic changes in scheduling, \eg threads running for long periods of time and then suddenly blocking and unblocking quickly and repeatedly. 159 160 In the context of operating systems, these concerns can be overshadowed by a more pressing concern : security. 161 When multiple users are involved, it is possible some users are malevolent and try to exploit the scheduling strategy to achieve some nefarious objective. 162 Security concerns mean more precise and robust fairness metrics must be used to guarantee fairness across processes created by users as well as threads created within a process. 163 In the case of the \CFA scheduler, every thread runs in the same user space and is controlled by the same user. 164 Fairness across users is therefore a given and it is then possible to safely ignore the possibility that threads are malevolent. 165 This approach allows for a much simpler fairness metric and in this proposal \emph{fairness} is defined as: when multiple threads are cycling through the system, the total ordering of threads being scheduled, \ie pushed onto the ready-queue, should not differ much from the total ordering of threads being executed, \ie popped from the ready-queue. 166 167 Since feedback is not necessarily feasible within the lifetime of all threads and a simple fairness metric can be used, the scheduling strategy proposed for the \CFA runtime does not use per-threads feedback. 168 Feedback in general is not rejected for secondary concerns like idle sleep for kernel threads, but no feedback is used to decide which thread to run next. 100 169 101 170 \subsection{Priority Schedulers} 102 Another broad category of schedulers are priority schedulers. In these scheduling strategies, threads have priorities and the runtime schedules the threads with the highest priority before scheduling other threads. Threads with equal priority are scheduled using a secondary strategy, often something simple like round-robin or FIFO. These priority mean that, as long as there is a thread with a higher priority that desires to run, a thread with a lower priority does not run. This possible starving of threads can dramatically increase programming complexity since starving threads and priority inversion (prioritizing a lower priority thread) can both lead to serious problems. 103 104 An important observation to make is that threads do not need to have explicit priorities for problems to occur. Indeed, any system with multiple ready-queues and attempts to exhaust one queue before accessing the other queues, can encounter starvation problems. A popular scheduling strategy that suffers from implicit priorities is work-stealing. Work-stealing is generally presented as follows, each processor has a list of ready threads. 105 \begin{enumerate} 106 \item Run threads from ``this'' processor's list first. 107 \item If ``this'' processor's list is empty, run threads from some other processor's list. 108 \end{enumerate} 109 110 In a loaded system\footnote{A loaded system is a system where threads are being run at the same rate they are scheduled.}, if a thread does not yield, block or preempt for an extended period of time, threads on the same processor's list starve if no other processors exhaust their list. 111 112 Since priorities can be complex for programmers to handle, the scheduling strategy proposed for the \CFA runtime does not use a strategy with either implicit or explicit thread priorities. 171 Another broad category of schedulers are priority schedulers. 172 In these scheduling strategies, threads have priorities and the runtime schedules the threads with the highest priority before scheduling other threads. 173 Threads with equal priority are scheduled using a secondary strategy, often something simple like round-robin or FIFO. 174 A consequence of priority is that, as long as there is a thread with a higher priority that desires to run, a thread with a lower priority does not run. 175 This possible starving of threads can dramatically increase programming complexity since starving threads and priority inversion (prioritizing a lower priority thread) can both lead to serious problems. 176 177 An important observation is that threads do not need to have explicit priorities for problems to occur. 178 Indeed, any system with multiple ready-queues that attempts to exhaust one queue before accessing the other queues, essentially provide implicit priority, which can encounter starvation problems. 179 For example, a popular scheduling strategy that suffers from implicit priorities is work stealing. 180 \newterm{Work stealing} is generally presented as follows: 181 \begin{enumerate} 182 \item Each processor has a list of ready threads. 183 \item Each processor runs threads from its ready queue first. 184 \item If a processor's ready queue is empty, attempt to run threads from some other processor's ready queue. 185 \end{enumerate} 186 187 In a loaded system\footnote{A \newterm{loaded system} is a system where threads are being run at the same rate they are scheduled.}, if a thread does not yield, block, or preempt for an extended period of time, threads on the same processor's list starve if no other processors exhaust their list. 188 189 Since priorities can be complex for programmers to incorporate into their execution intuition, the scheduling strategy proposed for the \CFA runtime does not use a strategy with either implicit or explicit thread priorities. 113 190 114 191 \subsection{Schedulers without feedback or priorities} 115 This proposal conjectures that is is possible to construct a default scheduler for the \CFA runtime that offers good scalability and a simple fairness guarantee that is easy for programmers to reason about. The simplest fairness guarantee is FIFO ordering, i.e., threads scheduled first run first. However, enforcing FIFO ordering generally conflicts with scalability across multiple processors because of the additionnal synchronization. Thankfully, strict FIFO is not needed for sufficient fairness. Since concurrency is inherently non-deterministic, fairness concerns in scheduling are only a problem if a thread repeatedly runs before another thread can run. This relaxation is possible because the non-determinism means that programmers must already handle ordering problems in order to produce correct code and already must rely on weak guarantees, for example that a specific thread will \emph{eventually} run. Since some reordering does not break correctness, the FIFO fairness guarantee can be significantly relaxed without causing problems. For this proposal, the target guarantee is that the \CFA scheduler provides \emph{probable} FIFO ordering, which allows reordering but makes it improbable that threads are reordered far from their position in total ordering. 116 117 Scheduling is defined as follows : 192 This proposal conjectures that is is possible to construct a default scheduler for the \CFA runtime that offers good scalability and a simple fairness guarantee that is easy for programmers to reason about. 193 The simplest fairness guarantee is FIFO ordering, \ie threads scheduled first run first. 194 However, enforcing FIFO ordering generally conflicts with scalability across multiple processors because of the additional synchronization. 195 Thankfully, strict FIFO is not needed for sufficient fairness. 196 Since concurrency is inherently non-deterministic, fairness concerns in scheduling are only a problem if a thread repeatedly runs before another thread can run. 197 Some relaxation is possible because non-determinism means programmers already handle ordering problems to produce correct code and hence rely on weak guarantees, \eg that a specific thread will \emph{eventually} run. 198 Since some reordering does not break correctness, the FIFO fairness guarantee can be significantly relaxed without causing problems. 199 For this proposal, the target guarantee is that the \CFA scheduler provides \emph{probable} FIFO ordering, which allows reordering but makes it improbable that threads are reordered far from their position in total ordering. 200 201 The \CFA scheduler fairness is defined as follows: 118 202 \begin{itemize} 119 203 \item Given two threads $X$ and $Y$, the odds that thread $X$ runs $N$ times \emph{after} thread $Y$ is scheduled but \emph{before} it is run, decreases exponentially with regard to $N$. 120 204 \end{itemize} 121 122 205 While this is not a bounded guarantee, the probability that unfairness persist for long periods of times decreases exponentially, making persisting unfairness virtually impossible. 123 206 124 207 % =============================================================================== 125 208 % =============================================================================== 126 \section{Proposal} 127 128 \subsection{Ready-Queue} \label{sec:queue} 129 A simple ready-queue can be built from a FIFO queue, where user-threads are pushed onto the queue when they are ready to run, and processors (kernel-threads acting as virtual processors) pop the user-threads from the queue and execute them. Using the paper\cite{alistarh2018relaxed} as a basis, it is simple to build a relaxed FIFO list that is fast and scalable for loaded or overloaded systems. The described queue uses an array of underlying strictly FIFO queues as shown in Figure~\ref{fig:base}\footnote{For this section, the number of underlying queues is assumed to be constant. Section~\ref{sec:resize} discusses resizing the array.}. Pushing new data is done by selecting one of these underlying queues at random, recording a timestamp for the operation and pushing to the selected queue. Popping is done by selecting two queues at random and popping from the queue with the oldest timestamp. A higher number of underlying queues leads to less contention on each queue and therefore better performance. In a loaded system, it is highly likely the queues are non-empty, i.e., several tasks are on each of the underlying queues. This means that selecting a queue at random to pop from is highly likely to yield a queue with available items. In Figure~\ref{fig:base}, ignoring the ellipsis, the chances of getting an empty queue is 2/7 per pick, meaning two random picks yield an item approximately 9 times out of 10. 209 \section{Proposal Details} 210 211 \subsection{Central Ready Queue} \label{sec:queue} 212 A central ready queue can be built from a FIFO queue, where user threads are pushed onto the queue when they are ready to run, and processors (kernel-threads acting as virtual processors) pop the user threads from the queue and execute them. 213 Alistarh \etal~\cite{alistarh2018relaxed} show it is straightforward to build a relaxed FIFO list that is fast and scalable for loaded or overloaded systems. 214 The described queue uses an array of underlying strictly FIFO queues as shown in Figure~\ref{fig:base}\footnote{For this section, the number of underlying queues is assumed to be constant. 215 Section~\ref{sec:resize} discusses resizing the array.}. 216 Pushing new data is done by selecting one of these underlying queues at random, recording a timestamp for the operation and pushing to the selected queue. 217 Popping is done by selecting two queues at random and popping from the queue with the oldest timestamp. 218 A higher number of underlying queues leads to less contention on each queue and therefore better performance. 219 In a loaded system, it is highly likely the queues are non-empty, \ie several tasks are on each of the underlying queues. 220 This means that selecting a queue at random to pop from is highly likely to yield a queue with available items. 221 In Figure~\ref{fig:base}, ignoring the ellipsis, the chances of getting an empty queue is 2/7 per pick, meaning two random picks yield an item approximately 9 times out of 10. 130 222 131 223 \begin{figure} … … 133 225 \input{base} 134 226 \end{center} 135 \caption{Relaxed FIFO list at the base of the scheduler: an array of strictly FIFO lists. The timestamp is in all nodes and cell arrays.} 227 \caption{Relaxed FIFO list at the base of the scheduler: an array of strictly FIFO lists. 228 The timestamp is in all nodes and cell arrays.} 136 229 \label{fig:base} 137 230 \end{figure} … … 145 238 \end{figure} 146 239 147 When the ready queue is \emph{more empty}, i.e., several of the queues are empty, selecting a random queue for popping is less likely to yield a valid selection and more attempts need to be made, resulting in a performance degradation. Figure~\ref{fig:empty} shows an example with fewer elements where the chances of getting an empty queue is 5/7 per pick, meaning two random picks yield an item only half the time. Since the ready queue is not empty, the pop operation \emph{must} find an element before returning and therefore must retry. Overall performance is therefore influenced by the contention on the underlying queues and pop performance is influenced by the item density. This leads to four performance cases, as depicted in Table~\ref{tab:perfcases}. 240 When the ready queue is \emph{more empty}, \ie several of the queues are empty, selecting a random queue for popping is less likely to yield a successful selection and more attempts are needed, resulting in a performance degradation. 241 Figure~\ref{fig:empty} shows an example with fewer elements, where the chances of getting an empty queue is 5/7 per pick, meaning two random picks yield an item only half the time. 242 Since the ready queue is not empty, the pop operation \emph{must} find an element before returning and therefore must retry. 243 Note, the popping kernel thread has no work to do, but CPU cycles are wasted both for available user and kernel threads during the pop operation as the popping thread is using a CPU. 244 Overall performance is therefore influenced by the contention on the underlying queues and pop performance is influenced by the item density. 245 246 This leads to four performance cases for the centralized ready-queue, as depicted in Table~\ref{tab:perfcases}. 247 The number of processors (many or few) refers to the number of kernel threads \emph{actively} attempting to pop user threads from the queues, not the total number of kernel threads. 248 The number of threads (many or few) refers to the number of user threads ready to be run. 249 Many threads means they outnumber processors significantly and most underlying queues have items, few threads mean there are barely more threads than processors and most underlying queues are empty. 250 Cases with fewer threads than processors are discussed in Section~\ref{sec:sleep}. 148 251 149 252 \begin{table} … … 155 258 Many Threads & A: good performance & B: good performance \\ 156 259 \hline 157 Few Threads & C: poorperformance & D: poor performance \\260 Few Threads & C: worst performance & D: poor performance \\ 158 261 \hline 159 262 \end{tabular} 160 263 \end{center} 161 \caption{ Performance of the relaxed FIFO list in different cases. The number of processors (many or few) refers to the number of kernel-threads \emph{actively} attempting to pop user-threads from the queues, not the total number of kernel-threads. The number of threads (many or few) refers to the number of user-threads ready to be run. Many threads means they outnumber processors significantly and most underlying queues have items, few threads mean there are barely more threads than processors and most underlying queues are empty. Cases with fewer threads than processors are discussed in Section~\ref{sec:sleep}.}264 \caption{Expected performance of the relaxed FIFO list in different cases.} 162 265 \label{tab:perfcases} 163 266 \end{table} 164 267 165 Table~\ref{tab:perfcases} 166 167 Performance can be improved in case~D (Table~\ref{tab:perfcases}) by adding information to help processors find which inner queues are used. This addition aims to avoid the cost of retrying the pop operation but does not affect contention on the underlying queues and can incur some management cost for both push and pop operations. The approach used to encode this information can vary in density and be either global or local, where density means the information is either packed in few cachelines or spread across several cachelines, and local information means each thread uses an independent copy instead of a single global, i.e., common, source of information. 168 169 For example, bitmask can be used to identify which inner queues are currently in use, as shown in Figure~\ref{fig:emptybit}. This means that processors can often find user-threads in constant time, regardless of how many underlying queues are empty. Furthermore, modern x86 CPUs have extended bit manipulation instructions (BMI2) which allow using the bitmask with very little overhead compared to the randomized selection approach for a filled readyqueue, offerring decent performance even in cases with many empty inner queues. However, this technique has its limits: with a single word\footnote{Word refers here to however many bits can be written atomicly.} bitmask, the total number of underlying queues in the ready queue is limited to the number of bits in the word. With a multi-word bitmask, this maximum limit can be increased arbitrarily, but it is not possible to check if the queue is empty by reading the bitmask atomicly. 170 171 Finally, a dense bitmap, either single or multi-word, causes additional problems 172 in case C (Table 1), because many processors are continuously scanning the 173 bitmask to find the few available threads. This increased contention on the 174 bitmask(s) reduces performance because of cache misses and the bitmask is 175 updated more frequently by the scanning processors racing to read and/or update 176 that information. This increased update frequency means the information in the 177 bitmask will more often be stale before a processor can use it to find an item. 268 Performance can be improved in case~D (Table~\ref{tab:perfcases}) by adding information to help processors find which inner queues are used. 269 This addition aims to avoid the cost of retrying the pop operation but does not affect contention on the underlying queues and can incur some management cost for both push and pop operations. 270 The approach used to encode this information can vary in density and be either global or local. 271 \newterm{Density} means the information is either packed in a few cachelines or spread across several cachelines, and \newterm{local information} means each thread uses an independent copy instead of a single global, \ie common, source of information. 272 273 For example, Figure~\ref{fig:emptybit} shows a dense bitmask to identify which inner queues are currently in use. 274 This approach means processors can often find user threads in constant time, regardless of how many underlying queues are empty. 275 Furthermore, modern x86 CPUs have extended bit manipulation instructions (BMI2) that allow using the bitmask with very little overhead compared to the randomized selection approach for a filled ready queue, offering good performance even in cases with many empty inner queues. 276 However, this technique has its limits: with a single word\footnote{Word refers here to however many bits can be written atomically.} bitmask, the total number of underlying queues in the ready queue is limited to the number of bits in the word. 277 With a multi-word bitmask, this maximum limit can be increased arbitrarily, but it is not possible to check if the queue is empty by reading the bitmask atomically. 278 279 Finally, a dense bitmap, either single or multi-word, causes additional problems in case C (Table 1), because many processors are continuously scanning the bitmask to find the few available threads. 280 This increased contention on the bitmask(s) reduces performance because of cache misses after updates and the bitmask is updated more frequently by the scanning processors racing to read and/or update that information. 281 This increased update frequency means the information in the bitmask is more often stale before a processor can use it to find an item, \ie mask read says there are available user threads but none on queue. 178 282 179 283 \begin{figure} … … 185 289 \end{figure} 186 290 187 Another approach is to use a hiearchical data structure, for example Figure~\ref{fig:emptytree}. Creating a tree of nodes to reduce contention has been shown to work in similar cases\cite{ellen2007snzi}\footnote{This particular paper seems to be patented in the US. How does that affect \CFA? Can I use it in my work?}. However, this approach may lead to poorer performance in case~B (Table~\ref{tab:perfcases}) due to the inherent pointer chasing cost and already low contention cost in that case. 291 Figure~\ref{fig:emptytree} shows another approach using a hierarchical tree data-structure to reduce contention and has been shown to work in similar cases~\cite{ellen2007snzi}\footnote{This particular paper seems to be patented in the US. 292 How does that affect \CFA? Can I use it in my work?}. 293 However, this approach may lead to poorer performance in case~B (Table~\ref{tab:perfcases}) due to the inherent pointer chasing cost and already low contention cost in that case. 188 294 189 295 \begin{figure} … … 195 301 \end{figure} 196 302 197 Finally, a third approach is to use dense information, similar to the bitmap, but have each thread keep its own independant copies of it. While this approach can offer good scalability \emph{and} low latency, the livelyness of the information can become a problem. In the simple cases, local copies of which underlying queues are empty can become stale and end-up not being useful for the pop operation. A more serious problem is that reliable information is necessary for some parts of this algorithm to be correct. As mentionned in this section, processors must know \emph{reliably} whether the list is empty or not to decide if they can return \texttt{NULL} or if they must keep looking during a pop operation. Section~\ref{sec:sleep} discusses another case where reliable information is required for the algorithm to be correct. 198 199 \begin{figure} 200 \begin{center} 201 {\resizebox{0.8\textwidth}{!}{\input{emptytls}}} 303 Finally, a third approach is to use dense information, similar to the bitmap, but have each thread keep its own independent copy of it. 304 While this approach can offer good scalability \emph{and} low latency, the liveliness of the information can become a problem. 305 In the simple cases, local copies of which underlying queues are empty can become stale and end-up not being useful for the pop operation. 306 A more serious problem is that reliable information is necessary for some parts of this algorithm to be correct. 307 As mentioned in this section, processors must know \emph{reliably} whether the list is empty or not to decide if they can return \texttt{NULL} or if they must keep looking during a pop operation. 308 Section~\ref{sec:sleep} discusses another case where reliable information is required for the algorithm to be correct. 309 310 \begin{figure} 311 \begin{center} 312 \input{emptytls} 202 313 \end{center} 203 314 \caption{``More empty'' queue with added per processor bitmask to indicate which array cells have items.} … … 205 316 \end{figure} 206 317 207 There is a fundamental tradeoff among these approach. Dense global information about empty underlying queues helps zero-contention cases at the cost of high-contention case. Sparse global information helps high-contention cases but increases latency in zero-contention-cases, to read and ``aggregate'' the information\footnote{Hiearchical structures, e.g., binary search tree, effectively aggregate information but following pointer chains, learning information for each node. Similarly, other sparse schemes would need to read multiple cachelines to acquire all the information needed.}. Finally, dense local information has both the advantages of low latency in zero-contention cases and scalability in high-contention cases, however the information can become stale making it difficult to use to ensure correctness. The fact that these solutions have these fundamental limits suggest to me that a better solution combines these properties in an interesting ways. The lock discussed in Section~\ref{sec:resize} also allows for solutions that adapt to the number of processors, which could also prove useful. 318 There is a fundamental tradeoff among these approach. 319 Dense global information about empty underlying queues helps zero-contention cases at the cost of high-contention case. 320 Sparse global information helps high-contention cases but increases latency in zero-contention-cases, to read and ``aggregate'' the information\footnote{Hierarchical structures, \eg binary search tree, effectively aggregate information but follow pointer chains, learning information at each node. 321 Similarly, other sparse schemes need to read multiple cachelines to acquire all the information needed.}. 322 Finally, dense local information has both the advantages of low latency in zero-contention cases and scalability in high-contention cases, however the information can become stale making it difficult to use to ensure correctness. 323 The fact that these solutions have these fundamental limits suggest to me a better solution that attempts to combine these properties in an interesting ways. 324 Also, the lock discussed in Section~\ref{sec:resize} allows for solutions that adapt to the number of processors, which could also prove useful. 208 325 209 326 \paragraph{Objectives and Existing Work} 210 327 211 How much scalability is actually needed is highly debatable \emph{libfibre}\cit has compared favorably to other schedulers in webserver tests\cit and uses a single atomic counter in its scheduling algorithm similarly to the proposed bitmask. As such, the single atomic instruction on a shared cacheline may be sufficiently performant. 212 213 I have built a prototype of this ready-queue in the shape of a data-queue, i.e., nodes on the queue are structures with a single int and the intrusive data fields. Using this prototype I ran preliminary performance experiments which confirm the expected performance in Table~\ref{tab:perfcases}. However, these experiments only offer a hint at the actual performance of the scheduler since threads form more complex operations than simple integer nodes, e.g., threads are not independant of each other, when a thread blocks some other thread must intervene to wake it. 214 215 I have also integrated this prototype into the \CFA runtime, but have not yet created performance experiments to compare results. As creating one-to-one comparisons with the prototype will be complex. 328 How much scalability is actually needed is highly debatable. 329 \emph{libfibre}\cite{libfibre} has compared favorably to other schedulers in webserver tests\cite{karstenuser} and uses a single atomic counter in its scheduling algorithm similarly to the proposed bitmask. 330 As such, the single atomic instruction on a shared cacheline may be sufficiently performant. 331 332 I have built a prototype of this ready queue in the shape of a data queue, \ie nodes on the queue are structures with a single int representing a thread and intrusive data fields. 333 Using this prototype I ran preliminary performance experiments that confirm the expected performance in Table~\ref{tab:perfcases}. 334 However, these experiments only offer a hint at the actual performance of the scheduler since threads form more complex operations than simple integer nodes, \eg threads are not independent of each other, when a thread blocks some other thread must intervene to wake it. 335 336 I have also integrated this prototype into the \CFA runtime, but have not yet created performance experiments to compare results, as creating one-to-one comparisons between the prototype and the \CFA runtime will be complex. 216 337 217 338 \subsection{Dynamic Resizing} \label{sec:resize} 339 218 340 \begin{figure} 219 341 \begin{center} … … 224 346 \end{figure} 225 347 226 The \CFA runtime system groups processors together as clusters. Threads on a cluster are always scheduled on one of the processors of the cluster. Currently, the runtime handles dynamically adding and removing processors from clusters at any time. Since this is part of the existing design, the proposed scheduler must also support this behaviour. However, dynamicaly resizing a cluster is considered a rare event associated with setup, teardown and major configuration changes. This assumption is made both in the design of the proposed scheduler as well as in the original design of the \CFA runtime system. As such, the proposed scheduler must honor the correctness of these behaviour but does not have any performance objectives with regard to resizing a cluster. How long adding or removing processors take and how much this disrupts the performance of other threads is considered a secondary concern since it should be amortized over long period of times. However, as mentionned in Section~\ref{sec:queue}, contention on the underlying queues can have a direct impact on performance. The number of underlying queues must therefore be adjusted as the number of processors grows or shrinks. Since the underlying queues are stored in a dense array, changing the number of queues requires resizing the array and expanding the array requires moving it. This can introduce memory reclamation problems if not done correctly. 348 The \CFA runtime system groups processors together as \newterm{clusters}, as shown in Figure~\ref{fig:system}. 349 Threads on a cluster are always scheduled on one of the processors of the cluster. 350 Currently, the runtime handles dynamically adding and removing processors from clusters at any time. 351 Since this is part of the existing design, the proposed scheduler must also support this behaviour. 352 However, dynamically resizing a cluster is considered a rare event associated with setup, tear down and major configuration changes. 353 This assumption is made both in the design of the proposed scheduler as well as in the original design of the \CFA runtime system. 354 As such, the proposed scheduler must honour the correctness of this behaviour but does not have any performance objectives with regard to resizing a cluster. 355 How long adding or removing processors take and how much this disrupts the performance of other threads is considered a secondary concern since it should be amortized over long period of times. 356 However, as mentioned in Section~\ref{sec:queue}, contention on the underlying queues can have a direct impact on performance. 357 The number of underlying queues must therefore be adjusted as the number of processors grows or shrinks. 358 Since the underlying queues are stored in a dense array, changing the number of queues requires resizing the array and expanding the array requires moving it, which can introduce memory reclamation problems if not done correctly. 227 359 228 360 \begin{figure} … … 230 362 \input{resize} 231 363 \end{center} 232 \caption{Copy of data structure shown in Figure~\ref{fig:base}. 364 \caption{Copy of data structure shown in Figure~\ref{fig:base}.} 233 365 \label{fig:base2} 234 366 \end{figure} 235 367 236 It is important to note how the array is used in this case. While the array cells are modified by every push and pop operation, the array itself, i.e., the pointer that would change when resized, is only read during these operations. Therefore the use of this pointer can be described as frequent reads and infrequent writes. This description effectively matches with the description of a Reader-Writer lock, infrequent but invasive updates among frequent read operations. In the case of the Ready-Queue described above, read operations are operations that push or pop from the ready-queue but do not invalidate any references to the ready queue data structures. Writes on the other-hand would add or remove inner queues, invalidating references to the array of inner queues in the process. Therefore, the current proposed approach to this problem is to add a per-cluster Reader Writer lock around the ready queue to prevent restructuring of the ready-queue data structure while threads are being pushed or popped. 237 238 There are possible alternatives to the Reader Writer lock solution. This problem is effectively a memory reclamation problem and as such there is a large body of research on the subject\cit. However, the RWlock solution is simple and can be leveraged to solve other problems (e.g., processor ordering and memory reclamation of threads), which makes it an attractive solution. 368 It is important to note how the array is used in this case. 369 While the array cells are modified by every push and pop operation, the array itself, \ie the pointer that would change when resized, is only read during these operations. 370 Therefore the use of this pointer can be described as frequent reads and infrequent writes. 371 This description effectively matches with the description of a reader-writer lock, infrequent but invasive updates among frequent read operations. 372 In the case of the ready queue described above, read operations are operations that push or pop from the ready queue but do not invalidate any references to the ready queue data structures. 373 Writes on the other hand would add or remove inner queues, invalidating references to the array of inner queues in a process. 374 Therefore, the current proposed approach to this problem is to add a per-cluster reader-writer lock around the ready queue to prevent restructuring of the ready-queue data-structure while threads are being pushed or popped. 375 376 There are possible alternatives to the reader-writer lock solution. 377 This problem is effectively a memory reclamation problem and as such there is a large body of research on the subject\cite{michael2004hazard, brown2015reclaiming}. 378 However, the reader-write lock-solution is simple and can be leveraged to solve other problems (\eg processor ordering and memory reclamation of threads), which makes it an attractive solution. 239 379 240 380 \paragraph{Objectives and Existing Work} 241 The lock must offer scalability and performance on par with the actual ready-queue in order not to introduce a new bottleneck. I have already built a lock that fits the desired requirements and preliminary testing show scalability and performance that exceed the target. As such, I do not consider this lock to be a risk on this project. 381 The lock must offer scalability and performance on par with the actual ready-queue in order not to introduce a new bottleneck. 382 I have already built a lock that fits the desired requirements and preliminary testing show scalability and performance that exceed the target. 383 As such, I do not consider this lock to be a risk for this project. 242 384 243 385 \subsection{Idle Sleep} \label{sec:sleep} 244 As mentioned, idle sleep is the process of putting processors to sleep when they have no threads to execute. In this context, processors are kernel-threads and sleeping refers to asking the kernel to block a thread. This benefit can be achieved with either thread synchronization operations like pthread\_cond\_wait or using signal operations like sigsuspend. The goal of putting idle processors to sleep is two-fold, it reduces energy consumption in cases where more idle kernel-threads translate to idle hardware threads, and reduces contention on the ready queue, since the otherwise idle processors generally contend trying to pop items from the queue. 245 246 Support for idle sleep broadly involves calling the operating system to block the kernel thread and handling the race between a blocking thread and the waking thread, and handling which kernel thread should sleep or wake-up. 247 248 When a processor decides to sleep, there is a race that occurs between it signalling that is going to sleep (so other processors can find sleeping processors) and actually blocking the kernel thread. This is equivalent to the classic problem of missing signals when using condition variables: the ``sleepy'' processor indicates that it will sleep but has not yet gone to sleep, when another processor attempts to wake it up, the waking-up operation may claim nothing needs to be done and the signal is missed. In cases where threads are scheduled from processors on the current cluster, loosing signals is not necessarily critical, because at least some processors on the cluster are awake and may check for more processors eventually. Individual processors always finish scheduling threads before looking for new work, which means that the last processor to go to sleep cannot miss threads scheduled from inside the cluster (if they do, that demonstrates the ready-queue is not linearizable). However, this guarantee does not hold if threads are scheduled from outside the cluster, either due to an external event like timers and I/O, or due to a thread migrating from a different cluster. In this case, missed signals can lead to the cluster deadlocking where it should not\footnote{Clusters ``should'' never deadlock, but for this proposal, cases where \CFA users \emph{actually} write \CFA code that leads to a deadlock is considered as a deadlock that ``should'' happen. }. Therefore, it is important that the scheduling of threads include a mechanism where signals \emph{cannot} be missed. For performance reasons, it can be advantageous to have a secondary mechanism that allows signals to be missed in cases where it cannot lead to a deadlock. To be safe, this process must include a ``handshake'' where it is guaranteed that either~: the sleepy processor notices that a thread is scheduled after it signalled its intent to block or code scheduling threads sees the intent to sleep before scheduling and be able to wake-up the processor. This matter is complicated by the fact that pthreads offers few tools to implement this solution and offers no guarantee of ordering of threads waking up for most of these tools. 249 250 Another issues is trying to avoid kernel threads sleeping and waking frequently. A possible partial solution is to order the processors so that the one which most recently went to sleep is woken up. This allows other sleeping processors to reach deeper sleep state (when these are available) while keeping ``hot'' processors warmer. Note that while this generally means organising the processors in a stack, I believe that the unique index provided by the ReaderWriter lock can be reused to strictly order the waking order of processors, causing a LIFO like waking order. While a strict LIFO stack is probably better, using the processor index could prove useful and offer a sufficiently LIFO ordering. 251 252 Finally, another important aspect of Idle Sleep is when should processors make the decision to sleep and when is it appropriate for sleeping processors to be woken up. Processors that are unnecessarily unblocked lead to unnecessary contention and power consumption, while too many sleeping processors can lead to sub-optimal throughput. Furthermore, transitions from sleeping to awake and vice-versa also add unnecessary latency. There is already a wealth of research on the subject and I may use an existing approach for the Idle Sleep heuristic in this project. 386 387 \newterm{Idle sleep} is the process of putting processors to sleep when they have no threads to execute. 388 In this context, processors are kernel threads and sleeping refers to asking the kernel to block a thread. 389 This operation can be achieved with either thread synchronization operations like $pthread_cond_wait$ or using signal operations like $sigsuspend$. 390 The goal of putting idle processors to sleep is: 391 \begin{enumerate} 392 \item 393 reduce contention on the ready queue, since the otherwise idle processors generally contend trying to pop items from the queue, 394 \item 395 give back unneeded CPU time associated with a process to other user processors executing on the computer, 396 \item 397 and reduce energy consumption in cases where more idle kernel-threads translate to idle CPUs, which can cycle down. 398 \end{enumerate} 399 Support for idle sleep broadly involves calling the operating system to block the kernel thread and handling the race between a blocking thread and the waking thread, and handling which kernel thread should sleep or wake up. 400 401 When a processor decides to sleep, there is a race that occurs between it signalling that is going to sleep (so other processors can find sleeping processors) and actually blocking the kernel thread. 402 This operation is equivalent to the classic problem of missing signals when using condition variables: the ``sleepy'' processor indicates its intention to block but has not yet gone to sleep when another processor attempts to wake it up. 403 The waking-up operation sees the blocked process and signals it, but the blocking process is racing to sleep so the signal is missed. 404 In cases where kernel threads are managed as processors on the current cluster, loosing signals is not necessarily critical, because at least some processors on the cluster are awake and may check for more processors eventually. 405 Individual processors always finish scheduling user threads before looking for new work, which means that the last processor to go to sleep cannot miss threads scheduled from inside the cluster (if they do, that demonstrates the ready queue is not linearizable). 406 However, this guarantee does not hold if threads are scheduled from outside the cluster, either due to an external event like timers and I/O, or due to a user (or kernel) thread migrating from a different cluster. 407 In this case, missed signals can lead to the cluster deadlocking\footnote{Clusters should only deadlock in cases where a \CFA programmer \emph{actually} write \CFA code that leads to a deadlock.}. 408 Therefore, it is important that the scheduling of threads include a mechanism where signals \emph{cannot} be missed. 409 For performance reasons, it can be advantageous to have a secondary mechanism that allows signals to be missed in cases where it cannot lead to a deadlock. 410 To be safe, this process must include a ``handshake'' where it is guaranteed that either~: the sleeping processor notices that a user thread is scheduled after the sleeping processor signalled its intent to block or code scheduling threads sees the intent to sleep before scheduling and be able to wake-up the processor. 411 This matter is complicated by the fact that pthreads and Linux offer few tools to implement this solution and no guarantee of ordering of threads waking up for most of these tools. 412 413 Another important issue is avoiding kernel threads sleeping and waking frequently because there is a significant operating-system cost. 414 This scenario happens when a program oscillates between high and low activity, needing most and then less processors. 415 A possible partial solution is to order the processors so that the one which most recently went to sleep is woken up. 416 This allows other sleeping processors to reach deeper sleep state (when these are available) while keeping ``hot'' processors warmer. 417 Note that while this generally means organizing the processors in a stack, I believe that the unique index provided in my reader-writer lock can be reused to strictly order the waking processors, causing a mostly LIFO order. 418 While a strict LIFO stack is probably better, the processor index could prove useful for other reasons, while still offering a sufficiently LIFO ordering. 419 420 A final important aspect of idle sleep is when should processors make the decision to sleep and when is it appropriate for sleeping processors to be woken up. 421 Processors that are unnecessarily unblocked lead to unnecessary contention, CPU usage, and power consumption, while too many sleeping processors can lead to sub-optimal throughput. 422 Furthermore, transitions from sleeping to awake and vice-versa also add unnecessary latency. 423 There is already a wealth of research on the subject\cite{schillings1996engineering, wiki:thunderherd} and I may use an existing approach for the idle-sleep heuristic in this project, \eg\cite{karstenuser}. 253 424 254 425 \subsection{Asynchronous I/O} 255 The final aspect of this proposal is asynchronous I/O. Without it, user threads that execute I/O operations block the underlying kernel thread, which leads to poor throughput. It would be preferrable to block the user-thread and reuse the underlying kernel-thread to run other ready threads. This approach requires intercepting the user-threads' calls to I/O operations, redirecting them to an asynchronous I/O interface and handling the multiplexing between the synchronous and asynchronous API. As such, these are the three components needed to implemented support for asynchronous I/O : an OS abstraction layer over the asynchronous interface, an event-engine to (de)multiplex the operations and a synchronous interface for users to use. None of these components currently exist in \CFA and I will need to build all three for this project. 426 427 The final aspect of this proposal is asynchronous I/O. 428 Without it, user threads that execute I/O operations block the underlying kernel thread, which leads to poor throughput. 429 It is preferable to block the user thread performing the I/O and reuse the underlying kernel-thread to run other ready user threads. 430 This approach requires intercepting user-thread calls to I/O operations, redirecting them to an asynchronous I/O interface, and handling the multiplexing/demultiplexing between the synchronous and asynchronous API. 431 As such, there are three components needed to implemented support for asynchronous I/O: 432 \begin{enumerate} 433 \item 434 an OS abstraction layer over the asynchronous interface, 435 \item 436 an event-engine to (de)multiplex the operations, 437 \item 438 and a synchronous interface for users to use. 439 \end{enumerate} 440 None of these components currently exist in \CFA and I will need to build all three for this project. 256 441 257 442 \paragraph{OS Abstraction} 258 One fundamental part for converting blocking I/O operations into non-blocking ones is having an underlying asynchronous I/O interface to direct the I/O operations. While there exists many different APIs for asynchronous I/O, it is not part of this proposal to create a novel API. I plan to use an existing one that is sufficient. uC++ uses the \texttt{select} as its interface, which handles ttys, pipes and sockets, but not disk. It entails significant complexity and is being replaced, which make it a less interesting alternative. Another popular interface is \texttt{epoll}\cit, which is supposed to be cheaper than \texttt{select}. However, epoll also does not handle the file system and seems to have problem to linux pipes and \texttt{TTY}s\cit. A very recent alternative that must still be investigated is \texttt{io\_uring}. It claims to address some of the issues with \texttt{epoll} but is too recent to be confident that it does. Finally, a popular cross-platform alternative is \texttt{libuv}, which offers asynchronous sockets and asynchronous file system operations (among other features). However, as a full-featured library it includes much more than what is needed and could conflict with other features of \CFA unless significant effort is made to merge them together. 259 260 \paragraph{Event-Engine} 261 Laying on top of the asynchronous interface layer is the event-engine. This engine is responsible for multiplexing (batching) the synchronous I/O requests into an asynchronous I/O request and demultiplexing the results onto appropriate blocked threads. This step can be straightforward for the simple cases, but can become quite complex. Decisions that need to be made include : whether to poll from a seperate kernel thread or a regularly scheduled user thread, what should be the ordering used when results satisfy many requests, how to handle threads waiting for multiple operations, etc. 443 One fundamental part for converting blocking I/O operations into non-blocking ones is having an underlying asynchronous I/O interface to direct the I/O operations. 444 While there exists many different APIs for asynchronous I/O, it is not part of this proposal to create a novel API. 445 It is sufficient to make one work in the complex context of the \CFA runtime. 446 \uC uses the $select$\cite{select} as its interface, which handles ttys, pipes and sockets, but not disk. 447 $select$ entails significant complexity and is being replaced in UNIX operating-systems, which make it a less interesting alternative. 448 Another popular interface is $epoll$\cite{epoll}, which is supposed to be cheaper than $select$. 449 However, $epoll$ also does not handle the file system and anectodal evidence suggest it has problem with linux pipes and $TTY$s. 450 A popular cross-platform alternative is $libuv$\cite{libuv}, which offers asynchronous sockets and asynchronous file system operations (among other features). 451 However, as a full-featured library it includes much more than I need and could conflict with other features of \CFA unless significant effort is made to merge them together. 452 A very recent alternative that I am investigating is $io_uring$\cite{io_uring}. 453 It claims to address some of the issues with $epoll$ and my early investigating suggest that the claim is accurate. 454 $io_uring$ uses a much more general approach where system calls are register to a queue and later executed by the kernel, rather than relying on system calls to return an error instead of blocking and subsequently waiting for changes on file descriptors. 455 I believe this approach allows for fewer problems, \eg the manpage for $open$\cite{open} states: 456 \begin{quote} 457 Note that [the $O_NONBLOCK$ flag] has no effect for regular files and block devices; 458 that is, I/O operations will (briefly) block when device activity is required, regardless of whether $O_NONBLOCK$ is set. 459 Since $O_NONBLOCK$ semantics might eventually be implemented, applications should not depend upon blocking behavior when specifying this flag for regular files and block devices. 460 \end{quote} 461 This makes approach based on $epoll$/$select$ less reliable since they may not work for every file descriptors. 462 For this reason, I plan to use $io_uring$ as the OS abstraction for the \CFA runtime, unless further work shows problems I haven't encountered yet. 463 However, only a small subset of the features are available in Ubuntu as of April 2020\cite{wiki:ubuntu-linux}, which will limit performance comparisons. 464 I do not believe this will affect the comparison result. 465 466 \paragraph{Event Engine} 467 Laying on top of the asynchronous interface layer is the event engine. 468 This engine is responsible for multiplexing (batching) the synchronous I/O requests into asynchronous I/O requests and demultiplexing the results to appropriate blocked user threads. 469 This step can be straightforward for simple cases, but becomes quite complex when there are thousands of user threads performing both reads and writes, possibly on overlapping file descriptors. 470 Decisions that need to be made include: 471 \begin{enumerate} 472 \item 473 whether to poll from a separate kernel thread or a regularly scheduled user thread, 474 \item 475 what should be the ordering used when results satisfy many requests, 476 \item 477 how to handle threads waiting for multiple operations, etc. 478 \end{enumerate} 262 479 263 480 \paragraph{Interface} 264 Finally, for these components to be available, it is necessary to expose them through a synchronous interface. The interface can be novel but it is preferrable to match the existing POSIX interface in order to be compatible with existing code. Matching allows C programs written using this interface to be transparently converted to \CFA with minimal effeort. Where this is not applicable, a novel interface will be created to fill the gaps. 481 Finally, for these non-blocking I/O components to be available, it is necessary to expose them through a synchronous interface because that is the \CFA concurrent programming style. 482 The interface can be novel but it is preferable to match the existing POSIX interface when possible to be compatible with existing code. 483 Matching allows C programs written using this interface to be transparently converted to \CFA with minimal effort. 484 Where new functionality is needed, I will create a novel interface to fill gaps and provide advanced features. 265 485 266 486 … … 268 488 % =============================================================================== 269 489 \section{Discussion} 270 490 I believe that runtime system and scheduling are still open topics. 491 Many ``state of the art'' production frameworks still use single threaded event-loops because of performance considerations, \eg \cite{nginx-design}, and, to my knowledge, no wideyl available system language offers modern threading facilities. 492 I believe the proposed work offers a novel runtime and scheduling package, where existing work only offers fragments that users must assemble themselves when possible. 271 493 272 494 % =============================================================================== 273 495 % =============================================================================== 274 496 \section{Timeline} 275 497 \begin{center} 498 \begin{tabular}{ | r @{--} l | p{4in} | } 499 \hline May 2020 & October 2020 & Creation of the performance benchmark. \\ 500 \hline November 2020 & March 2021 & Completion of the implementation. \\ 501 \hline March 2021 & April 2021 & Final Performance experiments. \\ 502 \hline May 2021 & August 2021 & Thesis writing and defense. \\ 503 \hline 504 \end{tabular} 505 \end{center} 276 506 277 507 % B I B L I O G R A P H Y -
doc/theses/thierry_delisle_PhD/comp_II/img/base.fig
re3bc51c rbcd74f3 1 #FIG 3.2 Produced by xfig version 3.2. 7a1 #FIG 3.2 Produced by xfig version 3.2.5c 2 2 Landscape 3 3 Center 4 4 Inches 5 Letter 5 Letter 6 6 100.00 7 7 Single 8 8 -2 9 9 1200 2 10 6 2400 3075 3000 420011 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 712 3000 3335 2850 3075 2550 3075 2400 3335 2550 3595 2850 359513 3000 333514 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 215 1 1 1.00 45.00 90.0016 2700 4200 2700 360017 -618 6 2400 2175 3000 337519 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 720 3000 2435 2850 2175 2550 2175 2400 2435 2550 2695 2850 269521 3000 243522 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 223 1 1 1.00 45.00 90.0024 2700 3375 2700 270025 -626 6 3600 2175 4200 337527 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 728 4200 2435 4050 2175 3750 2175 3600 2435 3750 2695 4050 269529 4200 243530 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 231 1 1 1.00 45.00 90.0032 3900 3375 3900 270033 -634 6 3600 3075 4200 420035 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 736 4200 3335 4050 3075 3750 3075 3600 3335 3750 3595 4050 359537 4200 333538 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 239 1 1 1.00 45.00 90.0040 3900 4200 3900 360041 -642 6 4200 3075 4800 420043 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 744 4800 3335 4650 3075 4350 3075 4200 3335 4350 3595 4650 359545 4800 333546 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 247 1 1 1.00 45.00 90.0048 4500 4200 4500 360049 -650 6 4800 3075 5400 420051 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 752 5400 3335 5250 3075 4950 3075 4800 3335 4950 3595 5250 359553 5400 333554 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 255 1 1 1.00 45.00 90.0056 5100 4200 5100 360057 -658 6 4800 2175 5400 337559 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 760 5400 2435 5250 2175 4950 2175 4800 2435 4950 2695 5250 269561 5400 243562 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 263 1 1 1.00 45.00 90.0064 5100 3375 5100 270065 -666 6 4800 1275 5400 247567 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 768 5400 1535 5250 1275 4950 1275 4800 1535 4950 1795 5250 179569 5400 153570 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 271 1 1 1.00 45.00 90.0072 5100 2475 5100 180073 -674 6 6000 2175 6600 337575 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 776 6600 2435 6450 2175 6150 2175 6000 2435 6150 2695 6450 269577 6600 243578 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 279 1 1 1.00 45.00 90.0080 6300 3375 6300 270081 -682 6 6000 3075 6600 420083 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 784 6600 3335 6450 3075 6150 3075 6000 3335 6150 3595 6450 359585 6600 333586 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 287 1 1 1.00 45.00 90.0088 6300 4200 6300 360089 -690 10 6 6750 4125 7050 4275 91 11 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 6825 4200 20 20 6825 4200 6845 4200 … … 93 13 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 6975 4200 20 20 6975 4200 6995 4200 94 14 -6 15 6 2400 2100 3000 2700 16 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 2700 2400 300 300 2700 2400 3000 2400 17 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 18 2400 2475 3000 2475 19 4 1 0 50 -1 0 11 0.0000 2 120 210 2700 2650 TS\001 20 -6 21 6 2400 3000 3000 3600 22 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 2700 3300 300 300 2700 3300 3000 3300 23 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 24 2400 3375 3000 3375 25 4 1 0 50 -1 0 11 0.0000 2 120 210 2700 3550 TS\001 26 -6 27 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 3900 2400 300 300 3900 2400 4200 2400 28 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 3900 3300 300 300 3900 3300 4200 3300 29 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 5100 1500 300 300 5100 1500 5400 1500 30 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 5100 2400 300 300 5100 2400 5400 2400 31 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 5100 3300 300 300 5100 3300 5400 3300 32 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 6300 2400 300 300 6300 2400 6600 2400 33 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 6300 3300 300 300 6300 3300 6600 3300 34 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 4509 3302 300 300 4509 3302 4809 3302 95 35 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 96 36 3000 3900 3000 4500 … … 109 49 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 110 50 2400 3900 7200 3900 7200 4500 2400 4500 2400 3900 51 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 52 1 1 1.00 45.00 90.00 53 2700 3300 2700 2700 54 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 55 1 1 1.00 45.00 90.00 56 3900 3300 3900 2700 57 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 58 1 1 1.00 45.00 90.00 59 3900 4200 3900 3600 60 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 61 1 1 1.00 45.00 90.00 62 5100 2475 5100 1800 63 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 64 1 1 1.00 45.00 90.00 65 5100 3300 5100 2700 66 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 67 1 1 1.00 45.00 90.00 68 5100 4200 5100 3600 69 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 70 1 1 1.00 45.00 90.00 71 6300 3300 6300 2700 72 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 73 1 1 1.00 45.00 90.00 74 6300 4200 6300 3600 75 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 76 1 1 1.00 45.00 90.00 77 2700 4200 2700 3600 111 78 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 112 2454 2520 2957 2520 113 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 114 2454 3417 2957 3417 115 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 116 2400 4350 3000 4350 117 4 2 0 50 -1 0 12 0.0000 2 165 720 2100 4200 Array of\001 118 4 2 0 50 -1 0 12 0.0000 2 150 540 2100 4425 Queues\001 119 4 2 0 50 -1 0 12 0.0000 2 135 630 2100 3075 Threads\001 120 4 2 0 50 -1 0 12 0.0000 2 165 450 2100 2850 Ready\001 121 4 0 0 50 -1 0 11 0.0000 2 135 180 2595 3561 TS\001 122 4 0 0 50 -1 0 11 0.0000 2 135 180 2595 2665 TS\001 123 4 0 0 50 -1 0 11 0.0000 2 135 180 2595 4479 TS\001 79 2400 4275 3000 4275 80 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 81 1 1 1.00 45.00 90.00 82 4500 4200 4500 3600 83 4 2 0 50 -1 0 12 0.0000 2 180 660 2100 4200 Array of\001 84 4 2 0 50 -1 0 12 0.0000 2 165 600 2100 4425 Queues\001 85 4 2 0 50 -1 0 12 0.0000 2 135 645 2100 3075 Threads\001 86 4 2 0 50 -1 0 12 0.0000 2 180 525 2100 2850 Ready\001 87 4 1 0 50 -1 0 11 0.0000 2 120 210 2700 4450 TS\001 -
doc/theses/thierry_delisle_PhD/comp_II/img/empty.fig
re3bc51c rbcd74f3 8 8 -2 9 9 1200 2 10 6 4800 3075 5400 420011 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 212 1 1 1.00 45.00 90.0013 5100 4200 5100 360014 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 715 5400 3335 5250 3075 4950 3075 4800 3335 4950 3595 5250 359516 5400 333517 -618 6 2400 2175 3000 337519 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 720 3000 2435 2850 2175 2550 2175 2400 2435 2550 2695 2850 269521 3000 243522 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 223 1 1 1.00 45.00 90.0024 2700 3375 2700 270025 -626 6 2400 3075 3000 420027 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 728 3000 3335 2850 3075 2550 3075 2400 3335 2550 3595 2850 359529 3000 333530 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 231 1 1 1.00 45.00 90.0032 2700 4200 2700 360033 -634 10 6 6750 4125 7050 4275 35 11 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 6825 4200 20 20 6825 4200 6845 4200 … … 37 13 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 6975 4200 20 20 6975 4200 6995 4200 38 14 -6 15 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 5100 3300 300 300 5100 3300 5400 3300 16 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 2700 2400 300 300 2700 2400 3000 2400 17 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 2700 3300 300 300 2700 3300 3000 3300 39 18 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 40 19 3000 3900 3000 4500 … … 53 32 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 54 33 2400 3900 7200 3900 7200 4500 2400 4500 2400 3900 34 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 35 1 1 1.00 45.00 90.00 36 2700 3300 2700 2700 37 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 38 1 1 1.00 45.00 90.00 39 5100 4200 5100 3600 40 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 41 1 1 1.00 45.00 90.00 42 2700 4200 2700 3600 55 43 4 2 0 50 -1 0 12 0.0000 2 180 660 2100 4200 Array of\001 56 44 4 2 0 50 -1 0 12 0.0000 2 165 600 2100 4425 Queues\001 -
doc/theses/thierry_delisle_PhD/comp_II/img/emptybit.fig
re3bc51c rbcd74f3 8 8 -2 9 9 1200 2 10 6 4800 3075 5400 420011 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 212 1 1 1.00 45.00 90.0013 5100 4200 5100 360014 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 715 5400 3335 5250 3075 4950 3075 4800 3335 4950 3595 5250 359516 5400 333517 -618 6 2400 2175 3000 337519 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 720 3000 2435 2850 2175 2550 2175 2400 2435 2550 2695 2850 269521 3000 243522 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 223 1 1 1.00 45.00 90.0024 2700 3375 2700 270025 -626 6 2400 3075 3000 420027 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 728 3000 3335 2850 3075 2550 3075 2400 3335 2550 3595 2850 359529 3000 333530 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 231 1 1 1.00 45.00 90.0032 2700 4200 2700 360033 -634 10 6 6750 4125 7050 4275 35 11 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 6825 4200 20 20 6825 4200 6845 4200 … … 37 13 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 6975 4200 20 20 6975 4200 6995 4200 38 14 -6 15 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 5100 3300 300 300 5100 3300 5400 3300 16 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 2700 2400 300 300 2700 2400 3000 2400 17 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 2700 3300 300 300 2700 3300 3000 3300 39 18 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 40 19 3000 3900 3000 4500 … … 53 32 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 54 33 2400 3900 7200 3900 7200 4500 2400 4500 2400 3900 34 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 35 1 1 1.00 45.00 90.00 36 2700 3300 2700 2700 37 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 38 1 1 1.00 45.00 90.00 39 5100 4200 5100 3600 40 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 41 1 1 1.00 45.00 90.00 42 2700 4200 2700 3600 43 4 0 0 50 -1 5 14 0.0000 2 180 1800 3750 4800 [1000100...]\001 55 44 4 2 0 50 -1 0 12 0.0000 2 180 660 2100 4200 Array of\001 56 45 4 2 0 50 -1 0 12 0.0000 2 165 600 2100 4425 Queues\001 57 46 4 2 0 50 -1 0 12 0.0000 2 135 645 2100 3075 Threads\001 58 47 4 2 0 50 -1 0 12 0.0000 2 180 525 2100 2850 Ready\001 59 4 0 0 50 -1 5 14 0.0000 2 180 1800 3750 4800 [1000100...]\001 -
doc/theses/thierry_delisle_PhD/comp_II/img/emptytls.fig
re3bc51c rbcd74f3 1 #FIG 3.2 Produced by xfig version 3.2. 7a1 #FIG 3.2 Produced by xfig version 3.2.5c 2 2 Landscape 3 3 Center 4 4 Inches 5 Letter 5 Letter 6 6 100.00 7 7 Single 8 8 -2 9 9 1200 2 10 6 4800 3075 5400 420011 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 212 1 1 1.00 45.00 90.0013 5100 4200 5100 360014 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 715 5400 3335 5250 3075 4950 3075 4800 3335 4950 3595 5250 359516 5400 333517 -618 6 2400 2175 3000 337519 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 720 3000 2435 2850 2175 2550 2175 2400 2435 2550 2695 2850 269521 3000 243522 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 223 1 1 1.00 45.00 90.0024 2700 3375 2700 270025 -626 6 2400 3075 3000 420027 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 728 3000 3335 2850 3075 2550 3075 2400 3335 2550 3595 2850 359529 3000 333530 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 231 1 1 1.00 45.00 90.0032 2700 4200 2700 360033 -634 10 6 6750 4125 7050 4275 35 11 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 6825 4200 20 20 6825 4200 6845 4200 … … 37 13 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 6975 4200 20 20 6975 4200 6995 4200 38 14 -6 15 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 5100 3300 300 300 5100 3300 5400 3300 16 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 2700 2400 300 300 2700 2400 3000 2400 17 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 2700 3300 300 300 2700 3300 3000 3300 39 18 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 40 19 3000 3900 3000 4500 … … 53 32 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 54 33 2400 3900 7200 3900 7200 4500 2400 4500 2400 3900 55 4 2 0 50 -1 0 12 0.0000 2 165 720 2100 4200 Array of\001 56 4 2 0 50 -1 0 12 0.0000 2 150 540 2100 4425 Queues\001 57 4 2 0 50 -1 0 12 0.0000 2 135 630 2100 3075 Threads\001 58 4 2 0 50 -1 0 12 0.0000 2 165 450 2100 2850 Ready\001 59 4 0 0 50 -1 5 14 0.0000 2 165 1080 2400 5100 [1000100...]\001 60 4 0 0 50 -1 5 14 0.0000 2 165 1080 4425 5100 [1000100...]\001 61 4 0 0 50 -1 5 14 0.0000 2 165 1080 6450 5100 [1000100...]\001 62 4 0 0 50 -1 0 13 0.0000 2 135 630 1500 5175 Bitmask\001 63 4 0 0 50 -1 0 13 0.0000 2 135 1080 1050 4950 Thread-Local\001 34 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 35 1 1 1.00 45.00 90.00 36 2700 3300 2700 2700 37 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 38 1 1 1.00 45.00 90.00 39 5100 4200 5100 3600 40 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 41 1 1 1.00 45.00 90.00 42 2700 4200 2700 3600 43 4 0 0 50 -1 5 14 0.0000 2 180 1800 2400 5100 [1000100...]\001 44 4 0 0 50 -1 5 14 0.0000 2 180 1800 4425 5100 [1000100...]\001 45 4 0 0 50 -1 5 14 0.0000 2 180 1800 6450 5100 [1000100...]\001 46 4 0 0 50 -1 0 13 0.0000 2 135 690 1500 5175 Bitmask\001 47 4 0 0 50 -1 0 13 0.0000 2 150 1155 1050 4950 Thread-Local\001 48 4 2 0 50 -1 0 12 0.0000 2 180 660 2100 4200 Array of\001 49 4 2 0 50 -1 0 12 0.0000 2 165 600 2100 4425 Queues\001 50 4 2 0 50 -1 0 12 0.0000 2 135 645 2100 3075 Threads\001 51 4 2 0 50 -1 0 12 0.0000 2 180 525 2100 2850 Ready\001 -
doc/theses/thierry_delisle_PhD/comp_II/img/emptytree.fig
re3bc51c rbcd74f3 8 8 -2 9 9 1200 2 10 6 4800 3075 5400 420011 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 212 1 1 1.00 45.00 90.0013 5100 4200 5100 360014 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 715 5400 3335 5250 3075 4950 3075 4800 3335 4950 3595 5250 359516 5400 333517 -618 6 2400 2175 3000 337519 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 720 3000 2435 2850 2175 2550 2175 2400 2435 2550 2695 2850 269521 3000 243522 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 223 1 1 1.00 45.00 90.0024 2700 3375 2700 270025 -626 6 2400 3075 3000 420027 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 728 3000 3335 2850 3075 2550 3075 2400 3335 2550 3595 2850 359529 3000 333530 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 231 1 1 1.00 45.00 90.0032 2700 4200 2700 360033 -634 10 6 6750 4125 7050 4275 35 11 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 6825 4200 20 20 6825 4200 6845 4200 … … 37 13 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 6975 4200 20 20 6975 4200 6995 4200 38 14 -6 39 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 40 3000 3900 3000 4500 41 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 42 3600 3900 3600 4500 43 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 44 4200 3900 4200 4500 45 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 46 4800 3900 4800 4500 47 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 48 5400 3900 5400 4500 49 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 50 6000 3900 6000 4500 51 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 52 6600 3900 6600 4500 53 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 54 2400 3900 7200 3900 7200 4500 2400 4500 2400 3900 15 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 5100 3300 300 300 5100 3300 5400 3300 16 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 2700 2400 300 300 2700 2400 3000 2400 17 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 2700 3300 300 300 2700 3300 3000 3300 55 18 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 56 19 1 1 1.00 45.00 90.00 … … 83 46 1 1 1.00 45.00 90.00 84 47 5850 5400 6300 5100 48 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 49 3000 3900 3000 4500 50 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 51 3600 3900 3600 4500 52 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 53 4200 3900 4200 4500 54 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 55 4800 3900 4800 4500 56 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 57 5400 3900 5400 4500 58 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 59 6000 3900 6000 4500 60 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 61 6600 3900 6600 4500 62 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 63 2400 3900 7200 3900 7200 4500 2400 4500 2400 3900 64 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 65 1 1 1.00 45.00 90.00 66 2700 3300 2700 2700 67 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 68 1 1 1.00 45.00 90.00 69 5100 4200 5100 3600 70 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 71 1 1 1.00 45.00 90.00 72 2700 4200 2700 3600 73 4 1 0 50 -1 0 12 0.0000 2 135 135 3300 4725 X\001 74 4 1 0 50 -1 0 12 0.0000 2 135 135 3900 5025 X\001 75 4 1 0 50 -1 0 12 0.0000 2 135 135 5700 4725 X\001 76 4 1 0 50 -1 0 12 0.0000 2 135 135 6300 5025 X\001 85 77 4 2 0 50 -1 0 12 0.0000 2 180 660 2100 4200 Array of\001 86 78 4 2 0 50 -1 0 12 0.0000 2 165 600 2100 4425 Queues\001 87 79 4 2 0 50 -1 0 12 0.0000 2 135 645 2100 3075 Threads\001 88 80 4 2 0 50 -1 0 12 0.0000 2 180 525 2100 2850 Ready\001 89 4 1 0 50 -1 0 12 0.0000 2 135 135 3300 4725 X\00190 4 1 0 50 -1 0 12 0.0000 2 135 135 3900 5025 X\00191 4 1 0 50 -1 0 12 0.0000 2 135 135 5700 4725 X\00192 4 1 0 50 -1 0 12 0.0000 2 135 135 6300 5025 X\001 -
doc/theses/thierry_delisle_PhD/comp_II/img/resize.fig
re3bc51c rbcd74f3 1 #FIG 3.2 Produced by xfig version 3.2. 7a1 #FIG 3.2 Produced by xfig version 3.2.5c 2 2 Landscape 3 3 Center 4 4 Inches 5 Letter 5 Letter 6 6 100.00 7 7 Single 8 8 -2 9 9 1200 2 10 6 2400 3075 3000 4200 11 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7 12 3000 3335 2850 3075 2550 3075 2400 3335 2550 3595 2850 3595 13 3000 3335 14 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 10 6 7500 3675 8475 4500 11 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 1 2 15 12 1 1 1.00 45.00 90.00 16 2700 4200 2700 360017 -618 6 2400 2175 3000 337519 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 720 3000 2435 2850 2175 2550 2175 2400 2435 2550 2695 2850 269521 3000 243522 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 223 13 1 1 1.00 45.00 90.00 24 2700 3375 2700 2700 25 -6 26 6 3600 2175 4200 3375 27 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7 28 4200 2435 4050 2175 3750 2175 3600 2435 3750 2695 4050 2695 29 4200 2435 30 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 31 1 1 1.00 45.00 90.00 32 3900 3375 3900 2700 33 -6 34 6 3600 3075 4200 4200 35 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7 36 4200 3335 4050 3075 3750 3075 3600 3335 3750 3595 4050 3595 37 4200 3335 38 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 39 1 1 1.00 45.00 90.00 40 3900 4200 3900 3600 41 -6 42 6 4200 3075 4800 4200 43 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7 44 4800 3335 4650 3075 4350 3075 4200 3335 4350 3595 4650 3595 45 4800 3335 46 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 47 1 1 1.00 45.00 90.00 48 4500 4200 4500 3600 49 -6 50 6 4800 3075 5400 4200 51 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7 52 5400 3335 5250 3075 4950 3075 4800 3335 4950 3595 5250 3595 53 5400 3335 54 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 55 1 1 1.00 45.00 90.00 56 5100 4200 5100 3600 57 -6 58 6 4800 2175 5400 3375 59 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7 60 5400 2435 5250 2175 4950 2175 4800 2435 4950 2695 5250 2695 61 5400 2435 62 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 63 1 1 1.00 45.00 90.00 64 5100 3375 5100 2700 65 -6 66 6 4800 1275 5400 2475 67 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7 68 5400 1535 5250 1275 4950 1275 4800 1535 4950 1795 5250 1795 69 5400 1535 70 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 71 1 1 1.00 45.00 90.00 72 5100 2475 5100 1800 73 -6 74 6 6000 2175 6600 3375 75 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7 76 6600 2435 6450 2175 6150 2175 6000 2435 6150 2695 6450 2695 77 6600 2435 78 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 79 1 1 1.00 45.00 90.00 80 6300 3375 6300 2700 81 -6 82 6 6000 3075 6600 4200 83 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7 84 6600 3335 6450 3075 6150 3075 6000 3335 6150 3595 6450 3595 85 6600 3335 86 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 87 1 1 1.00 45.00 90.00 88 6300 4200 6300 3600 14 7500 4200 7950 4200 15 4 0 0 50 -1 0 12 0.0000 2 135 915 7500 3825 Grows with\001 16 4 0 0 50 -1 0 12 0.0000 2 135 840 7500 4050 additional\001 17 4 0 0 50 -1 0 12 0.0000 2 135 840 7500 4425 processors\001 89 18 -6 90 19 6 6750 4125 7050 4275 … … 93 22 1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 6975 4200 20 20 6975 4200 6995 4200 94 23 -6 95 6 7500 3675 8475 4500 96 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 1 2 97 1 1 1.00 45.00 90.00 98 1 1 1.00 45.00 90.00 99 7500 4200 7950 4200 100 4 0 0 50 -1 0 12 0.0000 2 135 900 7500 3825 Grows with\001 101 4 0 0 50 -1 0 12 0.0000 2 135 900 7500 4050 additional\001 102 4 0 0 50 -1 0 12 0.0000 2 120 900 7500 4425 processors\001 103 -6 24 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 3900 2400 300 300 3900 2400 4200 2400 25 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 3900 3300 300 300 3900 3300 4200 3300 26 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 5100 1500 300 300 5100 1500 5400 1500 27 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 5100 2400 300 300 5100 2400 5400 2400 28 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 5100 3300 300 300 5100 3300 5400 3300 29 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 6300 2400 300 300 6300 2400 6600 2400 30 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 6300 3300 300 300 6300 3300 6600 3300 31 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 4509 3302 300 300 4509 3302 4809 3302 32 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 2700 2400 300 300 2700 2400 3000 2400 33 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 2700 3300 300 300 2700 3300 3000 3300 34 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 3 35 1 1 1.00 60.00 120.00 36 3750 5550 2400 5550 2400 4500 37 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 38 3600 5100 4500 5100 4500 6000 3600 6000 3600 5100 104 39 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 105 40 3000 3900 3000 4500 … … 118 53 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 119 54 2400 3900 7200 3900 7200 4500 2400 4500 2400 3900 120 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 121 3600 5100 4500 5100 4500 6300 3600 6300 3600 5100 122 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 3 123 1 1 1.00 60.00 120.00 124 3750 5550 2400 5550 2400 4500 125 4 2 0 50 -1 0 12 0.0000 2 165 720 2100 4200 Array of\001 126 4 2 0 50 -1 0 12 0.0000 2 150 540 2100 4425 Queues\001 127 4 2 0 50 -1 0 12 0.0000 2 135 630 2100 3075 Threads\001 128 4 2 0 50 -1 0 12 0.0000 2 165 450 2100 2850 Ready\001 129 4 0 0 50 -1 0 13 0.0000 2 135 1980 3600 5025 Cluster Data Strcuture\001 130 4 0 0 50 -1 0 13 0.0000 2 165 1170 2400 5775 Array Pointer\001 55 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 56 1 1 1.00 45.00 90.00 57 2700 3300 2700 2700 58 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 59 1 1 1.00 45.00 90.00 60 3900 3300 3900 2700 61 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 62 1 1 1.00 45.00 90.00 63 3900 4200 3900 3600 64 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 65 1 1 1.00 45.00 90.00 66 5100 2475 5100 1800 67 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 68 1 1 1.00 45.00 90.00 69 5100 3300 5100 2700 70 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 71 1 1 1.00 45.00 90.00 72 5100 4200 5100 3600 73 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 74 1 1 1.00 45.00 90.00 75 6300 3300 6300 2700 76 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 77 1 1 1.00 45.00 90.00 78 6300 4200 6300 3600 79 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 80 1 1 1.00 45.00 90.00 81 2700 4200 2700 3600 82 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 83 1 1 1.00 45.00 90.00 84 4500 4200 4500 3600 85 4 0 0 50 -1 0 13 0.0000 2 180 1170 2400 5775 Array Pointer\001 86 4 2 0 50 -1 0 12 0.0000 2 180 660 2100 4200 Array of\001 87 4 2 0 50 -1 0 12 0.0000 2 165 600 2100 4425 Queues\001 88 4 2 0 50 -1 0 12 0.0000 2 135 645 2100 3075 Threads\001 89 4 2 0 50 -1 0 12 0.0000 2 180 525 2100 2850 Ready\001 90 4 1 0 50 -1 0 13 0.0000 2 150 1890 4050 5025 Cluster Data Structure\001 -
doc/theses/thierry_delisle_PhD/comp_II/local.bib
re3bc51c rbcd74f3 76 76 77 77 @article{finkel1987dib, 78 title={DIB —a distributed implementation of backtracking},78 title={DIB-a distributed implementation of backtracking}, 79 79 author={Finkel, Raphael and Manber, Udi}, 80 80 journal={ACM Transactions on Programming Languages and Systems (TOPLAS)}, … … 223 223 224 224 % =============================================================================== 225 % MISC 226 % =============================================================================== 225 % Algorithms 226 % =============================================================================== 227 @article{michael2004hazard, 228 title={Hazard pointers: Safe memory reclamation for lock-free objects}, 229 author={Michael, Maged M}, 230 journal={IEEE Transactions on Parallel and Distributed Systems}, 231 volume={15}, 232 number={6}, 233 pages={491--504}, 234 year={2004}, 235 publisher={IEEE} 236 } 237 238 @inproceedings{brown2015reclaiming, 239 title={Reclaiming memory for lock-free data structures: There has to be a better way}, 240 author={Brown, Trevor Alexander}, 241 booktitle={Proceedings of the 2015 ACM Symposium on Principles of Distributed Computing}, 242 pages={261--270}, 243 year={2015} 244 } 245 227 246 % Trevor's relaxed FIFO list 228 247 @inproceedings{alistarh2018relaxed, … … 242 261 year={2007} 243 262 } 263 264 % =============================================================================== 265 % Linux Man Pages 266 % =============================================================================== 267 @manual{open, 268 key = "open", 269 title = "open(2) Linux User's Manual", 270 year = "2020", 271 month = "February", 272 } 273 274 @manual{epoll, 275 key = "epoll", 276 title = "epoll(7) Linux User's Manual", 277 year = "2019", 278 month = "March", 279 } 280 281 @manual{select, 282 key = "select", 283 title = "select(7) Linux User's Manual", 284 year = "2019", 285 month = "March", 286 } 287 288 @misc{io_uring, 289 title = {Efficient IO with io\_uring}, 290 author = {Axboe, Jens}, 291 year = "2019", 292 month = "March", 293 version = {0,4}, 294 howpublished = {\url{https://kernel.dk/io_uring.pdf}} 295 } 296 297 @misc{libuv, 298 key = "libuv", 299 title = {libuv}, 300 howpublished = {\url{https://github.com/libuv/libuv}} 301 } 302 303 % =============================================================================== 304 % MISC 305 % =============================================================================== 306 307 @misc{nginx-design, 308 key = "nginx", 309 title={Inside {NGINX}: How We Designed for Performance \& Scale}, 310 howpublished= {\href{https://www.nginx.com/blog/inside-nginx-how-we-designed-for-performance-scale} 311 {https://\-www.nginx.com/\-blog/\-inside\--nginx\--how\--we\--designed\--for\--performance\--scale}}, 312 } 313 314 @article{schillings1996engineering, 315 title={Be engineering insights: Benaphores}, 316 author={Schillings, Benoit}, 317 journal={Be Newsletters}, 318 volume={1}, 319 number={26}, 320 year={1996} 321 } 322 323 @misc{wiki:thunderherd, 324 author = "{Wikipedia contributors}", 325 title = "Thundering herd problem --- {W}ikipedia{,} The Free Encyclopedia", 326 year = "2020", 327 howpublished = {\href{https://en.wikipedia.org/wiki/Thundering_herd_problem} 328 {https://\-en.wikipedia.org/\-wiki/\-Thundering\_herd\_problem}},}, 329 note = "[Online; accessed 14-April-2020]" 330 } 331 332 @misc{wiki:ubuntu-linux, 333 author = "{Wikipedia contributors}", 334 title = "Ubuntu version history : Table of versions --- {W}ikipedia{,} The Free Encyclopedia", 335 year = "2020", 336 howpublished = {\href{https://en.wikipedia.org/wiki/Ubuntu_version_history\#Table_of_versions} 337 {https://\-en.wikipedia.org/\-wiki/\-Ubuntu\_version\_history\#Table\_of\_versions}}, 338 note = "[Online; accessed 15-April-2020]" 339 } -
driver/cfa.cc
re3bc51c rbcd74f3 385 385 } // if 386 386 387 string preludedir; 387 388 switch(path) { 388 case Installed : Putenv( argv, "--prelude-dir=" + libdir ); break;389 case BuildTree : Putenv( argv, "--prelude-dir=" + libdir + "/prelude" ); break;390 case Distributed : Putenv( argv, "--prelude-dir=" + dir(argv[0])); break;389 case Installed : preludedir = libdir; break; 390 case BuildTree : preludedir = libdir + "/prelude"; break; 391 case Distributed : preludedir = dir(argv[0]); break; 391 392 } 393 394 Putenv( argv, "--prelude-dir=" + preludedir ); 395 args[nargs++] = "-include"; 396 args[nargs++] = (*new string(preludedir + "/defines.hfa")).c_str(); 392 397 393 398 for ( int i = 0; i < nlibs; i += 1 ) { // copy non-user libraries after all user libraries -
libcfa/Makefile.in
re3bc51c rbcd74f3 106 106 configure.lineno config.status.lineno 107 107 mkinstalldirs = $(install_sh) -d 108 CONFIG_HEADER = $(top_builddir)/prelude/defines.hfa 108 109 CONFIG_CLEAN_FILES = 109 110 CONFIG_CLEAN_VPATH_FILES = -
libcfa/configure
re3bc51c rbcd74f3 790 790 enable_distcc 791 791 with_cfa_name 792 enable_static 792 793 enable_shared 793 enable_static794 794 with_pic 795 795 enable_fast_install … … 1452 1452 --disable-silent-rules verbose build output (undo: "make V=0") 1453 1453 --enable-distcc whether or not to enable distributed compilation 1454 --enable-static[=PKGS] build static libraries [default=no] 1454 1455 --enable-shared[=PKGS] build shared libraries [default=yes] 1455 --enable-static[=PKGS] build static libraries [default=yes]1456 1456 --enable-fast-install[=PKGS] 1457 1457 optimize for fast installation [default=yes] … … 1960 1960 1961 1961 } # ac_fn_cxx_try_link 1962 1963 # ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES 1964 # ------------------------------------------------------- 1965 # Tests whether HEADER exists, giving a warning if it cannot be compiled using 1966 # the include files in INCLUDES and setting the cache variable VAR 1967 # accordingly. 1968 ac_fn_c_check_header_mongrel () 1969 { 1970 as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack 1971 if eval \${$3+:} false; then : 1972 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 1973 $as_echo_n "checking for $2... " >&6; } 1974 if eval \${$3+:} false; then : 1975 $as_echo_n "(cached) " >&6 1976 fi 1977 eval ac_res=\$$3 1978 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 1979 $as_echo "$ac_res" >&6; } 1980 else 1981 # Is the header compilable? 1982 { $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5 1983 $as_echo_n "checking $2 usability... " >&6; } 1984 cat confdefs.h - <<_ACEOF >conftest.$ac_ext 1985 /* end confdefs.h. */ 1986 $4 1987 #include <$2> 1988 _ACEOF 1989 if ac_fn_c_try_compile "$LINENO"; then : 1990 ac_header_compiler=yes 1991 else 1992 ac_header_compiler=no 1993 fi 1994 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext 1995 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5 1996 $as_echo "$ac_header_compiler" >&6; } 1997 1998 # Is the header present? 1999 { $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5 2000 $as_echo_n "checking $2 presence... " >&6; } 2001 cat confdefs.h - <<_ACEOF >conftest.$ac_ext 2002 /* end confdefs.h. */ 2003 #include <$2> 2004 _ACEOF 2005 if ac_fn_c_try_cpp "$LINENO"; then : 2006 ac_header_preproc=yes 2007 else 2008 ac_header_preproc=no 2009 fi 2010 rm -f conftest.err conftest.i conftest.$ac_ext 2011 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5 2012 $as_echo "$ac_header_preproc" >&6; } 2013 2014 # So? What about this header? 2015 case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #(( 2016 yes:no: ) 2017 { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5 2018 $as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;} 2019 { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 2020 $as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} 2021 ;; 2022 no:yes:* ) 2023 { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5 2024 $as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;} 2025 { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: check for missing prerequisite headers?" >&5 2026 $as_echo "$as_me: WARNING: $2: check for missing prerequisite headers?" >&2;} 2027 { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5 2028 $as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;} 2029 { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&5 2030 $as_echo "$as_me: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&2;} 2031 { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 2032 $as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} 2033 ( $as_echo "## --------------------------------------- ## 2034 ## Report this to cforall@plg.uwaterloo.ca ## 2035 ## --------------------------------------- ##" 2036 ) | sed "s/^/$as_me: WARNING: /" >&2 2037 ;; 2038 esac 2039 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 2040 $as_echo_n "checking for $2... " >&6; } 2041 if eval \${$3+:} false; then : 2042 $as_echo_n "(cached) " >&6 2043 else 2044 eval "$3=\$ac_header_compiler" 2045 fi 2046 eval ac_res=\$$3 2047 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 2048 $as_echo "$ac_res" >&6; } 2049 fi 2050 eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno 2051 2052 } # ac_fn_c_check_header_mongrel 1962 2053 cat >config.log <<_ACEOF 1963 2054 This file contains any messages produced by compilers while … … 7939 8030 7940 8031 # Set options 8032 # Check whether --enable-static was given. 8033 if test "${enable_static+set}" = set; then : 8034 enableval=$enable_static; p=${PACKAGE-default} 8035 case $enableval in 8036 yes) enable_static=yes ;; 8037 no) enable_static=no ;; 8038 *) 8039 enable_static=no 8040 # Look at the argument we got. We use all the common list separators. 8041 lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, 8042 for pkg in $enableval; do 8043 IFS=$lt_save_ifs 8044 if test "X$pkg" = "X$p"; then 8045 enable_static=yes 8046 fi 8047 done 8048 IFS=$lt_save_ifs 8049 ;; 8050 esac 8051 else 8052 enable_static=no 8053 fi 8054 8055 8056 8057 8058 8059 8060 7941 8061 7942 8062 … … 7971 8091 fi 7972 8092 7973 7974 7975 7976 7977 7978 7979 7980 7981 # Check whether --enable-static was given.7982 if test "${enable_static+set}" = set; then :7983 enableval=$enable_static; p=${PACKAGE-default}7984 case $enableval in7985 yes) enable_static=yes ;;7986 no) enable_static=no ;;7987 *)7988 enable_static=no7989 # Look at the argument we got. We use all the common list separators.7990 lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR,7991 for pkg in $enableval; do7992 IFS=$lt_save_ifs7993 if test "X$pkg" = "X$p"; then7994 enable_static=yes7995 fi7996 done7997 IFS=$lt_save_ifs7998 ;;7999 esac8000 else8001 enable_static=yes8002 fi8003 8093 8004 8094 … … 16859 16949 16860 16950 16951 for ac_header in linux/io_uring.h 16952 do : 16953 ac_fn_c_check_header_mongrel "$LINENO" "linux/io_uring.h" "ac_cv_header_linux_io_uring_h" "$ac_includes_default" 16954 if test "x$ac_cv_header_linux_io_uring_h" = xyes; then : 16955 cat >>confdefs.h <<_ACEOF 16956 #define HAVE_LINUX_IO_URING_H 1 16957 _ACEOF 16958 16959 fi 16960 16961 done 16962 16963 for ac_func in preadv2 pwritev2 16964 do : 16965 as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` 16966 ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" 16967 if eval test \"x\$"$as_ac_var"\" = x"yes"; then : 16968 cat >>confdefs.h <<_ACEOF 16969 #define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1 16970 _ACEOF 16971 16972 fi 16973 done 16974 16975 16861 16976 ac_config_files="$ac_config_files Makefile src/Makefile prelude/Makefile" 16977 16978 16979 ac_config_headers="$ac_config_headers prelude/defines.hfa" 16862 16980 16863 16981 … … 16952 17070 test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' 16953 17071 16954 # Transform confdefs.h into DEFS. 16955 # Protect against shell expansion while executing Makefile rules. 16956 # Protect against Makefile macro expansion. 16957 # 16958 # If the first sed substitution is executed (which looks for macros that 16959 # take arguments), then branch to the quote section. Otherwise, 16960 # look for a macro that doesn't take arguments. 16961 ac_script=' 16962 :mline 16963 /\\$/{ 16964 N 16965 s,\\\n,, 16966 b mline 16967 } 16968 t clear 16969 :clear 16970 s/^[ ]*#[ ]*define[ ][ ]*\([^ (][^ (]*([^)]*)\)[ ]*\(.*\)/-D\1=\2/g 16971 t quote 16972 s/^[ ]*#[ ]*define[ ][ ]*\([^ ][^ ]*\)[ ]*\(.*\)/-D\1=\2/g 16973 t quote 16974 b any 16975 :quote 16976 s/[ `~#$^&*(){}\\|;'\''"<>?]/\\&/g 16977 s/\[/\\&/g 16978 s/\]/\\&/g 16979 s/\$/$$/g 16980 H 16981 :any 16982 ${ 16983 g 16984 s/^\n// 16985 s/\n/ /g 16986 p 16987 } 16988 ' 16989 DEFS=`sed -n "$ac_script" confdefs.h` 16990 17072 DEFS=-DHAVE_CONFIG_H 16991 17073 16992 17074 ac_libobjs= … … 17466 17548 esac 17467 17549 17550 case $ac_config_headers in *" 17551 "*) set x $ac_config_headers; shift; ac_config_headers=$*;; 17552 esac 17468 17553 17469 17554 … … 17471 17556 # Files that config.status was made for. 17472 17557 config_files="$ac_config_files" 17558 config_headers="$ac_config_headers" 17473 17559 config_commands="$ac_config_commands" 17474 17560 … … 17492 17578 --file=FILE[:TEMPLATE] 17493 17579 instantiate the configuration file FILE 17580 --header=FILE[:TEMPLATE] 17581 instantiate the configuration header FILE 17494 17582 17495 17583 Configuration files: 17496 17584 $config_files 17585 17586 Configuration headers: 17587 $config_headers 17497 17588 17498 17589 Configuration commands: … … 17562 17653 as_fn_append CONFIG_FILES " '$ac_optarg'" 17563 17654 ac_need_defaults=false;; 17564 --he | --h | --help | --hel | -h ) 17655 --header | --heade | --head | --hea ) 17656 $ac_shift 17657 case $ac_optarg in 17658 *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; 17659 esac 17660 as_fn_append CONFIG_HEADERS " '$ac_optarg'" 17661 ac_need_defaults=false;; 17662 --he | --h) 17663 # Conflict between --help and --header 17664 as_fn_error $? "ambiguous option: \`$1' 17665 Try \`$0 --help' for more information.";; 17666 --help | --hel | -h ) 17565 17667 $as_echo "$ac_cs_usage"; exit ;; 17566 17668 -q | -quiet | --quiet | --quie | --qui | --qu | --q \ … … 17625 17727 macro_version='`$ECHO "$macro_version" | $SED "$delay_single_quote_subst"`' 17626 17728 macro_revision='`$ECHO "$macro_revision" | $SED "$delay_single_quote_subst"`' 17729 enable_static='`$ECHO "$enable_static" | $SED "$delay_single_quote_subst"`' 17627 17730 enable_shared='`$ECHO "$enable_shared" | $SED "$delay_single_quote_subst"`' 17628 enable_static='`$ECHO "$enable_static" | $SED "$delay_single_quote_subst"`'17629 17731 pic_mode='`$ECHO "$pic_mode" | $SED "$delay_single_quote_subst"`' 17630 17732 enable_fast_install='`$ECHO "$enable_fast_install" | $SED "$delay_single_quote_subst"`' … … 18009 18111 "src/Makefile") CONFIG_FILES="$CONFIG_FILES src/Makefile" ;; 18010 18112 "prelude/Makefile") CONFIG_FILES="$CONFIG_FILES prelude/Makefile" ;; 18113 "prelude/defines.hfa") CONFIG_HEADERS="$CONFIG_HEADERS prelude/defines.hfa" ;; 18011 18114 18012 18115 *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;; … … 18021 18124 if $ac_need_defaults; then 18022 18125 test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files 18126 test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers 18023 18127 test "${CONFIG_COMMANDS+set}" = set || CONFIG_COMMANDS=$config_commands 18024 18128 fi … … 18209 18313 fi # test -n "$CONFIG_FILES" 18210 18314 18211 18212 eval set X " :F $CONFIG_FILES :C $CONFIG_COMMANDS" 18315 # Set up the scripts for CONFIG_HEADERS section. 18316 # No need to generate them if there are no CONFIG_HEADERS. 18317 # This happens for instance with `./config.status Makefile'. 18318 if test -n "$CONFIG_HEADERS"; then 18319 cat >"$ac_tmp/defines.awk" <<\_ACAWK || 18320 BEGIN { 18321 _ACEOF 18322 18323 # Transform confdefs.h into an awk script `defines.awk', embedded as 18324 # here-document in config.status, that substitutes the proper values into 18325 # config.h.in to produce config.h. 18326 18327 # Create a delimiter string that does not exist in confdefs.h, to ease 18328 # handling of long lines. 18329 ac_delim='%!_!# ' 18330 for ac_last_try in false false :; do 18331 ac_tt=`sed -n "/$ac_delim/p" confdefs.h` 18332 if test -z "$ac_tt"; then 18333 break 18334 elif $ac_last_try; then 18335 as_fn_error $? "could not make $CONFIG_HEADERS" "$LINENO" 5 18336 else 18337 ac_delim="$ac_delim!$ac_delim _$ac_delim!! " 18338 fi 18339 done 18340 18341 # For the awk script, D is an array of macro values keyed by name, 18342 # likewise P contains macro parameters if any. Preserve backslash 18343 # newline sequences. 18344 18345 ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]* 18346 sed -n ' 18347 s/.\{148\}/&'"$ac_delim"'/g 18348 t rset 18349 :rset 18350 s/^[ ]*#[ ]*define[ ][ ]*/ / 18351 t def 18352 d 18353 :def 18354 s/\\$// 18355 t bsnl 18356 s/["\\]/\\&/g 18357 s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ 18358 D["\1"]=" \3"/p 18359 s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2"/p 18360 d 18361 :bsnl 18362 s/["\\]/\\&/g 18363 s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ 18364 D["\1"]=" \3\\\\\\n"\\/p 18365 t cont 18366 s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2\\\\\\n"\\/p 18367 t cont 18368 d 18369 :cont 18370 n 18371 s/.\{148\}/&'"$ac_delim"'/g 18372 t clear 18373 :clear 18374 s/\\$// 18375 t bsnlc 18376 s/["\\]/\\&/g; s/^/"/; s/$/"/p 18377 d 18378 :bsnlc 18379 s/["\\]/\\&/g; s/^/"/; s/$/\\\\\\n"\\/p 18380 b cont 18381 ' <confdefs.h | sed ' 18382 s/'"$ac_delim"'/"\\\ 18383 "/g' >>$CONFIG_STATUS || ac_write_fail=1 18384 18385 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 18386 for (key in D) D_is_set[key] = 1 18387 FS = "" 18388 } 18389 /^[\t ]*#[\t ]*(define|undef)[\t ]+$ac_word_re([\t (]|\$)/ { 18390 line = \$ 0 18391 split(line, arg, " ") 18392 if (arg[1] == "#") { 18393 defundef = arg[2] 18394 mac1 = arg[3] 18395 } else { 18396 defundef = substr(arg[1], 2) 18397 mac1 = arg[2] 18398 } 18399 split(mac1, mac2, "(") #) 18400 macro = mac2[1] 18401 prefix = substr(line, 1, index(line, defundef) - 1) 18402 if (D_is_set[macro]) { 18403 # Preserve the white space surrounding the "#". 18404 print prefix "define", macro P[macro] D[macro] 18405 next 18406 } else { 18407 # Replace #undef with comments. This is necessary, for example, 18408 # in the case of _POSIX_SOURCE, which is predefined and required 18409 # on some systems where configure will not decide to define it. 18410 if (defundef == "undef") { 18411 print "/*", prefix defundef, macro, "*/" 18412 next 18413 } 18414 } 18415 } 18416 { print } 18417 _ACAWK 18418 _ACEOF 18419 cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 18420 as_fn_error $? "could not setup config headers machinery" "$LINENO" 5 18421 fi # test -n "$CONFIG_HEADERS" 18422 18423 18424 eval set X " :F $CONFIG_FILES :H $CONFIG_HEADERS :C $CONFIG_COMMANDS" 18213 18425 shift 18214 18426 for ac_tag … … 18429 18641 || as_fn_error $? "could not create $ac_file" "$LINENO" 5 18430 18642 ;; 18431 18643 :H) 18644 # 18645 # CONFIG_HEADER 18646 # 18647 if test x"$ac_file" != x-; then 18648 { 18649 $as_echo "/* $configure_input */" \ 18650 && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" 18651 } >"$ac_tmp/config.h" \ 18652 || as_fn_error $? "could not create $ac_file" "$LINENO" 5 18653 if diff "$ac_file" "$ac_tmp/config.h" >/dev/null 2>&1; then 18654 { $as_echo "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5 18655 $as_echo "$as_me: $ac_file is unchanged" >&6;} 18656 else 18657 rm -f "$ac_file" 18658 mv "$ac_tmp/config.h" "$ac_file" \ 18659 || as_fn_error $? "could not create $ac_file" "$LINENO" 5 18660 fi 18661 else 18662 $as_echo "/* $configure_input */" \ 18663 && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" \ 18664 || as_fn_error $? "could not create -" "$LINENO" 5 18665 fi 18666 # Compute "$ac_file"'s index in $config_headers. 18667 _am_arg="$ac_file" 18668 _am_stamp_count=1 18669 for _am_header in $config_headers :; do 18670 case $_am_header in 18671 $_am_arg | $_am_arg:* ) 18672 break ;; 18673 * ) 18674 _am_stamp_count=`expr $_am_stamp_count + 1` ;; 18675 esac 18676 done 18677 echo "timestamp for $_am_arg" >`$as_dirname -- "$_am_arg" || 18678 $as_expr X"$_am_arg" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ 18679 X"$_am_arg" : 'X\(//\)[^/]' \| \ 18680 X"$_am_arg" : 'X\(//\)$' \| \ 18681 X"$_am_arg" : 'X\(/\)' \| . 2>/dev/null || 18682 $as_echo X"$_am_arg" | 18683 sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ 18684 s//\1/ 18685 q 18686 } 18687 /^X\(\/\/\)[^/].*/{ 18688 s//\1/ 18689 q 18690 } 18691 /^X\(\/\/\)$/{ 18692 s//\1/ 18693 q 18694 } 18695 /^X\(\/\).*/{ 18696 s//\1/ 18697 q 18698 } 18699 s/.*/./; q'`/stamp-h$_am_stamp_count 18700 ;; 18432 18701 18433 18702 :C) { $as_echo "$as_me:${as_lineno-$LINENO}: executing $ac_file commands" >&5 … … 18587 18856 macro_revision=$macro_revision 18588 18857 18858 # Whether or not to build static libraries. 18859 build_old_libs=$enable_static 18860 18589 18861 # Whether or not to build shared libraries. 18590 18862 build_libtool_libs=$enable_shared 18591 18592 # Whether or not to build static libraries.18593 build_old_libs=$enable_static18594 18863 18595 18864 # What type of objects to build. -
libcfa/configure.ac
re3bc51c rbcd74f3 109 109 110 110 # Checks for programs. 111 LT_INIT 111 LT_INIT([disable-static]) 112 112 113 113 AC_PROG_CXX … … 118 118 AC_PROG_MAKE_SET 119 119 120 AC_CHECK_HEADERS([linux/io_uring.h]) 121 AC_CHECK_FUNCS([preadv2 pwritev2]) 122 120 123 AC_CONFIG_FILES([ 121 124 Makefile … … 124 127 ]) 125 128 129 AC_CONFIG_HEADERS(prelude/defines.hfa) 130 126 131 AC_OUTPUT() 127 132 -
libcfa/prelude/Makefile.am
re3bc51c rbcd74f3 21 21 # put into lib for now 22 22 cfalibdir = ${CFA_LIBDIR} 23 cfalib_DATA = gcc-builtins.cf builtins.cf extras.cf prelude.cfa bootloader.c 23 cfalib_DATA = gcc-builtins.cf builtins.cf extras.cf prelude.cfa bootloader.c defines.hfa 24 24 25 25 CC = @LOCAL_CFACC@ -
libcfa/prelude/Makefile.in
re3bc51c rbcd74f3 1 # Makefile.in generated by automake 1.1 6.1from Makefile.am.1 # Makefile.in generated by automake 1.15 from Makefile.am. 2 2 # @configure_input@ 3 3 4 # Copyright (C) 1994-201 8Free Software Foundation, Inc.4 # Copyright (C) 1994-2014 Free Software Foundation, Inc. 5 5 6 6 # This Makefile.in is free software; the Free Software Foundation … … 104 104 DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) 105 105 mkinstalldirs = $(install_sh) -d 106 CONFIG_HEADER = defines.hfa 106 107 CONFIG_CLEAN_FILES = 107 108 CONFIG_CLEAN_VPATH_FILES = … … 154 155 am__installdirs = "$(DESTDIR)$(cfalibdir)" 155 156 DATA = $(cfalib_DATA) 156 am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) 157 am__DIST_COMMON = $(srcdir)/Makefile.in 157 am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) \ 158 $(LISP)defines.hfa.in 159 # Read a list of newline-separated strings from the standard input, 160 # and print each of them once, without duplicates. Input order is 161 # *not* preserved. 162 am__uniquify_input = $(AWK) '\ 163 BEGIN { nonempty = 0; } \ 164 { items[$$0] = 1; nonempty = 1; } \ 165 END { if (nonempty) { for (i in items) print i; }; } \ 166 ' 167 # Make sure the list of sources is unique. This is necessary because, 168 # e.g., the same source file might be shared among _SOURCES variables 169 # for different programs/libraries. 170 am__define_uniq_tagged_files = \ 171 list='$(am__tagged_files)'; \ 172 unique=`for i in $$list; do \ 173 if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ 174 done | $(am__uniquify_input)` 175 ETAGS = etags 176 CTAGS = ctags 177 am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/defines.hfa.in 158 178 DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) 159 179 ACLOCAL = @ACLOCAL@ … … 306 326 # put into lib for now 307 327 cfalibdir = ${CFA_LIBDIR} 308 cfalib_DATA = gcc-builtins.cf builtins.cf extras.cf prelude.cfa bootloader.c 328 cfalib_DATA = gcc-builtins.cf builtins.cf extras.cf prelude.cfa bootloader.c defines.hfa 309 329 AM_CFLAGS = -g -Wall -Wno-unused-function -fPIC @ARCH_FLAGS@ @CONFIG_CFLAGS@ 310 330 AM_CFAFLAGS = @CONFIG_CFAFLAGS@ 311 331 MOSTLYCLEANFILES = bootloader.c builtins.cf extras.cf gcc-builtins.c gcc-builtins.cf prelude.cfa 312 332 MAINTAINERCLEANFILES = ${addprefix ${libdir}/,${cfalib_DATA}} ${addprefix ${libdir}/,${lib_LIBRARIES}} 313 all: all-am 333 all: defines.hfa 334 $(MAKE) $(AM_MAKEFLAGS) all-am 314 335 315 336 .SUFFIXES: … … 331 352 cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ 332 353 *) \ 333 echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__ maybe_remake_depfiles)'; \334 cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__ maybe_remake_depfiles);; \354 echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ 355 cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ 335 356 esac; 336 357 … … 343 364 cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh 344 365 $(am__aclocal_m4_deps): 366 367 defines.hfa: stamp-h1 368 @test -f $@ || rm -f stamp-h1 369 @test -f $@ || $(MAKE) $(AM_MAKEFLAGS) stamp-h1 370 371 stamp-h1: $(srcdir)/defines.hfa.in $(top_builddir)/config.status 372 @rm -f stamp-h1 373 cd $(top_builddir) && $(SHELL) ./config.status prelude/defines.hfa 374 $(srcdir)/defines.hfa.in: $(am__configure_deps) 375 ($(am__cd) $(top_srcdir) && $(AUTOHEADER)) 376 rm -f stamp-h1 377 touch $@ 378 379 distclean-hdr: 380 -rm -f defines.hfa stamp-h1 345 381 346 382 mostlyclean-libtool: … … 370 406 files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ 371 407 dir='$(DESTDIR)$(cfalibdir)'; $(am__uninstall_files_from_dir) 372 tags TAGS: 373 374 ctags CTAGS: 375 376 cscope cscopelist: 377 378 379 distdir: $(BUILT_SOURCES) 380 $(MAKE) $(AM_MAKEFLAGS) distdir-am 381 382 distdir-am: $(DISTFILES) 408 409 ID: $(am__tagged_files) 410 $(am__define_uniq_tagged_files); mkid -fID $$unique 411 tags: tags-am 412 TAGS: tags 413 414 tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) 415 set x; \ 416 here=`pwd`; \ 417 $(am__define_uniq_tagged_files); \ 418 shift; \ 419 if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ 420 test -n "$$unique" || unique=$$empty_fix; \ 421 if test $$# -gt 0; then \ 422 $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ 423 "$$@" $$unique; \ 424 else \ 425 $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ 426 $$unique; \ 427 fi; \ 428 fi 429 ctags: ctags-am 430 431 CTAGS: ctags 432 ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) 433 $(am__define_uniq_tagged_files); \ 434 test -z "$(CTAGS_ARGS)$$unique" \ 435 || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ 436 $$unique 437 438 GTAGS: 439 here=`$(am__cd) $(top_builddir) && pwd` \ 440 && $(am__cd) $(top_srcdir) \ 441 && gtags -i $(GTAGS_ARGS) "$$here" 442 cscopelist: cscopelist-am 443 444 cscopelist-am: $(am__tagged_files) 445 list='$(am__tagged_files)'; \ 446 case "$(srcdir)" in \ 447 [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ 448 *) sdir=$(subdir)/$(srcdir) ;; \ 449 esac; \ 450 for i in $$list; do \ 451 if test -f "$$i"; then \ 452 echo "$(subdir)/$$i"; \ 453 else \ 454 echo "$$sdir/$$i"; \ 455 fi; \ 456 done >> $(top_builddir)/cscope.files 457 458 distclean-tags: 459 -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags 460 461 distdir: $(DISTFILES) 383 462 @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ 384 463 topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ … … 412 491 check-am: all-am 413 492 check: check-am 414 all-am: Makefile $(DATA) 493 all-am: Makefile $(DATA) defines.hfa 415 494 installdirs: 416 495 for dir in "$(DESTDIR)$(cfalibdir)"; do \ … … 455 534 distclean: distclean-am 456 535 -rm -f Makefile 457 distclean-am: clean-am distclean-generic 536 distclean-am: clean-am distclean-generic distclean-hdr distclean-tags 458 537 459 538 dvi: dvi-am … … 516 595 uninstall-am: uninstall-cfalibDATA 517 596 518 .MAKE: install-am install-strip 519 520 .PHONY: all all-am check check-am clean clean-generic clean-libtool \ 521 cscopelist-am ctags-am distclean distclean-generic \ 522 distclean-libtool distdir dvi dvi-am html html-am info info-am \ 597 .MAKE: all install-am install-strip 598 599 .PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \ 600 clean-libtool cscopelist-am ctags ctags-am distclean \ 601 distclean-generic distclean-hdr distclean-libtool \ 602 distclean-tags distdir dvi dvi-am html html-am info info-am \ 523 603 install install-am install-cfalibDATA install-data \ 524 604 install-data-am install-dvi install-dvi-am install-exec \ … … 529 609 maintainer-clean-generic maintainer-clean-local mostlyclean \ 530 610 mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ 531 tags -am uninstall uninstall-am uninstall-cfalibDATA611 tags tags-am uninstall uninstall-am uninstall-cfalibDATA 532 612 533 613 .PRECIOUS: Makefile -
libcfa/src/Makefile.am
re3bc51c rbcd74f3 33 33 # The built sources must not depend on the installed headers 34 34 AM_CFAFLAGS = -quiet -cfalib -I$(srcdir)/stdhdr $(if $(findstring ${gdbwaittarget}, ${@}), -XCFA --gdb) @CONFIG_CFAFLAGS@ 35 AM_CFLAGS = -g -Wall -Wno-unused-function -fPIC - pthread @ARCH_FLAGS@ @CONFIG_CFLAGS@35 AM_CFLAGS = -g -Wall -Wno-unused-function -fPIC -fexceptions -pthread @ARCH_FLAGS@ @CONFIG_CFLAGS@ 36 36 AM_CCASFLAGS = -g -Wall -Wno-unused-function @ARCH_FLAGS@ @CONFIG_CFLAGS@ 37 37 CFACC = @CFACC@ … … 39 39 #---------------------------------------------------------------------------------------------------------------- 40 40 if BUILDLIB 41 headers_nosrc = bitmanip.hfa math.hfa gmp.hfa time_t.hfa bits/align.hfa bits/containers.hfa bits/defs.hfa bits/debug.hfa bits/locks.hfa 41 headers_nosrc = bitmanip.hfa math.hfa gmp.hfa time_t.hfa bits/align.hfa bits/containers.hfa bits/defs.hfa bits/debug.hfa bits/locks.hfa containers/list.hfa 42 42 headers = fstream.hfa iostream.hfa iterator.hfa limits.hfa rational.hfa time.hfa stdlib.hfa common.hfa \ 43 43 containers/maybe.hfa containers/pair.hfa containers/result.hfa containers/vector.hfa … … 48 48 thread_headers_nosrc = concurrency/invoke.h 49 49 thread_headers = concurrency/coroutine.hfa concurrency/thread.hfa concurrency/kernel.hfa concurrency/monitor.hfa concurrency/mutex.hfa 50 thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/ preemption.cfa ${thread_headers:.hfa=.cfa}50 thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/io.cfa concurrency/preemption.cfa ${thread_headers:.hfa=.cfa} 51 51 else 52 52 headers = -
libcfa/src/Makefile.in
re3bc51c rbcd74f3 105 105 $(am__nobase_cfa_include_HEADERS_DIST) $(am__DIST_COMMON) 106 106 mkinstalldirs = $(install_sh) -d 107 CONFIG_HEADER = $(top_builddir)/prelude/defines.hfa 107 108 CONFIG_CLEAN_FILES = 108 109 CONFIG_CLEAN_VPATH_FILES = … … 164 165 am__libcfathread_la_SOURCES_DIST = \ 165 166 concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa \ 166 concurrency/invoke.c concurrency/ preemption.cfa \167 concurrency/ coroutine.cfa concurrency/thread.cfa \168 concurrency/ kernel.cfa concurrency/monitor.cfa \169 concurrency/m utex.cfa167 concurrency/invoke.c concurrency/io.cfa \ 168 concurrency/preemption.cfa concurrency/coroutine.cfa \ 169 concurrency/thread.cfa concurrency/kernel.cfa \ 170 concurrency/monitor.cfa concurrency/mutex.cfa 170 171 @BUILDLIB_TRUE@am__objects_3 = concurrency/coroutine.lo \ 171 172 @BUILDLIB_TRUE@ concurrency/thread.lo concurrency/kernel.lo \ … … 174 175 @BUILDLIB_TRUE@ concurrency/CtxSwitch-@ARCHITECTURE@.lo \ 175 176 @BUILDLIB_TRUE@ concurrency/alarm.lo concurrency/invoke.lo \ 176 @BUILDLIB_TRUE@ concurrency/preemption.lo $(am__objects_3) 177 @BUILDLIB_TRUE@ concurrency/io.lo concurrency/preemption.lo \ 178 @BUILDLIB_TRUE@ $(am__objects_3) 177 179 am_libcfathread_la_OBJECTS = $(am__objects_4) 178 180 libcfathread_la_OBJECTS = $(am_libcfathread_la_OBJECTS) … … 193 195 am__v_at_0 = @ 194 196 am__v_at_1 = 195 DEFAULT_INCLUDES = -I.@am__isrc@ 197 DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/prelude 196 198 depcomp = $(SHELL) $(top_srcdir)/automake/depcomp 197 199 am__depfiles_maybe = depfiles … … 239 241 containers/vector.hfa bitmanip.hfa math.hfa gmp.hfa time_t.hfa \ 240 242 bits/align.hfa bits/containers.hfa bits/defs.hfa \ 241 bits/debug.hfa bits/locks.hfa con currency/coroutine.hfa \242 concurrency/ thread.hfa concurrency/kernel.hfa \243 concurrency/ monitor.hfa concurrency/mutex.hfa \244 concurrency/ invoke.h243 bits/debug.hfa bits/locks.hfa containers/list.hfa \ 244 concurrency/coroutine.hfa concurrency/thread.hfa \ 245 concurrency/kernel.hfa concurrency/monitor.hfa \ 246 concurrency/mutex.hfa concurrency/invoke.h 245 247 HEADERS = $(nobase_cfa_include_HEADERS) 246 248 am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) … … 456 458 # The built sources must not depend on the installed headers 457 459 AM_CFAFLAGS = -quiet -cfalib -I$(srcdir)/stdhdr $(if $(findstring ${gdbwaittarget}, ${@}), -XCFA --gdb) @CONFIG_CFAFLAGS@ 458 AM_CFLAGS = -g -Wall -Wno-unused-function -fPIC - pthread @ARCH_FLAGS@ @CONFIG_CFLAGS@460 AM_CFLAGS = -g -Wall -Wno-unused-function -fPIC -fexceptions -pthread @ARCH_FLAGS@ @CONFIG_CFLAGS@ 459 461 AM_CCASFLAGS = -g -Wall -Wno-unused-function @ARCH_FLAGS@ @CONFIG_CFLAGS@ 460 462 @BUILDLIB_FALSE@headers_nosrc = 461 463 462 464 #---------------------------------------------------------------------------------------------------------------- 463 @BUILDLIB_TRUE@headers_nosrc = bitmanip.hfa math.hfa gmp.hfa time_t.hfa bits/align.hfa bits/containers.hfa bits/defs.hfa bits/debug.hfa bits/locks.hfa 465 @BUILDLIB_TRUE@headers_nosrc = bitmanip.hfa math.hfa gmp.hfa time_t.hfa bits/align.hfa bits/containers.hfa bits/defs.hfa bits/debug.hfa bits/locks.hfa containers/list.hfa 464 466 @BUILDLIB_FALSE@headers = 465 467 @BUILDLIB_TRUE@headers = fstream.hfa iostream.hfa iterator.hfa limits.hfa rational.hfa time.hfa stdlib.hfa common.hfa \ … … 474 476 @BUILDLIB_FALSE@thread_headers = 475 477 @BUILDLIB_TRUE@thread_headers = concurrency/coroutine.hfa concurrency/thread.hfa concurrency/kernel.hfa concurrency/monitor.hfa concurrency/mutex.hfa 476 @BUILDLIB_TRUE@thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/ preemption.cfa ${thread_headers:.hfa=.cfa}478 @BUILDLIB_TRUE@thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/io.cfa concurrency/preemption.cfa ${thread_headers:.hfa=.cfa} 477 479 478 480 #---------------------------------------------------------------------------------------------------------------- … … 608 610 concurrency/$(DEPDIR)/$(am__dirstamp) 609 611 concurrency/invoke.lo: concurrency/$(am__dirstamp) \ 612 concurrency/$(DEPDIR)/$(am__dirstamp) 613 concurrency/io.lo: concurrency/$(am__dirstamp) \ 610 614 concurrency/$(DEPDIR)/$(am__dirstamp) 611 615 concurrency/preemption.lo: concurrency/$(am__dirstamp) \ -
libcfa/src/bitmanip.hfa
re3bc51c rbcd74f3 11 11 // Created On : Sat Mar 14 18:12:27 2020 12 12 // Last Modified By : Peter A. Buhr 13 // Last Modified On : Mon Mar 16 14:28:46202014 // Update Count : 4913 // Last Modified On : Sun Apr 19 22:29:58 2020 14 // Update Count : 121 15 15 // 16 16 … … 21 21 // Bits are numbered 1-N. 22 22 23 #include <assert.h> 23 //#include <assert.h> 24 25 #define __bitsizeof( n ) (sizeof(n) * __CHAR_BIT__) 24 26 25 27 static inline { 26 27 unsigned int cl0( unsigned char n ) { return n != 0 ? __builtin_clz( n ) - (sizeof(unsigned int) * __CHAR_BIT__ - sizeof(n) * __CHAR_BIT__) : sizeof(n) * __CHAR_BIT__; }28 unsigned int cl0( unsigned short int n ) { return n != 0 ? __builtin_clz( n ) - (sizeof(unsigned int) * __CHAR_BIT__ - sizeof(n) * __CHAR_BIT__) : sizeof(n) * __CHAR_BIT__; }29 unsigned int cl0( unsigned int n ) { return n != 0 ? __builtin_clz( n ) : sizeof(n) * __CHAR_BIT__; }30 unsigned int cl0( unsigned long int n ) { return n != 0 ? __builtin_clzl( n ) : sizeof(n) * __CHAR_BIT__; }31 unsigned int cl0( unsigned long long int n ) { return n != 0 ? __builtin_clzll( n ) : sizeof(n) * __CHAR_BIT__; }28 // Count leading 0 bits. 29 unsigned int leading0s( unsigned char n ) { return n != 0 ? __builtin_clz( n ) - (__bitsizeof(unsigned int) - __bitsizeof(n)) : __bitsizeof(n); } 30 unsigned int leading0s( unsigned short int n ) { return n != 0 ? __builtin_clz( n ) - (__bitsizeof(unsigned int) - __bitsizeof(n)) : __bitsizeof(n); } 31 unsigned int leading0s( unsigned int n ) { return n != 0 ? __builtin_clz( n ) : __bitsizeof(n); } 32 unsigned int leading0s( unsigned long int n ) { return n != 0 ? __builtin_clzl( n ) : __bitsizeof(n); } 33 unsigned int leading0s( unsigned long long int n ) { return n != 0 ? __builtin_clzll( n ) : __bitsizeof(n); } 32 34 33 34 unsigned int ct0( unsigned char n ) { return n != 0 ? __builtin_ctz( n ) : sizeof(n) * __CHAR_BIT__; }35 unsigned int ct0( unsigned short int n ) { return n != 0 ? __builtin_ctz( n ) : sizeof(n) * __CHAR_BIT__; }36 unsigned int ct0( unsigned int n ) { return n != 0 ? __builtin_ctz( n ) : sizeof(n) * __CHAR_BIT__; }37 unsigned int ct0( unsigned long int n ) { return n != 0 ? __builtin_ctzl( n ) : sizeof(n) * __CHAR_BIT__; }38 unsigned int ct0( unsigned long long int n ) { return n != 0 ? __builtin_ctzll( n ) : sizeof(n) * __CHAR_BIT__; }35 // Count trailing 0 bits. 36 unsigned int trailing0s( unsigned char n ) { return n != 0 ? __builtin_ctz( n ) : __bitsizeof(n); } 37 unsigned int trailing0s( unsigned short int n ) { return n != 0 ? __builtin_ctz( n ) : __bitsizeof(n); } 38 unsigned int trailing0s( unsigned int n ) { return n != 0 ? __builtin_ctz( n ) : __bitsizeof(n); } 39 unsigned int trailing0s( unsigned long int n ) { return n != 0 ? __builtin_ctzl( n ) : __bitsizeof(n); } 40 unsigned int trailing0s( unsigned long long int n ) { return n != 0 ? __builtin_ctzll( n ) : __bitsizeof(n); } 39 41 40 41 unsigned int ca1( unsigned char n ) { return __builtin_popcount( n ); }42 unsigned int ca1( unsigned short int n ) { return __builtin_popcount( n ); }43 unsigned int ca1( unsigned int n ) { return __builtin_popcount( n ); }44 unsigned int ca1( unsigned long int n ) { return __builtin_popcountl( n ); }45 unsigned int ca1( unsigned long long int n ) { return __builtin_popcountll( n ); }42 // Count all 1 bits. 43 unsigned int all1s( unsigned char n ) { return __builtin_popcount( n ); } 44 unsigned int all1s( unsigned short int n ) { return __builtin_popcount( n ); } 45 unsigned int all1s( unsigned int n ) { return __builtin_popcount( n ); } 46 unsigned int all1s( unsigned long int n ) { return __builtin_popcountl( n ); } 47 unsigned int all1s( unsigned long long int n ) { return __builtin_popcountll( n ); } 46 48 47 48 unsigned int ca0( unsigned char n ) { return sizeof(n) * __CHAR_BIT__- __builtin_popcount( n ); }49 unsigned int ca0( unsigned short int n ) { return sizeof(n) * __CHAR_BIT__- __builtin_popcount( n ); }50 unsigned int ca0( unsigned int n ) { return sizeof(n) * __CHAR_BIT__- __builtin_popcount( n ); }51 unsigned int ca0( unsigned long int n ) { return sizeof(n) * __CHAR_BIT__- __builtin_popcountl( n ); }52 unsigned int ca0( unsigned long long int n ) { return sizeof(n) * __CHAR_BIT__- __builtin_popcountll( n ); }49 // Count all 0 bits. 50 unsigned int all0s( unsigned char n ) { return __bitsizeof(n) - __builtin_popcount( n ); } 51 unsigned int all0s( unsigned short int n ) { return __bitsizeof(n) - __builtin_popcount( n ); } 52 unsigned int all0s( unsigned int n ) { return __bitsizeof(n) - __builtin_popcount( n ); } 53 unsigned int all0s( unsigned long int n ) { return __bitsizeof(n) - __builtin_popcountl( n ); } 54 unsigned int all0s( unsigned long long int n ) { return __bitsizeof(n) - __builtin_popcountll( n ); } 53 55 54 // Find least significiant set bit. (ffs) 55 unsigned int fls( unsigned int n ) { return __builtin_ffs( n ); } 56 unsigned int fls( unsigned long int n ) { return __builtin_ffsl( n ); } 57 unsigned int fls( unsigned long long int n ) { return __builtin_ffsll( n ); } 56 // Find least significiant zero bit. (ffs) 57 unsigned int low0( unsigned char n ) { return __builtin_ffs( (typeof(n))~n ); } 58 unsigned int low0( unsigned short int n ) { return __builtin_ffs( (typeof(n))~n ); } 59 unsigned int low0( unsigned int n ) { return __builtin_ffs( ~n ); } 60 unsigned int low0( unsigned long int n ) { return __builtin_ffsl( ~n ); } 61 unsigned int low0( unsigned long long int n ) { return __builtin_ffsll( ~n ); } 58 62 59 // Find most significiant set bit. 60 unsigned int fms( unsigned char n ) { return n != 0 ? sizeof(unsigned int) * __CHAR_BIT__ - __builtin_clz( n ) : 0; } 61 unsigned int fms( unsigned short int n ) { return n != 0 ? sizeof(unsigned int) * __CHAR_BIT__ - __builtin_clz( n ) : 0; } 62 unsigned int fms( unsigned int n ) { return n != 0 ? sizeof(n) * __CHAR_BIT__ - __builtin_clz( n ) : 0; } 63 unsigned int fms( unsigned long int n ) { return n != 0 ? sizeof(n) * __CHAR_BIT__ - __builtin_clzl( n ) : 0; } 64 unsigned int fms( unsigned long long int n ) { return n != 0 ? sizeof(n) * __CHAR_BIT__ - __builtin_clzll( n ) : 0; } 63 // Find least significiant one bit. 64 unsigned int low1( unsigned int n ) { return __builtin_ffs( n ); } 65 unsigned int low1( unsigned long int n ) { return __builtin_ffsl( n ); } 66 unsigned int low1( unsigned long long int n ) { return __builtin_ffsll( n ); } 65 67 66 // Check for power of 2 67 bool pow2( unsigned long int value ) { 68 return (value & (value - 1)) == 0; // clears bits below value, rounding down to the next lower multiple of value 69 } // pow2 68 // Find most significiant zero bit. 69 unsigned int high0( unsigned char n ) { return n == (typeof(n))-1 ? 0 : __bitsizeof(unsigned int) - __builtin_clz( (typeof(n))~n ); } 70 unsigned int high0( unsigned short int n ) { return n == (typeof(n))-1 ? 0 : __bitsizeof(unsigned int) - __builtin_clz( (typeof(n))~n ); } 71 unsigned int high0( unsigned int n ) { return n == -1 ? 0 : __bitsizeof(n) - __builtin_clz( ~n ); } 72 unsigned int high0( unsigned long int n ) { return n == -1 ? 0 : __bitsizeof(n) - __builtin_clzl( ~n ); } 73 unsigned int high0( unsigned long long int n ) { return n == -1 ? 0 : __bitsizeof(n) - __builtin_clzll( ~n ); } 70 74 71 // Returns value aligned at the floor of align. 72 unsigned long int floor( unsigned long int value, unsigned long int align ) { 73 assert( pow2( align ) ); 74 return value & -align; // clear bits above or equal to align, giving value % align 75 } // floor 75 // Find most significiant one bit. 76 unsigned int high1( unsigned char n ) { return n == 0 ? 0 : __bitsizeof(unsigned int) - __builtin_clz( n ); } 77 unsigned int high1( unsigned short int n ) { return n == 0 ? 0 : __bitsizeof(unsigned int) - __builtin_clz( n ); } 78 unsigned int high1( unsigned int n ) { return n == 0 ? 0 : __bitsizeof(n) - __builtin_clz( n ); } 79 unsigned int high1( unsigned long int n ) { return n == 0 ? 0 : __bitsizeof(n) - __builtin_clzl( n ); } 80 unsigned int high1( unsigned long long int n ) { return n == 0 ? 0 : __bitsizeof(n) - __builtin_clzll( n ); } 76 81 77 // Returns value aligned at the ceiling of align. 78 unsigned long int ceiling( unsigned long int value, unsigned long int align ) { 79 assert( pow2( align ) ); 80 return -floor( -value, align ); // negate, round down, negate is the same as round up 81 } // ceiling 82 } 82 // Check for power of 2, clears bits below n, rounding down to the next lower multiple of n. 0 is not a power of 2 83 // but this computation returns true because of the two's complement, so it is a special case. 84 bool is_pow2( unsigned char n ) { return n == 0 ? false : (n & (n - 1)) == 0; } 85 bool is_pow2( unsigned short int n ) { return n == 0 ? false : (n & (n - 1)) == 0; } 86 bool is_pow2( unsigned int n ) { return n == 0 ? false : (n & (n - 1)) == 0; } 87 bool is_pow2( unsigned long int n ) { return n == 0 ? false : (n & (n - 1)) == 0; } 88 bool is_pow2( unsigned long long int n ) { return n == 0 ? false : (n & (n - 1)) == 0; } 89 90 // Returns n aligned at the floor of align, clear bits above or equal to align, giving n % align. 91 signed char floor2( signed char n, char align ) { /*assert( is_pow2( align ) );*/ return n & -align; } 92 unsigned char floor2( unsigned char n, unsigned char align ) { /*assert( is_pow2( align ) );*/ return n & -align; } 93 short int floor2( short int n, short int align ) { /*assert( is_pow2( align ) );*/ return n & -align; } 94 unsigned short int floor2( unsigned short int n, unsigned short int align ) { /*assert( is_pow2( align ) );*/ return n & -align; } 95 int floor2( int n, int align ) { /*assert( is_pow2( align ) );*/ return n & -align; } 96 unsigned int floor2( unsigned int n, unsigned int align ) { /*assert( is_pow2( align ) );*/ return n & -align; } 97 long int floor2( long int n, long int align ) { /*assert( is_pow2( align ) );*/ return n & -align; } 98 unsigned long int floor2( unsigned long int n, unsigned long int align ) { /*assert( is_pow2( align ) );*/ return n & -align; } 99 long long int floor2( long long int n, long long int align ) { /*assert( is_pow2( align ) );*/ return n & -align; } 100 unsigned long long int floor2( unsigned long long int n, unsigned long long int align ) { /*assert( is_pow2( align ) );*/ return n & -align; } 101 102 // forall( otype T | { T ?&?( T, T ); T -?( T ); } ) 103 // T floor2( T n, T align ) { /* assert( is_pow2( align ) ); */ return n & -align; } 104 105 signed char floor( signed char n, char align ) { return n / align * align; } 106 unsigned char floor( unsigned char n, unsigned char align ) { return n / align * align; } 107 short int floor( short int n, short int align ) { return n / align * align; } 108 unsigned short int floor( unsigned short int n, unsigned short int align ) { return n / align * align; } 109 int floor( int n, int align ) { return n / align * align; } 110 unsigned int floor( unsigned int n, unsigned int align ) { return n / align * align; } 111 long int floor( long int n, long int align ) { return n / align * align; } 112 unsigned long int floor( unsigned long int n, unsigned long int align ) { return n / align * align; } 113 long long int floor( long long int n, long long int align ) { return n / align * align; } 114 unsigned long long int floor( unsigned long long int n, unsigned long long int align ) { return n / align * align; } 115 116 // forall( otype T | { T ?/?( T, T ); T ?*?( T, T ); } ) 117 // T floor( T n, T align ) { return n / align * align; } 118 119 // Returns n aligned at the ceiling of align, negate, round down, negate is the same as round up. 120 signed char ceiling2( signed char n, char align ) { /*assert( is_pow2( align ) );*/ return -floor2( -n, align ); } 121 unsigned char ceiling2( unsigned char n, unsigned char align ) { /*assert( is_pow2( align ) );*/ return -floor2( -n, align ); } 122 short int ceiling2( short int n, short int align ) { /*assert( is_pow2( align ) );*/ return -floor2( -n, align ); } 123 unsigned short int ceiling2( unsigned short int n, unsigned short int align ) { /*assert( is_pow2( align ) );*/ return -floor2( -n, align ); } 124 int ceiling2( int n, int align ) { /*assert( is_pow2( align ) );*/ return -floor2( -n, align ); } 125 unsigned int ceiling2( unsigned int n, unsigned int align ) { /*assert( is_pow2( align ) );*/ return -floor2( -n, align ); } 126 long int ceiling2( long int n, long int align ) { /*assert( is_pow2( align ) );*/ return -floor2( -n, align ); } 127 unsigned long int ceiling2( unsigned long int n, unsigned long int align ) { /*assert( is_pow2( align ) );*/ return -floor2( -n, align ); } 128 long long int ceiling2( long long int n, long long int align ) { /*assert( is_pow2( align ) );*/ return -floor2( -n, align ); } 129 unsigned long long int ceiling2( unsigned long long int n, unsigned long long int align ) { /*assert( is_pow2( align ) );*/ return -floor2( -n, align ); } 130 131 // forall( otype T | { T floor2( T, T ); T -?( T ); } ) 132 // T ceiling2( T n, T align ) { /* assert( is_pow2( align ) ); */ return -floor2( -n, align ); } 133 134 signed char ceiling( signed char n, char align ) { return (n + (align - 1)) / align; } 135 unsigned char ceiling( unsigned char n, unsigned char align ) { return (n + (align - 1)) / align; } 136 short int ceiling( short int n, short int align ) { return (n + (align - 1)) / align; } 137 unsigned short int ceiling( unsigned short int n, unsigned short int align ) { return (n + (align - 1)) / align; } 138 int ceiling( int n, int align ) { return (n + (align - 1)) / align; } 139 unsigned int ceiling( unsigned int n, unsigned int align ) { return (n + (align - 1)) / align; } 140 long int ceiling( long int n, long int align ) { return (n + (align - 1)) / align; } 141 unsigned long int ceiling( unsigned long int n, unsigned long int align ) { return (n + (align - 1)) / align; } 142 long long int ceiling( long long int n, long long int align ) { return (n + (align - 1)) / align; } 143 unsigned long long int ceiling( unsigned long long int n, unsigned long long int align ) { return (n + (align - 1)) / align; } 144 145 // forall( otype T | { void ?{}( T &, one_t ); T ?+?( T, T ); T ?-?( T, T ); T ?/?( T, T ); } ) 146 // T ceiling( T n, T align ) { return (n + (align - (T){1})) / align; } 147 } // distribution 83 148 84 149 // Local Variables: // -
libcfa/src/bits/debug.hfa
re3bc51c rbcd74f3 9 9 // Author : Thierry Delisle 10 10 // Created On : Mon Nov 28 12:27:26 2016 11 // Last Modified By : Peter A. Buhr12 // Last Modified On : Tue Feb 4 12:29:21202013 // Update Count : 911 // Last Modified By : Andrew Beach 12 // Last Modified On : Mon Apr 27 10:15:00 2020 13 // Update Count : 10 14 14 // 15 15 … … 40 40 #endif 41 41 #include <stdarg.h> 42 #include <stdio.h>43 42 44 43 extern void __cfaabi_bits_write( int fd, const char buffer[], int len ); … … 49 48 extern void __cfaabi_bits_print_vararg( int fd, const char fmt[], va_list arg ); 50 49 extern void __cfaabi_bits_print_buffer( int fd, char buffer[], int buffer_size, const char fmt[], ... ) __attribute__(( format(printf, 4, 5) )); 50 51 #if defined(__CFA_DEBUG_PRINT__) \ 52 || defined(__CFA_DEBUG_PRINT_IO__) || defined(__CFA_DEBUG_PRINT_IO_CORE__) \ 53 || defined(__CFA_DEBUG_PRINT_MONITOR__) || defined(__CFA_DEBUG_PRINT_PREEMPTION__) \ 54 || defined(__CFA_DEBUG_PRINT_RUNTIME_CORE__) || defined(__CFA_DEBUG_PRINT_EXCEPTION__) 55 #include <stdio.h> 56 #include <unistd.h> 57 #endif 51 58 #ifdef __cforall 52 59 } 53 60 #endif 54 61 62 // Deprecated: Use the versions with the new module names. 55 63 #ifdef __CFA_DEBUG_PRINT__ 56 64 #define __cfaabi_dbg_write( buffer, len ) __cfaabi_bits_write( STDERR_FILENO, buffer, len ) 57 65 #define __cfaabi_dbg_acquire() __cfaabi_bits_acquire() 58 66 #define __cfaabi_dbg_release() __cfaabi_bits_release() 59 #define __cfaabi_dbg_print_safe(...) __cfaabi_bits_print_safe ( __VA_ARGS__)60 #define __cfaabi_dbg_print_nolock(...) __cfaabi_bits_print_nolock ( __VA_ARGS__)61 #define __cfaabi_dbg_print_buffer(...) __cfaabi_bits_print_buffer ( __VA_ARGS__)62 #define __cfaabi_dbg_print_buffer_decl(...) char __dbg_text[256]; int __dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_bits_write( __dbg_text, __dbg_len );63 #define __cfaabi_dbg_print_buffer_local(...) __dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_dbg_write( __dbg_text, __dbg_len );67 #define __cfaabi_dbg_print_safe(...) __cfaabi_bits_print_safe ( STDERR_FILENO, __VA_ARGS__ ) 68 #define __cfaabi_dbg_print_nolock(...) __cfaabi_bits_print_nolock ( STDERR_FILENO, __VA_ARGS__ ) 69 #define __cfaabi_dbg_print_buffer(...) __cfaabi_bits_print_buffer ( STDERR_FILENO, __VA_ARGS__ ) 70 #define __cfaabi_dbg_print_buffer_decl(...) char __dbg_text[256]; int __dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_bits_write( STDERR_FILENO, __dbg_text, __dbg_len ); 71 #define __cfaabi_dbg_print_buffer_local(...) __dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_dbg_write( STDERR_FILENO, __dbg_text, __dbg_len ); 64 72 #else 65 73 #define __cfaabi_dbg_write(...) ((void)0) … … 73 81 #endif 74 82 83 // Debug print functions and statements: 84 // Most are wrappers around the bits printing function but are not always used. 85 // If they are used depends if the group (first argument) is active or not. The group must be one 86 // defined belowe. The other arguments depend on the wrapped function. 87 #define __cfadbg_write(group, buffer, len) \ 88 __CFADBG_PRINT_GROUP_##group(__cfaabi_bits_write(STDERR_FILENO, buffer, len)) 89 #define __cfadbg_acquire(group) \ 90 __CFADBG_PRINT_GROUP_##group(__cfaabi_bits_acquire()) 91 #define __cfadbg_release(group) \ 92 __CFADBG_PRINT_GROUP_##group(__cfaabi_bits_release()) 93 #define __cfadbg_print_safe(group, ...) \ 94 __CFADBG_PRINT_GROUP_##group(__cfaabi_bits_print_safe(STDERR_FILENO, __VA_ARGS__)) 95 #define __cfadbg_print_nolock(group, ...) \ 96 __CFADBG_PRINT_GROUP_##group(__cfaabi_bits_print_nolock(STDERR_FILENO, __VA_ARGS__)) 97 #define __cfadbg_print_buffer(group, ...) \ 98 __CFADBG_PRINT_GROUP_##group(__cfaabi_bits_print_buffer(STDERR_FILENO, __VA_ARGS__)) 99 #define __cfadbg_print_buffer_decl(group, ...) \ 100 __CFADBG_PRINT_GROUP_##group(char __dbg_text[256]; int __dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_bits_write( __dbg_text, __dbg_len )) 101 #define __cfadbg_print_buffer_local(group, ...) \ 102 __CFADBG_PRINT_GROUP_##group(__dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_bits_write(STDERR_FILENO, __dbg_text, __dbg_len)) 103 104 // The debug print groups: 105 #if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_IO__) 106 # define __CFADBG_PRINT_GROUP_io(...) __VA_ARGS__ 107 #else 108 # define __CFADBG_PRINT_GROUP_io(...) ((void)0) 109 #endif 110 #if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_IO__) || defined(__CFA_DEBUG_PRINT_IO_CORE__) 111 # define __CFADBG_PRINT_GROUP_io_core(...) __VA_ARGS__ 112 #else 113 # define __CFADBG_PRINT_GROUP_io_core(...) ((void)0) 114 #endif 115 #if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_MONITOR__) 116 # define __CFADBG_PRINT_GROUP_monitor(...) __VA_ARGS__ 117 #else 118 # define __CFADBG_PRINT_GROUP_monitor(...) ((void)0) 119 #endif 120 #if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_PREEMPTION__) 121 # define __CFADBG_PRINT_GROUP_preemption(...) __VA_ARGS__ 122 #else 123 # define __CFADBG_PRINT_GROUP_preemption(...) ((void)0) 124 #endif 125 #if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_RUNTIME_CORE__) 126 # define __CFADBG_PRINT_GROUP_runtime_core(...) __VA_ARGS__ 127 #else 128 # define __CFADBG_PRINT_GROUP_runtime_core(...) ((void)0) 129 #endif 130 #if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_READY_QUEUE__) 131 # define __CFADBG_PRINT_GROUP_ready_queue(...) __VA_ARGS__ 132 #else 133 # define __CFADBG_PRINT_GROUP_ready_queue(...) ((void)0) 134 #endif 135 #if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_EXCEPTION__) 136 # define __CFADBG_PRINT_GROUP_exception(...) __VA_ARGS__ 137 #else 138 # define __CFADBG_PRINT_GROUP_exception(...) ((void)0) 139 #endif 140 75 141 // Local Variables: // 76 142 // mode: c // -
libcfa/src/bits/locks.hfa
re3bc51c rbcd74f3 112 112 #endif 113 113 114 extern "C" { 115 char * strerror(int); 116 } 117 #define CHECKED(x) { int err = x; if( err != 0 ) abort("KERNEL ERROR: Operation \"" #x "\" return error %d - %s\n", err, strerror(err)); } 118 114 119 struct __bin_sem_t { 115 bool signaled;116 120 pthread_mutex_t lock; 117 121 pthread_cond_t cond; 122 int val; 118 123 }; 119 124 120 125 static inline void ?{}(__bin_sem_t & this) with( this ) { 121 signaled = false; 122 pthread_mutex_init(&lock, NULL); 123 pthread_cond_init (&cond, NULL); 126 // Create the mutex with error checking 127 pthread_mutexattr_t mattr; 128 pthread_mutexattr_init( &mattr ); 129 pthread_mutexattr_settype( &mattr, PTHREAD_MUTEX_ERRORCHECK_NP); 130 pthread_mutex_init(&lock, &mattr); 131 132 pthread_cond_init (&cond, 0p); 133 val = 0; 124 134 } 125 135 126 136 static inline void ^?{}(__bin_sem_t & this) with( this ) { 127 pthread_mutex_destroy(&lock);128 pthread_cond_destroy (&cond);137 CHECKED( pthread_mutex_destroy(&lock) ); 138 CHECKED( pthread_cond_destroy (&cond) ); 129 139 } 130 140 131 141 static inline void wait(__bin_sem_t & this) with( this ) { 132 142 verify(__cfaabi_dbg_in_kernel()); 133 pthread_mutex_lock(&lock);134 if(!signaled) { // this must be a loop, not if!143 CHECKED( pthread_mutex_lock(&lock) ); 144 while(val < 1) { 135 145 pthread_cond_wait(&cond, &lock); 136 146 } 137 signaled = false;138 pthread_mutex_unlock(&lock);147 val -= 1; 148 CHECKED( pthread_mutex_unlock(&lock) ); 139 149 } 140 150 141 151 static inline bool post(__bin_sem_t & this) with( this ) { 142 pthread_mutex_lock(&lock); 143 bool needs_signal = !signaled; 144 signaled = true; 145 pthread_mutex_unlock(&lock); 152 bool needs_signal = false; 146 153 147 if (needs_signal) pthread_cond_signal(&cond); 154 CHECKED( pthread_mutex_lock(&lock) ); 155 if(val < 1) { 156 val += 1; 157 pthread_cond_signal(&cond); 158 needs_signal = true; 159 } 160 CHECKED( pthread_mutex_unlock(&lock) ); 148 161 149 162 return needs_signal; 150 163 } 164 165 #undef CHECKED 151 166 #endif -
libcfa/src/bits/signal.hfa
re3bc51c rbcd74f3 54 54 sig, handler, flags, errno, strerror( errno ) 55 55 ); 56 _ exit( EXIT_FAILURE );56 _Exit( EXIT_FAILURE ); 57 57 } // if 58 58 } -
libcfa/src/concurrency/alarm.cfa
re3bc51c rbcd74f3 51 51 this.alarm = alarm; 52 52 this.period = period; 53 next = 0;54 53 set = false; 55 54 kernel_alarm = false; … … 60 59 this.alarm = alarm; 61 60 this.period = period; 62 next = 0;63 61 set = false; 64 62 kernel_alarm = true; … … 71 69 } 72 70 73 #if !defined(NDEBUG) && (defined(__CFA_DEBUG__) || defined(__CFA_VERIFY__)) 74 bool validate( alarm_list_t * this ) { 75 alarm_node_t ** it = &this->head; 76 while( (*it) ) { 77 it = &(*it)->next; 71 void insert( alarm_list_t * this, alarm_node_t * n ) { 72 alarm_node_t * it = & (*this)`first; 73 while( it && (n->alarm > it->alarm) ) { 74 it = & (*it)`next; 75 } 76 if ( it ) { 77 insert_before( *it, *n ); 78 } else { 79 insert_last(*this, *n); 78 80 } 79 81 80 return it == this->tail; 81 } 82 #endif 83 84 static inline void insert_at( alarm_list_t * this, alarm_node_t * n, __alarm_it_t p ) { 85 verify( !n->next ); 86 if( p == this->tail ) { 87 this->tail = &n->next; 88 } 89 else { 90 n->next = *p; 91 } 92 *p = n; 93 94 verify( validate( this ) ); 95 } 96 97 void insert( alarm_list_t * this, alarm_node_t * n ) { 98 alarm_node_t ** it = &this->head; 99 while( (*it) && (n->alarm > (*it)->alarm) ) { 100 it = &(*it)->next; 101 } 102 103 insert_at( this, n, it ); 104 105 verify( validate( this ) ); 82 verify( validate( *this ) ); 106 83 } 107 84 108 85 alarm_node_t * pop( alarm_list_t * this ) { 109 alarm_node_t * head = this->head; 86 verify( validate( *this ) ); 87 alarm_node_t * head = & (*this)`first; 110 88 if( head ) { 111 this->head = head->next; 112 if( !head->next ) { 113 this->tail = &this->head; 114 } 115 head->next = 0p; 89 remove(*head); 116 90 } 117 verify( validate( this ) );91 verify( validate( *this ) ); 118 92 return head; 119 93 } 120 94 121 static inline void remove_at( alarm_list_t * this, alarm_node_t * n, __alarm_it_t it ) {122 verify( it );123 verify( (*it) == n );124 125 (*it) = n->next;126 if( !n-> next ) {127 this->tail = it;128 }129 n->next = 0p;130 131 verify( validate( this ) );132 }133 134 static inline void remove( alarm_list_t * this, alarm_node_t * n ) {135 alarm_node_t ** it = &this->head;136 while( (*it) && (*it) != n ) {137 it = &(*it)->next;138 }139 140 verify( validate( this ) );141 142 if( *it ) { remove_at( this, n, it ); }143 144 verify( validate( this ) );145 }146 147 95 void register_self( alarm_node_t * this ) { 148 alarm_list_t * alarms = &event_kernel->alarms;96 alarm_list_t & alarms = event_kernel->alarms; 149 97 150 98 disable_interrupts(); … … 152 100 { 153 101 verify( validate( alarms ) ); 154 bool first = ! alarms->head;102 bool first = ! & alarms`first; 155 103 156 insert( alarms, this );104 insert( &alarms, this ); 157 105 if( first ) { 158 __kernel_set_timer( alarms ->head->alarm - __kernel_get_time() );106 __kernel_set_timer( alarms`first.alarm - __kernel_get_time() ); 159 107 } 160 108 } … … 168 116 lock( event_kernel->lock __cfaabi_dbg_ctx2 ); 169 117 { 170 verify( validate( &event_kernel->alarms ) );171 remove( &event_kernel->alarms,this );118 verify( validate( event_kernel->alarms ) ); 119 remove( *this ); 172 120 } 173 121 unlock( event_kernel->lock ); … … 176 124 } 177 125 126 //============================================================================================= 127 // Utilities 128 //============================================================================================= 129 130 void sleep( Duration duration ) { 131 alarm_node_t node = { active_thread(), __kernel_get_time() + duration, 0`s }; 132 133 register_self( &node ); 134 park( __cfaabi_dbg_ctx ); 135 136 /* paranoid */ verify( !node.set ); 137 /* paranoid */ verify( & node`next == 0p ); 138 /* paranoid */ verify( & node`prev == 0p ); 139 } 140 178 141 // Local Variables: // 179 142 // mode: c // -
libcfa/src/concurrency/alarm.hfa
re3bc51c rbcd74f3 23 23 #include "time.hfa" 24 24 25 #include <containers/list.hfa> 26 25 27 struct $thread; 26 28 struct processor; … … 40 42 Time alarm; // time when alarm goes off 41 43 Duration period; // if > 0 => period of alarm 42 alarm_node_t * next; // intrusive link list field 44 45 DLISTED_MGD_IMPL_IN(alarm_node_t) 43 46 44 47 union { … … 50 53 bool kernel_alarm :1; // true if this is not a user defined alarm 51 54 }; 52 53 typedef alarm_node_t ** __alarm_it_t; 55 DLISTED_MGD_IMPL_OUT(alarm_node_t) 54 56 55 57 void ?{}( alarm_node_t & this, $thread * thrd, Time alarm, Duration period ); … … 57 59 void ^?{}( alarm_node_t & this ); 58 60 59 struct alarm_list_t { 60 alarm_node_t * head; 61 __alarm_it_t tail; 62 }; 63 64 static inline void ?{}( alarm_list_t & this ) with( this ) { 65 head = 0; 66 tail = &head; 67 } 61 typedef dlist(alarm_node_t, alarm_node_t) alarm_list_t; 68 62 69 63 void insert( alarm_list_t * this, alarm_node_t * n ); -
libcfa/src/concurrency/kernel.cfa
re3bc51c rbcd74f3 15 15 16 16 #define __cforall_thread__ 17 // #define __CFA_DEBUG_PRINT_RUNTIME_CORE__ 17 18 18 19 //C Includes … … 40 41 #include "invoke.h" 41 42 43 42 44 //----------------------------------------------------------------------------- 43 45 // Some assembly required … … 230 232 idle{}; 231 233 232 __cfa abi_dbg_print_safe("Kernel : Starting core %p\n", &this);234 __cfadbg_print_safe(runtime_core, "Kernel : Starting core %p\n", &this); 233 235 234 236 this.stack = __create_pthread( &this.kernel_thread, __invoke_processor, (void *)&this ); 235 237 236 __cfa abi_dbg_print_safe("Kernel : core %p started\n", &this);238 __cfadbg_print_safe(runtime_core, "Kernel : core %p created\n", &this); 237 239 } 238 240 239 241 void ^?{}(processor & this) with( this ){ 240 242 if( ! __atomic_load_n(&do_terminate, __ATOMIC_ACQUIRE) ) { 241 __cfa abi_dbg_print_safe("Kernel : core %p signaling termination\n", &this);243 __cfadbg_print_safe(runtime_core, "Kernel : core %p signaling termination\n", &this); 242 244 243 245 __atomic_store_n(&do_terminate, true, __ATOMIC_RELAXED); … … 248 250 } 249 251 250 pthread_join( kernel_thread, 0p ); 252 int err = pthread_join( kernel_thread, 0p ); 253 if( err != 0 ) abort("KERNEL ERROR: joining processor %p caused error %s\n", &this, strerror(err)); 254 251 255 free( this.stack ); 252 256 } 253 257 254 void ?{}(cluster & this, const char name[], Duration preemption_rate ) with( this ) {258 void ?{}(cluster & this, const char name[], Duration preemption_rate, int io_flags) with( this ) { 255 259 this.name = name; 256 260 this.preemption_rate = preemption_rate; … … 258 262 ready_queue_lock{}; 259 263 264 #if !defined(__CFA_NO_STATISTICS__) 265 print_stats = false; 266 #endif 267 260 268 procs{ __get }; 261 269 idles{ __get }; 262 270 threads{ __get }; 263 271 272 __kernel_io_startup( this, io_flags, &this == mainCluster ); 273 264 274 doregister(this); 265 275 } 266 276 267 277 void ^?{}(cluster & this) { 278 __kernel_io_shutdown( this, &this == mainCluster ); 279 268 280 unregister(this); 269 281 } … … 281 293 verify(this); 282 294 283 __cfa abi_dbg_print_safe("Kernel : core %p starting\n", this);295 __cfadbg_print_safe(runtime_core, "Kernel : core %p starting\n", this); 284 296 285 297 doregister(this->cltr, this); … … 289 301 preemption_scope scope = { this }; 290 302 291 __cfa abi_dbg_print_safe("Kernel : core %p started\n", this);303 __cfadbg_print_safe(runtime_core, "Kernel : core %p started\n", this); 292 304 293 305 $thread * readyThread = 0p; … … 315 327 } 316 328 317 __cfa abi_dbg_print_safe("Kernel : core %p stopping\n", this);329 __cfadbg_print_safe(runtime_core, "Kernel : core %p stopping\n", this); 318 330 } 319 331 … … 322 334 V( this->terminated ); 323 335 324 __cfa abi_dbg_print_safe("Kernel : core %p terminated\n", this);336 __cfadbg_print_safe(runtime_core, "Kernel : core %p terminated\n", this); 325 337 326 338 // HACK : the coroutine context switch expects this_thread to be set … … 467 479 468 480 //We now have a proper context from which to schedule threads 469 __cfa abi_dbg_print_safe("Kernel : core %p created (%p, %p)\n", proc, &proc->runner, &ctx);481 __cfadbg_print_safe(runtime_core, "Kernel : core %p created (%p, %p)\n", proc, &proc->runner, &ctx); 470 482 471 483 // SKULLDUGGERY: Since the coroutine doesn't have its own stack, we can't … … 478 490 479 491 // Main routine of the core returned, the core is now fully terminated 480 __cfa abi_dbg_print_safe("Kernel : core %p main ended (%p)\n", proc, &proc->runner);492 __cfadbg_print_safe(runtime_core, "Kernel : core %p main ended (%p)\n", proc, &proc->runner); 481 493 482 494 return 0p; … … 611 623 } 612 624 613 void unpark( $thread * thrd __cfaabi_dbg_ctx_param2 ) { 614 if( !thrd ) return; 615 616 disable_interrupts(); 625 // KERNEL ONLY unpark with out disabling interrupts 626 void __unpark( $thread * thrd __cfaabi_dbg_ctx_param2 ) { 617 627 static_assert(sizeof(thrd->state) == sizeof(int)); 618 628 … … 643 653 abort(); 644 654 } 655 } 656 657 void unpark( $thread * thrd __cfaabi_dbg_ctx_param2 ) { 658 if( !thrd ) return; 659 660 disable_interrupts(); 661 __unpark( thrd __cfaabi_dbg_ctx_fwd2 ); 645 662 enable_interrupts( __cfaabi_dbg_ctx ); 646 663 } … … 704 721 static void __kernel_startup(void) { 705 722 verify( ! kernelTLS.preemption_state.enabled ); 706 __cfa abi_dbg_print_safe("Kernel : Starting\n");723 __cfadbg_print_safe(runtime_core, "Kernel : Starting\n"); 707 724 708 725 __page_size = sysconf( _SC_PAGESIZE ); … … 715 732 (*mainCluster){"Main Cluster"}; 716 733 717 __cfa abi_dbg_print_safe("Kernel : Main cluster ready\n");734 __cfadbg_print_safe(runtime_core, "Kernel : Main cluster ready\n"); 718 735 719 736 // Start by initializing the main thread … … 725 742 (*mainThread){ &info }; 726 743 727 __cfa abi_dbg_print_safe("Kernel : Main thread ready\n");744 __cfadbg_print_safe(runtime_core, "Kernel : Main thread ready\n"); 728 745 729 746 … … 746 763 747 764 runner{ &this }; 748 __cfa abi_dbg_print_safe("Kernel : constructed main processor context %p\n", &runner);765 __cfadbg_print_safe(runtime_core, "Kernel : constructed main processor context %p\n", &runner); 749 766 } 750 767 … … 771 788 772 789 773 774 790 // THE SYSTEM IS NOW COMPLETELY RUNNING 775 __cfaabi_dbg_print_safe("Kernel : Started\n--------------------------------------------------\n\n"); 791 792 793 // Now that the system is up, finish creating systems that need threading 794 __kernel_io_finish_start( *mainCluster ); 795 796 797 __cfadbg_print_safe(runtime_core, "Kernel : Started\n--------------------------------------------------\n\n"); 776 798 777 799 verify( ! kernelTLS.preemption_state.enabled ); … … 781 803 782 804 static void __kernel_shutdown(void) { 783 __cfaabi_dbg_print_safe("\n--------------------------------------------------\nKernel : Shutting down\n"); 805 //Before we start shutting things down, wait for systems that need threading to shutdown 806 __kernel_io_prepare_stop( *mainCluster ); 784 807 785 808 /* paranoid */ verify( TL_GET( preemption_state.enabled ) ); 786 809 disable_interrupts(); 787 810 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled ); 811 812 __cfadbg_print_safe(runtime_core, "\n--------------------------------------------------\nKernel : Shutting down\n"); 788 813 789 814 // SKULLDUGGERY: Notify the mainProcessor it needs to terminates. … … 801 826 // Destroy the main processor and its context in reverse order of construction 802 827 // These were manually constructed so we need manually destroy them 828 void ^?{}(processor & this) with( this ){ 829 /* paranoid */ verify( this.do_terminate == true ); 830 } 831 803 832 ^(*mainProcessor){}; 804 833 … … 808 837 ^(*mainThread){}; 809 838 839 ^(*mainCluster){}; 840 810 841 ^(__cfa_dbg_global_clusters.list){}; 811 842 ^(__cfa_dbg_global_clusters.lock){}; 812 843 813 __cfa abi_dbg_print_safe("Kernel : Shutdown complete\n");844 __cfadbg_print_safe(runtime_core, "Kernel : Shutdown complete\n"); 814 845 } 815 846 … … 836 867 837 868 // We are ready to sleep 838 __cfa abi_dbg_print_safe("Kernel : Processor %p ready to sleep\n", this);869 __cfadbg_print_safe(runtime_core, "Kernel : Processor %p ready to sleep\n", this); 839 870 wait( idle ); 840 871 841 872 // We have woken up 842 __cfa abi_dbg_print_safe("Kernel : Processor %p woke up and ready to run\n", this);873 __cfadbg_print_safe(runtime_core, "Kernel : Processor %p woke up and ready to run\n", this); 843 874 844 875 // Get ourself off the idle list … … 856 887 static bool __wake_one(cluster * this, __attribute__((unused)) bool force) { 857 888 // if we don't want to force check if we know it's false 858 if( !this->idles.head && !force ) return false;889 // if( !this->idles.head && !force ) return false; 859 890 860 891 // First, lock the cluster idle … … 869 900 870 901 // Wake them up 902 __cfadbg_print_safe(runtime_core, "Kernel : waking Processor %p\n", this->idles.head); 903 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled ); 871 904 post( this->idles.head->idle ); 872 905 … … 878 911 // Unconditionnaly wake a thread 879 912 static bool __wake_proc(processor * this) { 880 return post( this->idle ); 913 __cfadbg_print_safe(runtime_core, "Kernel : waking Processor %p\n", this); 914 915 disable_interrupts(); 916 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled ); 917 bool ret = post( this->idle ); 918 enable_interrupts( __cfaabi_dbg_ctx ); 919 920 return ret; 881 921 } 882 922 … … 960 1000 void ^?{}(semaphore & this) {} 961 1001 962 voidP(semaphore & this) with( this ){1002 bool P(semaphore & this) with( this ){ 963 1003 lock( lock __cfaabi_dbg_ctx2 ); 964 1004 count -= 1; … … 970 1010 unlock( lock ); 971 1011 park( __cfaabi_dbg_ctx ); 1012 return true; 972 1013 } 973 1014 else { 974 1015 unlock( lock ); 1016 return false; 975 1017 } 976 1018 } … … 989 1031 // make new owner 990 1032 unpark( thrd __cfaabi_dbg_ctx2 ); 1033 1034 return thrd != 0p; 1035 } 1036 1037 bool V(semaphore & this, unsigned diff) with( this ) { 1038 $thread * thrd = 0p; 1039 lock( lock __cfaabi_dbg_ctx2 ); 1040 int release = max(-count, (int)diff); 1041 count += diff; 1042 for(release) { 1043 unpark( pop_head( waiting ) __cfaabi_dbg_ctx2 ); 1044 } 1045 1046 unlock( lock ); 991 1047 992 1048 return thrd != 0p; -
libcfa/src/concurrency/kernel.hfa
re3bc51c rbcd74f3 17 17 18 18 #include <stdbool.h> 19 #include <stdint.h> 19 20 20 21 #include "invoke.h" … … 37 38 void ?{}(semaphore & this, int count = 1); 38 39 void ^?{}(semaphore & this); 39 voidP (semaphore & this);40 bool P (semaphore & this); 40 41 bool V (semaphore & this); 42 bool V (semaphore & this, unsigned count); 41 43 42 44 … … 111 113 112 114 //----------------------------------------------------------------------------- 115 // I/O 116 struct __io_data; 117 118 #define CFA_CLUSTER_IO_POLLER_USER_THREAD 1 << 0 119 // #define CFA_CLUSTER_IO_POLLER_KERNEL_SIDE 1 << 1 120 121 //----------------------------------------------------------------------------- 113 122 // Cluster 114 123 struct cluster { … … 141 150 cluster * prev; 142 151 } node; 152 153 struct __io_data * io; 154 155 #if !defined(__CFA_NO_STATISTICS__) 156 bool print_stats; 157 #endif 143 158 }; 144 159 extern Duration default_preemption(); 145 160 146 void ?{} (cluster & this, const char name[], Duration preemption_rate );161 void ?{} (cluster & this, const char name[], Duration preemption_rate, int flags); 147 162 void ^?{}(cluster & this); 148 163 149 static inline void ?{} (cluster & this) { this{"Anonymous Cluster", default_preemption()}; } 150 static inline void ?{} (cluster & this, Duration preemption_rate) { this{"Anonymous Cluster", preemption_rate}; } 151 static inline void ?{} (cluster & this, const char name[]) { this{name, default_preemption()}; } 164 static inline void ?{} (cluster & this) { this{"Anonymous Cluster", default_preemption(), 0}; } 165 static inline void ?{} (cluster & this, Duration preemption_rate) { this{"Anonymous Cluster", preemption_rate, 0}; } 166 static inline void ?{} (cluster & this, const char name[]) { this{name, default_preemption(), 0}; } 167 static inline void ?{} (cluster & this, int flags) { this{"Anonymous Cluster", default_preemption(), flags}; } 168 static inline void ?{} (cluster & this, Duration preemption_rate, int flags) { this{"Anonymous Cluster", preemption_rate, flags}; } 169 static inline void ?{} (cluster & this, const char name[], int flags) { this{name, default_preemption(), flags}; } 152 170 153 171 static inline [cluster *&, cluster *& ] __get( cluster & this ) __attribute__((const)) { return this.node.[next, prev]; } … … 156 174 static inline struct cluster * active_cluster () { return TL_GET( this_processor )->cltr; } 157 175 176 #if !defined(__CFA_NO_STATISTICS__) 177 static inline void print_stats_at_exit( cluster & this ) { 178 this.print_stats = true; 179 } 180 #endif 181 158 182 // Local Variables: // 159 183 // mode: c // -
libcfa/src/concurrency/kernel_private.hfa
re3bc51c rbcd74f3 59 59 extern volatile thread_local __cfa_kernel_preemption_state_t preemption_state __attribute__ ((tls_model ( "initial-exec" ))); 60 60 61 extern cluster * mainCluster; 62 61 63 //----------------------------------------------------------------------------- 62 64 // Threads … … 69 71 extern void __cfaabi_dbg_thread_unregister( $thread * thrd ); 70 72 ) 73 74 // KERNEL ONLY unpark with out disabling interrupts 75 void __unpark( $thread * thrd __cfaabi_dbg_ctx_param2 ); 76 77 //----------------------------------------------------------------------------- 78 // I/O 79 void __kernel_io_startup ( cluster &, int, bool ); 80 void __kernel_io_finish_start( cluster & ); 81 void __kernel_io_prepare_stop( cluster & ); 82 void __kernel_io_shutdown ( cluster &, bool ); 71 83 72 84 //----------------------------------------------------------------------------- -
libcfa/src/concurrency/preemption.cfa
re3bc51c rbcd74f3 43 43 // FwdDeclarations : Signal handlers 44 44 static void sigHandler_ctxSwitch( __CFA_SIGPARMS__ ); 45 static void sigHandler_alarm ( __CFA_SIGPARMS__ ); 45 46 static void sigHandler_segv ( __CFA_SIGPARMS__ ); 46 47 static void sigHandler_ill ( __CFA_SIGPARMS__ ); … … 83 84 // Get next expired node 84 85 static inline alarm_node_t * get_expired( alarm_list_t * alarms, Time currtime ) { 85 if( ! alarms->head) return 0p; // If no alarms return null86 if( alarms->head->alarm >= currtime ) return 0p; // If alarms head not expired return null86 if( ! & (*alarms)`first ) return 0p; // If no alarms return null 87 if( (*alarms)`first.alarm >= currtime ) return 0p; // If alarms head not expired return null 87 88 return pop(alarms); // Otherwise just pop head 88 89 } … … 97 98 while( node = get_expired( alarms, currtime ) ) { 98 99 // __cfaabi_dbg_print_buffer_decl( " KERNEL: preemption tick.\n" ); 100 Duration period = node->period; 101 if( period == 0) { 102 node->set = false; // Node is one-shot, just mark it as not pending 103 } 99 104 100 105 // Check if this is a kernel … … 107 112 108 113 // Check if this is a periodic alarm 109 Duration period = node->period;110 114 if( period > 0 ) { 111 115 // __cfaabi_dbg_print_buffer_local( " KERNEL: alarm period is %lu.\n", period.tv ); … … 113 117 insert( alarms, node ); // Reinsert the node for the next time it triggers 114 118 } 115 else {116 node->set = false; // Node is one-shot, just mark it as not pending117 }118 119 } 119 120 120 121 // If there are still alarms pending, reset the timer 121 if( alarms->head) {122 if( & (*alarms)`first ) { 122 123 __cfaabi_dbg_print_buffer_decl( " KERNEL: @%ju(%ju) resetting alarm to %ju.\n", currtime.tv, __kernel_get_time().tv, (alarms->head->alarm - currtime).tv); 123 Duration delta = alarms->head->alarm - currtime;124 Duration cap ed = max(delta, 50`us);124 Duration delta = (*alarms)`first.alarm - currtime; 125 Duration capped = max(delta, 50`us); 125 126 // itimerval tim = { caped }; 126 127 // __cfaabi_dbg_print_buffer_local( " Values are %lu, %lu, %lu %lu.\n", delta.tv, caped.tv, tim.it_value.tv_sec, tim.it_value.tv_usec); 127 128 128 __kernel_set_timer( cap ed );129 __kernel_set_timer( capped ); 129 130 } 130 131 } … … 256 257 257 258 if ( pthread_sigmask( SIG_BLOCK, &mask, 0p ) == -1 ) { 258 259 abort( "internal error, pthread_sigmask" ); 259 260 } 260 261 } … … 268 269 // reserved for future use 269 270 static void timeout( $thread * this ) { 270 //TODO : implement waking threads271 __unpark( this __cfaabi_dbg_ctx2 ); 271 272 } 272 273 … … 303 304 // Setup proper signal handlers 304 305 __cfaabi_sigaction( SIGUSR1, sigHandler_ctxSwitch, SA_SIGINFO | SA_RESTART ); // __cfactx_switch handler 306 __cfaabi_sigaction( SIGALRM, sigHandler_alarm , SA_SIGINFO | SA_RESTART ); // debug handler 305 307 306 308 signal_block( SIGALRM ); … … 394 396 395 397 force_yield( __ALARM_PREEMPTION ); // Do the actual __cfactx_switch 398 } 399 400 static void sigHandler_alarm( __CFA_SIGPARMS__ ) { 401 abort("SIGALRM should never reach the signal handler"); 396 402 } 397 403 -
libcfa/src/concurrency/thread.hfa
re3bc51c rbcd74f3 117 117 } 118 118 119 //---------- 120 // sleep: force thread to block and be rescheduled after Duration duration 121 void sleep( Duration duration ); 122 119 123 // Local Variables: // 120 124 // mode: c // -
libcfa/src/exception.c
re3bc51c rbcd74f3 10 10 // Created On : Mon Jun 26 15:13:00 2017 11 11 // Last Modified By : Andrew Beach 12 // Last Modified On : Fri Apr 03 11:57:00 202013 // Update Count : 1 412 // Last Modified On : Tue Apr 14 12:01:00 2020 13 // Update Count : 18 14 14 // 15 15 … … 28 28 #include <unwind.h> 29 29 #include <bits/debug.hfa> 30 #include "stdhdr/assert.h" 30 31 31 32 // FIX ME: temporary hack to keep ARM build working … … 75 76 // RESUMPTION ================================================================ 76 77 78 static void reset_top_resume(struct __cfaehm_try_resume_node ** store) { 79 this_exception_context()->top_resume = *store; 80 } 81 77 82 void __cfaehm_throw_resume(exception_t * except) { 78 83 struct exception_context_t * context = this_exception_context(); 79 84 80 __cfaabi_dbg_print_safe("Throwing resumption exception\n"); 81 85 __cfadbg_print_safe(exception, "Throwing resumption exception\n"); 86 87 __attribute__((cleanup(reset_top_resume))) 82 88 struct __cfaehm_try_resume_node * original_head = context->top_resume; 83 89 struct __cfaehm_try_resume_node * current = context->top_resume; … … 86 92 context->top_resume = current->next; 87 93 if (current->handler(except)) { 88 context->top_resume = original_head;89 94 return; 90 95 } 91 96 } 92 97 93 __cfaabi_dbg_print_safe("Unhandled exception\n"); 94 context->top_resume = original_head; 98 __cfadbg_print_safe(exception, "Unhandled exception\n"); 95 99 96 100 // Fall back to termination: … … 176 180 struct exception_context_t * context = this_exception_context(); 177 181 178 __cfa abi_dbg_print_safe("Deleting Exception\n");182 __cfadbg_print_safe(exception, "Deleting Exception\n"); 179 183 180 184 // Remove the exception from the list. … … 213 217 struct _Unwind_Context * unwind_context, 214 218 void * stop_param) { 215 if ( actions & _UA_END_OF_STACK ) exit(1); 216 if ( actions & _UA_CLEANUP_PHASE ) return _URC_NO_REASON; 217 218 return _URC_FATAL_PHASE2_ERROR; 219 // Verify actions follow the rules we expect. 220 verify((actions & _UA_CLEANUP_PHASE) && (actions & _UA_FORCE_UNWIND)); 221 verify(!(actions & (_UA_SEARCH_PHASE | _UA_HANDER_FRAME))); 222 223 if ( actions & _UA_END_OF_STACK ) { 224 exit(1); 225 } else { 226 return _URC_NO_REASON; 227 } 219 228 } 220 229 … … 252 261 253 262 void __cfaehm_throw_terminate( exception_t * val ) { 254 __cfa abi_dbg_print_safe("Throwing termination exception\n");263 __cfadbg_print_safe(exception, "Throwing termination exception\n"); 255 264 256 265 __cfaehm_allocate_exception( val ); … … 259 268 260 269 void __cfaehm_rethrow_terminate(void) { 261 __cfa abi_dbg_print_safe("Rethrowing termination exception\n");270 __cfadbg_print_safe(exception, "Rethrowing termination exception\n"); 262 271 263 272 __cfaehm_begin_unwind(); … … 275 284 { 276 285 277 //__cfa abi_dbg_print_safe("CFA: 0x%lx\n", _Unwind_GetCFA(context));278 __cfa abi_dbg_print_safe("Personality function (%d, %x, %llu, %p, %p):",286 //__cfadbg_print_safe(exception, "CFA: 0x%lx\n", _Unwind_GetCFA(context)); 287 __cfadbg_print_safe(exception, "Personality function (%d, %x, %llu, %p, %p):", 279 288 version, actions, exception_class, unwind_exception, unwind_context); 280 289 281 // If we've reached the end of the stack then there is nothing much we can do... 282 if (actions & _UA_END_OF_STACK) return _URC_END_OF_STACK; 283 290 // Verify that actions follow the rules we expect. 291 // This function should never be called at the end of the stack. 292 verify(!(actions & _UA_END_OF_STACK)); 293 // Either only the search phase flag is set or... 284 294 if (actions & _UA_SEARCH_PHASE) { 285 __cfaabi_dbg_print_safe(" lookup phase"); 286 } 287 else if (actions & _UA_CLEANUP_PHASE) { 288 __cfaabi_dbg_print_safe(" cleanup phase"); 289 } 290 // Just in case, probably can't actually happen 291 else { 292 printf(" error\n"); 293 return _URC_FATAL_PHASE1_ERROR; 295 verify(actions == _UA_SEARCH_PHASE); 296 __cfadbg_print_safe(exception, " lookup phase"); 297 // ... we are in clean-up phase. 298 } else { 299 verify(actions & _UA_CLEANUP_PHASE); 300 __cfadbg_print_safe(exception, " cleanup phase"); 301 // We shouldn't be the handler frame during forced unwind. 302 if (actions & _UA_HANDLER_FRAME) { 303 verify(!(actions & _UA_FORCE_UNWIND)); 304 __cfadbg_print_safe(exception, " (handler frame)"); 305 } else if (actions & _UA_FORCE_UNWIND) { 306 __cfadbg_print_safe(exception, " (force unwind)"); 307 } 294 308 } 295 309 … … 331 345 void * ep = (void*)lsd_info.Start + callsite_start + callsite_len; 332 346 void * ip = (void*)instruction_ptr; 333 __cfa abi_dbg_print_safe("\nfound %p - %p (%p, %p, %p), looking for %p\n",347 __cfadbg_print_safe(exception, "\nfound %p - %p (%p, %p, %p), looking for %p\n", 334 348 bp, ep, ls, cs, cl, ip); 335 349 #endif // __CFA_DEBUG_PRINT__ … … 346 360 if ( 0 == callsite_landing_pad ) { 347 361 // Nothing to do, move along 348 __cfa abi_dbg_print_safe(" no landing pad");362 __cfadbg_print_safe(exception, " no landing pad"); 349 363 } else if (actions & _UA_SEARCH_PHASE) { 350 364 // In search phase, these means we found a potential handler we must check. … … 384 398 // Based on the return value, check if we matched the exception 385 399 if (ret == _URC_HANDLER_FOUND) { 386 __cfa abi_dbg_print_safe(" handler found\n");400 __cfadbg_print_safe(exception, " handler found\n"); 387 401 } else { 388 __cfa abi_dbg_print_safe(" no handler\n");402 __cfadbg_print_safe(exception, " no handler\n"); 389 403 } 390 404 return ret; … … 392 406 393 407 // This is only a cleanup handler, ignore it 394 __cfa abi_dbg_print_safe(" no action");395 } else if (actions & _UA_CLEANUP_PHASE){408 __cfadbg_print_safe(exception, " no action"); 409 } else { 396 410 // In clean-up phase, no destructors here but this could be the handler. 397 411 … … 414 428 _Unwind_SetIP( unwind_context, ((lsd_info.LPStart) + (callsite_landing_pad)) ); 415 429 416 __cfa abi_dbg_print_safe(" action\n");430 __cfadbg_print_safe(exception, " action\n"); 417 431 418 432 // Return have some action to run … … 421 435 } 422 436 // No handling found 423 __cfa abi_dbg_print_safe(" table end reached\n");437 __cfadbg_print_safe(exception, " table end reached"); 424 438 425 439 UNWIND: 426 __cfa abi_dbg_print_safe(" unwind\n");440 __cfadbg_print_safe(exception, " unwind\n"); 427 441 428 442 // Keep unwinding the stack … … 431 445 432 446 #pragma GCC push_options 433 #pragma GCC optimize( "O0")447 #pragma GCC optimize(0) 434 448 435 449 // Try statements are hoisted out see comments for details. While this could probably be unique … … 528 542 " .quad __gcfa_personality_v0\n" 529 543 #else // then __i386 530 " 544 " .long __gcfa_personality_v0\n" 531 545 #endif 532 546 ); -
libcfa/src/heap.cfa
re3bc51c rbcd74f3 10 10 // Created On : Tue Dec 19 21:58:35 2017 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Wed Apr 1 15:59:53202013 // Update Count : 69212 // Last Modified On : Wed May 6 17:29:26 2020 13 // Update Count : 727 14 14 // 15 15 … … 19 19 #include <errno.h> // errno 20 20 #include <string.h> // memset, memcpy 21 #include <limits.h> // ULONG_MAX 21 22 extern "C" { 22 23 #include <sys/mman.h> // mmap, munmap 23 24 } // extern "C" 24 25 25 // #comment TD : Many of these should be merged into math I believe26 26 #include "bits/align.hfa" // libPow2 27 27 #include "bits/defs.hfa" // likely, unlikely … … 30 30 //#include "stdlib.hfa" // bsearchl 31 31 #include "malloc.h" 32 #include "bitmanip.hfa" // ceiling 32 33 33 34 #define MIN(x, y) (y > x ? x : y) … … 81 82 }; 82 83 84 size_t default_heap_expansion() __attribute__(( weak )) { 85 return __CFA_DEFAULT_HEAP_EXPANSION__; 86 } // default_heap_expansion 87 83 88 size_t default_mmap_start() __attribute__(( weak )) { 84 89 return __CFA_DEFAULT_MMAP_START__; 85 90 } // default_mmap_start 86 87 size_t default_heap_expansion() __attribute__(( weak )) {88 return __CFA_DEFAULT_HEAP_EXPANSION__;89 } // default_heap_expansion90 91 91 92 … … 181 182 } fake; // FakeHeader 182 183 } kind; // Kind 183 uint32_t dimension; // used by calloc-like to remember number of array elements184 size_t size; // allocation size in bytes 184 185 } header; // Header 185 186 char pad[libAlign() - sizeof( Header )]; … … 265 266 static unsigned long long int free_storage; 266 267 static unsigned int free_calls; 268 static unsigned long long int aalloc_storage; 269 static unsigned int aalloc_calls; 267 270 static unsigned long long int calloc_storage; 268 271 static unsigned int calloc_calls; 269 272 static unsigned long long int memalign_storage; 270 273 static unsigned int memalign_calls; 274 static unsigned long long int amemalign_storage; 275 static unsigned int amemalign_calls; 271 276 static unsigned long long int cmemalign_storage; 272 277 static unsigned int cmemalign_calls; … … 280 285 // Use "write" because streams may be shutdown when calls are made. 281 286 static void printStats() { 282 char helpText[ 512];287 char helpText[1024]; 283 288 __cfaabi_bits_print_buffer( STDERR_FILENO, helpText, sizeof(helpText), 284 289 "\nHeap statistics:\n" 285 290 " malloc: calls %u / storage %llu\n" 291 " aalloc: calls %u / storage %llu\n" 286 292 " calloc: calls %u / storage %llu\n" 287 293 " memalign: calls %u / storage %llu\n" 294 " amemalign: calls %u / storage %llu\n" 288 295 " cmemalign: calls %u / storage %llu\n" 289 296 " resize: calls %u / storage %llu\n" … … 294 301 " sbrk: calls %u / storage %llu\n", 295 302 malloc_calls, malloc_storage, 303 aalloc_calls, calloc_storage, 296 304 calloc_calls, calloc_storage, 297 305 memalign_calls, memalign_storage, 306 amemalign_calls, amemalign_storage, 298 307 cmemalign_calls, cmemalign_storage, 299 308 resize_calls, resize_storage, … … 307 316 308 317 static int printStatsXML( FILE * stream ) { // see malloc_info 309 char helpText[ 512];318 char helpText[1024]; 310 319 int len = snprintf( helpText, sizeof(helpText), 311 320 "<malloc version=\"1\">\n" … … 314 323 "</sizes>\n" 315 324 "<total type=\"malloc\" count=\"%u\" size=\"%llu\"/>\n" 325 "<total type=\"aalloc\" count=\"%u\" size=\"%llu\"/>\n" 316 326 "<total type=\"calloc\" count=\"%u\" size=\"%llu\"/>\n" 317 327 "<total type=\"memalign\" count=\"%u\" size=\"%llu\"/>\n" 328 "<total type=\"amemalign\" count=\"%u\" size=\"%llu\"/>\n" 318 329 "<total type=\"cmemalign\" count=\"%u\" size=\"%llu\"/>\n" 319 330 "<total type=\"resize\" count=\"%u\" size=\"%llu\"/>\n" … … 325 336 "</malloc>", 326 337 malloc_calls, malloc_storage, 338 aalloc_calls, aalloc_storage, 327 339 calloc_calls, calloc_storage, 328 340 memalign_calls, memalign_storage, 341 amemalign_calls, amemalign_storage, 329 342 cmemalign_calls, cmemalign_storage, 330 343 resize_calls, resize_storage, … … 348 361 349 362 350 static inline bool setHeapExpand( size_t value ) {351 if ( heapExpand < pageSize ) return true;352 heapExpand = value;353 return false;354 } // setHeapExpand355 356 357 363 // thunk problem 358 364 size_t Bsearchl( unsigned int key, const unsigned int * vals, size_t dim ) { … … 371 377 372 378 static inline bool setMmapStart( size_t value ) { // true => mmapped, false => sbrk 373 if ( value < pageSize || bucketSizes[NoBucketSizes - 1] < value ) return true;379 if ( value < pageSize || bucketSizes[NoBucketSizes - 1] < value ) return false; 374 380 mmapStart = value; // set global 375 381 … … 378 384 assert( maxBucketsUsed < NoBucketSizes ); // subscript failure ? 379 385 assert( mmapStart <= bucketSizes[maxBucketsUsed] ); // search failure ? 380 return false;386 return true; 381 387 } // setMmapStart 382 388 … … 437 443 438 444 #ifdef __CFA_DEBUG__ 439 checkHeader( addr < heapBegin || header < (HeapManager.Storage.Header *)heapBegin, name, addr );// bad low address ?445 checkHeader( addr < heapBegin, name, addr ); // bad low address ? 440 446 #endif // __CFA_DEBUG__ 441 447 … … 496 502 // along with the block and is a multiple of the alignment size. 497 503 498 if ( unlikely( size > ~0ul- sizeof(HeapManager.Storage) ) ) return 0p;504 if ( unlikely( size > ULONG_MAX - sizeof(HeapManager.Storage) ) ) return 0p; 499 505 size_t tsize = size + sizeof(HeapManager.Storage); 500 506 if ( likely( tsize < mmapStart ) ) { // small size => sbrk … … 548 554 block->header.kind.real.home = freeElem; // pointer back to free list of apropriate size 549 555 } else { // large size => mmap 550 if ( unlikely( size > ~0ul- pageSize ) ) return 0p;556 if ( unlikely( size > ULONG_MAX - pageSize ) ) return 0p; 551 557 tsize = libCeiling( tsize, pageSize ); // must be multiple of page size 552 558 #ifdef __STATISTICS__ … … 566 572 } // if 567 573 574 block->header.size = size; // store allocation size 568 575 void * addr = &(block->data); // adjust off header to user bytes 569 576 … … 689 696 #endif // FASTLOOKUP 690 697 691 if ( setMmapStart( default_mmap_start() ) ) {698 if ( ! setMmapStart( default_mmap_start() ) ) { 692 699 abort( "HeapManager : internal error, mmap start initialization failure." ); 693 700 } // if … … 695 702 696 703 char * end = (char *)sbrk( 0 ); 697 sbrk( (char *)libCeiling( (long unsigned int)end, libAlign() ) - end ); // move start of heap to multiple of alignment 698 heapBegin = heapEnd = sbrk( 0 ); // get new start point 704 heapBegin = heapEnd = sbrk( (char *)libCeiling( (long unsigned int)end, libAlign() ) - end ); // move start of heap to multiple of alignment 699 705 } // HeapManager 700 706 … … 722 728 //assert( heapManager.heapBegin != 0 ); 723 729 //heapManager{}; 724 if ( heapManager.heapBegin == 0p ) heapManager{}; 730 if ( heapManager.heapBegin == 0p ) heapManager{}; // sanity check 725 731 } // memory_startup 726 732 … … 734 740 //assert( heapManager.heapBegin != 0 ); 735 741 if ( unlikely( heapManager.heapBegin == 0p ) ) heapManager{}; // called before memory_startup ? 742 #if __SIZEOF_POINTER__ == 8 743 verify( size < ((typeof(size_t))1 << 48) ); 744 #endif // __SIZEOF_POINTER__ == 8 736 745 void * addr = doMalloc( size ); 737 746 if ( unlikely( addr == 0p ) ) errno = ENOMEM; // POSIX … … 740 749 741 750 742 static inline void * callocNoStats( size_t noOfElems, size_t elemSize ) {743 size_t size = noOfElems* elemSize;751 static inline void * callocNoStats( size_t dim, size_t elemSize ) { 752 size_t size = dim * elemSize; 744 753 char * addr = (char *)mallocNoStats( size ); 745 754 if ( unlikely( addr == 0p ) ) return 0p; … … 758 767 memset( addr, '\0', bsize - sizeof(HeapManager.Storage) ); // set to zeros 759 768 760 assert( noOfElems <= UINT32_MAX );761 header->dimension = noOfElems; // store number of array elements762 769 header->kind.real.blockSize |= 2; // mark as zero filled 763 770 return addr; … … 801 808 802 809 803 static inline void * cmemalignNoStats( size_t alignment, size_t noOfElems, size_t elemSize ) {804 size_t size = noOfElems* elemSize;810 static inline void * cmemalignNoStats( size_t alignment, size_t dim, size_t elemSize ) { 811 size_t size = dim * elemSize; 805 812 char * addr = (char *)memalignNoStats( alignment, size ); 806 813 if ( unlikely( addr == 0p ) ) return 0p; … … 815 822 memset( addr, '\0', dataStorage( bsize, addr, header ) ); // set to zeros 816 823 817 assert( noOfElems <= UINT32_MAX );818 header->dimension = noOfElems; // store initial array size819 824 header->kind.real.blockSize |= 2; // mark as zero filled 820 825 return addr; … … 832 837 833 838 extern "C" { 834 // Allocates size bytes and returns a pointer to the allocated memory. The memory is not initialized. If size is 0,835 // then malloc() returns either 0p, ora unique pointer value that can later be successfully passed to free().839 // Allocates size bytes and returns a pointer to the allocated memory. The contents are undefined. If size is 0, 840 // then malloc() returns a unique pointer value that can later be successfully passed to free(). 836 841 void * malloc( size_t size ) { 837 842 #ifdef __STATISTICS__ … … 843 848 } // malloc 844 849 845 // Allocate memory for an array of nmemb elements of size bytes each and returns a pointer to the allocated 846 // memory. The memory is set to zero. If nmemb or size is 0, then calloc() returns either 0p, or a unique pointer 847 // value that can later be successfully passed to free(). 848 void * calloc( size_t noOfElems, size_t elemSize ) { 850 851 // Same as malloc() except size bytes is an array of dim elements each of elemSize bytes. 852 void * aalloc( size_t dim, size_t elemSize ) { 853 #ifdef __STATISTICS__ 854 __atomic_add_fetch( &aalloc_calls, 1, __ATOMIC_SEQ_CST ); 855 __atomic_add_fetch( &aalloc_storage, dim * elemSize, __ATOMIC_SEQ_CST ); 856 #endif // __STATISTICS__ 857 858 return mallocNoStats( dim * elemSize ); 859 } // aalloc 860 861 862 // Same as aalloc() with memory set to zero. 863 void * calloc( size_t dim, size_t elemSize ) { 849 864 #ifdef __STATISTICS__ 850 865 __atomic_add_fetch( &calloc_calls, 1, __ATOMIC_SEQ_CST ); 851 __atomic_add_fetch( &calloc_storage, noOfElems* elemSize, __ATOMIC_SEQ_CST );852 #endif // __STATISTICS__ 853 854 return callocNoStats( noOfElems, elemSize );866 __atomic_add_fetch( &calloc_storage, dim * elemSize, __ATOMIC_SEQ_CST ); 867 #endif // __STATISTICS__ 868 869 return callocNoStats( dim, elemSize ); 855 870 } // calloc 856 871 857 // Change the size of the memory block pointed to by ptr to size bytes. The contents are undefined. If ptr is 0p, 858 // then the call is equivalent to malloc(size), for all values of size; if size is equal to zero, and ptr is not 0p, 859 // then the call is equivalent to free(ptr). Unless ptr is 0p, it must have been returned by an earlier call to 860 // malloc(), calloc() or realloc(). If the area pointed to was moved, a free(ptr) is done. 861 872 // Change the size of the memory block pointed to by oaddr to size bytes. The contents are undefined. If oaddr is 873 // 0p, then the call is equivalent to malloc(size), for all values of size; if size is equal to zero, and oaddr is 874 // not 0p, then the call is equivalent to free(oaddr). Unless oaddr is 0p, it must have been returned by an earlier 875 // call to malloc(), alloc(), calloc() or realloc(). If the area pointed to was moved, a free(oaddr) is done. 862 876 void * resize( void * oaddr, size_t size ) { 863 877 #ifdef __STATISTICS__ … … 874 888 size_t bsize, oalign = 0; 875 889 headers( "resize", oaddr, header, freeElem, bsize, oalign ); 890 876 891 size_t odsize = dataStorage( bsize, oaddr, header ); // data storage available in bucket 877 878 892 // same size, DO NOT preserve STICKY PROPERTIES. 879 893 if ( oalign == 0 && size <= odsize && odsize <= size * 2 ) { // allow 50% wasted storage for smaller size 880 894 header->kind.real.blockSize &= -2; // no alignment and turn off 0 fill 881 895 return oaddr; … … 883 897 884 898 // change size, DO NOT preserve STICKY PROPERTIES. 899 free( oaddr ); 885 900 void * naddr = mallocNoStats( size ); // create new area 886 free( oaddr );887 901 return naddr; 888 902 } // resize 889 903 890 904 891 // Same as resize but the contents shall be unchanged in the range from the start of the region up to the minimum of905 // Same as resize() but the contents are unchanged in the range from the start of the region up to the minimum of 892 906 // the old and new sizes. 893 907 void * realloc( void * oaddr, size_t size ) { … … 939 953 } // realloc 940 954 941 // Allocates size bytes and returns a pointer to the allocated memory. The memory address shall be a multiple of 942 // alignment, which must be a power of two. (obsolete) 955 // Same as malloc() except the memory address is a multiple of alignment, which must be a power of two. (obsolete) 943 956 void * memalign( size_t alignment, size_t size ) { 944 957 #ifdef __STATISTICS__ … … 951 964 952 965 966 // Same as aalloc() with memory alignment. 967 void * amemalign( size_t alignment, size_t dim, size_t elemSize ) { 968 #ifdef __STATISTICS__ 969 __atomic_add_fetch( &cmemalign_calls, 1, __ATOMIC_SEQ_CST ); 970 __atomic_add_fetch( &cmemalign_storage, dim * elemSize, __ATOMIC_SEQ_CST ); 971 #endif // __STATISTICS__ 972 973 return memalignNoStats( alignment, dim * elemSize ); 974 } // amemalign 975 976 953 977 // Same as calloc() with memory alignment. 954 void * cmemalign( size_t alignment, size_t noOfElems, size_t elemSize ) {978 void * cmemalign( size_t alignment, size_t dim, size_t elemSize ) { 955 979 #ifdef __STATISTICS__ 956 980 __atomic_add_fetch( &cmemalign_calls, 1, __ATOMIC_SEQ_CST ); 957 __atomic_add_fetch( &cmemalign_storage, noOfElems* elemSize, __ATOMIC_SEQ_CST );958 #endif // __STATISTICS__ 959 960 return cmemalignNoStats( alignment, noOfElems, elemSize );981 __atomic_add_fetch( &cmemalign_storage, dim * elemSize, __ATOMIC_SEQ_CST ); 982 #endif // __STATISTICS__ 983 984 return cmemalignNoStats( alignment, dim, elemSize ); 961 985 } // cmemalign 962 986 … … 993 1017 994 1018 // Frees the memory space pointed to by ptr, which must have been returned by a previous call to malloc(), calloc() 995 // or realloc(). Otherwise, or if free(ptr) has already been called before, undefined behavio r occurs. If ptr is1019 // or realloc(). Otherwise, or if free(ptr) has already been called before, undefined behaviour occurs. If ptr is 996 1020 // 0p, no operation is performed. 997 1021 void free( void * addr ) { … … 1015 1039 1016 1040 1017 // Returns the alignment of theallocation.1041 // Returns the alignment of an allocation. 1018 1042 size_t malloc_alignment( void * addr ) { 1019 1043 if ( unlikely( addr == 0p ) ) return libAlign(); // minimum alignment … … 1026 1050 } // malloc_alignment 1027 1051 1028 1029 // Returns true if the allocation is zero filled, i.e., initially allocated by calloc(). 1052 // Set the alignment for an the allocation and return previous alignment or 0 if no alignment. 1053 size_t $malloc_alignment_set( void * addr, size_t alignment ) { 1054 if ( unlikely( addr == 0p ) ) return libAlign(); // minimum alignment 1055 size_t ret; 1056 HeapManager.Storage.Header * header = headerAddr( addr ); 1057 if ( (header->kind.fake.alignment & 1) == 1 ) { // fake header ? 1058 ret = header->kind.fake.alignment & -2; // remove flag from old value 1059 header->kind.fake.alignment = alignment | 1; // add flag to new value 1060 } else { 1061 ret = 0; // => no alignment to change 1062 } // if 1063 return ret; 1064 } // $malloc_alignment_set 1065 1066 1067 // Returns true if the allocation is zero filled, e.g., allocated by calloc(). 1030 1068 bool malloc_zero_fill( void * addr ) { 1031 1069 if ( unlikely( addr == 0p ) ) return false; // null allocation is not zero fill … … 1034 1072 header = realHeader( header ); // backup from fake to real header 1035 1073 } // if 1036 return (header->kind.real.blockSize & 2) != 0; // zero filled (calloc/cmemalign)?1074 return (header->kind.real.blockSize & 2) != 0; // zero filled ? 1037 1075 } // malloc_zero_fill 1038 1076 1039 1040 // Returns number of elements if the allocation is for an array, i.e., by calloc(). 1041 size_t malloc_dimension( void * addr ) { 1077 // Set allocation is zero filled and return previous zero filled. 1078 bool $malloc_zero_fill_set( void * addr ) { 1042 1079 if ( unlikely( addr == 0p ) ) return false; // null allocation is not zero fill 1043 1080 HeapManager.Storage.Header * header = headerAddr( addr ); … … 1045 1082 header = realHeader( header ); // backup from fake to real header 1046 1083 } // if 1047 return header->dimension; // array (calloc/cmemalign) 1048 } // malloc_zero_fill 1084 bool ret = (header->kind.real.blockSize & 2) != 0; // zero filled ? 1085 header->kind.real.blockSize |= 2; // mark as zero filled 1086 return ret; 1087 } // $malloc_zero_fill_set 1088 1089 1090 // Returns original total allocation size (not bucket size) => array size is dimension * sizeif(T). 1091 size_t malloc_size( void * addr ) { 1092 if ( unlikely( addr == 0p ) ) return false; // null allocation is not zero fill 1093 HeapManager.Storage.Header * header = headerAddr( addr ); 1094 if ( (header->kind.fake.alignment & 1) == 1 ) { // fake header ? 1095 header = realHeader( header ); // backup from fake to real header 1096 } // if 1097 return header->size; 1098 } // malloc_size 1099 1100 // Set allocation size and return previous size. 1101 size_t $malloc_size_set( void * addr, size_t size ) { 1102 if ( unlikely( addr == 0p ) ) return false; // null allocation is not zero fill 1103 HeapManager.Storage.Header * header = headerAddr( addr ); 1104 if ( (header->kind.fake.alignment & 1) == 1 ) { // fake header ? 1105 header = realHeader( header ); // backup from fake to real header 1106 } // if 1107 size_t ret = header->size; 1108 header->size = size; 1109 return ret; 1110 } // $malloc_size_set 1049 1111 1050 1112 … … 1082 1144 1083 1145 1084 // Adjusts parameters that control the behavio r of the memory-allocation functions (see malloc). The param argument1146 // Adjusts parameters that control the behaviour of the memory-allocation functions (see malloc). The param argument 1085 1147 // specifies the parameter to be modified, and value specifies the new value for that parameter. 1086 1148 int mallopt( int option, int value ) { 1087 1149 choose( option ) { 1088 1150 case M_TOP_PAD: 1089 if ( setHeapExpand( value ) )return 1;1151 heapExpand = ceiling( value, pageSize ); return 1; 1090 1152 case M_MMAP_THRESHOLD: 1091 1153 if ( setMmapStart( value ) ) return 1; 1154 break; 1092 1155 } // switch 1093 1156 return 0; // error, unsupported -
libcfa/src/interpose.cfa
re3bc51c rbcd74f3 15 15 16 16 #include <stdarg.h> // va_start, va_end 17 #include <stdio.h> 17 18 #include <string.h> // strlen 18 19 #include <unistd.h> // _exit, getpid -
libcfa/src/iostream.cfa
re3bc51c rbcd74f3 10 10 // Created On : Wed May 27 17:56:53 2015 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Wed Mar 11 14:35:35 202013 // Update Count : 86012 // Last Modified On : Sat May 2 18:30:25 2020 13 // Update Count : 1017 14 14 // 15 15 … … 29 29 #include <complex.h> // creal, cimag 30 30 } // extern "C" 31 32 #include <bitmanip.hfa> // fms 31 33 32 34 … … 459 461 \ 460 462 if ( f.base == 'b' || f.base == 'B' ) { /* bespoke binary format */ \ 461 int bits; \ 462 if ( f.val == (T){0} ) bits = 1; /* force at least one bit to print */ \ 463 else bits = sizeof(long long int) * 8 - __builtin_clzll( f.val ); /* position of most significant bit */ \ 464 bits = bits > sizeof(f.val) * 8 ? sizeof(f.val) * 8 : bits; \ 465 int spaces = f.wd - bits; /* can be negative */ \ 466 if ( ! f.flags.nobsdp ) { spaces -= 2; } /* base prefix takes space */ \ 467 /* printf( "%d %d\n", bits, spaces ); */ \ 463 int bits = high1( f.val ); /* position of most significant bit */ \ 464 if ( bits == 0 ) bits = 1; /* 0 value => force one bit to print */ \ 465 int spaces; \ 468 466 if ( ! f.flags.left ) { /* right justified ? */ \ 469 467 /* Note, base prefix then zero padding or spacing then prefix. */ \ 470 if ( f.flags.pad0 || f.flags.pc ) { \ 468 if ( f.flags.pc ) { \ 469 spaces = f.wd - f.pc; \ 470 if ( ! f.flags.nobsdp ) { spaces -= 2; } /* base prefix takes space */ \ 471 if ( spaces > 0 ) fmt( os, "%*s", spaces, " " ); /* space pad */ \ 471 472 if ( ! f.flags.nobsdp ) { fmt( os, "0%c", f.base ); } \ 472 if ( f.flags.pc )spaces = f.pc - bits; \473 spaces = f.pc - bits; \ 473 474 if ( spaces > 0 ) fmt( os, "%0*d", spaces, 0 ); /* zero pad */ \ 474 475 } else { \ 475 if ( spaces > 0 ) fmt( os, "%*s", spaces, " " ); /* space pad */ \ 476 if ( ! f.flags.nobsdp ) { fmt( os, "0%c", f.base ); } \ 476 spaces = f.wd - bits; \ 477 if ( ! f.flags.nobsdp ) { spaces -= 2; } /* base prefix takes space */ \ 478 if ( f.flags.pad0 ) { \ 479 if ( ! f.flags.nobsdp ) { fmt( os, "0%c", f.base ); } \ 480 if ( spaces > 0 ) fmt( os, "%0*d", spaces, 0 ); /* zero pad */ \ 481 } else { \ 482 if ( spaces > 0 ) fmt( os, "%*s", spaces, " " ); /* space pad */ \ 483 if ( ! f.flags.nobsdp ) { fmt( os, "0%c", f.base ); } \ 484 } /* if */ \ 477 485 } /* if */ \ 478 } else if ( ! f.flags.nobsdp ) { \ 479 fmt( os, "0%c", f.base ); \ 486 } else { \ 487 if ( ! f.flags.nobsdp ) fmt( os, "0%c", f.base ); \ 488 if ( f.flags.pc ) { \ 489 spaces = f.pc - bits; \ 490 if ( spaces > 0 ) fmt( os, "%0*d", spaces, 0 ); /* zero pad */ \ 491 spaces = f.wd - f.pc; \ 492 } else { /* pad0 flag ignored with left flag */ \ 493 spaces = f.wd - bits; \ 494 } /* if */ \ 495 if ( ! f.flags.nobsdp ) { spaces -= 2; } /* base prefix takes space */ \ 480 496 } /* if */ \ 481 int shift = (bits - 1) / 4 * 4; /* floor( bits - 1, 4 ) */\497 int shift = floor( bits - 1, 4 ); \ 482 498 typeof( f.val ) temp = f.val; \ 483 499 fmt( os, "%s", shortbin[(temp >> shift) & 0xf] ); \ … … 565 581 fmt2.flags.pad0 = fmt2.flags.nobsdp = true; \ 566 582 if ( f.base == 'b' | f.base == 'B' ) { \ 567 if ( f.wd > 64 ) fmt.wd = f.wd - 64; \ 568 if ( f.flags.pc && f.pc > 64 ) fmt.pc = f.pc - 64; \ 569 fmt2.wd = 64; \ 583 if ( fmt.flags.pc && fmt.pc > 64 ) fmt.pc -= 64; else { fmt.flags.pc = false; fmt.pc = 0; } \ 584 if ( fmt.flags.left ) { \ 585 fmt.flags.left = false; \ 586 fmt.wd = 0; \ 587 /* printf( "L %llo %llo %llo %d %d '%c' %x\n", msig, lsig, fmt.val, fmt.wd, fmt.pc, fmt.base, fmt.all ); */ \ 588 fmt2.flags.left = true; \ 589 int msigd = high1( msig ); \ 590 fmt2.wd = f.wd - (fmt.pc > msigd ? fmt.pc : msigd); \ 591 if ( ! fmt.flags.nobsdp ) fmt2.wd -= 2; /* compensate for 0b base specifier */ \ 592 if ( (int)fmt2.wd < 64 ) fmt2.wd = 64; /* cast deals with negative value */ \ 593 fmt2.flags.pc = true; fmt2.pc = 64; \ 594 } else { \ 595 if ( fmt.wd > 64 ) fmt.wd -= 64; \ 596 else fmt.wd = 1; \ 597 /* printf( "R %llo %llo %llo %d %d '%c' %x\n", msig, lsig, fmt.val, fmt.wd, fmt.pc, fmt.base, fmt.all ); */ \ 598 fmt2.wd = 64; \ 599 } /* if */ \ 600 /* printf( "C %llo %d %d '%c' %x\n", fmt2.val, fmt2.wd, fmt2.pc, fmt2.base, fmt2.all ); */ \ 570 601 (ostype &)(os | fmt | "" | fmt2); \ 571 602 } else if ( f.base == 'o' ) { \ 603 if ( fmt.flags.pc && fmt.pc > 22 ) fmt.pc -= 22; else { fmt.flags.pc = false; fmt.pc = 0; } \ 572 604 fmt.val = (unsigned long long int)fmt.val >> 2; \ 573 if ( f.wd > 21 ) fmt.wd = f.wd - 21; \ 574 if ( f.flags.pc && f.pc > 21 ) fmt.pc = f.pc - 21; \ 575 fmt2.wd = 1; \ 576 fmt2.val = ((msig & 0x3) << 1) + 1; \ 577 (ostype &)(os | fmt | "" | fmt2); \ 578 sepOff( os ); \ 579 fmt2.wd = 21; \ 580 fmt2.val = lsig & 0x7fffffffffffffff; \ 605 fmt2.val = ((msig & 0x3) << 1) + ((lsig & 0x8000000000000000U) != 0); \ 606 if ( fmt.flags.left ) { \ 607 fmt.flags.left = false; \ 608 fmt.wd = 0; \ 609 /* printf( "L %llo %llo %llo %d %d '%c' %x %llo %d %d '%c' %x\n", msig, lsig, fmt.val, fmt.wd, fmt.pc, fmt.base, fmt.all, fmt2.val, fmt2.wd, fmt2.pc, fmt2.base, fmt2.all ); */ \ 610 (ostype &)(os | fmt | "" | fmt2); \ 611 sepOff( os ); \ 612 fmt2.flags.left = true; \ 613 int msigd = ceiling( high1( fmt.val ), 3 ); \ 614 fmt2.wd = f.wd - (fmt.pc > msigd ? fmt.pc : msigd); \ 615 if ( ! fmt.flags.nobsdp ) fmt2.wd -= 1; /* compensate for 0 base specifier */ \ 616 if ( (int)fmt2.wd < 21 ) fmt2.wd = 21; /* cast deals with negative value */ \ 617 fmt2.flags.pc = true; fmt2.pc = 21; \ 618 } else { \ 619 if ( fmt.wd > 22 ) fmt.wd -= 22; \ 620 else fmt.wd = 1; \ 621 /* printf( "R %llo %llo %llo %d %d '%c' %x %llo %d %d '%c' %x\n", msig, lsig, fmt.val, fmt.wd, fmt.pc, fmt.base, fmt.all, fmt2.val, fmt2.wd, fmt2.pc, fmt2.base, fmt2.all ); */ \ 622 (ostype &)(os | fmt | "" | fmt2); \ 623 sepOff( os ); \ 624 fmt2.wd = 21; \ 625 } /* if */ \ 626 fmt2.val = lsig & 0x7fffffffffffffffU; \ 627 /* printf( "\nC %llo %d %d '%c' %x\n", fmt2.val, fmt2.wd, fmt2.pc, fmt2.base, fmt2.all ); */ \ 581 628 (ostype &)(os | fmt2); \ 582 } else { \ 583 if ( f.flags.left ) { \ 584 if ( f.wd > 16 ) fmt2.wd = f.wd - 16; \ 585 fmt.wd = 16; \ 629 } else { /* f.base == 'x' | f.base == 'X' */ \ 630 if ( fmt.flags.pc && fmt.pc > 16 ) fmt.pc -= 16; else { fmt.flags.pc = false; fmt.pc = 0; } \ 631 if ( fmt.flags.left ) { \ 632 fmt.flags.left = false; \ 633 fmt.wd = 0; \ 634 /* printf( "L %llo %llo %llo %d %d '%c' %x\n", msig, lsig, fmt.val, fmt.wd, fmt.pc, fmt.base, fmt.all ); */ \ 635 fmt2.flags.left = true; \ 636 int msigd = high1( msig ); \ 637 fmt2.wd = f.wd - (fmt.pc > msigd ? fmt.pc : msigd); \ 638 if ( ! fmt.flags.nobsdp ) fmt2.wd -= 2; /* compensate for 0x base specifier */ \ 639 if ( (int)fmt2.wd < 16 ) fmt2.wd = 16; /* cast deals with negative value */ \ 640 fmt2.flags.pc = true; fmt2.pc = 16; \ 586 641 } else { \ 587 if ( f.wd > 16 ) fmt.wd = f.wd - 16; \ 588 if ( f.flags.pc && f.pc > 16 ) fmt.pc = f.pc - 16; \ 642 if ( fmt.wd > 16 ) fmt.wd -= 16; \ 643 else fmt.wd = 1; \ 644 /* printf( "R %llo %llo %llo %d %d '%c' %x\n", msig, lsig, fmt.val, fmt.wd, fmt.pc, fmt.base, fmt.all ); */ \ 589 645 fmt2.wd = 16; \ 590 646 } /* if */ \ 647 /* printf( "C %llo %d %d '%c' %x\n", fmt2.val, fmt2.wd, fmt2.pc, fmt2.base, fmt2.all ); */ \ 591 648 (ostype &)(os | fmt | "" | fmt2); \ 592 649 } /* if */ \ -
libcfa/src/startup.cfa
re3bc51c rbcd74f3 14 14 // 15 15 16 #include <time.h> // tzset 16 #include <time.h> // tzset 17 #include <locale.h> // setlocale 17 18 #include "startup.hfa" 18 19 … … 21 22 void __cfaabi_appready_startup( void ) { 22 23 tzset(); // initialize time global variables 24 setlocale(LC_NUMERIC, ""); 23 25 #ifdef __CFA_DEBUG__ 24 26 extern void heapAppStart(); -
libcfa/src/stdhdr/malloc.h
re3bc51c rbcd74f3 10 10 // Created On : Thu Jul 20 15:58:16 2017 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Sun Mar 8 10:01:20202013 // Update Count : 1 112 // Last Modified On : Thu Apr 16 22:44:06 2020 13 // Update Count : 13 14 14 // 15 15 … … 31 31 32 32 extern "C" { 33 void * aalloc( size_t noOfElems, size_t elemSize ); 34 void * amemalign( size_t alignment, size_t noOfElems, size_t elemSize ); 33 35 void * cmemalign( size_t alignment, size_t noOfElems, size_t elemSize ); 34 36 size_t malloc_alignment( void * ); 35 37 bool malloc_zero_fill( void * ); 36 size_t malloc_ dimension( void * );38 size_t malloc_size( void * ); 37 39 int malloc_stats_fd( int fd ); 38 40 } // extern "C" -
libcfa/src/stdlib.cfa
re3bc51c rbcd74f3 10 10 // Created On : Thu Jan 28 17:10:29 2016 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : T ue Mar 31 13:26:46202013 // Update Count : 49 512 // Last Modified On : Thu Apr 16 22:43:33 2020 13 // Update Count : 498 14 14 // 15 15 … … 37 37 } // alloc_set 38 38 39 T * alloc_set( T ptr[], size_t dim, T fill ) { 39 T * alloc_set( T ptr[], size_t dim, T fill ) { // realloc array with fill 40 40 size_t olen = malloc_usable_size( ptr ); // current allocation 41 41 void * nptr = (void *)realloc( (void *)ptr, dim * sizeof(T) ); // C realloc 42 42 size_t nlen = malloc_usable_size( nptr ); // new allocation 43 43 if ( nlen > olen ) { // larger ? 44 for ( i; dim ) { memcpy( &ptr[i], &fill, sizeof(T) ); } // initialize with fill value 44 for ( i; malloc_size( ptr ) / sizeof(T) ~ dim ) { 45 memcpy( &ptr[i], &fill, sizeof(T) ); // initialize with fill value 46 } // for 45 47 } // if 46 48 return (T *)nptr; -
libcfa/src/stdlib.hfa
re3bc51c rbcd74f3 10 10 // Created On : Thu Jan 28 17:12:35 2016 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Wed Apr 1 18:38:41202013 // Update Count : 4 2912 // Last Modified On : Thu Apr 16 22:44:05 2020 13 // Update Count : 432 14 14 // 15 15 … … 25 25 void * memalign( size_t align, size_t size ); // malloc.h 26 26 size_t malloc_usable_size( void * ptr ); // malloc.h 27 size_t malloc_size( void * addr ); // CFA heap 27 28 void * cmemalign( size_t alignment, size_t noOfElems, size_t elemSize ); // CFA heap 28 29 void * memset( void * dest, int fill, size_t size ); // string.h -
src/CompilationState.cc
re3bc51c rbcd74f3 27 27 nopreludep = false, 28 28 genproto = false, 29 deterministic_output = false, 29 30 nomainp = false, 30 31 parsep = false, -
src/CompilationState.h
re3bc51c rbcd74f3 28 28 nopreludep, 29 29 genproto, 30 deterministic_output, 30 31 nomainp, 31 32 parsep, -
src/Parser/parser.yy
re3bc51c rbcd74f3 10 10 // Created On : Sat Sep 1 20:22:55 2001 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Fri Mar 6 17:26:45202013 // Update Count : 44 7412 // Last Modified On : Mon Apr 27 12:25:42 2020 13 // Update Count : 4483 14 14 // 15 15 … … 966 966 967 967 tuple_expression_list: 968 assignment_expression_opt 969 | tuple_expression_list ',' assignment_expression_opt 968 assignment_expression 969 | '@' // CFA 970 { SemanticError( yylloc, "Eliding tuple element with '@' is currently unimplemented." ); $$ = nullptr; } 971 | tuple_expression_list ',' assignment_expression 970 972 { $$ = (ExpressionNode *)($1->set_last( $3 )); } 973 | tuple_expression_list ',' '@' 974 { SemanticError( yylloc, "Eliding tuple element with '@' is currently unimplemented." ); $$ = nullptr; } 971 975 ; 972 976 -
src/ResolvExpr/TypeEnvironment.cc
re3bc51c rbcd74f3 20 20 #include <utility> // for pair, move 21 21 22 #include "CompilationState.h" // for deterministic_output 22 23 #include "Common/utility.h" // for maybeClone 23 24 #include "SynTree/Type.h" // for Type, FunctionType, Type::Fora... … … 106 107 107 108 void EqvClass::print( std::ostream &os, Indenter indent ) const { 108 os << "( "; 109 std::copy( vars.begin(), vars.end(), std::ostream_iterator< std::string >( os, " " ) ); 110 os << ")"; 109 if( !deterministic_output ) { 110 os << "( "; 111 std::copy( vars.begin(), vars.end(), std::ostream_iterator< std::string >( os, " " ) ); 112 os << ")"; 113 } 111 114 if ( type ) { 112 115 os << " -> "; … … 235 238 // check safely bindable 236 239 if ( r.type && occursIn( r.type, s.vars.begin(), s.vars.end(), *this ) ) return false; 237 240 238 241 // merge classes in 239 242 r.vars.insert( s.vars.begin(), s.vars.end() ); -
src/main.cc
re3bc51c rbcd74f3 449 449 450 450 451 static const char optstring[] = ":c:ghlLmNnp P:S:twW:D:";451 static const char optstring[] = ":c:ghlLmNnpdP:S:twW:D:"; 452 452 453 453 enum { PreludeDir = 128 }; … … 462 462 { "no-prelude", no_argument, nullptr, 'n' }, 463 463 { "prototypes", no_argument, nullptr, 'p' }, 464 { "deterministic-out", no_argument, nullptr, 'd' }, 464 465 { "print", required_argument, nullptr, 'P' }, 465 466 { "prelude-dir", required_argument, nullptr, PreludeDir }, … … 482 483 "do not read prelude", // -n 483 484 "generate prototypes for prelude functions", // -p 485 "don't print output that isn't deterministic", // -d 484 486 "print", // -P 485 487 "<directory> prelude directory for debug/nodebug", // no flag … … 586 588 genproto = true; 587 589 break; 590 case 'd': // don't print non-deterministic output 591 deterministic_output = true; 592 break; 588 593 case 'P': // print options 589 594 for ( int i = 0;; i += 1 ) { -
tests/.expect/alloc.txt
re3bc51c rbcd74f3 35 35 CFA realloc array alloc, fill 36 36 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 37 CFA realloc array alloc, 538 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede39 CFA realloc array alloc, 540 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef41 CFA realloc array alloc, 542 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0xffffffff 0xffffffff 0xffffffff 0xffffffff 0xffffffff 0xffffffff 0xffffffff 0xffffffff 0xffffffff 0xffffffff 0xffffffff 0xffffffff 0xffffffff 0xffffffff43 37 44 38 C memalign 42 42.5 -
tests/Makefile.am
re3bc51c rbcd74f3 41 41 -quiet @CFA_FLAGS@ \ 42 42 -DIN_DIR="${abs_srcdir}/.in/" 43 44 AM_CFAFLAGS = -XCFA --deterministic-out 43 45 44 46 # get the desired cfa to test -
tests/Makefile.in
re3bc51c rbcd74f3 408 408 -DIN_DIR="${abs_srcdir}/.in/" 409 409 410 AM_CFAFLAGS = -XCFA --deterministic-out 410 411 411 412 # get the desired cfa to test -
tests/alloc.cfa
re3bc51c rbcd74f3 10 10 // Created On : Wed Feb 3 07:56:22 2016 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Wed Apr 1 10:58:35202013 // Update Count : 42 412 // Last Modified On : Mon Apr 6 21:08:23 2020 13 // Update Count : 428 14 14 // 15 15 … … 151 151 ip = alloc_set( ip, 3 * dim, fill ); // CFA realloc array alloc, fill 152 152 printf( "CFA realloc array alloc, fill\n" ); 153 for ( i; 3 * dim ) { printf( "%#x ", ip[i] ); ;}154 printf( "\n" ); 155 // do not free 156 157 ip = alloc_set( ip, 3* dim, 5 ); // CFA realloc array alloc, 5153 for ( i; 3 * dim ) { printf( "%#x ", ip[i] ); } 154 printf( "\n" ); 155 // do not free 156 #if 0 // FIX ME 157 ip = alloc_set( ip, 5 * dim, 5 ); // CFA realloc array alloc, 5 158 158 printf( "CFA realloc array alloc, 5\n" ); 159 for ( i; 3* dim ) { printf( "%#x ", ip[i] ); }159 for ( i; 5 * dim ) { printf( "%#x ", ip[i] ); } 160 160 printf( "\n" ); 161 161 // do not free … … 167 167 // do not free 168 168 169 ip = alloc_set( ip, 3* dim, 5 ); // CFA realloc array alloc, 5169 ip = alloc_set( ip, 5 * dim, 5 ); // CFA realloc array alloc, 5 170 170 printf( "CFA realloc array alloc, 5\n" ); 171 for ( i; 3 * dim ) { printf( "%#x ", ip[i] );; }172 printf( "\n" ); 173 free( ip ); 174 171 for ( i; 5 * dim ) { printf( "%#x ", ip[i] ); } 172 printf( "\n" ); 173 #endif // 0 174 free( ip ); 175 175 176 176 // resize, non-array types -
tests/concurrent/.expect/monitor.txt
re3bc51c rbcd74f3 1 40000001 3000000 -
tests/concurrent/monitor.cfa
re3bc51c rbcd74f3 29 29 30 30 void main( MyThread & this ) { 31 for(int i = 0; i < 1_000_000; i++) {31 for(int i = 0; i < 750_000; i++) { 32 32 increment( global ); 33 33 } -
tests/errors/.expect/completeType.txt
re3bc51c rbcd74f3 27 27 void 28 28 ) 29 Environment: ( _85_4_DT )-> instance of struct A with body 0 (no widening)29 Environment: -> instance of struct A with body 0 (no widening) 30 30 31 31 … … 50 50 void 51 51 ) 52 Environment: ( _85_4_DT )-> instance of struct B with body 1 (no widening)52 Environment: -> instance of struct B with body 1 (no widening) 53 53 54 54 … … 127 127 void 128 128 ) 129 Environment: ( _104_0_T )-> instance of type T (not function type) (no widening)129 Environment: -> instance of type T (not function type) (no widening) 130 130 131 131 Could not satisfy assertion: -
tests/exceptions/.expect/interact.txt
re3bc51c rbcd74f3 14 14 resumption catch, will terminate 15 15 inner termination catch 16 17 throwing resume moon 18 resumption moon catch, will terminate 19 termination catch 20 throwing resume star 21 resumption star catch -
tests/exceptions/.expect/resume.txt
re3bc51c rbcd74f3 25 25 caught second exception 26 26 recaught first exception 27 28 inner catch 29 inner catch 30 outer catch -
tests/exceptions/.expect/terminate.txt
re3bc51c rbcd74f3 24 24 caught second exception 25 25 recaught first exception 26 27 inner catch 28 outer catch -
tests/exceptions/conditional.cfa
re3bc51c rbcd74f3 4 4 // up the non-trivial exception is reasonable to do. 5 5 6 #include "except-mac.hfa"6 #include <exception.hfa> 7 7 #include <stdio.h> 8 8 9 DECLARE_EXCEPT(num_error, BASE_EXCEPT, 9 VTABLE_DECLARATION(num_error)( 10 10 int (*code)(num_error *this); 11 11 ); … … 36 36 this.num = other.num; 37 37 } 38 void copy(num_error * this, num_error * other) {39 *this = *other;40 }41 38 void ^?{}(num_error & this) { 42 39 if( this.msg ) free( this.msg ); … … 46 43 } 47 44 48 VTABLE_INSTANCE(num_error, BASE_EXCEPT, copy, ^?{}, 49 num_error_msg, num_error_code 45 VTABLE_INSTANCE(num_error)( 46 num_error_msg, 47 num_error_code, 50 48 ); 51 49 … … 58 56 59 57 try { 60 THROW(&exc);58 throw &exc; 61 59 } catch (num_error * error ; 3 == error->virtual_table->code( error )) { 62 60 caught_num_error(3, error); … … 66 64 67 65 try { 68 THROW_RESUME(&exc);66 throwResume &exc; 69 67 } catchResume (num_error * error ; 3 == error->virtual_table->code( error )) { 70 68 caught_num_error(3, error); -
tests/exceptions/finally.cfa
re3bc51c rbcd74f3 1 1 // Finally Clause Tests 2 2 3 #include "except-mac.hfa"3 #include <exception.hfa> 4 4 #include "except-io.hfa" 5 5 … … 12 12 try { 13 13 printf("termination throw\n"); 14 THROW(&exc);14 throw &exc; 15 15 } finally { 16 16 loud_exit a = "termination inner finally"; … … 28 28 try { 29 29 printf("resumption throw\n"); 30 THROW_RESUME(&exc);30 throwResume &exc; 31 31 } finally { 32 32 loud_exit a = "resumption inner finally"; -
tests/exceptions/interact.cfa
re3bc51c rbcd74f3 1 1 // Testing Interactions Between Termination and Resumption 2 2 3 #include "except-mac.hfa"3 #include <exception.hfa> 4 4 #include "except-io.hfa" 5 5 … … 10 10 // Resume falls back to terminate. 11 11 try { 12 THROW_RESUME(&(star){});12 throwResume &(star){}; 13 13 } catch (star *) { 14 14 printf("caught as termination\n"); … … 17 17 try { 18 18 loud_region a = "try block with resume throw"; 19 THROW_RESUME(&(star){});19 throwResume &(star){}; 20 20 } catch (star *) { 21 21 printf("caught as termination\n");