Changes in / [e5d9274:015925a]
- Files:
-
- 1 added
- 25 deleted
- 133 edited
-
benchmark/plot.py (modified) (7 diffs)
-
doc/theses/mubeen_zulfiqar_MMath/allocator.tex (modified) (39 diffs)
-
doc/theses/mubeen_zulfiqar_MMath/background.tex (modified) (15 diffs)
-
doc/theses/mubeen_zulfiqar_MMath/benchmarks.tex (modified) (15 diffs)
-
doc/theses/mubeen_zulfiqar_MMath/conclusion.tex (modified) (3 diffs)
-
doc/theses/mubeen_zulfiqar_MMath/figures/Header.fig (modified) (1 diff)
-
doc/theses/mubeen_zulfiqar_MMath/figures/MultipleHeapsNoOwnership.fig (modified) (2 diffs)
-
doc/theses/mubeen_zulfiqar_MMath/figures/MultipleHeapsOwnership.fig (modified) (2 diffs)
-
doc/theses/mubeen_zulfiqar_MMath/figures/PerThreadHeap.fig (modified) (4 diffs)
-
doc/theses/mubeen_zulfiqar_MMath/figures/SharedHeaps.fig (modified) (2 diffs)
-
doc/theses/mubeen_zulfiqar_MMath/figures/SingleHeap.fig (modified) (2 diffs)
-
doc/theses/mubeen_zulfiqar_MMath/figures/UserKernelHeaps.fig (modified) (1 diff)
-
doc/theses/mubeen_zulfiqar_MMath/intro.tex (modified) (6 diffs)
-
doc/theses/mubeen_zulfiqar_MMath/performance.tex (modified) (7 diffs)
-
doc/theses/mubeen_zulfiqar_MMath/uw-ethesis-frontpgs.tex (modified) (5 diffs)
-
doc/theses/mubeen_zulfiqar_MMath/uw-ethesis.tex (modified) (1 diff)
-
doc/theses/thierry_delisle_PhD/thesis/Makefile (modified) (5 diffs)
-
doc/theses/thierry_delisle_PhD/thesis/data/churn.jax (deleted)
-
doc/theses/thierry_delisle_PhD/thesis/data/churn.low.jax (deleted)
-
doc/theses/thierry_delisle_PhD/thesis/data/cycle.jax (modified) (1 diff)
-
doc/theses/thierry_delisle_PhD/thesis/data/cycle.low.jax (deleted)
-
doc/theses/thierry_delisle_PhD/thesis/data/memcd.rate (deleted)
-
doc/theses/thierry_delisle_PhD/thesis/data/memcd.updt (deleted)
-
doc/theses/thierry_delisle_PhD/thesis/data/yield.jax (deleted)
-
doc/theses/thierry_delisle_PhD/thesis/data/yield.low.jax (deleted)
-
doc/theses/thierry_delisle_PhD/thesis/fig/SAVE.fig (deleted)
-
doc/theses/thierry_delisle_PhD/thesis/fig/idle.fig (deleted)
-
doc/theses/thierry_delisle_PhD/thesis/fig/idle1.fig (deleted)
-
doc/theses/thierry_delisle_PhD/thesis/fig/idle2.fig (deleted)
-
doc/theses/thierry_delisle_PhD/thesis/fig/idle_state.fig (deleted)
-
doc/theses/thierry_delisle_PhD/thesis/local.bib (modified) (1 diff)
-
doc/theses/thierry_delisle_PhD/thesis/test.svg (deleted)
-
doc/theses/thierry_delisle_PhD/thesis/text/eval_macro.tex (modified) (1 diff)
-
doc/theses/thierry_delisle_PhD/thesis/text/eval_micro.tex (modified) (8 diffs)
-
doc/theses/thierry_delisle_PhD/thesis/text/intro.tex (modified) (1 diff)
-
doc/theses/thierry_delisle_PhD/thesis/text/practice.tex (modified) (4 diffs)
-
doc/theses/thierry_delisle_PhD/thesis/thesis.tex (modified) (3 diffs)
-
libcfa/src/Makefile.am (modified) (2 diffs)
-
libcfa/src/algorithms/range_iterator.cfa (modified) (1 diff)
-
libcfa/src/assert.cfa (modified) (3 diffs)
-
libcfa/src/bits/debug.cfa (modified) (2 diffs)
-
libcfa/src/bits/defs.hfa (modified) (1 diff)
-
libcfa/src/bits/weakso_locks.cfa (modified) (1 diff)
-
libcfa/src/common.cfa (modified) (1 diff)
-
libcfa/src/concurrency/alarm.cfa (modified) (1 diff)
-
libcfa/src/concurrency/clib/cfathread.cfa (modified) (3 diffs)
-
libcfa/src/concurrency/coroutine.cfa (modified) (10 diffs)
-
libcfa/src/concurrency/coroutine.hfa (modified) (5 diffs)
-
libcfa/src/concurrency/exception.cfa (modified) (1 diff)
-
libcfa/src/concurrency/invoke.c (modified) (4 diffs)
-
libcfa/src/concurrency/io.cfa (modified) (5 diffs)
-
libcfa/src/concurrency/io/call.cfa.in (modified) (1 diff)
-
libcfa/src/concurrency/io/setup.cfa (modified) (2 diffs)
-
libcfa/src/concurrency/io/types.hfa (modified) (3 diffs)
-
libcfa/src/concurrency/kernel.cfa (modified) (8 diffs)
-
libcfa/src/concurrency/kernel.hfa (modified) (1 diff)
-
libcfa/src/concurrency/kernel/cluster.cfa (modified) (6 diffs)
-
libcfa/src/concurrency/kernel/cluster.hfa (modified) (3 diffs)
-
libcfa/src/concurrency/kernel/private.hfa (modified) (1 diff)
-
libcfa/src/concurrency/kernel/startup.cfa (modified) (9 diffs)
-
libcfa/src/concurrency/locks.cfa (modified) (9 diffs)
-
libcfa/src/concurrency/monitor.cfa (modified) (18 diffs)
-
libcfa/src/concurrency/monitor.hfa (modified) (1 diff)
-
libcfa/src/concurrency/preemption.cfa (modified) (6 diffs)
-
libcfa/src/concurrency/ready_queue.cfa (modified) (4 diffs)
-
libcfa/src/concurrency/ready_subqueue.hfa (modified) (4 diffs)
-
libcfa/src/concurrency/thread.cfa (modified) (5 diffs)
-
libcfa/src/concurrency/thread.hfa (modified) (3 diffs)
-
libcfa/src/containers/maybe.cfa (modified) (1 diff)
-
libcfa/src/containers/result.cfa (modified) (1 diff)
-
libcfa/src/containers/string.cfa (modified) (1 diff)
-
libcfa/src/containers/string_sharectx.hfa (modified) (1 diff)
-
libcfa/src/containers/vector.cfa (modified) (2 diffs)
-
libcfa/src/device/cpu.cfa (modified) (2 diffs)
-
libcfa/src/exception.c (modified) (2 diffs)
-
libcfa/src/exception.hfa (modified) (2 diffs)
-
libcfa/src/fstream.cfa (modified) (4 diffs)
-
libcfa/src/fstream.hfa (modified) (1 diff)
-
libcfa/src/heap.cfa (modified) (33 diffs)
-
libcfa/src/interpose.cfa (modified) (5 diffs)
-
libcfa/src/iostream.cfa (modified) (1 diff)
-
libcfa/src/limits.cfa (modified) (1 diff)
-
libcfa/src/memory.cfa (modified) (1 diff)
-
libcfa/src/parseargs.cfa (modified) (1 diff)
-
libcfa/src/parseconfig.cfa (modified) (4 diffs)
-
libcfa/src/rational.cfa (modified) (1 diff)
-
libcfa/src/startup.cfa (modified) (2 diffs)
-
libcfa/src/stdlib.cfa (modified) (2 diffs)
-
libcfa/src/strstream.cfa (modified) (3 diffs)
-
libcfa/src/time.cfa (modified) (1 diff)
-
libcfa/src/virtual.c (modified) (1 diff)
-
src/AST/Eval.hpp (added)
-
src/AST/Expr.cpp (modified) (5 diffs)
-
src/AST/Expr.hpp (modified) (1 diff)
-
src/AST/module.mk (modified) (1 diff)
-
src/CodeGen/CodeGenerator.cc (modified) (1 diff)
-
src/CodeGen/FixMain.cc (modified) (1 diff)
-
src/CodeGen/FixMain2.cc (deleted)
-
src/CodeGen/GenType.cc (modified) (4 diffs)
-
src/CodeGen/LinkOnce.cc (modified) (1 diff)
-
src/CodeGen/module.mk (modified) (1 diff)
-
src/CodeTools/ResolvProtoDump.cc (modified) (1 diff)
-
src/Common/Indenter.cc (deleted)
-
src/Common/Indenter.h (modified) (2 diffs)
-
src/Common/ResolvProtoDump.cpp (modified) (1 diff)
-
src/Common/module.mk (modified) (1 diff)
-
src/Concurrency/module.mk (modified) (1 diff)
-
src/ControlStruct/ExceptDecl.cc (modified) (6 diffs)
-
src/ControlStruct/module.mk (modified) (2 diffs)
-
src/GenPoly/Lvalue.cc (modified) (2 diffs)
-
src/GenPoly/Lvalue2.cc (deleted)
-
src/GenPoly/module.mk (modified) (1 diff)
-
src/InitTweak/FixInitNew.cpp (modified) (4 diffs)
-
src/InitTweak/GenInit.cc (modified) (2 diffs)
-
src/InitTweak/module.mk (modified) (2 diffs)
-
src/Parser/parser.yy (modified) (14 diffs)
-
src/ResolvExpr/AlternativeFinder.cc (modified) (1 diff)
-
src/ResolvExpr/CandidateFinder.cpp (modified) (2 diffs)
-
src/ResolvExpr/CommonType.cc (modified) (13 diffs)
-
src/ResolvExpr/ConversionCost.cc (modified) (4 diffs)
-
src/ResolvExpr/ConversionCost.h (modified) (2 diffs)
-
src/SymTab/Autogen.h (modified) (14 diffs)
-
src/SymTab/Demangle.cc (modified) (1 diff)
-
src/SymTab/Demangle.h (deleted)
-
src/SymTab/Mangler.h (modified) (1 diff)
-
src/SymTab/Validate.cc (modified) (12 diffs)
-
src/SymTab/Validate.h (modified) (3 diffs)
-
src/SymTab/ValidateType.cc (deleted)
-
src/SymTab/ValidateType.h (deleted)
-
src/SymTab/demangler.cc (modified) (1 diff)
-
src/SymTab/module.mk (modified) (1 diff)
-
src/SynTree/BaseSyntaxNode.cc (deleted)
-
src/SynTree/module.mk (modified) (1 diff)
-
src/Tuples/TupleExpansion.cc (modified) (2 diffs)
-
src/Tuples/Tuples.cc (modified) (5 diffs)
-
src/Tuples/module.mk (modified) (2 diffs)
-
src/Validate/Autogen.cpp (modified) (1 diff)
-
src/Validate/module.mk (modified) (3 diffs)
-
src/Virtual/module.mk (modified) (1 diff)
-
src/main.cc (modified) (1 diff)
-
tests/.expect/attributes.nast.x64.txt (modified) (1 diff)
-
tests/.expect/attributes.nast.x86.txt (modified) (1 diff)
-
tests/.expect/attributes.oast.x64.txt (modified) (1 diff)
-
tests/.expect/attributes.oast.x86.txt (modified) (1 diff)
-
tests/.expect/nested_function.txt (deleted)
-
tests/.expect/quasiKeyword.txt (modified) (1 diff)
-
tests/concurrent/examples/multiSort.cfa (deleted)
-
tests/enum_tests/.expect/structEnum.txt (deleted)
-
tests/enum_tests/structEnum.cfa (deleted)
-
tests/exceptions/defaults.cfa (modified) (2 diffs)
-
tests/include/.expect/includes.nast.txt (modified) (1 diff)
-
tests/include/includes.cfa (modified) (3 diffs)
-
tests/linking/exception-nothreads.cfa (modified) (1 diff)
-
tests/linking/exception-withthreads.cfa (modified) (1 diff)
-
tests/nested_function.cfa (deleted)
-
tests/pybin/settings.py (modified) (1 diff)
-
tests/pybin/test_run.py (modified) (1 diff)
-
tests/quasiKeyword.cfa (modified) (3 diffs)
-
tests/test.py (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
-
benchmark/plot.py
re5d9274 r015925a 22 22 23 23 class Field: 24 def __init__(self, unit, _min, _log , _name=None):24 def __init__(self, unit, _min, _log): 25 25 self.unit = unit 26 26 self.min = _min 27 27 self.log = _log 28 self.name = _name29 28 30 29 field_names = { … … 33 32 "Ops per procs" : Field('Ops' , 0, False), 34 33 "Ops per threads" : Field('Ops' , 0, False), 35 "ns per ops/procs" : Field(' ' , 0, False, _name = "Latency (ns $/$ (Processor $\\times$ Operation))"),34 "ns per ops/procs" : Field('ns' , 0, False), 36 35 "Number of threads" : Field('' , 1, False), 37 36 "Total Operations(ops)" : Field('Ops' , 0, False), 38 37 "Ops/sec/procs" : Field('Ops' , 0, False), 39 38 "Total blocks" : Field('Blocks', 0, False), 40 "Ops per second" : Field(' ' , 0, False),39 "Ops per second" : Field('Ops' , 0, False), 41 40 "Cycle size (# thrds)" : Field('thrd' , 1, False), 42 41 "Duration (ms)" : Field('ms' , 0, False), … … 52 51 } 53 52 54 def plot(in_data, x, y, o ptions):53 def plot(in_data, x, y, out): 55 54 fig, ax = plt.subplots() 56 55 colors = itertools.cycle(['#0095e3','#006cb4','#69df00','#0aa000','#fb0300','#e30002','#fd8f00','#ff7f00','#8f00d6','#4b009a','#ffff00','#b13f00']) … … 110 109 print("Finishing Plots") 111 110 112 plt.ylabel( field_names[y].name if field_names[y].name elsey)111 plt.ylabel(y) 113 112 # plt.xticks(range(1, math.ceil(mx) + 1)) 114 plt.xlabel( field_names[x].name if field_names[x].name elsex)113 plt.xlabel(x) 115 114 plt.grid(b = True) 116 115 ax.xaxis.set_major_formatter( EngFormatter(unit=field_names[x].unit) ) 117 if options.logx: 118 ax.set_xscale('log') 119 elif field_names[x].log: 116 if field_names[x].log: 120 117 ax.set_xscale('log') 121 118 else: … … 123 120 124 121 ax.yaxis.set_major_formatter( EngFormatter(unit=field_names[y].unit) ) 125 if options.logy: 126 ax.set_yscale('log') 127 elif field_names[y].log: 122 if field_names[y].log: 128 123 ax.set_yscale('log') 129 124 else: 130 plt.ylim(field_names[y].min, options.MaxY if options.MaxY elsemy*1.2)125 plt.ylim(field_names[y].min, my*1.2) 131 126 132 127 plt.legend(loc='upper left') 133 128 134 129 print("Results Ready") 135 if o ptions.out:136 plt.savefig(o ptions.out, bbox_inches='tight')130 if out: 131 plt.savefig(out) 137 132 else: 138 133 plt.show() … … 147 142 parser.add_argument('-y', nargs='?', type=str, default="", help="Which field to use as the Y axis") 148 143 parser.add_argument('-x', nargs='?', type=str, default="", help="Which field to use as the X axis") 149 parser.add_argument('--logx', action='store_true', help="if set, makes the x-axis logscale")150 parser.add_argument('--logy', action='store_true', help="if set, makes the y-axis logscale")151 parser.add_argument('--MaxY', nargs='?', type=int, help="maximum value of the y-axis")152 144 153 145 options = parser.parse_args() … … 193 185 194 186 195 plot(data, wantx, wanty, options )187 plot(data, wantx, wanty, options.out) -
doc/theses/mubeen_zulfiqar_MMath/allocator.tex
re5d9274 r015925a 29 29 llheap's design was reviewed and changed multiple times throughout the thesis. 30 30 Some of the rejected designs are discussed because they show the path to the final design (see discussion in \VRef{s:MultipleHeaps}). 31 Note, a few simple tests for a design choice were compared with the current best allocators to determine the viability of a design.31 Note, a few simples tests for a design choice were compared with the current best allocators to determine the viability of a design. 32 32 33 33 … … 37 37 These designs look at the allocation/free \newterm{fastpath}, \ie when an allocation can immediately return free storage or returned storage is not coalesced. 38 38 \paragraph{T:1 model} 39 \VRef[Figure]{f:T1SharedBuckets} shows one heap accessed by multiple kernel threads (KTs) using a bucket array, where smaller bucket sizes are shared among NKTs.40 This design leverages the fact that usually the allocation requests are less than 1024 bytes and there are only a fewdifferent request sizes.39 \VRef[Figure]{f:T1SharedBuckets} shows one heap accessed by multiple kernel threads (KTs) using a bucket array, where smaller bucket sizes are N-shared across KTs. 40 This design leverages the fact that 95\% of allocation requests are less than 1024 bytes and there are only 3--5 different request sizes. 41 41 When KTs $\le$ N, the common bucket sizes are uncontented; 42 42 when KTs $>$ N, the free buckets are contented and latency increases significantly. … … 64 64 65 65 \paragraph{T:H model} 66 \VRef[Figure]{f:THSharedHeaps} shows a fixed number of heaps (N), each a local free pool, where the heaps are sharded (distributed)across the KTs.66 \VRef[Figure]{f:THSharedHeaps} shows a fixed number of heaps (N), each a local free pool, where the heaps are sharded across the KTs. 67 67 A KT can point directly to its assigned heap or indirectly through the corresponding heap bucket. 68 When KT $\le$ N, the heaps might be uncontented;68 When KT $\le$ N, the heaps are uncontented; 69 69 when KTs $>$ N, the heaps are contented. 70 70 In all cases, a KT must acquire/release a lock, contented or uncontented along the fast allocation path because a heap is shared. 71 By increasing N, this approach reduces contention but increases storage (time versus space);71 By adjusting N upwards, this approach reduces contention but increases storage (time versus space); 72 72 however, picking N is workload specific. 73 73 … … 109 109 Need to prevent preemption during a dynamic memory operation because of the \newterm{serially-reusable problem}. 110 110 \begin{quote} 111 A sequence of code that is guaranteed to run to completion before being invoked to accept another input is called serially-reusable code.~\cite{SeriallyReusable} \label{p:SeriallyReusable}111 A sequence of code that is guaranteed to run to completion before being invoked to accept another input is called serially-reusable code.~\cite{SeriallyReusable} 112 112 \end{quote} 113 113 If a KT is preempted during an allocation operation, the operating system can schedule another KT on the same CPU, which can begin an allocation operation before the previous operation associated with this CPU has completed, invalidating heap correctness. … … 138 138 (See \VRef[Figure]{f:THSharedHeaps} but with a heap bucket per KT and no bucket or local-pool lock.) 139 139 Hence, immediately after a KT starts, its heap is created and just before a KT terminates, its heap is (logically) deleted. 140 Heaps are uncontended for a KTs memory operations as every KT has its own thread-local heap, modulo operations on the global pool and ownership.140 Heaps are uncontended for a KTs memory operations to its heap (modulo operations on the global pool and ownership). 141 141 142 142 Problems: 143 143 \begin{itemize} 144 144 \item 145 Need to know when a KT starts/terminates to create/delete its heap.145 Need to know when a KT is starts/terminates to create/delete its heap. 146 146 147 147 \noindent … … 161 161 \noindent 162 162 In many concurrent applications, good performance is achieved with the number of KTs proportional to the number of CPUs. 163 Since the number of CPUs is relatively small, and a heap is alsorelatively small, $\approx$10K bytes (not including any associated freed storage), the worst-case external fragmentation is still small compared to the RAM available on large servers with many CPUs.163 Since the number of CPUs is relatively small, >~1024, and a heap relatively small, $\approx$10K bytes (not including any associated freed storage), the worst-case external fragmentation is still small compared to the RAM available on large servers with many CPUs. 164 164 \item 165 165 There is the same serially-reusable problem with UTs migrating across KTs. … … 171 171 \noindent 172 172 The conclusion from this design exercise is: any atomic fence, atomic instruction (lock free), or lock along the allocation fastpath produces significant slowdown. 173 For the T:1 and T:H models, locking must exist along the allocation fastpath because the buckets or heaps m ightbe shared by multiple threads, even when KTs $\le$ N.173 For the T:1 and T:H models, locking must exist along the allocation fastpath because the buckets or heaps maybe shared by multiple threads, even when KTs $\le$ N. 174 174 For the T:H=CPU and 1:1 models, locking is eliminated along the allocation fastpath. 175 175 However, T:H=CPU has poor operating-system support to determine the CPU id (heap id) and prevent the serially-reusable problem for KTs. 176 176 More operating system support is required to make this model viable, but there is still the serially-reusable problem with user-level threading. 177 So the 1:1 model had no atomic actions along the fastpath and no special operating-system support requirements.177 Leaving the 1:1 model with no atomic actions along the fastpath and no special operating-system support required. 178 178 The 1:1 model still has the serially-reusable problem with user-level threading, which is addressed in \VRef{s:UserlevelThreadingSupport}, and the greatest potential for heap blowup for certain allocation patterns. 179 179 … … 212 212 Ideally latency is $O(1)$ with a small constant. 213 213 214 To obtain $O(1)$ internal latency means no searching on the allocation fastpath andlargely prohibits coalescing, which leads to external fragmentation.214 To obtain $O(1)$ internal latency means no searching on the allocation fastpath, largely prohibits coalescing, which leads to external fragmentation. 215 215 The mitigating factor is that most programs have well behaved allocation patterns, where the majority of allocation operations can be $O(1)$, and heap blowup does not occur without coalescing (although the allocation footprint may be slightly larger). 216 216 … … 257 257 llheap starts by creating an array of $N$ global heaps from storage obtained using @mmap@, where $N$ is the number of computer cores, that persists for program duration. 258 258 There is a global bump-pointer to the next free heap in the array. 259 When this array is exhausted, another array of heapsis allocated.260 There is a global top pointer for a intrusive linked-listto chain free heaps from terminated threads.261 When statistics are turned on, there is a global top pointer for a intrusive linked-listto chain \emph{all} the heaps, which is traversed to accumulate statistics counters across heaps using @malloc_stats@.259 When this array is exhausted, another array is allocated. 260 There is a global top pointer for a heap intrusive link to chain free heaps from terminated threads. 261 When statistics are turned on, there is a global top pointer for a heap intrusive link to chain \emph{all} the heaps, which is traversed to accumulate statistics counters across heaps using @malloc_stats@. 262 262 263 263 When a KT starts, a heap is allocated from the current array for exclusive use by the KT. 264 When a KT terminates, its heap is chained onto the heap free-list for reuse by a new KT, which prevents unbounded growth of number ofheaps.265 The free heaps are stored onstack so hot storage is reused first.266 Preserving all heaps , created during the program lifetime, solves the storage lifetime problemwhen ownership is used.264 When a KT terminates, its heap is chained onto the heap free-list for reuse by a new KT, which prevents unbounded growth of heaps. 265 The free heaps is a stack so hot storage is reused first. 266 Preserving all heaps created during the program lifetime, solves the storage lifetime problem, when ownership is used. 267 267 This approach wastes storage if a large number of KTs are created/terminated at program start and then the program continues sequentially. 268 268 llheap can be configured with object ownership, where an object is freed to the heap from which it is allocated, or object no-ownership, where an object is freed to the KT's current heap. 269 269 270 270 Each heap uses segregated free-buckets that have free objects distributed across 91 different sizes from 16 to 4M. 271 All objects in a bucket are of the same size.272 271 The number of buckets used is determined dynamically depending on the crossover point from @sbrk@ to @mmap@ allocation using @mallopt( M_MMAP_THRESHOLD )@, \ie small objects managed by the program and large objects managed by the operating system. 273 272 Each free bucket of a specific size has the following two lists: … … 287 286 Quantizing is performed using a binary search over the ordered bucket array. 288 287 An optional optimization is fast lookup $O(1)$ for sizes < 64K from a 64K array of type @char@, where each element has an index to the corresponding bucket. 289 The @char@ type restricts the number of bucket sizes to 256. 288 (Type @char@ restricts the number of bucket sizes to 256.) 290 289 For $S$ > 64K, a binary search is used. 291 290 Then, the allocation storage is obtained from the following locations (in order), with increasing latency. … … 382 381 Then the corresponding bucket of the owner thread is computed for the deallocating thread, and the allocation is pushed onto the deallocating thread's bucket. 383 382 384 Finally, the llheap design funnels \label{p:FunnelRoutine} all allocation/deallocation operations through the @malloc@ and @free@ routines, which are the only routines to directly access and manage the internal data structures of the heap.383 Finally, the llheap design funnels \label{p:FunnelRoutine} all allocation/deallocation operations through routines @malloc@/@free@, which are the only routines to directly access and manage the internal data structures of the heap. 385 384 Other allocation operations, \eg @calloc@, @memalign@, and @realloc@, are composed of calls to @malloc@ and possibly @free@, and may manipulate header information after storage is allocated. 386 385 This design simplifies heap-management code during development and maintenance. … … 389 388 \subsection{Alignment} 390 389 391 Most dynamic memory allocationshave a minimum storage alignment for the contained object(s).390 All dynamic memory allocations must have a minimum storage alignment for the contained object(s). 392 391 Often the minimum memory alignment, M, is the bus width (32 or 64-bit) or the largest register (double, long double) or largest atomic instruction (DCAS) or vector data (MMMX). 393 392 In general, the minimum storage alignment is 8/16-byte boundary on 32/64-bit computers. 394 393 For consistency, the object header is normally aligned at this same boundary. 395 Larger alignments must be a power of 2, such aspage alignment (4/8K).394 Larger alignments must be a power of 2, such page alignment (4/8K). 396 395 Any alignment request, N, $\le$ the minimum alignment is handled as a normal allocation with minimal alignment. 397 396 … … 401 400 \end{center} 402 401 The storage between @E@ and @H@ is chained onto the appropriate free list for future allocations. 403 Th e same approach is used for sufficiently large free blocks, where @E@ is the start of the free block, and any unused storage before @H@ or after the allocated object becomes free storage.402 This approach is also valid within any sufficiently large free block, where @E@ is the start of the free block, and any unused storage before @H@ or after the allocated object becomes free storage. 404 403 In this approach, the aligned address @A@ is the same as the allocated storage address @P@, \ie @P@ $=$ @A@ for all allocation routines, which simplifies deallocation. 405 404 However, if there are a large number of aligned requests, this approach leads to memory fragmentation from the small free areas around the aligned object. … … 408 407 Finally, this approach is incompatible with allocator designs that funnel allocation requests through @malloc@ as it directly manipulates management information within the allocator to optimize the space/time of a request. 409 408 410 Instead, llheap alignment is accomplished by making a \emph{pessimistic } allocation request for sufficient storage to ensure that \emph{both} the alignment and size request are satisfied, \eg:409 Instead, llheap alignment is accomplished by making a \emph{pessimistically} allocation request for sufficient storage to ensure that \emph{both} the alignment and size request are satisfied, \eg: 411 410 \begin{center} 412 411 \input{Alignment2} … … 425 424 \input{Alignment2Impl} 426 425 \end{center} 427 Since @malloc@ has a minimum alignment of @M@, @P@ $\neq$ @A@ only holds for alignments greater than @M@.426 Since @malloc@ has a minimum alignment of @M@, @P@ $\neq$ @A@ only holds for alignments of @M@ or greater. 428 427 When @P@ $\neq$ @A@, the minimum distance between @P@ and @A@ is @M@ bytes, due to the pessimistic storage-allocation. 429 428 Therefore, there is always room for an @M@-byte fake header before @A@. … … 440 439 \label{s:ReallocStickyProperties} 441 440 442 The allocation routine @realloc@ provides a memory-management pattern for shrinking/enlarging an existing allocation, while maintaining some or all of the object data, rather than performing the following steps manually.441 Allocation routine @realloc@ provides a memory-management pattern for shrinking/enlarging an existing allocation, while maintaining some or all of the object data, rather than performing the following steps manually. 443 442 \begin{flushleft} 444 443 \begin{tabular}{ll} … … 461 460 The realloc pattern leverages available storage at the end of an allocation due to bucket sizes, possibly eliminating a new allocation and copying. 462 461 This pattern is not used enough to reduce storage management costs. 463 In fact, if @oaddr@ is @nullptr@, @realloc@ does a @malloc@, so even the initial @malloc@ can be a @realloc@ for consistency in the allocationpattern.462 In fact, if @oaddr@ is @nullptr@, @realloc@ does a @malloc@, so even the initial @malloc@ can be a @realloc@ for consistency in the pattern. 464 463 465 464 The hidden problem for this pattern is the effect of zero fill and alignment with respect to reallocation. 466 465 Are these properties transient or persistent (``sticky'')? 467 For example, when memory is initially allocated by @calloc@ or @memalign@ with zero fill or alignment properties, respectively, what happens when those allocations are given to @realloc@ to change size ?468 That is, if @realloc@ logically extends storage into unused bucket space or allocates new storage to satisfy a size change, are initial allocation properties preserve d?466 For example, when memory is initially allocated by @calloc@ or @memalign@ with zero fill or alignment properties, respectively, what happens when those allocations are given to @realloc@ to change size. 467 That is, if @realloc@ logically extends storage into unused bucket space or allocates new storage to satisfy a size change, are initial allocation properties preserve? 469 468 Currently, allocation properties are not preserved, so subsequent use of @realloc@ storage may cause inefficient execution or errors due to lack of zero fill or alignment. 470 469 This silent problem is unintuitive to programmers and difficult to locate because it is transient. … … 476 475 477 476 To preserve allocation properties requires storing additional information with an allocation, 478 The best available option is the header, where \VRef[Figure]{f:llheapNormalHeader} shows the llheap storage layout.477 The only available location is the header, where \VRef[Figure]{f:llheapNormalHeader} shows the llheap storage layout. 479 478 The header has two data field sized appropriately for 32/64-bit alignment requirements. 480 479 The first field is a union of three values: … … 488 487 \end{description} 489 488 The second field remembers the request size versus the allocation (bucket) size, \eg request 42 bytes which is rounded up to 64 bytes. 490 Since programmers think in request sizes rather than allocation sizes, the request size allows better generation of statistics or errors and also helps in memory management.489 Since programmers think in request sizes rather than allocation sizes, the request size allows better generation of statistics or errors. 491 490 492 491 \begin{figure} … … 497 496 \end{figure} 498 497 499 The low-order 3-bits of the first field are \emph{unused} for any stored values as these values are 16-byte aligned by default, whereas the second field may use all of its bits.498 The low-order 3-bits of the first field are \emph{unused} for any stored values, whereas the second field may use all of its bits. 500 499 The 3 unused bits are used to represent mapped allocation, zero filled, and alignment, respectively. 501 500 Note, the alignment bit is not used in the normal header and the zero-filled/mapped bits are not used in the fake header. … … 503 502 If no bits are on, it implies a basic allocation, which is handled quickly; 504 503 otherwise, the bits are analysed and appropriate actions are taken for the complex cases. 505 Since most allocations are basic, th ey will take significantly less time as the memory operations will be donealong the allocation and free fastpath.504 Since most allocations are basic, this implementation results in a significant performance gain along the allocation and free fastpath. 506 505 507 506 … … 515 514 To locate all statistic counters, heaps are linked together in statistics mode, and this list is locked and traversed to sum all counters across heaps. 516 515 Note, the list is locked to prevent errors traversing an active list; 517 the statistics counters are not locked and can flicker during accumulation .516 the statistics counters are not locked and can flicker during accumulation, which is not an issue with atomic read/write. 518 517 \VRef[Figure]{f:StatiticsOutput} shows an example of statistics output, which covers all allocation operations and information about deallocating storage not owned by a thread. 519 518 No other memory allocator studied provides as comprehensive statistical information. 520 Finally, these statistics were invaluable during the development of this thesis for debugging and verifying correctness andshould be equally valuable to application developers.519 Finally, these statistics were invaluable during the development of this thesis for debugging and verifying correctness, and hence, should be equally valuable to application developers. 521 520 522 521 \begin{figure} … … 548 547 Nevertheless, the checks detect many allocation problems. 549 548 There is an unfortunate problem in detecting unfreed storage because some library routines assume their allocations have life-time duration, and hence, do not free their storage. 550 For example, @printf@ allocates a 1024 -byte buffer on thefirst call and never deletes this buffer.549 For example, @printf@ allocates a 1024 buffer on first call and never deletes this buffer. 551 550 To prevent a false positive for unfreed storage, it is possible to specify an amount of storage that is never freed (see @malloc_unfreed@ \VPageref{p:malloc_unfreed}), and it is subtracted from the total allocate/free difference. 552 551 Determining the amount of never-freed storage is annoying, but once done, any warnings of unfreed storage are application related. 553 552 554 Tests indicate only a 30\% performance decrease when statistics \emph{and} debugging are enabled, and the latency cost for accumulating statistic is mitigated by limited calls, often only one at the end of the program.553 Tests indicate only a 30\% performance increase when statistics \emph{and} debugging are enabled, and the latency cost for accumulating statistic is mitigated by limited calls, often only one at the end of the program. 555 554 556 555 … … 558 557 \label{s:UserlevelThreadingSupport} 559 558 560 The serially-reusable problem (see \V Pageref{p:SeriallyReusable}) occurs for kernel threads in the ``T:H model, H = number of CPUs'' model and for user threads in the ``1:1'' model, where llheap uses the ``1:1'' model.561 The solution is to prevent interrupts that can result in a CPU or KT change during operations that are logically critical sections such as starting a memory operation on one KT and completing it on another.559 The serially-reusable problem (see \VRef{s:AllocationFastpath}) occurs for kernel threads in the ``T:H model, H = number of CPUs'' model and for user threads in the ``1:1'' model, where llheap uses the ``1:1'' model. 560 The solution is to prevent interrupts that can result in CPU or KT change during operations that are logically critical sections. 562 561 Locking these critical sections negates any attempt for a quick fastpath and results in high contention. 563 562 For user-level threading, the serially-reusable problem appears with time slicing for preemptable scheduling, as the signal handler context switches to another user-level thread. 564 Without time slicing, a user thread performing a long computation can prevent the execution of(starve) other threads.565 To prevent starvation for a memory-allocation-intensive thread, \ie the time slice always triggers in an allocation critical-section for one thread so the thread never gets time sliced, a thread-local \newterm{rollforward} flag is set in the signal handler when it aborts a time slice.563 Without time slicing, a user thread performing a long computation can prevent execution (starve) other threads. 564 To prevent starvation for an allocation-active thread, \ie the time slice always triggers in an allocation critical-section for one thread, a thread-local \newterm{rollforward} flag is set in the signal handler when it aborts a time slice. 566 565 The rollforward flag is tested at the end of each allocation funnel routine (see \VPageref{p:FunnelRoutine}), and if set, it is reset and a volunteer yield (context switch) is performed to allow other threads to execute. 567 566 568 llheap uses two techniques to detect when execution is in a nallocation operation or routine called from allocation operation, to abort any time slice during this period.569 On the slowpath when executing expensive operations, like @sbrk@ or @mmap@, interrupts are disabled/enabled by setting kernel-thread-local flags so the signal handler aborts immediately.570 On the fastpath, disabling/enabling interrupts is too expensive as accessing kernel-thread-local storage can be expensive and not user-thread-safe.567 llheap uses two techniques to detect when execution is in a allocation operation or routine called from allocation operation, to abort any time slice during this period. 568 On the slowpath when executing expensive operations, like @sbrk@ or @mmap@, interrupts are disabled/enabled by setting thread-local flags so the signal handler aborts immediately. 569 On the fastpath, disabling/enabling interrupts is too expensive as accessing thread-local storage can be expensive and not thread-safe. 571 570 For example, the ARM processor stores the thread-local pointer in a coprocessor register that cannot perform atomic base-displacement addressing. 572 Hence, there is a window between loading the kernel-thread-local pointer from the coprocessor register into a normal register and adding the displacement when a time slice can move a thread.573 574 The fast technique (with lower run time cost) is to definea special code section and places all non-interruptible routines in this section.571 Hence, there is a window between loading the thread-local pointer from the coprocessor register into a normal register and adding the displacement when a time slice can move a thread. 572 573 The fast technique defines a special code section and places all non-interruptible routines in this section. 575 574 The linker places all code in this section into a contiguous block of memory, but the order of routines within the block is unspecified. 576 575 Then, the signal handler compares the program counter at the point of interrupt with the the start and end address of the non-interruptible section, and aborts if executing within this section and sets the rollforward flag. … … 578 577 Hence, for correctness, this approach requires inspection of generated assembler code for routines placed in the non-interruptible section. 579 578 This issue is mitigated by the llheap funnel design so only funnel routines and a few statistics routines are placed in the non-interruptible section and their assembler code examined. 580 These techniques are used in both the \uC and \CFA versions of llheap asboth of these systems have user-level threading.579 These techniques are used in both the \uC and \CFA versions of llheap, where both of these systems have user-level threading. 581 580 582 581 … … 588 587 Programs can be statically or dynamically linked. 589 588 \item 590 The order in which the linker schedules startup code is poorly supported so it cannot be controlled entirely.589 The order the linker schedules startup code is poorly supported. 591 590 \item 592 591 Knowing a KT's start and end independently from the KT code is difficult. … … 601 600 Hence, some part of the @sbrk@ area may be used by the default allocator and statistics about allocation operations cannot be correct. 602 601 Furthermore, dynamic linking goes through trampolines, so there is an additional cost along the allocator fastpath for all allocation operations. 603 Testing showed up to a 5\% performance decrease with dynamic linking as compared tostatic linking, even when using @tls_model("initial-exec")@ so the dynamic loader can obtain tighter binding.602 Testing showed up to a 5\% performance increase for dynamic linking over static linking, even when using @tls_model("initial-exec")@ so the dynamic loader can obtain tighter binding. 604 603 605 604 All allocator libraries need to perform startup code to initialize data structures, such as the heap array for llheap. 606 The problem is getting initializ ationdone before the first allocator call.605 The problem is getting initialized done before the first allocator call. 607 606 However, there does not seem to be mechanism to tell either the static or dynamic loader to first perform initialization code before any calls to a loaded library. 608 Also, initialization code of other libraries and the run-time environment may call memory allocation routines such as \lstinline{malloc}.609 This compounds the situation as there is no mechanism to tell either the static or dynamic loader to first perform the initialization code of the memory allocator before any other initialization that may involve a dynamic memory allocation call.610 607 As a result, calls to allocation routines occur without initialization. 611 608 To deal with this problem, it is necessary to put a conditional initialization check along the allocation fastpath to trigger initialization (singleton pattern). … … 644 641 Therefore, the constructor is useless for knowing when a KT starts because the KT must reference it, and the allocator does not control the application KT. 645 642 Fortunately, the singleton pattern needed for initializing the program KT also triggers KT allocator initialization, which can then reference @pgm_thread@ to call @threadManager@'s constructor, otherwise its destructor is not called. 646 Now when a KT terminates, @~ThreadManager@ is called to chain it onto the global-heap free-stack, where @pgm_thread@ is set to true only for the program KT.643 Now when a KT terminates, @~ThreadManager@ is called to chained it onto the global-heap free-stack, where @pgm_thread@ is set to true only for the program KT. 647 644 The conditional destructor call prevents closing down the program heap, which must remain available because epilogue code may free more storage. 648 645 … … 663 660 bool traceHeapOff(); $\C{// stop printing allocation/free calls}$ 664 661 \end{lstlisting} 665 This kind of API is necessary to allow concurrent runtime systems to interact with differen tmemory allocators in a consistent way.662 This kind of API is necessary to allow concurrent runtime systems to interact with difference memory allocators in a consistent way. 666 663 667 664 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% … … 715 712 Most allocators use @nullptr@ to indicate an allocation failure, specifically out of memory; 716 713 hence the need to return an alternate value for a zero-sized allocation. 717 A different approach allowed by @C API@is to abort a program when out of memory and return @nullptr@ for a zero-sized allocation.714 A different approach allowed by the C API is to abort a program when out of memory and return @nullptr@ for a zero-sized allocation. 718 715 In theory, notifying the programmer of memory failure allows recovery; 719 716 in practice, it is almost impossible to gracefully recover when out of memory. … … 739 736 \paragraph{\lstinline{void * aalloc( size_t dim, size_t elemSize )}} 740 737 extends @calloc@ for allocating a dynamic array of objects without calculating the total size of array explicitly but \emph{without} zero-filling the memory. 741 @aalloc@ is significantly faster than @calloc@, which is the only alternative given by the standard memory-allocation routines.738 @aalloc@ is significantly faster than @calloc@, which is the only alternative. 742 739 743 740 \noindent\textbf{Usage} … … 828 825 \begin{itemize} 829 826 \item 830 @fd@: file descriptor.827 @fd@: files description. 831 828 \end{itemize} 832 829 It returns the previous file descriptor. … … 835 832 \label{p:malloc_expansion} 836 833 set the amount (bytes) to extend the heap when there is insufficient free storage to service an allocation request. 837 It returns the heap extension size used throughout a program when requesting more memory from the system using @sbrk@ system-call, \ie called once at heap initialization.834 It returns the heap extension size used throughout a program, \ie called once at heap initialization. 838 835 839 836 \paragraph{\lstinline{size_t malloc_mmap_start()}} … … 918 915 \begin{itemize} 919 916 \item 920 naming: \CFA regular and @ttype@ polymorphism (@ttype@ polymorphism in \CFA is similar to \CC variadic templates)is used to encapsulate a wide range of allocation functionality into a single routine name, so programmers do not have to remember multiple routine names for different kinds of dynamic allocations.921 \item 922 named arguments: individual allocation properties are specified using postfix function call, so the programmers do nothave to remember parameter positions in allocation calls.923 \item 924 object size: like the \CFA 's C-interface, programmers do not have to specify object size or cast allocation results.917 naming: \CFA regular and @ttype@ polymorphism is used to encapsulate a wide range of allocation functionality into a single routine name, so programmers do not have to remember multiple routine names for different kinds of dynamic allocations. 918 \item 919 named arguments: individual allocation properties are specified using postfix function call, so programmers do have to remember parameter positions in allocation calls. 920 \item 921 object size: like the \CFA C-style interface, programmers do not have to specify object size or cast allocation results. 925 922 \end{itemize} 926 923 Note, postfix function call is an alternative call syntax, using backtick @`@, where the argument appears before the function name, \eg … … 931 928 duration dur = 3@`@h + 42@`@m + 17@`@s; 932 929 \end{cfa} 930 @ttype@ polymorphism is similar to \CC variadic templates. 933 931 934 932 \paragraph{\lstinline{T * alloc( ... )} or \lstinline{T * alloc( size_t dim, ... )}} 935 is overloaded with a variable number of specific allocation operations, or an integer dimension parameter followed by a variable number of specific allocation operations. 936 These allocation operations can be passed as named arguments when calling the \lstinline{alloc} routine. 933 is overloaded with a variable number of specific allocation routines, or an integer dimension parameter followed by a variable number specific allocation routines. 937 934 A call without parameters returns a dynamically allocated object of type @T@ (@malloc@). 938 935 A call with only the dimension (dim) parameter returns a dynamically allocated array of objects of type @T@ (@aalloc@). … … 983 980 5 5 5 -555819298 -555819298 // two undefined values 984 981 \end{lstlisting} 985 Examples 1 to 3 fill an object with a value or characters.986 Examples 4 to 7 fill an array of objects with values, another array, or part of an array.982 Examples 1 to 3, fill an object with a value or characters. 983 Examples 4 to 7, fill an array of objects with values, another array, or part of an array. 987 984 988 985 \subparagraph{\lstinline{S_resize(T) ?`resize( void * oaddr )}} … … 1018 1015 \subparagraph{\lstinline{S_realloc(T) ?`realloc( T * a ))}} 1019 1016 used to resize, realign, and fill, where the old object data is copied to the new object. 1020 The old object type must be the same as the new object type, since the value is used.1017 The old object type must be the same as the new object type, since the values used. 1021 1018 Note, for @fill@, only the extra space after copying the data from the old object is filled with the given parameter. 1022 1019 For example: … … 1032 1029 \end{lstlisting} 1033 1030 Examples 2 to 3 change the alignment for the initial storage of @i@. 1034 The @13`fill@ inexample 3 does nothing because no extra space is added.1031 The @13`fill@ for example 3 does nothing because no extra space is added. 1035 1032 1036 1033 \begin{cfa}[numbers=left] … … 1047 1044 \end{lstlisting} 1048 1045 Examples 2 to 4 change the array size, alignment and fill for the initial storage of @ia@. 1049 The @13`fill@ inexample 3 does nothing because no extra space is added.1046 The @13`fill@ for example 3 does nothing because no extra space is added. 1050 1047 1051 1048 These \CFA allocation features are used extensively in the development of the \CFA runtime. -
doc/theses/mubeen_zulfiqar_MMath/background.tex
re5d9274 r015925a 36 36 The management data starts with fixed-sized information in the static-data memory that references components in the dynamic-allocation memory. 37 37 The \newterm{storage data} is composed of allocated and freed objects, and \newterm{reserved memory}. 38 Allocated objects (light grey) are variable sized, and a re allocated and maintained by the program;39 \ie only the memory allocator knows the location of allocated storage, not the program.38 Allocated objects (light grey) are variable sized, and allocated and maintained by the program; 39 \ie only the program knows the location of allocated storage, not the memory allocator. 40 40 \begin{figure}[h] 41 41 \centering … … 49 49 if there are multiple reserved blocks, they are also chained together, usually internally. 50 50 51 In some allocator designs, allocated and freed objectshave additional management data embedded within them.51 Allocated and freed objects typically have additional management data embedded within them. 52 52 \VRef[Figure]{f:AllocatedObject} shows an allocated object with a header, trailer, and alignment padding and spacing around the object. 53 53 The header contains information about the object, \eg size, type, etc. … … 104 104 \VRef[Figure]{f:MemoryFragmentation} shows an example of how a small block of memory fragments as objects are allocated and deallocated over time. 105 105 Blocks of free memory become smaller and non-contiguous making them less useful in serving allocation requests. 106 Memory is highly fragmented when most free blocks are unusable because of their sizes.106 Memory is highly fragmented when the sizes of most free blocks are unusable. 107 107 For example, \VRef[Figure]{f:Contiguous} and \VRef[Figure]{f:HighlyFragmented} have the same quantity of external fragmentation, but \VRef[Figure]{f:HighlyFragmented} is highly fragmented. 108 108 If there is a request to allocate a large object, \VRef[Figure]{f:Contiguous} is more likely to be able to satisfy it with existing free memory, while \VRef[Figure]{f:HighlyFragmented} likely has to request more memory from the operating system. … … 137 137 The fewer bin-sizes, the fewer lists need to be searched and maintained; 138 138 however, the bin sizes are less likely to closely fit the requested object size, leading to more internal fragmentation. 139 The more bin sizes, the longer the search and the less likely free objects are to be reused, leading to more external fragmentation and potentially heap blowup.139 The more bin-sizes, the longer the search and the less likely free objects are to be reused, leading to more external fragmentation and potentially heap blowup. 140 140 A variation of the binning algorithm allows objects to be allocated to the requested size, but when an object is freed, it is placed on the free list of the next smallest or equal bin-size. 141 141 For example, with bin sizes of 8 and 16 bytes, a request for 12 bytes allocates only 12 bytes, but when the object is freed, it is placed on the 8-byte bin-list. … … 157 157 The principle of locality recognizes that programs tend to reference a small set of data, called a working set, for a certain period of time, where a working set is composed of temporal and spatial accesses~\cite{Denning05}. 158 158 Temporal clustering implies a group of objects are accessed repeatedly within a short time period, while spatial clustering implies a group of objects physically close together (nearby addresses) are accessed repeatedly within a short time period. 159 Temporal locality commonly occurs during an iterative computation with a fix edset of disjoint variables, while spatial locality commonly occurs when traversing an array.159 Temporal locality commonly occurs during an iterative computation with a fix set of disjoint variables, while spatial locality commonly occurs when traversing an array. 160 160 161 161 Hardware takes advantage of temporal and spatial locality through multiple levels of caching, \ie memory hierarchy. … … 328 328 For example, multiple heaps are managed in a pool, starting with a single or a fixed number of heaps that increase\-/decrease depending on contention\-/space issues. 329 329 At creation, a thread is associated with a heap from the pool. 330 In some implementations of this model, when the thread attempts an allocation and its associated heap is locked (contention), it scans for an unlocked heap in the pool.330 When the thread attempts an allocation and its associated heap is locked (contention), it scans for an unlocked heap in the pool. 331 331 If an unlocked heap is found, the thread changes its association and uses that heap. 332 332 If all heaps are locked, the thread may create a new heap, use it, and then place the new heap into the pool; … … 347 347 The management information in the static zone must be able to locate all heaps in the dynamic zone. 348 348 The management information for the heaps must reside in the dynamic-allocation zone if there are a variable number. 349 Each heap in the dynamic zone is composed of a list of free objects and a pointer to its reserved memory.349 Each heap in the dynamic zone is composed of a list of a free objects and a pointer to its reserved memory. 350 350 An alternative implementation is for all heaps to share one reserved memory, which requires a separate lock for the reserved storage to ensure mutual exclusion when acquiring new memory. 351 351 Because multiple threads can allocate/free/reallocate adjacent storage, all forms of false sharing may occur. … … 361 361 Multiple heaps increase external fragmentation as the ratio of heaps to threads increases, which can lead to heap blowup. 362 362 The external fragmentation experienced by a program with a single heap is now multiplied by the number of heaps, since each heap manages its own free storage and allocates its own reserved memory. 363 Additionally, objects freed by one heap cannot be reused by other threads without increasing the cost of the memory operations, except indirectly by returning free memory to the operating system, which can be expensive.364 Depending on how the operating system provides dynamic storage to an application, returning storage may be difficult or impossible, \eg the contiguous @sbrk@ area in Unix. 363 Additionally, objects freed by one heap cannot be reused by other threads, except indirectly by returning free memory to the operating system, which can be expensive. 364 (Depending on how the operating system provides dynamic storage to an application, returning storage may be difficult or impossible, \eg the contiguous @sbrk@ area in Unix.) 365 365 In the worst case, a program in which objects are allocated from one heap but deallocated to another heap means these freed objects are never reused. 366 366 … … 384 384 In contrast, the T:H model spreads each thread's objects over a larger area in different heaps. 385 385 Thread heaps can also eliminate allocator-induced active false-sharing, if memory is acquired so it does not overlap at crucial boundaries with memory for another thread's heap. 386 For example, assume page boundaries coincide with cache line boundaries, if a thread heap always acquires pages of memory thenno two threads share a page or cache line unless pointers are passed among them.386 For example, assume page boundaries coincide with cache line boundaries, then if a thread heap always acquires pages of memory, no two threads share a page or cache line unless pointers are passed among them. 387 387 Hence, allocator-induced active false-sharing in \VRef[Figure]{f:AllocatorInducedActiveFalseSharing} cannot occur because the memory for thread heaps never overlaps. 388 388 389 When a thread terminates, there are two options for handling its threadheap.390 First is to free all objects in the threadheap to the global heap and destroy the thread heap.389 When a thread terminates, there are two options for handling its heap. 390 First is to free all objects in the heap to the global heap and destroy the thread heap. 391 391 Second is to place the thread heap on a list of available heaps and reuse it for a new thread in the future. 392 392 Destroying the thread heap immediately may reduce external fragmentation sooner, since all free objects are freed to the global heap and may be reused by other threads. 393 Alternatively, reusing thread heaps may improve performance if the inheriting thread makes similar allocation requests as the thread that previously held the thread heap because any unfreed storage is immediately accessible. 393 Alternatively, reusing thread heaps may improve performance if the inheriting thread makes similar allocation requests as the thread that previously held the thread heap because any unfreed storage is immediately accessible.. 394 394 395 395 … … 417 417 When the user thread continues on the new kernel thread, it may have pointers into the previous kernel-thread's heap and hold locks associated with it. 418 418 To get the same kernel-thread safety, time slicing must be disabled/\-enabled around these operations, so the user thread cannot jump to another kernel thread. 419 However, eagerly disabling/enabling time-slicing on the allocation/deallocation fast path is expensive, because preemption does not happen that frequently.419 However, eagerly disabling/enabling time-slicing on the allocation/deallocation fast path is expensive, because preemption is rare (10--100 milliseconds). 420 420 Instead, techniques exist to lazily detect this case in the interrupt handler, abort the preemption, and return to the operation so it can complete atomically. 421 421 Occasionally ignoring a preemption should be benign, but a persistent lack of preemption can result in both short and long term starvation. 422 423 424 \begin{figure} 425 \centering 426 \subfigure[Ownership]{ 427 \input{MultipleHeapsOwnership} 428 } % subfigure 429 \hspace{0.25in} 430 \subfigure[No Ownership]{ 431 \input{MultipleHeapsNoOwnership} 432 } % subfigure 433 \caption{Heap Ownership} 434 \label{f:HeapsOwnership} 435 \end{figure} 422 436 423 437 … … 433 447 For the T:1/T:H models with or without ownership or the 1:1 model with ownership, a thread may free objects to different heaps, which makes each heap publicly accessible to all threads, called a \newterm{public heap}. 434 448 435 \begin{figure}436 \centering437 \subfigure[Ownership]{438 \input{MultipleHeapsOwnership}439 } % subfigure440 \hspace{0.25in}441 \subfigure[No Ownership]{442 \input{MultipleHeapsNoOwnership}443 } % subfigure444 \caption{Heap Ownership}445 \label{f:HeapsOwnership}446 \end{figure}447 448 449 \VRef[Figure]{f:MultipleHeapStorageOwnership} shows the effect of ownership on storage layout. 449 (For simplicity ,assume the heaps all use the same size of reserves storage.)450 (For simplicity assume the heaps all use the same size of reserves storage.) 450 451 In contrast to \VRef[Figure]{f:MultipleHeapStorage}, each reserved area used by a heap only contains free storage for that particular heap because threads must return free objects back to the owner heap. 451 452 Again, because multiple threads can allocate/free/reallocate adjacent storage in the same heap, all forms of false sharing may occur. … … 472 473 While the returning thread can batch objects, batching across multiple heaps is complex and there is no obvious time when to push back to the owner heap. 473 474 It is better for returning threads to immediately return to the receiving thread's batch list as the receiving thread has better knowledge when to incorporate the batch list into its free pool. 474 Batching leverages the fact that most allocation patterns use the contention-free fast-path ,so locking on the batch list is rare for both the returning and receiving threads.475 476 It is possible for heaps to steal objects rather than return them and then reallocate these objects againwhen storage runs out on a heap.475 Batching leverages the fact that most allocation patterns use the contention-free fast-path so locking on the batch list is rare for both the returning and receiving threads. 476 477 It is possible for heaps to steal objects rather than return them and reallocating these objects when storage runs out on a heap. 477 478 However, stealing can result in passive false-sharing. 478 479 For example, in \VRef[Figure]{f:AllocatorInducedPassiveFalseSharing}, Object$_2$ may be deallocated to Thread$_2$'s heap initially. … … 484 485 485 486 Bracketing every allocation with headers/trailers can result in significant internal fragmentation, as shown in \VRef[Figure]{f:ObjectHeaders}. 486 Especially if the headers contain redundant management information, then storing that information is a waste of storage,\eg object size may be the same for many objects because programs only allocate a small set of object sizes.487 Especially if the headers contain redundant management information, \eg object size may be the same for many objects because programs only allocate a small set of object sizes. 487 488 As well, it can result in poor cache usage, since only a portion of the cache line is holding useful information from the program's perspective. 488 489 Spatial locality can also be negatively affected leading to poor cache locality~\cite{Feng05}: … … 659 660 With local free-lists in containers, as in \VRef[Figure]{f:LocalFreeListWithinContainers}, the container is simply removed from one heap's free list and placed on the new heap's free list. 660 661 Thus, when using local free-lists, the operation of moving containers is reduced from $O(N)$ to $O(1)$. 661 However, there is the additional storage cost in theheader, which increases the header size, and therefore internal fragmentation.662 The cost is adding information to a header, which increases the header size, and therefore internal fragmentation. 662 663 663 664 \begin{figure} … … 688 689 The main goal of the hybrid approach is to eliminate locking on thread-local allocation/deallocation, while providing ownership to prevent heap blowup. 689 690 In the hybrid approach, a thread first allocates from its private heap and second from its public heap if no free memory exists in the private heap. 690 Similarly, a thread first deallocates an object toits private heap, and second to the public heap.691 Similarly, a thread first deallocates an object its private heap, and second to the public heap. 691 692 Both private and public heaps can allocate/deallocate to/from the global heap if there is no free memory or excess free memory, although an implementation may choose to funnel all interaction with the global heap through one of the heaps. 692 693 Note, deallocation from the private to the public (dashed line) is unlikely because there is no obvious advantages unless the public heap provides the only interface to the global heap. -
doc/theses/mubeen_zulfiqar_MMath/benchmarks.tex
re5d9274 r015925a 12 12 \item[Benchmarks] 13 13 are a suite of application programs (SPEC CPU/WEB) that are exercised in a common way (inputs) to find differences among underlying software implementations associated with an application (compiler, memory allocator, web server, \etc). 14 The applications are suppose dto represent common execution patterns that need to perform well with respect to an underlying software implementation.14 The applications are suppose to represent common execution patterns that need to perform well with respect to an underlying software implementation. 15 15 Benchmarks are often criticized for having overlapping patterns, insufficient patterns, or extraneous code that masks patterns. 16 16 \item[Micro-Benchmarks] … … 26 26 27 27 This thesis designs and examines a new set of micro-benchmarks for memory allocators that test a variety of allocation patterns, each with multiple tuning parameters. 28 The aim of the micro-benchmark suite is to create a set of programs that can evaluate a memory allocator based on the key performance m etrics such as speed, memory overhead, and cache performance.28 The aim of the micro-benchmark suite is to create a set of programs that can evaluate a memory allocator based on the key performance matrices such as speed, memory overhead, and cache performance. 29 29 % These programs can be taken as a standard to benchmark an allocator's basic goals. 30 30 These programs give details of an allocator's memory overhead and speed under certain allocation patterns. 31 The allocation patterns are configurable (adjustment knobs) to observe an allocator's performance across a spectrum allocation patterns, which is seldom possible with benchmark programs.31 The allocation patterns are configurable (adjustment knobs) to observe an allocator's performance across a spectrum of events for a desired allocation pattern, which is seldom possible with benchmark programs. 32 32 Each micro-benchmark program has multiple control knobs specified by command-line arguments. 33 33 34 The new micro-benchmark suite measures performance by allocating dynamic objects and measuring specific m etrics.34 The new micro-benchmark suite measures performance by allocating dynamic objects and measuring specific matrices. 35 35 An allocator's speed is benchmarked in different ways, as are issues like false sharing. 36 36 … … 40 40 Modern memory allocators, such as llheap, must handle multi-threaded programs at the KT and UT level. 41 41 The following multi-threaded micro-benchmarks are presented to give a sense of prior work~\cite{Berger00} at the KT level. 42 None of the prior work address esmulti-threading at the UT level.42 None of the prior work address multi-threading at the UT level. 43 43 44 44 … … 47 47 This benchmark stresses the ability of the allocator to handle different threads allocating and deallocating independently. 48 48 There is no interaction among threads, \ie no object sharing. 49 Each thread repeatedly allocate s100,000 \emph{8-byte} objects then deallocates them in the order they were allocated.50 The executiontime of the benchmark evaluates its efficiency.49 Each thread repeatedly allocate 100,000 \emph{8-byte} objects then deallocates them in the order they were allocated. 50 Runtime of the benchmark evaluates its efficiency. 51 51 52 52 … … 63 63 Before the thread terminates, it passes its array of 10,000 objects to a new child thread to continue the process. 64 64 The number of thread generations varies depending on the thread speed. 65 It calculates memory operations per second as an indicator of thememory allocator's performance.65 It calculates memory operations per second as an indicator of memory allocator's performance. 66 66 67 67 … … 75 75 \label{s:ChurnBenchmark} 76 76 77 The churn benchmark measures the runtime speed of an allocator in a multi-threaded scen ario, where each thread extensively allocates and frees dynamic memory.77 The churn benchmark measures the runtime speed of an allocator in a multi-threaded scenerio, where each thread extensively allocates and frees dynamic memory. 78 78 Only @malloc@ and @free@ are used to eliminate any extra cost, such as @memcpy@ in @calloc@ or @realloc@. 79 Churn simulates a memory intensive program andcan be tuned to create different scenarios.79 Churn simulates a memory intensive program that can be tuned to create different scenarios. 80 80 81 81 \VRef[Figure]{fig:ChurnBenchFig} shows the pseudo code for the churn micro-benchmark. … … 133 133 When threads share a cache line, frequent reads/writes to their cache-line object causes cache misses, which cause escalating delays as cache distance increases. 134 134 135 Cache thrash tries to create a scen ario that leads to false sharing, if the underlying memory allocator is allocating dynamic memory to multiple threads on the same cache lines.135 Cache thrash tries to create a scenerio that leads to false sharing, if the underlying memory allocator is allocating dynamic memory to multiple threads on the same cache lines. 136 136 Ideally, a memory allocator should distance the dynamic memory region of one thread from another. 137 137 Having multiple threads allocating small objects simultaneously can cause a memory allocator to allocate objects on the same cache line, if its not distancing the memory among different threads. … … 141 141 Each worker thread allocates an object and intensively reads/writes it for M times to possible invalidate cache lines that may interfere with other threads sharing the same cache line. 142 142 Each thread repeats this for N times. 143 The main thread measures the total time taken for all worker threads to complete.144 Worker threads sharing cache lines with each other are expected totake longer.143 The main thread measures the total time taken to for all worker threads to complete. 144 Worker threads sharing cache lines with each other will take longer. 145 145 146 146 \begin{figure} … … 156 156 signal workers to free 157 157 ... 158 print addresses from each $thread$ 158 159 Worker Thread$\(_1\)$ 159 warm up memory in chunks of 16 bytes160 ...161 For N162 malloc an object163 read/write the object M times164 free the object165 ...160 allocate, write, read, free 161 warmup memory in chunkc of 16 bytes 162 ... 163 malloc N objects 164 ... 165 free objects 166 return object address to Main Thread 166 167 Worker Thread$\(_2\)$ 167 168 // same as Worker Thread$\(_1\)$ … … 190 191 191 192 The cache-scratch micro-benchmark measures allocator-induced passive false-sharing as illustrated in \VRef{s:AllocatorInducedPassiveFalseSharing}. 192 As withcache thrash, if memory is allocated for multiple threads on the same cache line, this can significantly slow down program performance.193 As for cache thrash, if memory is allocated for multiple threads on the same cache line, this can significantly slow down program performance. 193 194 In this scenario, the false sharing is being caused by the memory allocator although it is started by the program sharing an object. 194 195 … … 201 202 Cache scratch tries to create a scenario that leads to false sharing and should make the memory allocator preserve the program-induced false sharing, if it does not return a freed object to its owner thread and, instead, re-uses it instantly. 202 203 An allocator using object ownership, as described in section \VRef{s:Ownership}, is less susceptible to allocator-induced passive false-sharing. 203 If the object is returned to the thread that owns it, then the new object that the thread getsis less likely to be on the same cache line.204 If the object is returned to the thread who owns it, then the thread that gets a new object is less likely to be on the same cache line. 204 205 205 206 \VRef[Figure]{fig:benchScratchFig} shows the pseudo code for the cache-scratch micro-benchmark. … … 223 224 signal workers to free 224 225 ... 226 print addresses from each $thread$ 225 227 Worker Thread$\(_1\)$ 226 warmup memory in chunks of 16 bytes 227 ... 228 free the object passed by the Main Thread 229 For N 228 allocate, write, read, free 229 warmup memory in chunkc of 16 bytes 230 ... 231 for ( N ) 232 free an object passed by Main Thread 230 233 malloc new object 231 read/write the object M times232 free the object233 ...234 ... 235 free objects 236 return new object addresses to Main Thread 234 237 Worker Thread$\(_2\)$ 235 238 // same as Worker Thread$\(_1\)$ … … 245 248 246 249 Similar to benchmark cache thrash in section \VRef{sec:benchThrashSec}, different cache access scenarios can be created using the following command-line arguments. 247 \begin{description}[ topsep=0pt,itemsep=0pt,parsep=0pt]250 \begin{description}[itemsep=0pt,parsep=0pt] 248 251 \item[threads:] 249 252 number of threads (K). … … 259 262 \subsection{Speed Micro-Benchmark} 260 263 \label{s:SpeedMicroBenchmark} 261 \vspace*{-4pt}262 264 263 265 The speed benchmark measures the runtime speed of individual and sequences of memory allocation routines: 264 \begin{enumerate}[ topsep=-5pt,itemsep=0pt,parsep=0pt]266 \begin{enumerate}[itemsep=0pt,parsep=0pt] 265 267 \item malloc 266 268 \item realloc … … 330 332 \VRef[Figure]{fig:MemoryBenchFig} shows the pseudo code for the memory micro-benchmark. 331 333 It creates a producer-consumer scenario with K producer threads and each producer has M consumer threads. 332 A producer has a separate buffer for each consumer and allocates N objects of random sizes following a configurable distribution for each consumer.334 A producer has a separate buffer for each consumer and allocates N objects of random sizes following a settable distribution for each consumer. 333 335 A consumer frees these objects. 334 336 After every memory operation, program memory usage is recorded throughout the runtime. -
doc/theses/mubeen_zulfiqar_MMath/conclusion.tex
re5d9274 r015925a 17 17 % ==================== 18 18 19 The goal of this thesis was to build a low-latency (or high bandwidth) memory allocator for both KT and UT multi-threading systems that is competitive with the best current memory allocatorswhile extending the feature set of existing and new allocator routines.19 The goal of this thesis was to build a low-latency memory allocator for both KT and UT multi-threads systems, which is competitive with the best current memory allocators, while extending the feature set of existing and new allocator routines. 20 20 The new llheap memory-allocator achieves all of these goals, while maintaining and managing sticky allocation information without a performance loss. 21 21 Hence, it becomes possible to use @realloc@ frequently as a safe operation, rather than just occasionally. 22 22 Furthermore, the ability to query sticky properties and information allows programmers to write safer programs, as it is possible to dynamically match allocation styles from unknown library routines that return allocations. 23 23 24 Extending the C allocation API with @resize@, advanced @realloc@, @aalloc@, @amemalign@, and @cmemalign@ means programmers do not have to do these useful allocation operations themselves.24 Extending the C allocation API with @resize@, advanced @realloc@, @aalloc@, @amemalign@, and @cmemalign@ means programmers do not make mistakes writing theses useful allocation operations. 25 25 The ability to use \CFA's advanced type-system (and possibly \CC's too) to have one allocation routine with completely orthogonal sticky properties shows how far the allocation API can be pushed, which increases safety and greatly simplifies programmer's use of dynamic allocation. 26 26 27 27 Providing comprehensive statistics for all allocation operations is invaluable in understanding and debugging a program's dynamic behaviour. 28 No other memory allocator provides suchcomprehensive statistics gathering.28 No other memory allocator provides comprehensive statistics gathering. 29 29 This capability was used extensively during the development of llheap to verify its behaviour. 30 30 As well, providing a debugging mode where allocations are checked, along with internal pre/post conditions and invariants, is extremely useful, especially for students. 31 While not as powerful as the @valgrind@ interpreter, a large number of allocation mistakes are detected.31 While not as powerful as the @valgrind@ interpreter, a large number of allocations mistakes are detected. 32 32 Finally, contention-free statistics gathering and debugging have a low enough cost to be used in production code. 33 33 … … 36 36 37 37 Starting a micro-benchmark test-suite for comparing allocators, rather than relying on a suite of arbitrary programs, has been an interesting challenge. 38 The current micro-benchmarks allow some understand ingof allocator implementation properties without actually looking at the implementation.38 The current micro-benchmarks allow some understand of allocator implementation properties without actually looking at the implementation. 39 39 For example, the memory micro-benchmark quickly identified how several of the allocators work at the global level. 40 40 It was not possible to show how the micro-benchmarks adjustment knobs were used to tune to an interesting test point. … … 45 45 46 46 A careful walk-though of the allocator fastpath should yield additional optimizations for a slight performance gain. 47 In particular, analysingthe implementation of rpmalloc, which is often the fastest allocator,47 In particular, looking at the implementation of rpmalloc, which is often the fastest allocator, 48 48 49 The micro-benchmark project requires more testing and analysis.50 Additional allocation patterns are needed to extract meaningful information about allocators, and within allocation patterns, what are the most usefultuning knobs.49 The micro-benchmarks project requires more testing and analysis. 50 Additional allocations patterns are needed to extract meaningful information about allocators, and within allocation patterns, what are the best tuning knobs. 51 51 Also, identifying ways to visualize the results of the micro-benchmarks is a work in progress. 52 52 53 After llheap is made available on GitHub, interacting with its users to locate problems and improvementswill make llbench a more robust memory allocator.54 As well, feedback from the \uC and \CFA projects, which have adopted llheap for their memory allocator, will provide additional information.53 After llheap is made available on gitHub, interacting with its users to locate problems and improvements, will make llbench a more robust memory allocator. 54 As well, feedback from the \uC and \CFA projects, which have adopted llheap for their memory allocator, will provide additional feedback. -
doc/theses/mubeen_zulfiqar_MMath/figures/Header.fig
re5d9274 r015925a 20 20 2 1 1 1 0 7 50 -1 -1 4.000 0 0 -1 0 0 2 21 21 3300 1500 3300 2400 22 2 1 0 1 0 7 50 -1 -1 4.000 0 0 -1 1 0 3 23 1 1 1.00 45.00 90.00 24 4050 2625 3750 2625 3750 2400 25 2 1 0 1 0 7 50 -1 -1 4.000 0 0 -1 1 0 3 26 1 1 1.00 45.00 90.00 27 4050 2850 3450 2850 3450 2400 22 28 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 23 29 4200 1800 6600 1800 6600 2100 4200 2100 4200 1800 24 2 1 0 1 0 7 50 -1 -1 4.000 0 0 -1 1 0 325 1 1 1.00 45.00 90.0026 4200 2775 3750 2775 3750 172527 2 1 0 1 0 7 50 -1 -1 4.000 0 0 -1 1 0 328 1 1 1.00 45.00 90.0029 4200 2550 4050 2550 4050 172530 2 1 0 1 0 7 50 -1 -1 4.000 0 0 -1 1 0 331 1 1 1.00 45.00 90.0032 4200 3000 3450 3000 3450 202533 30 4 0 0 50 -1 0 12 0.0000 2 180 1185 1875 1725 bucket pointer\001 34 31 4 0 0 50 -1 0 12 0.0000 2 180 1005 1875 2025 mapped size\001 35 32 4 0 0 50 -1 0 12 0.0000 2 135 1215 1875 2325 next free block\001 36 33 4 2 0 50 -1 0 12 0.0000 2 135 480 1725 2025 union\001 34 4 1 0 50 -1 0 12 0.0000 2 135 270 3775 2325 0/1\001 35 4 1 0 50 -1 0 12 0.0000 2 135 270 3475 2325 0/1\001 37 36 4 1 0 50 -1 0 12 0.0000 2 180 945 5400 2025 request size\001 38 37 4 1 0 50 -1 0 12 0.0000 2 180 765 5400 1425 4/8-bytes\001 39 38 4 1 0 50 -1 0 12 0.0000 2 180 765 3000 1425 4/8-bytes\001 40 4 1 0 50 -1 0 12 0.0000 2 135 270 3475 2025 0/1\001 41 4 1 0 50 -1 0 12 0.0000 2 135 270 3775 1725 0/1\001 42 4 1 0 50 -1 0 12 0.0000 2 135 270 4075 1725 0/1\001 43 4 0 0 50 -1 0 12 0.0000 2 180 1515 4275 3075 mapped allocation\001 44 4 0 0 50 -1 0 12 0.0000 2 135 825 4275 2850 zero filled\001 45 4 0 0 50 -1 0 12 0.0000 2 180 1920 4275 2625 alignment (fake header)\001 39 4 0 0 50 -1 0 12 0.0000 2 135 825 4125 2700 zero filled\001 40 4 0 0 50 -1 0 12 0.0000 2 180 1515 4125 2925 mapped allocation\001 -
doc/theses/mubeen_zulfiqar_MMath/figures/MultipleHeapsNoOwnership.fig
re5d9274 r015925a 1 #FIG 3.2 Produced by xfig version 3.2. 7b1 #FIG 3.2 Produced by xfig version 3.2.5 2 2 Landscape 3 3 Center 4 4 Inches 5 Letter 5 Letter 6 6 100.00 7 7 Single … … 11 11 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 12 12 1200 2100 1500 2100 1500 1800 1200 1800 1200 2100 13 4 1 0 50 -1 0 11 0.0000 2 1 65 495 1350 2025 H$_1$\00113 4 1 0 50 -1 0 11 0.0000 2 195 495 1350 2025 H$_1$\001 14 14 -6 15 15 6 1950 1800 2550 2100 16 16 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 17 17 2100 2100 2400 2100 2400 1800 2100 1800 2100 2100 18 4 1 0 50 -1 0 11 0.0000 2 1 65 495 2250 2025 H$_2$\00118 4 1 0 50 -1 0 11 0.0000 2 195 495 2250 2025 H$_2$\001 19 19 -6 20 20 1 3 0 1 0 7 50 -1 -1 0.000 0 -0.0000 1350 1350 150 150 1350 1350 1500 1350 21 21 1 3 0 1 0 7 50 -1 -1 0.000 0 -0.0000 2250 1350 150 150 2250 1350 2400 1350 22 22 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 1 2 23 1 11.00 45.00 90.0024 1 11.00 45.00 90.0023 0 0 1.00 45.00 90.00 24 0 0 1.00 45.00 90.00 25 25 1275 1800 1275 1500 26 2 1 0 1 0 050 -1 -1 0.000 0 0 -1 1 0 227 1 11.00 45.00 90.0026 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 27 0 0 1.00 45.00 90.00 28 28 1425 1500 1425 1800 29 29 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 1 2 30 1 11.00 45.00 90.0030 0 0 1.00 45.00 90.00 31 31 1425 1500 2175 1800 32 32 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 1 2 33 1 11.00 45.00 90.0033 0 0 1.00 45.00 90.00 34 34 2175 1500 1425 1800 35 35 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 36 1 11.00 45.00 90.0036 0 0 1.00 45.00 90.00 37 37 2175 1500 2175 1800 38 38 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 1 2 39 1 11.00 45.00 90.0040 1 11.00 45.00 90.0039 0 0 1.00 45.00 90.00 40 0 0 1.00 45.00 90.00 41 41 2325 1800 2325 1500 42 4 1 0 50 -1 0 11 0.0000 2 1 65 465 1350 1425 T$_1$\00143 4 1 0 50 -1 0 11 0.0000 2 1 65 465 2250 1425 T$_2$\00142 4 1 0 50 -1 0 11 0.0000 2 195 465 1350 1425 T$_1$\001 43 4 1 0 50 -1 0 11 0.0000 2 195 465 2250 1425 T$_2$\001 -
doc/theses/mubeen_zulfiqar_MMath/figures/MultipleHeapsOwnership.fig
re5d9274 r015925a 1 #FIG 3.2 Produced by xfig version 3.2. 7b1 #FIG 3.2 Produced by xfig version 3.2.5 2 2 Landscape 3 3 Center 4 4 Inches 5 Letter 5 Letter 6 6 100.00 7 7 Single … … 11 11 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 12 12 1200 2100 1500 2100 1500 1800 1200 1800 1200 2100 13 4 1 0 50 -1 0 11 0.0000 2 1 65 495 1350 2025 H$_1$\00113 4 1 0 50 -1 0 11 0.0000 2 195 495 1350 2025 H$_1$\001 14 14 -6 15 15 6 1950 1800 2550 2100 16 16 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 17 17 2100 2100 2400 2100 2400 1800 2100 1800 2100 2100 18 4 1 0 50 -1 0 11 0.0000 2 1 65 495 2250 2025 H$_2$\00118 4 1 0 50 -1 0 11 0.0000 2 195 495 2250 2025 H$_2$\001 19 19 -6 20 20 1 3 0 1 0 7 50 -1 -1 0.000 0 -0.0000 1350 1350 150 150 1350 1350 1500 1350 21 21 1 3 0 1 0 7 50 -1 -1 0.000 0 -0.0000 2250 1350 150 150 2250 1350 2400 1350 22 22 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 1 2 23 1 11.00 45.00 90.0024 1 11.00 45.00 90.0023 0 0 1.00 45.00 90.00 24 0 0 1.00 45.00 90.00 25 25 2175 1500 1425 1800 26 26 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 1 2 27 1 1 1.00 45.00 90.00 28 1 1 1.00 45.00 90.00 27 0 0 1.00 45.00 90.00 28 0 0 1.00 45.00 90.00 29 1425 1500 2175 1800 30 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 1 2 31 0 0 1.00 45.00 90.00 32 0 0 1.00 45.00 90.00 29 33 1275 1800 1275 1500 30 34 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 1 2 31 1 11.00 45.00 90.0032 1 11.00 45.00 90.0035 0 0 1.00 45.00 90.00 36 0 0 1.00 45.00 90.00 33 37 2325 1800 2325 1500 34 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 1 2 35 1 1 1.00 45.00 90.00 36 1 1 1.00 45.00 90.00 37 1425 1500 2175 1800 38 4 1 0 50 -1 0 11 0.0000 2 165 465 2250 1425 T$_2$\001 39 4 1 0 50 -1 0 11 0.0000 2 165 465 1350 1425 T$_1$\001 38 4 1 0 50 -1 0 11 0.0000 2 195 465 2250 1425 T$_2$\001 39 4 1 0 50 -1 0 11 0.0000 2 195 465 1350 1425 T$_1$\001 -
doc/theses/mubeen_zulfiqar_MMath/figures/PerThreadHeap.fig
re5d9274 r015925a 1 #FIG 3.2 Produced by xfig version 3.2. 7b1 #FIG 3.2 Produced by xfig version 3.2.5 2 2 Landscape 3 3 Center 4 4 Inches 5 Letter 5 Letter 6 6 100.00 7 7 Single … … 11 11 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 12 12 2700 1800 3000 1800 3000 2100 2700 2100 2700 1800 13 4 1 0 50 -1 0 11 0.0000 2 1 20135 2850 2025 G\00113 4 1 0 50 -1 0 11 0.0000 2 135 135 2850 2025 G\001 14 14 -6 15 15 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 1350 1350 150 150 1350 1350 1500 1350 … … 17 17 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 2250 1350 150 150 2250 1350 2400 1350 18 18 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 1 2 19 1 11.00 45.00 90.0020 1 11.00 45.00 90.0019 0 0 1.00 45.00 90.00 20 0 0 1.00 45.00 90.00 21 21 1350 1500 1350 1800 22 22 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 … … 27 27 2100 1800 2400 1800 2400 2100 2100 2100 2100 1800 28 28 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 1 2 29 1 11.00 45.00 90.0030 1 11.00 45.00 90.0029 0 0 1.00 45.00 90.00 30 0 0 1.00 45.00 90.00 31 31 1800 1500 1800 1800 32 32 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 1 2 33 1 11.00 45.00 90.0034 1 11.00 45.00 90.0033 0 0 1.00 45.00 90.00 34 0 0 1.00 45.00 90.00 35 35 2250 1500 2250 1800 36 4 1 0 50 -1 0 11 0.0000 2 1 80 1260 2550 2025 $\\Leftrightarrow$\00137 4 1 0 50 -1 0 11 0.0000 2 1 80 1260 3150 2025 $\\Leftrightarrow$\00138 4 0 0 50 -1 0 11 0.0000 2 1 20240 3300 2025 OS\00139 4 1 0 50 -1 0 11 0.0000 2 1 65 495 1350 2025 H$_1$\00140 4 1 0 50 -1 0 11 0.0000 2 1 65 465 1350 1425 T$_1$\00141 4 1 0 50 -1 0 11 0.0000 2 1 65 495 1800 2025 H$_2$\00142 4 1 0 50 -1 0 11 0.0000 2 1 65 465 1800 1425 T$_2$\00143 4 1 0 50 -1 0 11 0.0000 2 1 65 495 2250 2025 H$_3$\00144 4 1 0 50 -1 0 11 0.0000 2 1 65 465 2250 1425 T$_3$\00136 4 1 0 50 -1 0 11 0.0000 2 195 1320 2550 2025 $\\Leftrightarrow$\001 37 4 1 0 50 -1 0 11 0.0000 2 195 1320 3150 2025 $\\Leftrightarrow$\001 38 4 0 0 50 -1 0 11 0.0000 2 135 240 3300 2025 OS\001 39 4 1 0 50 -1 0 11 0.0000 2 195 495 1350 2025 H$_1$\001 40 4 1 0 50 -1 0 11 0.0000 2 195 465 1350 1425 T$_1$\001 41 4 1 0 50 -1 0 11 0.0000 2 195 495 1800 2025 H$_2$\001 42 4 1 0 50 -1 0 11 0.0000 2 195 465 1800 1425 T$_2$\001 43 4 1 0 50 -1 0 11 0.0000 2 195 495 2250 2025 H$_3$\001 44 4 1 0 50 -1 0 11 0.0000 2 195 465 2250 1425 T$_3$\001 -
doc/theses/mubeen_zulfiqar_MMath/figures/SharedHeaps.fig
re5d9274 r015925a 1 #FIG 3.2 Produced by xfig version 3.2. 7b1 #FIG 3.2 Produced by xfig version 3.2.5 2 2 Landscape 3 3 Center 4 4 Inches 5 Letter 5 Letter 6 6 100.00 7 7 Single … … 10 10 6 1500 1200 2100 1500 11 11 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 1800 1350 150 150 1800 1350 1950 1350 12 4 1 0 50 -1 0 11 0.0000 2 1 65 465 1800 1425 T$_2$\00112 4 1 0 50 -1 0 11 0.0000 2 195 465 1800 1425 T$_2$\001 13 13 -6 14 14 6 1050 1200 1650 1500 15 15 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 1350 1350 150 150 1350 1350 1500 1350 16 4 1 0 50 -1 0 11 0.0000 2 1 65 465 1350 1425 T$_1$\00116 4 1 0 50 -1 0 11 0.0000 2 195 465 1350 1425 T$_1$\001 17 17 -6 18 18 6 1950 1200 2550 1500 19 19 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 2250 1350 150 150 2250 1350 2400 1350 20 4 1 0 50 -1 0 11 0.0000 2 1 65 465 2250 1425 T$_3$\00120 4 1 0 50 -1 0 11 0.0000 2 195 465 2250 1425 T$_3$\001 21 21 -6 22 22 6 1275 1800 1875 2100 23 23 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 24 24 1425 1800 1725 1800 1725 2100 1425 2100 1425 1800 25 4 1 0 50 -1 0 11 0.0000 2 1 65 495 1575 2025 H$_1$\00125 4 1 0 50 -1 0 11 0.0000 2 195 495 1575 2025 H$_1$\001 26 26 -6 27 27 6 1725 1800 2325 2100 28 28 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 29 29 1875 1800 2175 1800 2175 2100 1875 2100 1875 1800 30 4 1 0 50 -1 0 11 0.0000 2 1 65 495 2025 2025 H$_2$\00130 4 1 0 50 -1 0 11 0.0000 2 195 495 2025 2025 H$_2$\001 31 31 -6 32 32 6 2475 1800 2775 2100 33 33 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 34 34 2475 1800 2775 1800 2775 2100 2475 2100 2475 1800 35 4 1 0 50 -1 0 11 0.0000 2 1 20135 2625 2025 G\00135 4 1 0 50 -1 0 11 0.0000 2 135 135 2625 2025 G\001 36 36 -6 37 37 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 1 2 38 1 11.00 45.00 90.0039 1 11.00 45.00 90.0038 0 0 1.00 45.00 90.00 39 0 0 1.00 45.00 90.00 40 40 1275 1500 1500 1800 41 41 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 1 2 42 1 11.00 45.00 90.0043 1 11.00 45.00 90.0042 0 0 1.00 45.00 90.00 43 0 0 1.00 45.00 90.00 44 44 1425 1500 1950 1800 45 45 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 1 2 46 1 11.00 45.00 90.0047 1 11.00 45.00 90.0046 0 0 1.00 45.00 90.00 47 0 0 1.00 45.00 90.00 48 48 1725 1500 1650 1800 49 49 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 1 2 50 1 11.00 45.00 90.0051 1 11.00 45.00 90.0050 0 0 1.00 45.00 90.00 51 0 0 1.00 45.00 90.00 52 52 1875 1500 2025 1800 53 53 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 1 2 54 1 11.00 45.00 90.0055 1 11.00 45.00 90.0054 0 0 1.00 45.00 90.00 55 0 0 1.00 45.00 90.00 56 56 2250 1500 2100 1800 57 4 0 0 50 -1 0 11 0.0000 2 1 20240 3075 2025 OS\00158 4 1 0 50 -1 0 11 0.0000 2 1 80 1260 2325 2025 $\\Leftrightarrow$\00159 4 1 0 50 -1 0 11 0.0000 2 1 80 1260 2925 2025 $\\Leftrightarrow$\00157 4 0 0 50 -1 0 11 0.0000 2 135 240 3075 2025 OS\001 58 4 1 0 50 -1 0 11 0.0000 2 195 1320 2325 2025 $\\Leftrightarrow$\001 59 4 1 0 50 -1 0 11 0.0000 2 195 1320 2925 2025 $\\Leftrightarrow$\001 -
doc/theses/mubeen_zulfiqar_MMath/figures/SingleHeap.fig
re5d9274 r015925a 1 #FIG 3.2 Produced by xfig version 3.2. 7b1 #FIG 3.2 Produced by xfig version 3.2.5 2 2 Landscape 3 3 Center 4 4 Inches 5 Letter 5 Letter 6 6 100.00 7 7 Single … … 10 10 6 1500 1200 2100 1500 11 11 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 1800 1350 150 150 1800 1350 1950 1350 12 4 1 0 50 -1 0 11 0.0000 2 1 65 465 1800 1425 T$_2$\00112 4 1 0 50 -1 0 11 0.0000 2 195 465 1800 1425 T$_2$\001 13 13 -6 14 14 6 1050 1200 1650 1500 15 15 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 1350 1350 150 150 1350 1350 1500 1350 16 4 1 0 50 -1 0 11 0.0000 2 1 65 465 1350 1425 T$_1$\00116 4 1 0 50 -1 0 11 0.0000 2 195 465 1350 1425 T$_1$\001 17 17 -6 18 18 6 1950 1200 2550 1500 19 19 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 2250 1350 150 150 2250 1350 2400 1350 20 4 1 0 50 -1 0 11 0.0000 2 1 65 465 2250 1425 T$_3$\00120 4 1 0 50 -1 0 11 0.0000 2 195 465 2250 1425 T$_3$\001 21 21 -6 22 22 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 1 2 23 1 11.00 45.00 90.0024 1 11.00 45.00 90.0023 0 0 1.00 45.00 90.00 24 0 0 1.00 45.00 90.00 25 25 1350 1500 1725 1800 26 26 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 1 2 27 1 11.00 45.00 90.0028 1 11.00 45.00 90.0027 0 0 1.00 45.00 90.00 28 0 0 1.00 45.00 90.00 29 29 2250 1500 1875 1800 30 30 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 31 31 1650 1800 1950 1800 1950 2100 1650 2100 1650 1800 32 32 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 1 2 33 1 11.00 45.00 90.0034 1 11.00 45.00 90.0033 0 0 1.00 45.00 90.00 34 0 0 1.00 45.00 90.00 35 35 1800 1500 1800 1800 36 4 1 0 50 -1 0 11 0.0000 2 1 65 495 1800 2025 H$_1$\00137 4 1 0 50 -1 0 11 0.0000 2 1 80 1260 2100 2025 $\\Leftrightarrow$\00138 4 0 0 50 -1 0 11 0.0000 2 1 20240 2250 2025 OS\00136 4 1 0 50 -1 0 11 0.0000 2 195 495 1800 2025 H$_1$\001 37 4 1 0 50 -1 0 11 0.0000 2 195 1320 2100 2025 $\\Leftrightarrow$\001 38 4 0 0 50 -1 0 11 0.0000 2 135 240 2250 2025 OS\001 -
doc/theses/mubeen_zulfiqar_MMath/figures/UserKernelHeaps.fig
re5d9274 r015925a 45 45 -6 46 46 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 1 2 47 1 11.00 45.00 90.0048 1 11.00 45.00 90.0047 0 0 1.00 45.00 90.00 48 0 0 1.00 45.00 90.00 49 49 2025 2100 2025 2400 50 50 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 1 2 51 1 11.00 45.00 90.0052 1 11.00 45.00 90.0051 0 0 1.00 45.00 90.00 52 0 0 1.00 45.00 90.00 53 53 2475 2100 2475 2400 54 54 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 1 2 55 1 11.00 45.00 90.0056 1 11.00 45.00 90.0055 0 0 1.00 45.00 90.00 56 0 0 1.00 45.00 90.00 57 57 2925 2100 2925 2400 58 58 4 1 0 50 -1 0 11 0.0000 2 135 2235 2475 1725 scheduled across kernel threads\001 -
doc/theses/mubeen_zulfiqar_MMath/intro.tex
re5d9274 r015925a 53 53 When this allocator proves inadequate, programmers often write specialize allocators for specific needs. 54 54 C and \CC allow easy replacement of the default memory allocator with an alternative specialized or general-purpose memory-allocator. 55 Jikes RVM MMTk~\cite{MMTk} provides a similar generalization for the Java virtual machine. 55 (Jikes RVM MMTk~\cite{MMTk} provides a similar generalization for the Java virtual machine.) 56 56 However, high-performance memory-allocators for kernel and user multi-threaded programs are still being designed and improved. 57 57 For this reason, several alternative general-purpose allocators have been written for C/\CC with the goal of scaling in a multi-threaded program~\cite{Berger00,mtmalloc,streamflow,tcmalloc}. … … 65 65 \begin{enumerate}[leftmargin=*] 66 66 \item 67 Implementation of a new stand-alone concurrent low-latency memory-allocator ($\approx$1,200 lines of code) for C/\CC programs using kernel threads (1:1 threading), and specialized versions of the allocator for the programming languages \uC and \CFA using user-level threads running over multiple kernel threads (M:N threading). 67 Implementation of a new stand-lone concurrent low-latency memory-allocator ($\approx$1,200 lines of code) for C/\CC programs using kernel threads (1:1 threading), and specialized versions of the allocator for the programming languages \uC and \CFA using user-level threads running over multiple kernel threads (M:N threading). 68 69 \item 70 Adopt @nullptr@ return for a zero-sized allocation, rather than an actual memory address, which can be passed to @free@. 68 71 69 72 \item … … 101 104 102 105 \item 103 Provide additional heap wrapper functions in \CFA creating a more usableset of allocation operations and properties.106 Provide additional heap wrapper functions in \CFA creating an orthogonal set of allocation operations and properties. 104 107 105 108 \item … … 108 111 \item 109 112 @malloc_alignment( addr )@ returns the alignment of the allocation pointed-to by @addr@. 110 If the allocation is not aligned or @addr@ is the @ NULL@, the minimal alignment is returned.113 If the allocation is not aligned or @addr@ is the @nulladdr@, the minimal alignment is returned. 111 114 \item 112 115 @malloc_zero_fill( addr )@ returns a boolean result indicating if the memory pointed-to by @addr@ is allocated with zero fill, e.g., by @calloc@/@cmemalign@. … … 116 119 @malloc_usable_size( addr )@ returns the usable (total) size of the memory pointed-to by @addr@, i.e., the bin size containing the allocation, where @malloc_size( addr )@ $\le$ @malloc_usable_size( addr )@. 117 120 \end{itemize} 121 122 \item 123 Provide mostly contention-free allocation and free operations via a heap-per-kernel-thread implementation. 118 124 119 125 \item … … 130 136 131 137 \item 132 Provide extensive runtime checks to valid ateallocation operations and identify the amount of unfreed storage at program termination.138 Provide extensive runtime checks to valid allocation operations and identify the amount of unfreed storage at program termination. 133 139 134 140 \item -
doc/theses/mubeen_zulfiqar_MMath/performance.tex
re5d9274 r015925a 3 3 4 4 This chapter uses the micro-benchmarks from \VRef[Chapter]{s:Benchmarks} to test a number of current memory allocators, including llheap. 5 The goal is to see if llheap is competitive with the current ly popularmemory allocators.5 The goal is to see if llheap is competitive with the current best memory allocators. 6 6 7 7 … … 11 11 \begin{itemize} 12 12 \item 13 \textbf{Nasus} AMD EPYC 7662, 64-core socket $\times$ 2, 2.0 GHz, GCC version 9.3.0 14 \item 13 15 \textbf{Algol} Huawei ARM TaiShan 2280 V2 Kunpeng 920, 24-core socket $\times$ 4, 2.6 GHz, GCC version 9.4.0 14 \item15 \textbf{Nasus} AMD EPYC 7662, 64-core socket $\times$ 2, 2.0 GHz, GCC version 9.3.016 16 \end{itemize} 17 17 … … 31 31 32 32 \paragraph{glibc (\textsf{glc})} 33 \cite{glibc} is the default g libc thread-safe allocator.33 \cite{glibc} is the default gcc thread-safe allocator. 34 34 \\ 35 35 \textbf{Version:} Ubuntu GLIBC 2.31-0ubuntu9.7 2.31\\ … … 46 46 47 47 \paragraph{hoard (\textsf{hrd})} 48 \cite{hoard} is a thread-safe allocator that is multi-threaded and us esa heap layer framework. It has per-thread heaps that have thread-local free-lists, and a global shared heap.48 \cite{hoard} is a thread-safe allocator that is multi-threaded and using a heap layer framework. It has per-thread heaps that have thread-local free-lists, and a global shared heap. 49 49 \\ 50 50 \textbf{Version:} 3.13\\ … … 78 78 79 79 \paragraph{tbb malloc (\textsf{tbb})} 80 \cite{tbbmalloc} is a thread-safe allocator that is multi-threaded and uses aprivate heap for each thread.80 \cite{tbbmalloc} is a thread-safe allocator that is multi-threaded and uses private heap for each thread. 81 81 Each private-heap has multiple bins of different sizes. Each bin contains free regions of the same size. 82 82 \\ … … 90 90 \section{Experiments} 91 91 92 Each micro-benchmark is configured and run with each of the allocators,93 The less time an allocator takes to complete a benchmark the better so lower in the graphs is better, except for the Memory micro-benchmark graphs.92 The each micro-benchmark is configured and run with each of the allocators, 93 The less time an allocator takes to complete a benchmark the better, so lower in the graphs is better. 94 94 All graphs use log scale on the Y-axis, except for the Memory micro-benchmark (see \VRef{s:MemoryMicroBenchmark}). 95 95 … … 231 231 Second is the low-performer group, which includes the rest of the memory allocators. 232 232 These memory allocators have significant program-induced passive false-sharing, where \textsf{hrd}'s is the worst performing allocator. 233 All of the allocators in this group are sharing heaps among threads at some level. 234 235 Interestingly, allocators such as \textsf{hrd} and \textsf{glc} performed well in micro-benchmark cache thrash (see \VRef{sec:cache-thrash-perf}), but, these allocators are among the low performers in the cache scratch. 236 It suggests these allocators do not actively produce false-sharing, but preserve program-induced passive false sharing. 233 All of the allocator's in this group are sharing heaps among threads at some level. 234 235 Interestingly, allocators such as \textsf{hrd} and \textsf{glc} performed well in micro-benchmark cache thrash (see \VRef{sec:cache-thrash-perf}). 236 But, these allocators are among the low performers in the cache scratch. 237 It suggests these allocators do not actively produce false-sharing but preserve program-induced passive false sharing. 237 238 238 239 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -
doc/theses/mubeen_zulfiqar_MMath/uw-ethesis-frontpgs.tex
re5d9274 r015925a 13 13 \vspace*{1.0cm} 14 14 15 {\Huge\bf High-Performance ConcurrentMemory Allocation}15 {\Huge\bf \CFA Memory Allocation} 16 16 17 17 \vspace*{1.0cm} … … 108 108 % D E C L A R A T I O N P A G E 109 109 % ------------------------------- 110 % The following is a sample De claration Page as provided by the GSO110 % The following is a sample Delaration Page as provided by the GSO 111 111 % December 13th, 2006. It is designed for an electronic thesis. 112 112 \begin{center}\textbf{Author's Declaration}\end{center} … … 136 136 137 137 The goal of this thesis is to build a low-latency memory allocator for both kernel and user multi-threaded systems, which is competitive with the best current memory allocators, while extending the feature set of existing and new allocator routines. 138 A new llheap memory-allocator is created that achieves all of these goals, while maintaining and managing sticky allocation properties for zero-fill ed and alignedallocations without a performance loss.138 A new llheap memory-allocator is created that achieves all of these goals, while maintaining and managing sticky allocation properties for zero-fill and alignment allocations without a performance loss. 139 139 Hence, it becomes possible to use @realloc@ frequently as a safe operation, rather than just occasionally, because it preserves sticky properties when enlarging storage requests. 140 140 Furthermore, the ability to query sticky properties and information allows programmers to write safer programs, as it is possible to dynamically match allocation styles from unknown library routines that return allocations. 141 141 The C allocation API is also extended with @resize@, advanced @realloc@, @aalloc@, @amemalign@, and @cmemalign@ so programmers do not make mistakes writing theses useful allocation operations. 142 142 llheap is embedded into the \uC and \CFA runtime systems, both of which have user-level threading. 143 The ability to use \CFA's advanced type-system (and possibly \CC's too) to combine advanced memory operations into one allocation routine using named arguments shows how far the allocation API can be pushed, which increases safety and greatly simplifies programmer's use of dynamic allocation.143 The ability to use \CFA's advanced type-system (and possibly \CC's too) to have one allocation routine with completely orthogonal sticky properties shows how far the allocation API can be pushed, which increases safety and greatly simplifies programmer's use of dynamic allocation. 144 144 145 145 The llheap allocator also provides comprehensive statistics for all allocation operations, which are invaluable in understanding and debugging a program's dynamic behaviour. 146 No other memory allocator examined in the thesis provides suchcomprehensive statistics gathering.147 As well, llheap provides a debugging mode where allocations are checked with internal pre/post conditions and invariants. Itis extremely useful, especially for students.146 No other memory allocator examined in the thesis provides comprehensive statistics gathering. 147 As well, llheap provides a debugging mode where allocations are checked, along with internal pre/post conditions and invariants, is extremely useful, especially for students. 148 148 While not as powerful as the @valgrind@ interpreter, a large number of allocations mistakes are detected. 149 149 Finally, contention-free statistics gathering and debugging have a low enough cost to be used in production code. 150 150 151 A micro-benchmark test-suite is started for comparing allocators, rather than relying on a suite of arbitrary programs . Ithas been an interesting challenge.151 A micro-benchmark test-suite is started for comparing allocators, rather than relying on a suite of arbitrary programs, has been an interesting challenge. 152 152 These micro-benchmarks have adjustment knobs to simulate allocation patterns hard-coded into arbitrary test programs. 153 Existing memory allocators, glibc, dlmalloc, hoard, jemalloc, ptmalloc3, rpmalloc, tbmalloc ,and the new allocator llheap are all compared using the new micro-benchmark test-suite.153 Existing memory allocators, glibc, dlmalloc, hoard, jemalloc, ptmalloc3, rpmalloc, tbmalloc and the new allocator llheap are all compared using the new micro-benchmark test-suite. 154 154 \cleardoublepage 155 155 … … 162 162 I would like to thank all the people who made this thesis possible. 163 163 164 I would like to acknowledge Peter A. Buhr for his assistance and support through out the process.164 I would like to acknowledge Peter A. Buhr for his assistance and support throughtout the process. 165 165 It would have been impossible without him. 166 166 167 I would like to acknowledge Gregor Richards and Trevor Brown for reading my thesis quickly and giving me great feedback on my work.168 169 167 Also, I would say thanks to my team members at PLG especially Thierry, Michael, and Andrew for their input. 170 171 Finally, a special thank you to Huawei Canada for funding this work.172 168 \end{center} 173 169 \cleardoublepage … … 199 195 % L I S T O F T A B L E S 200 196 % --------------------------- 201 %\addcontentsline{toc}{chapter}{List of Tables}202 %\listoftables203 %\cleardoublepage204 %\phantomsection % allows hyperref to link to the correct page197 \addcontentsline{toc}{chapter}{List of Tables} 198 \listoftables 199 \cleardoublepage 200 \phantomsection % allows hyperref to link to the correct page 205 201 206 202 % Change page numbering back to Arabic numerals -
doc/theses/mubeen_zulfiqar_MMath/uw-ethesis.tex
re5d9274 r015925a 106 106 pdffitwindow=false, % window fit to page when opened 107 107 pdfstartview={FitH}, % fits the width of the page to the window 108 pdftitle={ High-Performance ConcurrentMemory Allocation}, % title: CHANGE THIS TEXT!108 pdftitle={Cforall Memory Allocation}, % title: CHANGE THIS TEXT! 109 109 pdfauthor={Mubeen Zulfiqar}, % author: CHANGE THIS TEXT! and uncomment this line 110 110 pdfsubject={Cforall}, % subject: CHANGE THIS TEXT! and uncomment this line -
doc/theses/thierry_delisle_PhD/thesis/Makefile
re5d9274 r015925a 3 3 Build = build 4 4 Figures = img 5 6 LaTMac = ../../../LaTeXmacros 7 BibRep = ../../../bibliography 8 9 Macros = ${LaTMac} 10 TeXLIB = .:${Macros}:${Build}:${BibRep}: 5 Macros = ../../../LaTeXmacros 6 TeXLIB = .:${Macros}:${Build}:../../../bibliography: 11 7 LaTeX = TEXINPUTS=${TeXLIB} && export TEXINPUTS && latex -halt-on-error -output-directory=${Build} 12 8 BibTeX = BIBINPUTS=${TeXLIB} && export BIBINPUTS && bibtex … … 41 37 emptytree \ 42 38 fairness \ 43 idle \44 idle1 \45 idle2 \46 idle_state \47 39 io_uring \ 48 40 pivot_ring \ … … 50 42 cycle \ 51 43 result.cycle.jax.ops \ 52 result.yield.jax.ops \53 result.churn.jax.ops \54 result.cycle.jax.ns \55 result.yield.jax.ns \56 result.churn.jax.ns \57 result.cycle.low.jax.ops \58 result.yield.low.jax.ops \59 result.churn.low.jax.ops \60 result.cycle.low.jax.ns \61 result.yield.low.jax.ns \62 result.churn.low.jax.ns \63 result.memcd.updt.qps \64 result.memcd.updt.lat \65 result.memcd.rate.qps \66 result.memcd.rate.99th \67 44 } 68 45 … … 75 52 ## Define the documents that need to be made. 76 53 all: thesis.pdf 77 thesis.pdf: ${TEXTS} ${FIGURES} ${PICTURES} thesis.tex glossary.tex local.bib ${LaTMac}/common.tex ${LaTMac}/common.sty ${BibRep}/pl.bib54 thesis.pdf: ${TEXTS} ${FIGURES} ${PICTURES} thesis.tex glossary.tex local.bib ../../../LaTeXmacros/common.tex ../../../LaTeXmacros/common.sty 78 55 79 56 DOCUMENT = thesis.pdf … … 139 116 python3 $< $@ 140 117 141 cycle_jax_ops_FLAGS = --MaxY=120000000 142 cycle_low_jax_ops_FLAGS = --MaxY=120000000 143 cycle_jax_ns_FLAGS = --MaxY=2000 144 cycle_low_jax_ns_FLAGS = --MaxY=2000 118 build/result.%.ns.svg : data/% | ${Build} 119 ../../../../benchmark/plot.py -f $< -o $@ -y "ns per ops" 145 120 146 yield_jax_ops_FLAGS = --MaxY=150000000 147 yield_low_jax_ops_FLAGS = --MaxY=150000000 148 yield_jax_ns_FLAGS = --MaxY=1500 149 yield_low_jax_ns_FLAGS = --MaxY=1500 150 151 build/result.%.ns.svg : data/% Makefile | ${Build} 152 ../../../../benchmark/plot.py -f $< -o $@ -y "ns per ops/procs" $($(subst .,_,$*)_ns_FLAGS) 153 154 build/result.%.ops.svg : data/% Makefile | ${Build} 155 ../../../../benchmark/plot.py -f $< -o $@ -y "Ops per second" $($(subst .,_,$*)_ops_FLAGS) 156 157 build/result.memcd.updt.qps.svg : data/memcd.updt Makefile | ${Build} 158 ../../../../benchmark/plot.py -f $< -o $@ -y "Actual QPS" -x "Update Ratio" 159 160 build/result.memcd.updt.lat.svg : data/memcd.updt Makefile | ${Build} 161 ../../../../benchmark/plot.py -f $< -o $@ -y "Average Read Latency" -x "Update Ratio" 162 163 build/result.memcd.rate.qps.svg : data/memcd.rate Makefile | ${Build} 164 ../../../../benchmark/plot.py -f $< -o $@ -y "Actual QPS" -x "Target QPS" 165 166 build/result.memcd.rate.99th.svg : data/memcd.rate Makefile | ${Build} 167 ../../../../benchmark/plot.py -f $< -o $@ -y "Tail Read Latency" -x "Target QPS" 121 build/result.%.ops.svg : data/% | ${Build} 122 ../../../../benchmark/plot.py -f $< -o $@ -y "Ops per second" 168 123 169 124 ## pstex with inverted colors -
doc/theses/thierry_delisle_PhD/thesis/data/cycle.jax
re5d9274 r015925a 1 [["rdq-cycle-go", "./rdq-cycle-go - p 24 -d 10 -r 5 -t 2400", {"Duration (ms)": 10001.0, "Number of processors": 24.0, "Number of threads": 12000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 1138076440.0, "Ops per second": 113792094.48, "ns per ops": 8.79, "Ops per threads": 94839.0, "Ops per procs": 47419851.0, "Ops/sec/procs": 4741337.27, "ns per ops/procs": 210.91}],["rdq-cycle-go", "./rdq-cycle-go -p 16 -d 10 -r 5 -t 1600", {"Duration (ms)": 200285.0, "Number of processors": 16.0, "Number of threads": 8000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 17638575791.0, "Ops per second": 88067238.72, "ns per ops": 11.35, "Ops per threads": 2204821.0, "Ops per procs": 1102410986.0, "Ops/sec/procs": 5504202.42, "ns per ops/procs": 181.68}],["rdq-cycle-tokio", "./rdq-cycle-tokio -p 1 -d 10 -r 5 -t 100", {"Duration (ms)": 10100.0, "Number of processors": 1.0, "Number of threads": 500.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 54856916.0, "Ops per second": 5485691.0, "ns per ops": 184.0, "Ops per threads": 109713.0, "Ops per procs": 54856916.0, "Ops/sec/procs": 5485691.0, "ns per ops/procs": 184.0}],["rdq-cycle-cfa", "./rdq-cycle-cfa -p 16 -d 10 -r 5 -t 1600", {"Duration (ms)": 10025.449006, "Number of processors": 16.0, "Number of threads": 8000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 558836360.0, "Total blocks": 558836360.0, "Ops per second": 55741778.71, "ns per ops": 17.94, "Ops per threads": 69854.0, "Ops per procs": 34927272.0, "Ops/sec/procs": 3483861.17, "ns per ops/procs": 287.04}],["rdq-cycle-fibre", "./rdq-cycle-fibre -p 16 -d 10 -r 5 -t 1600", {"Duration (ms)": 10038.0, "Number of processors": 16.0, "Number of threads": 8000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 58647049.0, "Total blocks": 58647049.0, "Ops per second": 5842287.68, "ns per ops": 171.17, "Ops per threads": 7330.0, "Ops per procs": 3665440.0, "Ops/sec/procs": 365142.98, "ns per ops/procs": 2738.65}],["rdq-cycle-cfa", "./rdq-cycle-cfa -p 24 -d 10 -r 5 -t 2400", {"Duration (ms)": 10003.489711, "Number of processors": 24.0, "Number of threads": 12000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 728096996.0, "Total blocks": 728096996.0, "Ops per second": 72784299.98, "ns per ops": 13.74, "Ops per threads": 60674.0, "Ops per procs": 30337374.0, "Ops/sec/procs": 3032679.17, "ns per ops/procs": 329.74}],["rdq-cycle-fibre", "./rdq-cycle-fibre -p 8 -d 10 -r 5 -t 800", {"Duration (ms)": 10021.0, "Number of processors": 8.0, "Number of threads": 4000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 63157049.0, "Total blocks": 63157049.0, "Ops per second": 6302255.13, "ns per ops": 158.67, "Ops per threads": 15789.0, "Ops per procs": 7894631.0, "Ops/sec/procs": 787781.89, "ns per ops/procs": 1269.39}],["rdq-cycle-fibre", "./rdq-cycle-fibre -p 1 -d 10 -r 5 -t 100", {"Duration (ms)": 10009.0, "Number of processors": 1.0, "Number of threads": 500.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 62412200.0, "Total blocks": 62411700.0, "Ops per second": 6235572.31, "ns per ops": 160.37, "Ops per threads": 124824.0, "Ops per procs": 62412200.0, "Ops/sec/procs": 6235572.31, "ns per ops/procs": 160.37}],["rdq-cycle-go", "./rdq-cycle-go -p 8 -d 10 -r 5 -t 800", {"Duration (ms)": 10000.0, "Number of processors": 8.0, "Number of threads": 4000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 464608617.0, "Ops per second": 46457191.42, "ns per ops": 21.53, "Ops per threads": 116152.0, "Ops per procs": 58076077.0, "Ops/sec/procs": 5807148.93, "ns per ops/procs": 172.2}],["rdq-cycle-tokio", "./rdq-cycle-tokio -p 8 -d 10 -r 5 -t 800", {"Duration (ms)": 10099.0, "Number of processors": 8.0, "Number of threads": 4000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 391521066.0, "Ops per second": 39152106.0, "ns per ops": 25.0, "Ops per threads": 97880.0, "Ops per procs": 48940133.0, "Ops/sec/procs": 4894013.0, "ns per ops/procs": 206.0}],["rdq-cycle-tokio", "./rdq-cycle-tokio -p 24 -d 10 -r 5 -t 2400", {"Duration (ms)": 10099.0, "Number of processors": 24.0, "Number of threads": 12000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 963549550.0, "Ops per second": 96354955.0, "ns per ops": 10.0, "Ops per threads": 80295.0, "Ops per procs": 40147897.0, "Ops/sec/procs": 4014789.0, "ns per ops/procs": 251.0}],["rdq-cycle-go", "./rdq-cycle-go -p 16 -d 10 -r 5 -t 1600", {"Duration (ms)": 10001.0, "Number of processors": 16.0, "Number of threads": 8000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 867718190.0, "Ops per second": 86761170.55, "ns per ops": 11.53, "Ops per threads": 108464.0, "Ops per procs": 54232386.0, "Ops/sec/procs": 5422573.16, "ns per ops/procs": 184.41}],["rdq-cycle-tokio", "./rdq-cycle-tokio -p 24 -d 10 -r 5 -t 2400", {"Duration (ms)": 10100.0, "Number of processors": 24.0, "Number of threads": 12000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 962016289.0, "Ops per second": 96201628.0, "ns per ops": 10.0, "Ops per threads": 80168.0, "Ops per procs": 40084012.0, "Ops/sec/procs": 4008401.0, "ns per ops/procs": 251.0}],["rdq-cycle-cfa", "./rdq-cycle-cfa -p 1 -d 10 -r 5 -t 100", {"Duration (ms)": 10016.837824, "Number of processors": 1.0, "Number of threads": 500.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 54738237.0, "Total blocks": 54737741.0, "Ops per second": 5464622.46, "ns per ops": 183.0, "Ops per threads": 109476.0, "Ops per procs": 54738237.0, "Ops/sec/procs": 5464622.46, "ns per ops/procs": 183.0}],["rdq-cycle-tokio", "./rdq-cycle-tokio -p 16 -d 10 -r 5 -t 1600", {"Duration (ms)": 10099.0, "Number of processors": 16.0, "Number of threads": 8000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 731309408.0, "Ops per second": 73130940.0, "ns per ops": 13.0, "Ops per threads": 91413.0, "Ops per procs": 45706838.0, "Ops/sec/procs": 4570683.0, "ns per ops/procs": 220.0}],["rdq-cycle-tokio", "./rdq-cycle-tokio -p 16 -d 10 -r 5 -t 1600", {"Duration (ms)": 10100.0, "Number of processors": 16.0, "Number of threads": 8000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 739772688.0, "Ops per second": 73977268.0, "ns per ops": 13.0, "Ops per threads": 92471.0, "Ops per procs": 46235793.0, "Ops/sec/procs": 4623579.0, "ns per ops/procs": 218.0}],["rdq-cycle-tokio", "./rdq-cycle-tokio -p 8 -d 10 -r 5 -t 800", {"Duration (ms)": 10100.0, "Number of processors": 8.0, "Number of threads": 4000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 391449785.0, "Ops per second": 39144978.0, "ns per ops": 25.0, "Ops per threads": 97862.0, "Ops per procs": 48931223.0, "Ops/sec/procs": 4893122.0, "ns per ops/procs": 206.0}],["rdq-cycle-fibre", "./rdq-cycle-fibre -p 24 -d 10 -r 5 -t 2400", {"Duration (ms)": 10048.0, "Number of processors": 24.0, "Number of threads": 12000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 57239183.0, "Total blocks": 57239183.0, "Ops per second": 5696211.13, "ns per ops": 175.56, "Ops per threads": 4769.0, "Ops per procs": 2384965.0, "Ops/sec/procs": 237342.13, "ns per ops/procs": 4213.33}],["rdq-cycle-go", "./rdq-cycle-go -p 1 -d 10 -r 5 -t 100", {"Duration (ms)": 10000.0, "Number of processors": 1.0, "Number of threads": 500.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 55248375.0, "Ops per second": 5524562.87, "ns per ops": 181.01, "Ops per threads": 110496.0, "Ops per procs": 55248375.0, "Ops/sec/procs": 5524562.87, "ns per ops/procs": 181.01}],["rdq-cycle-fibre", "./rdq-cycle-fibre -p 8 -d 10 -r 5 -t 800", {"Duration (ms)": 10021.0, "Number of processors": 8.0, "Number of threads": 4000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 61553053.0, "Total blocks": 61553053.0, "Ops per second": 6142186.88, "ns per ops": 162.81, "Ops per threads": 15388.0, "Ops per procs": 7694131.0, "Ops/sec/procs": 767773.36, "ns per ops/procs": 1302.47}],["rdq-cycle-fibre", "./rdq-cycle-fibre -p 1 -d 10 -r 5 -t 100", {"Duration (ms)": 10008.0, "Number of processors": 1.0, "Number of threads": 500.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 62811642.0, "Total blocks": 62811142.0, "Ops per second": 6275517.47, "ns per ops": 159.35, "Ops per threads": 125623.0, "Ops per procs": 62811642.0, "Ops/sec/procs": 6275517.47, "ns per ops/procs": 159.35}],["rdq-cycle-cfa", "./rdq-cycle-cfa -p 8 -d 10 -r 5 -t 800", {"Duration (ms)": 10018.820873, "Number of processors": 8.0, "Number of threads": 4000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 260866706.0, "Total blocks": 260862710.0, "Ops per second": 26037665.44, "ns per ops": 38.41, "Ops per threads": 65216.0, "Ops per procs": 32608338.0, "Ops/sec/procs": 3254708.18, "ns per ops/procs": 307.25}],["rdq-cycle-go", "./rdq-cycle-go -p 16 -d 10 -r 5 -t 1600", {"Duration (ms)": 10000.0, "Number of processors": 16.0, "Number of threads": 8000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 874581175.0, "Ops per second": 87449851.2, "ns per ops": 11.44, "Ops per threads": 109322.0, "Ops per procs": 54661323.0, "Ops/sec/procs": 5465615.7, "ns per ops/procs": 182.96}],["rdq-cycle-tokio", "./rdq-cycle-tokio -p 1 -d 10 -r 5 -t 100", {"Duration (ms)": 10099.0, "Number of processors": 1.0, "Number of threads": 500.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 55228782.0, "Ops per second": 5522878.0, "ns per ops": 182.0, "Ops per threads": 110457.0, "Ops per procs": 55228782.0, "Ops/sec/procs": 5522878.0, "ns per ops/procs": 182.0}],["rdq-cycle-fibre", "./rdq-cycle-fibre -p 1 -d 10 -r 5 -t 100", {"Duration (ms)": 10009.0, "Number of processors": 1.0, "Number of threads": 500.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 62564955.0, "Total blocks": 62564455.0, "Ops per second": 6250797.96, "ns per ops": 159.98, "Ops per threads": 125129.0, "Ops per procs": 62564955.0, "Ops/sec/procs": 6250797.96, "ns per ops/procs": 159.98}],["rdq-cycle-tokio", "./rdq-cycle-tokio -p 16 -d 10 -r 5 -t 1600", {"Duration (ms)": 10100.0, "Number of processors": 16.0, "Number of threads": 8000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 738848909.0, "Ops per second": 73884890.0, "ns per ops": 13.0, "Ops per threads": 92356.0, "Ops per procs": 46178056.0, "Ops/sec/procs": 4617805.0, "ns per ops/procs": 218.0}],["rdq-cycle-go", "./rdq-cycle-go -p 24 -d 10 -r 5 -t 2400", {"Duration (ms)": 10001.0, "Number of processors": 24.0, "Number of threads": 12000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 1131221613.0, "Ops per second": 113108175.94, "ns per ops": 8.84, "Ops per threads": 94268.0, "Ops per procs": 47134233.0, "Ops/sec/procs": 4712840.66, "ns per ops/procs": 212.19}],["rdq-cycle-cfa", "./rdq-cycle-cfa -p 24 -d 10 -r 5 -t 2400", {"Duration (ms)": 10008.209159, "Number of processors": 24.0, "Number of threads": 12000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 729328104.0, "Total blocks": 729328099.0, "Ops per second": 72872987.81, "ns per ops": 13.72, "Ops per threads": 60777.0, "Ops per procs": 30388671.0, "Ops/sec/procs": 3036374.49, "ns per ops/procs": 329.34}],["rdq-cycle-tokio", "./rdq-cycle-tokio -p 24 -d 10 -r 5 -t 2400", {"Duration (ms)": 10099.0, "Number of processors": 24.0, "Number of threads": 12000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 961002611.0, "Ops per second": 96100261.0, "ns per ops": 10.0, "Ops per threads": 80083.0, "Ops per procs": 40041775.0, "Ops/sec/procs": 4004177.0, "ns per ops/procs": 252.0}],["rdq-cycle-tokio", "./rdq-cycle-tokio -p 8 -d 10 -r 5 -t 800", {"Duration (ms)": 10099.0, "Number of processors": 8.0, "Number of threads": 4000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 390098231.0, "Ops per second": 39009823.0, "ns per ops": 25.0, "Ops per threads": 97524.0, "Ops per procs": 48762278.0, "Ops/sec/procs": 4876227.0, "ns per ops/procs": 207.0}],["rdq-cycle-tokio", "./rdq-cycle-tokio -p 1 -d 10 -r 5 -t 100", {"Duration (ms)": 10100.0, "Number of processors": 1.0, "Number of threads": 500.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 55237591.0, "Ops per second": 5523759.0, "ns per ops": 182.0, "Ops per threads": 110475.0, "Ops per procs": 55237591.0, "Ops/sec/procs": 5523759.0, "ns per ops/procs": 182.0}],["rdq-cycle-cfa", "./rdq-cycle-cfa -p 1 -d 10 -r 5 -t 100", {"Duration (ms)": 10016.576699, "Number of processors": 1.0, "Number of threads": 500.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 54510321.0, "Total blocks": 54509820.0, "Ops per second": 5442011.04, "ns per ops": 183.76, "Ops per threads": 109020.0, "Ops per procs": 54510321.0, "Ops/sec/procs": 5442011.04, "ns per ops/procs": 183.76}],["rdq-cycle-go", "./rdq-cycle-go -p 24 -d 10 -r 5 -t 2400", {"Duration (ms)": 10001.0, "Number of processors": 24.0, "Number of threads": 12000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 1135730371.0, "Ops per second": 113558509.97, "ns per ops": 8.81, "Ops per threads": 94644.0, "Ops per procs": 47322098.0, "Ops/sec/procs": 4731604.58, "ns per ops/procs": 211.34}],["rdq-cycle-fibre", "./rdq-cycle-fibre -p 16 -d 10 -r 5 -t 1600", {"Duration (ms)": 10039.0, "Number of processors": 16.0, "Number of threads": 8000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 61004037.0, "Total blocks": 61004037.0, "Ops per second": 6076255.04, "ns per ops": 164.58, "Ops per threads": 7625.0, "Ops per procs": 3812752.0, "Ops/sec/procs": 379765.94, "ns per ops/procs": 2633.2}],["rdq-cycle-cfa", "./rdq-cycle-cfa -p 24 -d 10 -r 5 -t 2400", {"Duration (ms)": 10004.891999, "Number of processors": 24.0, "Number of threads": 12000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 747946345.0, "Total blocks": 747934349.0, "Ops per second": 74758062.86, "ns per ops": 13.38, "Ops per threads": 62328.0, "Ops per procs": 31164431.0, "Ops/sec/procs": 3114919.29, "ns per ops/procs": 321.04}],["rdq-cycle-go", "./rdq-cycle-go -p 8 -d 10 -r 5 -t 800", {"Duration (ms)": 10000.0, "Number of processors": 8.0, "Number of threads": 4000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 466424792.0, "Ops per second": 46638931.23, "ns per ops": 21.44, "Ops per threads": 116606.0, "Ops per procs": 58303099.0, "Ops/sec/procs": 5829866.4, "ns per ops/procs": 171.53}],["rdq-cycle-fibre", "./rdq-cycle-fibre -p 24 -d 10 -r 5 -t 2400", {"Duration (ms)": 10086.0, "Number of processors": 24.0, "Number of threads": 12000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 57343570.0, "Total blocks": 57343570.0, "Ops per second": 5685308.81, "ns per ops": 175.89, "Ops per threads": 4778.0, "Ops per procs": 2389315.0, "Ops/sec/procs": 236887.87, "ns per ops/procs": 4221.41}],["rdq-cycle-cfa", "./rdq-cycle-cfa -p 8 -d 10 -r 5 -t 800", {"Duration (ms)": 10020.39533, "Number of processors": 8.0, "Number of threads": 4000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 263517289.0, "Total blocks": 263513293.0, "Ops per second": 26298093.07, "ns per ops": 38.03, "Ops per threads": 65879.0, "Ops per procs": 32939661.0, "Ops/sec/procs": 3287261.63, "ns per ops/procs": 304.2}],["rdq-cycle-cfa", "./rdq-cycle-cfa -p 16 -d 10 -r 5 -t 1600", {"Duration (ms)": 10025.357431, "Number of processors": 16.0, "Number of threads": 8000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 551670395.0, "Total blocks": 551662399.0, "Ops per second": 55027503.89, "ns per ops": 18.17, "Ops per threads": 68958.0, "Ops per procs": 34479399.0, "Ops/sec/procs": 3439218.99, "ns per ops/procs": 290.76}],["rdq-cycle-fibre", "./rdq-cycle-fibre -p 24 -d 10 -r 5 -t 2400", {"Duration (ms)": 10050.0, "Number of processors": 24.0, "Number of threads": 12000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 56162695.0, "Total blocks": 56162695.0, "Ops per second": 5588033.65, "ns per ops": 178.95, "Ops per threads": 4680.0, "Ops per procs": 2340112.0, "Ops/sec/procs": 232834.74, "ns per ops/procs": 4294.89}],["rdq-cycle-cfa", "./rdq-cycle-cfa -p 8 -d 10 -r 5 -t 800", {"Duration (ms)": 10019.690183, "Number of processors": 8.0, "Number of threads": 4000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 271866976.0, "Total blocks": 271862980.0, "Ops per second": 27133271.69, "ns per ops": 36.86, "Ops per threads": 67966.0, "Ops per procs": 33983372.0, "Ops/sec/procs": 3391658.96, "ns per ops/procs": 294.84}],["rdq-cycle-fibre", "./rdq-cycle-fibre -p 8 -d 10 -r 5 -t 800", {"Duration (ms)": 10057.0, "Number of processors": 8.0, "Number of threads": 4000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 62105022.0, "Total blocks": 62105022.0, "Ops per second": 6175186.04, "ns per ops": 161.94, "Ops per threads": 15526.0, "Ops per procs": 7763127.0, "Ops/sec/procs": 771898.25, "ns per ops/procs": 1295.51}],["rdq-cycle-cfa", "./rdq-cycle-cfa -p 16 -d 10 -r 5 -t 1600", {"Duration (ms)": 10025.81217, "Number of processors": 16.0, "Number of threads": 8000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 537080117.0, "Total blocks": 537072121.0, "Ops per second": 53569736.59, "ns per ops": 18.67, "Ops per threads": 67135.0, "Ops per procs": 33567507.0, "Ops/sec/procs": 3348108.54, "ns per ops/procs": 298.68}],["rdq-cycle-go", "./rdq-cycle-go -p 1 -d 10 -r 5 -t 100", {"Duration (ms)": 10000.0, "Number of processors": 1.0, "Number of threads": 500.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 55967030.0, "Ops per second": 5596438.25, "ns per ops": 178.69, "Ops per threads": 111934.0, "Ops per procs": 55967030.0, "Ops/sec/procs": 5596438.25, "ns per ops/procs": 178.69}],["rdq-cycle-go", "./rdq-cycle-go -p 1 -d 10 -r 5 -t 100", {"Duration (ms)": 10000.0, "Number of processors": 1.0, "Number of threads": 500.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 55703320.0, "Ops per second": 5570084.72, "ns per ops": 179.53, "Ops per threads": 111406.0, "Ops per procs": 55703320.0, "Ops/sec/procs": 5570084.72, "ns per ops/procs": 179.53}],["rdq-cycle-go", "./rdq-cycle-go -p 8 -d 10 -r 5 -t 800", {"Duration (ms)": 10000.0, "Number of processors": 8.0, "Number of threads": 4000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 469211793.0, "Ops per second": 46918327.16, "ns per ops": 21.31, "Ops per threads": 117302.0, "Ops per procs": 58651474.0, "Ops/sec/procs": 5864790.9, "ns per ops/procs": 170.51}],["rdq-cycle-cfa", "./rdq-cycle-cfa -p 1 -d 10 -r 5 -t 100", {"Duration (ms)": 10016.545208, "Number of processors": 1.0, "Number of threads": 500.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 54925472.0, "Total blocks": 54924976.0, "Ops per second": 5483474.68, "ns per ops": 182.37, "Ops per threads": 109850.0, "Ops per procs": 54925472.0, "Ops/sec/procs": 5483474.68, "ns per ops/procs": 182.37}],["rdq-cycle-fibre", "./rdq-cycle-fibre -p 16 -d 10 -r 5 -t 1600", {"Duration (ms)": 10037.0, "Number of processors": 16.0, "Number of threads": 8000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 60770550.0, "Total blocks": 60770550.0, "Ops per second": 6054474.7, "ns per ops": 165.17, "Ops per threads": 7596.0, "Ops per procs": 3798159.0, "Ops/sec/procs": 378404.67, "ns per ops/procs": 2642.67}]]1 [["rdq-cycle-go", "./rdq-cycle-go -t 4 -p 4 -d 5 -r 5", {"Duration (ms)": 5000.0, "Number of processors": 4.0, "Number of threads": 20.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 43606897.0, "Ops per second": 8720908.73, "ns per ops": 114.67, "Ops per threads": 2180344.0, "Ops per procs": 10901724.0, "Ops/sec/procs": 2180227.18, "ns per ops/procs": 458.67}],["rdq-cycle-cfa", "./rdq-cycle-cfa -t 16 -p 16 -d 5 -r 5", {"Duration (ms)": 5010.922033, "Number of processors": 16.0, "Number of threads": 80.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 93993568.0, "Total blocks": 93993209.0, "Ops per second": 18757739.07, "ns per ops": 53.31, "Ops per threads": 1174919.0, "Ops per procs": 5874598.0, "Ops/sec/procs": 1172358.69, "ns per ops/procs": 852.98}],["rdq-cycle-go", "./rdq-cycle-go -t 16 -p 16 -d 5 -r 5", {"Duration (ms)": 5000.0, "Number of processors": 16.0, "Number of threads": 80.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 136763517.0, "Ops per second": 27351079.35, "ns per ops": 36.56, "Ops per threads": 1709543.0, "Ops per procs": 8547719.0, "Ops/sec/procs": 1709442.46, "ns per ops/procs": 584.99}],["rdq-cycle-go", "./rdq-cycle-go -t 1 -p 1 -d 5 -r 5", {"Duration (ms)": 5000.0, "Number of processors": 1.0, "Number of threads": 5.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 27778961.0, "Ops per second": 5555545.09, "ns per ops": 180.0, "Ops per threads": 5555792.0, "Ops per procs": 27778961.0, "Ops/sec/procs": 5555545.09, "ns per ops/procs": 180.0}],["rdq-cycle-cfa", "./rdq-cycle-cfa -t 4 -p 4 -d 5 -r 5", {"Duration (ms)": 5009.290878, "Number of processors": 4.0, "Number of threads": 20.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 43976310.0, "Total blocks": 43976217.0, "Ops per second": 8778949.17, "ns per ops": 113.91, "Ops per threads": 2198815.0, "Ops per procs": 10994077.0, "Ops/sec/procs": 2194737.29, "ns per ops/procs": 455.64}],["rdq-cycle-cfa", "./rdq-cycle-cfa -t 4 -p 4 -d 5 -r 5", {"Duration (ms)": 5009.151542, "Number of processors": 4.0, "Number of threads": 20.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 44132300.0, "Total blocks": 44132201.0, "Ops per second": 8810334.37, "ns per ops": 113.5, "Ops per threads": 2206615.0, "Ops per procs": 11033075.0, "Ops/sec/procs": 2202583.59, "ns per ops/procs": 454.01}],["rdq-cycle-go", "./rdq-cycle-go -t 4 -p 4 -d 5 -r 5", {"Duration (ms)": 5000.0, "Number of processors": 4.0, "Number of threads": 20.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 46353896.0, "Ops per second": 9270294.11, "ns per ops": 107.87, "Ops per threads": 2317694.0, "Ops per procs": 11588474.0, "Ops/sec/procs": 2317573.53, "ns per ops/procs": 431.49}],["rdq-cycle-go", "./rdq-cycle-go -t 1 -p 1 -d 5 -r 5", {"Duration (ms)": 5000.0, "Number of processors": 1.0, "Number of threads": 5.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 27894379.0, "Ops per second": 5578591.58, "ns per ops": 179.26, "Ops per threads": 5578875.0, "Ops per procs": 27894379.0, "Ops/sec/procs": 5578591.58, "ns per ops/procs": 179.26}],["rdq-cycle-cfa", "./rdq-cycle-cfa -t 1 -p 1 -d 5 -r 5", {"Duration (ms)": 5008.743463, "Number of processors": 1.0, "Number of threads": 5.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 32825528.0, "Total blocks": 32825527.0, "Ops per second": 6553645.29, "ns per ops": 152.59, "Ops per threads": 6565105.0, "Ops per procs": 32825528.0, "Ops/sec/procs": 6553645.29, "ns per ops/procs": 152.59}],["rdq-cycle-go", "./rdq-cycle-go -t 16 -p 16 -d 5 -r 5", {"Duration (ms)": 5000.0, "Number of processors": 16.0, "Number of threads": 80.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 138213098.0, "Ops per second": 27640977.5, "ns per ops": 36.18, "Ops per threads": 1727663.0, "Ops per procs": 8638318.0, "Ops/sec/procs": 1727561.09, "ns per ops/procs": 578.85}],["rdq-cycle-cfa", "./rdq-cycle-cfa -t 4 -p 4 -d 5 -r 5", {"Duration (ms)": 5007.914168, "Number of processors": 4.0, "Number of threads": 20.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 44109513.0, "Total blocks": 44109419.0, "Ops per second": 8807961.06, "ns per ops": 113.53, "Ops per threads": 2205475.0, "Ops per procs": 11027378.0, "Ops/sec/procs": 2201990.27, "ns per ops/procs": 454.13}],["rdq-cycle-cfa", "./rdq-cycle-cfa -t 16 -p 16 -d 5 -r 5", {"Duration (ms)": 5012.121876, "Number of processors": 16.0, "Number of threads": 80.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 94130673.0, "Total blocks": 94130291.0, "Ops per second": 18780603.37, "ns per ops": 53.25, "Ops per threads": 1176633.0, "Ops per procs": 5883167.0, "Ops/sec/procs": 1173787.71, "ns per ops/procs": 851.94}],["rdq-cycle-go", "./rdq-cycle-go -t 16 -p 16 -d 5 -r 5", {"Duration (ms)": 5000.0, "Number of processors": 16.0, "Number of threads": 80.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 140936367.0, "Ops per second": 28185668.38, "ns per ops": 35.48, "Ops per threads": 1761704.0, "Ops per procs": 8808522.0, "Ops/sec/procs": 1761604.27, "ns per ops/procs": 567.66}],["rdq-cycle-go", "./rdq-cycle-go -t 4 -p 4 -d 5 -r 5", {"Duration (ms)": 5000.0, "Number of processors": 4.0, "Number of threads": 20.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 44279585.0, "Ops per second": 8855475.01, "ns per ops": 112.92, "Ops per threads": 2213979.0, "Ops per procs": 11069896.0, "Ops/sec/procs": 2213868.75, "ns per ops/procs": 451.7}],["rdq-cycle-cfa", "./rdq-cycle-cfa -t 1 -p 1 -d 5 -r 5", {"Duration (ms)": 5008.37392, "Number of processors": 1.0, "Number of threads": 5.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 32227534.0, "Total blocks": 32227533.0, "Ops per second": 6434730.02, "ns per ops": 155.41, "Ops per threads": 6445506.0, "Ops per procs": 32227534.0, "Ops/sec/procs": 6434730.02, "ns per ops/procs": 155.41}],["rdq-cycle-cfa", "./rdq-cycle-cfa -t 16 -p 16 -d 5 -r 5", {"Duration (ms)": 5011.019789, "Number of processors": 16.0, "Number of threads": 80.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 90600569.0, "Total blocks": 90600173.0, "Ops per second": 18080265.66, "ns per ops": 55.31, "Ops per threads": 1132507.0, "Ops per procs": 5662535.0, "Ops/sec/procs": 1130016.6, "ns per ops/procs": 884.94}],["rdq-cycle-cfa", "./rdq-cycle-cfa -t 1 -p 1 -d 5 -r 5", {"Duration (ms)": 5008.52474, "Number of processors": 1.0, "Number of threads": 5.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 32861776.0, "Total blocks": 32861775.0, "Ops per second": 6561168.75, "ns per ops": 152.41, "Ops per threads": 6572355.0, "Ops per procs": 32861776.0, "Ops/sec/procs": 6561168.75, "ns per ops/procs": 152.41}],["rdq-cycle-go", "./rdq-cycle-go -t 1 -p 1 -d 5 -r 5", {"Duration (ms)": 5000.0, "Number of processors": 1.0, "Number of threads": 5.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 28097680.0, "Ops per second": 5619274.9, "ns per ops": 177.96, "Ops per threads": 5619536.0, "Ops per procs": 28097680.0, "Ops/sec/procs": 5619274.9, "ns per ops/procs": 177.96}]] -
doc/theses/thierry_delisle_PhD/thesis/local.bib
re5d9274 r015925a 701 701 note = "[Online; accessed 12-April-2022]" 702 702 } 703 704 % RMR notes :705 % [05/04, 12:36] Trevor Brown706 % i don't know where rmr complexity was first introduced, but there are many many many papers that use the term and define it707 % [05/04, 12:37] Trevor Brown708 % here's one paper that uses the term a lot and links to many others that use it... might trace it to something useful there https://drops.dagstuhl.de/opus/volltexte/2021/14832/pdf/LIPIcs-DISC-2021-30.pdf709 % [05/04, 12:37] Trevor Brown710 % another option might be to cite a textbook711 % [05/04, 12:42] Trevor Brown712 % but i checked two textbooks in the area i'm aware of and i don't see a definition of rmr complexity in either713 % [05/04, 12:42] Trevor Brown714 % this one has a nice statement about the prevelance of rmr complexity, as well as some rough definition715 % [05/04, 12:42] Trevor Brown716 % https://dl.acm.org/doi/pdf/10.1145/3465084.3467938717 718 % Race to idle notes :719 % [13/04, 16:56] Martin Karsten720 % I don't have a citation. Google brings up this one, which might be good:721 %722 % https://doi.org/10.1137/1.9781611973099.100 -
doc/theses/thierry_delisle_PhD/thesis/text/eval_macro.tex
re5d9274 r015925a 7 7 Networked ZIPF 8 8 9 Nginx : 5Gb still good, 4Gb starts to suffer10 11 Cforall : 10Gb too high, 4 Gb too low12 13 9 \section{Memcached} 14 10 15 \subsection{Benchmark Environment} 16 These experiments are run on a cluster of homogenous Supermicro SYS-6017R-TDF compute nodes with the following characteristics: 17 The server runs Ubuntu 20.04.3 LTS on top of Linux Kernel 5.11.0-34. 18 Each node has 2 Intel(R) Xeon(R) CPU E5-2620 v2 running at 2.10GHz. 19 These CPUs have 6 cores per CPUs and 2 \glspl{hthrd} per core, for a total of 24 \glspl{hthrd}. 20 The cpus each have 384 KB, 3 MB and 30 MB of L1, L2 and L3 caches respectively. 21 Each node is connected to the network through a Mellanox 10 Gigabit Ethernet port. 22 The network route uses 1 Mellanox SX1012 10/40 Gigabit Ethernet cluster switch. 11 In Memory 23 12 24 25 26 \begin{figure} 27 \centering 28 \input{result.memcd.updt.qps.pstex_t} 29 \caption[Churn Benchmark : Throughput on Intel]{Churn Benchmark : Throughput on Intel\smallskip\newline Description} 30 \label{fig:memcd:updt:qps} 31 \end{figure} 32 33 \begin{figure} 34 \centering 35 \input{result.memcd.updt.lat.pstex_t} 36 \caption[Churn Benchmark : Throughput on Intel]{Churn Benchmark : Throughput on Intel\smallskip\newline Description} 37 \label{fig:memcd:updt:lat} 38 \end{figure} 39 40 \begin{figure} 41 \centering 42 \input{result.memcd.rate.qps.pstex_t} 43 \caption[Churn Benchmark : Throughput on Intel]{Churn Benchmark : Throughput on Intel\smallskip\newline Description} 44 \label{fig:memcd:rate:qps} 45 \end{figure} 46 47 \begin{figure} 48 \centering 49 \input{result.memcd.rate.99th.pstex_t} 50 \caption[Churn Benchmark : Throughput on Intel]{Churn Benchmark : Throughput on Intel\smallskip\newline Description} 51 \label{fig:memcd:rate:tail} 52 \end{figure} 13 Networked -
doc/theses/thierry_delisle_PhD/thesis/text/eval_micro.tex
re5d9274 r015925a 6 6 \section{Benchmark Environment} 7 7 All of these benchmarks are run on two distinct hardware environment, an AMD and an INTEL machine. 8 9 For all benchmarks, \texttt{taskset} is used to limit the experiment to 1 NUMA Node with no hyper threading.10 If more \glspl{hthrd} are needed, then 1 NUMA Node with hyperthreading is used.11 If still more \glspl{hthrd} are needed then the experiment is limited to as few NUMA Nodes as needed.12 13 8 14 9 \paragraph{AMD} The AMD machine is a server with two AMD EPYC 7662 CPUs and 256GB of DDR4 RAM. … … 28 23 29 24 \section{Cycling latency} 30 \begin{figure}31 \centering32 \input{cycle.pstex_t}33 \caption[Cycle benchmark]{Cycle benchmark\smallskip\newline Each \gls{at} unparks the next \gls{at} in the cycle before parking itself.}34 \label{fig:cycle}35 \end{figure}36 25 The most basic evaluation of any ready queue is to evaluate the latency needed to push and pop one element from the ready-queue. 37 26 Since these two operation also describe a \texttt{yield} operation, many systems use this as the most basic benchmark. … … 53 42 Note that this problem is only present on SMP machines and is significantly mitigated by the fact that there are multiple rings in the system. 54 43 44 \begin{figure} 45 \centering 46 \input{cycle.pstex_t} 47 \caption[Cycle benchmark]{Cycle benchmark\smallskip\newline Each \gls{at} unparks the next \gls{at} in the cycle before parking itself.} 48 \label{fig:cycle} 49 \end{figure} 50 55 51 To avoid this benchmark from being dominated by the idle sleep handling, the number of rings is kept at least as high as the number of \glspl{proc} available. 56 52 Beyond this point, adding more rings serves to mitigate even more the idle sleep handling. … … 58 54 59 55 The actual benchmark is more complicated to handle termination, but that simply requires using a binary semphore or a channel instead of raw \texttt{park}/\texttt{unpark} and carefully picking the order of the \texttt{P} and \texttt{V} with respect to the loop condition. 60 Figure~\ref{fig:cycle:code} shows pseudo code for this benchmark. 56 57 \begin{lstlisting} 58 Thread.main() { 59 count := 0 60 for { 61 wait() 62 this.next.wake() 63 count ++ 64 if must_stop() { break } 65 } 66 global.count += count 67 } 68 \end{lstlisting} 61 69 62 70 \begin{figure} 63 \begin{lstlisting} 64 Thread.main() { 65 count := 0 66 for { 67 wait() 68 this.next.wake() 69 count ++ 70 if must_stop() { break } 71 } 72 global.count += count 73 } 74 \end{lstlisting} 75 \caption[Cycle Benchmark : Pseudo Code]{Cycle Benchmark : Pseudo Code} 76 \label{fig:cycle:code} 71 \centering 72 \input{result.cycle.jax.ops.pstex_t} 73 \vspace*{-10pt} 74 \label{fig:cycle:ns:jax} 77 75 \end{figure} 78 79 80 81 \subsection{Results}82 \begin{figure}83 \subfloat[][Throughput, 100 \ats per \proc]{84 \resizebox{0.5\linewidth}{!}{85 \input{result.cycle.jax.ops.pstex_t}86 }87 \label{fig:cycle:jax:ops}88 }89 \subfloat[][Throughput, 1 \ats per \proc]{90 \resizebox{0.5\linewidth}{!}{91 \input{result.cycle.low.jax.ops.pstex_t}92 }93 \label{fig:cycle:jax:low:ops}94 }95 96 \subfloat[][Latency, 100 \ats per \proc]{97 \resizebox{0.5\linewidth}{!}{98 \input{result.cycle.jax.ns.pstex_t}99 }100 101 }102 \subfloat[][Latency, 1 \ats per \proc]{103 \resizebox{0.5\linewidth}{!}{104 \input{result.cycle.low.jax.ns.pstex_t}105 }106 \label{fig:cycle:jax:low:ns}107 }108 \caption[Cycle Benchmark on Intel]{Cycle Benchmark on Intel\smallskip\newline Throughput as a function of \proc count, using 100 cycles per \proc, 5 \ats per cycle.}109 \label{fig:cycle:jax}110 \end{figure}111 Figure~\ref{fig:cycle:jax} shows the throughput as a function of \proc count, with the following constants:112 Each run uses 100 cycles per \proc, 5 \ats per cycle.113 114 \todo{results discussion}115 76 116 77 \section{Yield} … … 120 81 Its only interesting variable is the number of \glspl{at} per \glspl{proc}, where ratios close to 1 means the ready queue(s) could be empty. 121 82 This sometimes puts more strain on the idle sleep handling, compared to scenarios where there is clearly plenty of work to be done. 122 Figure~\ref{fig:yield:code} shows pseudo code for this benchmark, the ``wait/wake-next'' is simply replaced by a yield.123 83 124 \begin{figure} 125 \begin{lstlisting} 126 Thread.main() { 127 count := 0 128 for { 129 yield() 130 count ++ 131 if must_stop() { break } 132 } 133 global.count += count 84 \todo{code, setup, results} 85 86 \begin{lstlisting} 87 Thread.main() { 88 count := 0 89 while !stop { 90 yield() 91 count ++ 134 92 } 135 \end{lstlisting} 136 \caption[Yield Benchmark : Pseudo Code]{Yield Benchmark : Pseudo Code} 137 \label{fig:yield:code} 138 \end{figure} 139 140 \subsection{Results} 141 \begin{figure} 142 \subfloat[][Throughput, 100 \ats per \proc]{ 143 \resizebox{0.5\linewidth}{!}{ 144 \input{result.yield.jax.ops.pstex_t} 145 } 146 \label{fig:yield:jax:ops} 93 global.count += count 147 94 } 148 \subfloat[][Throughput, 1 \ats per \proc]{ 149 \resizebox{0.5\linewidth}{!}{ 150 \input{result.yield.low.jax.ops.pstex_t} 151 } 152 \label{fig:yield:jax:low:ops} 153 } 154 155 \subfloat[][Latency, 100 \ats per \proc]{ 156 \resizebox{0.5\linewidth}{!}{ 157 \input{result.yield.jax.ns.pstex_t} 158 } 159 \label{fig:yield:jax:ns} 160 } 161 \subfloat[][Latency, 1 \ats per \proc]{ 162 \resizebox{0.5\linewidth}{!}{ 163 \input{result.yield.low.jax.ns.pstex_t} 164 } 165 \label{fig:yield:jax:low:ns} 166 } 167 \caption[Yield Benchmark on Intel]{Yield Benchmark on Intel\smallskip\newline Throughput as a function of \proc count, using 1 \ats per \proc.} 168 \label{fig:yield:jax} 169 \end{figure} 170 Figure~\ref{fig:yield:ops:jax} shows the throughput as a function of \proc count, with the following constants: 171 Each run uses 100 \ats per \proc. 172 173 \todo{results discussion} 95 \end{lstlisting} 174 96 175 97 … … 183 105 In either case, this benchmark aims to highlight how each scheduler handles these cases, since both cases can lead to performance degradation if they are not handled correctly. 184 106 185 To achieve this the benchmark uses a fixed size array of semaphores. 186 Each \gls{at} picks a random semaphore, \texttt{V}s it to unblock a \at waiting and then \texttt{P}s on the semaphore. 187 This creates a flow where \glspl{at} push each other out of the semaphores before being pushed out themselves. 188 For this benchmark to work however, the number of \glspl{at} must be equal or greater to the number of semaphores plus the number of \glspl{proc}. 189 Note that the nature of these semaphores mean the counter can go beyond 1, which could lead to calls to \texttt{P} not blocking. 107 To achieve this the benchmark uses a fixed size array of \newterm{chair}s, where a chair is a data structure that holds a single blocked \gls{at}. 108 When a \gls{at} attempts to block on the chair, it must first unblocked the \gls{at} currently blocked on said chair, if any. 109 This creates a flow where \glspl{at} push each other out of the chairs before being pushed out themselves. 110 For this benchmark to work however, the number of \glspl{at} must be equal or greater to the number of chairs plus the number of \glspl{proc}. 190 111 191 112 \todo{code, setup, results} … … 195 116 for { 196 117 r := random() % len(spots) 197 spots[r].V() 198 spots[r].P() 118 next := xchg(spots[r], this) 119 if next { next.wake() } 120 wait() 199 121 count ++ 200 122 if must_stop() { break } … … 203 125 } 204 126 \end{lstlisting} 205 206 \begin{figure}207 \subfloat[][Throughput, 100 \ats per \proc]{208 \resizebox{0.5\linewidth}{!}{209 \input{result.churn.jax.ops.pstex_t}210 }211 \label{fig:churn:jax:ops}212 }213 \subfloat[][Throughput, 1 \ats per \proc]{214 \resizebox{0.5\linewidth}{!}{215 \input{result.churn.low.jax.ops.pstex_t}216 }217 \label{fig:churn:jax:low:ops}218 }219 220 \subfloat[][Latency, 100 \ats per \proc]{221 \resizebox{0.5\linewidth}{!}{222 \input{result.churn.jax.ns.pstex_t}223 }224 225 }226 \subfloat[][Latency, 1 \ats per \proc]{227 \resizebox{0.5\linewidth}{!}{228 \input{result.churn.low.jax.ns.pstex_t}229 }230 \label{fig:churn:jax:low:ns}231 }232 \caption[Churn Benchmark on Intel]{\centering Churn Benchmark on Intel\smallskip\newline Throughput and latency of the Churn on the benchmark on the Intel machine. Throughput is the total operation per second across all cores. Latency is the duration of each opeartion.}233 \label{fig:churn:jax}234 \end{figure}235 127 236 128 \section{Locality} -
doc/theses/thierry_delisle_PhD/thesis/text/intro.tex
re5d9274 r015925a 2 2 \todo{A proper intro} 3 3 4 The C programming language ~\cite{C11}4 The C programming language\cit{C} 5 5 6 The \CFA programming language~\cite{cfa:frontpage,cfa:typesystem} extends the C programming language by adding modern safety and productivity features, while maintaining backwards compatibility. Among its productivity features, \CFA supports user-level threading~\cite{Delisle21} allowing programmers to write modern concurrent and parallel programs. 7 My previous master's thesis on concurrent in \CFA focused on features and interfaces. 8 This Ph.D.\ thesis focuses on performance, introducing \glsxtrshort{api} changes only when required by performance considerations. Specifically, this work concentrates on scheduling and \glsxtrshort{io}. Prior to this work, the \CFA runtime used a strict \glsxtrshort{fifo} \gls{rQ} and no non-blocking I/O capabilities at the user-thread level. 6 The \CFA programming language\cite{cfa:frontpage,cfa:typesystem} which extends the C programming language to add modern safety and productiviy features while maintaining backwards compatibility. Among it's productiviy features, \CFA introduces support for threading\cit{CFA Concurrency}, to allow programmers to write modern concurrent and parallel programming. 7 While previous work on the concurrent package of \CFA focused on features and interfaces, this thesis focuses on performance, introducing \glsxtrshort{api} changes only when required by performance considerations. More specifically, this thesis concentrates on scheduling and \glsxtrshort{io}. Prior to this work, the \CFA runtime used a strictly \glsxtrshort{fifo} \gls{rQ}. 9 8 10 As a research project, this work builds exclusively on newer versions of the Linux operating-system and gcc/clang compilers. While \CFA is released, supporting older versions of Linux ($<$~Ubuntu 16.04) and gcc/clang compilers ($<$~gcc 6.0)is not a goal of this work.9 This work exclusively concentrates on Linux as it's operating system since the existing \CFA runtime and compiler does not already support other operating systems. Furthermore, as \CFA is yet to be released, supporting version of Linux older than the latest version is not a goal of this work. -
doc/theses/thierry_delisle_PhD/thesis/text/practice.tex
re5d9274 r015925a 7 7 More precise \CFA supports adding \procs using the RAII object @processor@. 8 8 These objects can be created at any time and can be destroyed at any time. 9 They are normally create das automatic stack variables, but this is not a requirement.9 They are normally create as automatic stack variables, but this is not a requirement. 10 10 11 11 The consequence is that the scheduler and \io subsystems must support \procs comming in and out of existence. 12 12 13 13 \section{Manual Resizing} 14 Manual resizing is expected to be a rare operation. 15 Programmers are mostly expected to resize clusters on startup or teardown. 16 Therefore dynamically changing the number of \procs is an appropriate moment to allocate or free resources to match the new state. 17 As such all internal arrays that are sized based on the number of \procs need to be \texttt{realloc}ed. 18 This also means that any references into these arrays, pointers or indexes, may need to be fixed when shrinking\footnote{Indexes may still need fixing when shrinkingbecause some indexes are expected to refer to dense contiguous resources and there is no guarantee the resource being removed has the highest index.}. 14 The consequence of dynamically changing the number of \procs is that all internal arrays that are sized based on the number of \procs neede to be \texttt{realloc}ed. 15 This also means that any references into these arrays, pointers or indexes, may need to be fixed when shrinking\footnote{Indexes may still need fixing because there is no guarantee the \proc causing the shrink had the highest index. Therefore indexes need to be reassigned to preserve contiguous indexes.}. 19 16 20 There are no performance requirements, within reason, for resizing since it is expected to be rare.17 There are no performance requirements, within reason, for resizing since this is usually considered as part of setup and teardown. 21 18 However, this operation has strict correctness requirements since shrinking and idle sleep can easily lead to deadlocks. 22 19 It should also avoid as much as possible any effect on performance when the number of \procs remain constant. 23 This later requirement pr ohibits naive solutions, like simply adding a global lock to the ready-queue arrays.20 This later requirement prehibits simple solutions, like simply adding a global lock to these arrays. 24 21 25 22 \subsection{Read-Copy-Update} … … 27 24 In this pattern, resizing is done by creating a copy of the internal data strucures, updating the copy with the desired changes, and then attempt an Idiana Jones Switch to replace the original witht the copy. 28 25 This approach potentially has the advantage that it may not need any synchronization to do the switch. 29 However, there is a race where \procs could still use the previous, original, data structure after the copy was switched in. 30 This race not only requires some added memory reclamation scheme, it also requires that operations made on the stale original version be eventually moved to the copy. 26 The switch definitely implies a race where \procs could still use the previous, original, data structure after the copy was switched in. 27 The important question then becomes whether or not this race can be recovered from. 28 If the changes that arrived late can be transferred from the original to the copy then this solution works. 31 29 32 For linked-lists, enqueing is only somewhat problematic, \ats enqueued to the original queues need to be transferred to the new, which might not preserve ordering. 33 Dequeing is more challenging. 30 For linked-lists, dequeing is somewhat of a problem. 34 31 Dequeing from the original will not necessarily update the copy which could lead to multiple \procs dequeing the same \at. 35 Fixing this requires m ore synchronization or more indirection on the queues.32 Fixing this requires making the array contain pointers to subqueues rather than the subqueues themselves. 36 33 37 34 Another challenge is that the original must be kept until all \procs have witnessed the change. … … 100 97 In addition to users manually changing the number of \procs, it is desireable to support ``removing'' \procs when there is not enough \ats for all the \procs to be useful. 101 98 While manual resizing is expected to be rare, the number of \ats is expected to vary much more which means \procs may need to be ``removed'' for only short periods of time. 102 Furthermore, race conditions that spuriously lead to the impression thatno \ats are ready are actually common in practice.103 Therefore resources associated with \procs should not be freed but \procssimply put into an idle state where the \gls{kthrd} is blocked until more \ats become ready.99 Furthermore, race conditions that spuriously lead to the impression no \ats are ready are actually common in practice. 100 Therefore \procs should not be actually \emph{removed} but simply put into an idle state where the \gls{kthrd} is blocked until more \ats become ready. 104 101 This state is referred to as \newterm{Idle-Sleep}. 105 102 … … 113 110 The \CFA scheduler simply follows the ``Race-to-Idle'\cit{https://doi.org/10.1137/1.9781611973099.100}' approach where a sleeping \proc is woken any time an \at becomes ready and \procs go to idle sleep anytime they run out of work. 114 111 115 \section{Sleeping}116 As usual, the corner-stone of any feature related to the kernel is the choice of system call.117 In terms of blocking a \gls{kthrd} until some event occurs the linux kernel has many available options:118 119 \paragraph{\texttt{pthread\_mutex}/\texttt{pthread\_cond}}120 The most classic option is to use some combination of \texttt{pthread\_mutex} and \texttt{pthread\_cond}.121 These serve as straight forward mutual exclusion and synchronization tools and allow a \gls{kthrd} to wait on a \texttt{pthread\_cond} until signalled.122 While this approach is generally perfectly appropriate for \glspl{kthrd} waiting after eachother, \io operations do not signal \texttt{pthread\_cond}s.123 For \io results to wake a \proc waiting on a \texttt{pthread\_cond} means that a different \glspl{kthrd} must be woken up first, and then the \proc can be signalled.124 125 \subsection{\texttt{io\_uring} and Epoll}126 An alternative is to flip the problem on its head and block waiting for \io, using \texttt{io\_uring} or even \texttt{epoll}.127 This creates the inverse situation, where \io operations directly wake sleeping \procs but waking \proc from a running \gls{kthrd} must use an indirect scheme.128 This generally takes the form of creating a file descriptor, \eg, a dummy file, a pipe or an event fd, and using that file descriptor when \procs need to wake eachother.129 This leads to additional complexity because there can be a race between these artificial \io operations and genuine \io operations.130 If not handled correctly, this can lead to the artificial files going out of sync.131 132 \subsection{Event FDs}133 Another interesting approach is to use an event file descriptor\cit{eventfd}.134 This is a Linux feature that is a file descriptor that behaves like \io, \ie, uses \texttt{read} and \texttt{write}, but also behaves like a semaphore.135 Indeed, all read and writes must use 64bits large values\footnote{On 64-bit Linux, a 32-bit Linux would use 32 bits values.}.136 Writes add their values to the buffer, that is arithmetic addition and not buffer append, and reads zero out the buffer and return the buffer values so far\footnote{This is without the \texttt{EFD\_SEMAPHORE} flag. This flags changes the behavior of \texttt{read} but is not needed for this work.}.137 If a read is made while the buffer is already 0, the read blocks until a non-0 value is added.138 What makes this feature particularly interesting is that \texttt{io\_uring} supports the \texttt{IORING\_REGISTER\_EVENTFD} command, to register an event fd to a particular instance.139 Once that instance is registered, any \io completion will result in \texttt{io\_uring} writing to the event FD.140 This means that a \proc waiting on the event FD can be \emph{directly} woken up by either other \procs or incomming \io.141 142 \begin{figure}143 \centering144 \input{idle1.pstex_t}145 \caption[Basic Idle Sleep Data Structure]{Basic Idle Sleep Data Structure \smallskip\newline Each idle \proc is put unto a doubly-linked stack protected by a lock.146 Each \proc has a private event FD.}147 \label{fig:idle1}148 \end{figure}149 150 112 151 113 \section{Tracking Sleepers} 152 114 Tracking which \procs are in idle sleep requires a data structure holding all the sleeping \procs, but more importantly it requires a concurrent \emph{handshake} so that no \at is stranded on a ready-queue with no active \proc. 153 115 The classic challenge is when a \at is made ready while a \proc is going to sleep, there is a race where the new \at may not see the sleeping \proc and the sleeping \proc may not see the ready \at. 154 Since \ats can be made ready by timers, \io operations or other events outside a clusre, this race can occur even if the \proc going to sleep is the only \proc awake.155 As a result, improper handling of this race can lead to all \procs going to sleep and the system deadlocking.156 116 157 Furthermore, the ``Race-to-Idle'' approach means that there may be contention on the data structure tracking sleepers. 158 Contention slowing down \procs attempting to sleep or wake-up can be tolerated. 159 These \procs are not doing useful work and therefore not contributing to overall performance. 160 However, notifying, checking if a \proc must be woken-up and doing so if needed, can significantly affect overall performance and must be low cost. 117 Furthermore, the ``Race-to-Idle'' approach means that there is some 161 118 162 \subsection{Sleepers List} 163 Each cluster maintains a list of idle \procs, organized as a stack. 164 This ordering hopefully allows \proc at the tail to stay in idle sleep for extended period of times. 165 Because of these unbalanced performance requirements, the algorithm tracking sleepers is designed to have idle \proc handle as much of the work as possible. 166 The idle \procs maintain the of sleepers among themselves and notifying a sleeping \proc takes as little work as possible. 167 This approach means that maintaining the list is fairly straightforward. 168 The list can simply use a single lock per cluster and only \procs that are getting in and out of idle state will contend for that lock. 119 \section{Sleeping} 169 120 170 This approach also simplifies notification. 171 Indeed, \procs need to be notify when a new \at is readied, but they also must be notified during resizing, so the \gls{kthrd} can be joined. 172 This means that whichever entity removes idle \procs from the sleeper list must be able to do so in any order. 173 Using a simple lock over this data structure makes the removal much simpler than using a lock-free data structure. 174 The notification process then simply needs to wake-up the desired idle \proc, using \texttt{pthread\_cond\_signal}, \texttt{write} on an fd, etc., and the \proc will handle the rest. 121 \subsection{Event FDs} 175 122 176 \subsection{Reducing Latency} 177 As mentioned in this section, \procs going idle for extremely short periods of time is likely in certain common scenarios. 178 Therefore, the latency of doing a system call to read from and writing to the event fd can actually negatively affect overall performance in a notable way. 179 Is it important to reduce latency and contention of the notification as much as possible. 180 Figure~\ref{fig:idle1} shoes the basic idle sleep data structure. 181 For the notifiers, this data structure can cause contention on the lock and the event fd syscall can cause notable latency. 123 \subsection{Epoll} 182 124 183 \begin{figure} 184 \centering 185 \input{idle2.pstex_t} 186 \caption[Improved Idle Sleep Data Structure]{Improved Idle Sleep Data Structure \smallskip\newline An atomic pointer is added to the list, pointing to the Event FD of the first \proc on the list.} 187 \label{fig:idle2} 188 \end{figure} 125 \subsection{\texttt{io\_uring}} 189 126 190 The contention is mostly due to the lock on the list needing to be held to get to the head \proc. 191 That lock can be contended by \procs attempting to go to sleep, \procs waking or notification attempts. 192 The contentention from the \procs attempting to go to sleep can be mitigated slightly by using \texttt{try\_acquire} instead, so the \procs simply continue searching for \ats if the lock is held. 193 This trick cannot be used for waking \procs since they are not in a state where they can run \ats. 194 However, it is worth nothing that notification does not strictly require accessing the list or the head \proc. 195 Therefore, contention can be reduced notably by having notifiers avoid the lock entirely and adding a pointer to the event fd of the first idle \proc, as in Figure~\ref{fig:idle2}. 196 To avoid contention between the notifiers, instead of simply reading the atomic pointer, notifiers atomically exchange it to \texttt{null} so only only notifier will contend on the system call. 197 198 \begin{figure} 199 \centering 200 \input{idle_state.pstex_t} 201 \caption[Improved Idle Sleep Data Structure]{Improved Idle Sleep Data Structure \smallskip\newline An atomic pointer is added to the list, pointing to the Event FD of the first \proc on the list.} 202 \label{fig:idle:state} 203 \end{figure} 204 205 The next optimization that can be done is to avoid the latency of the event fd when possible. 206 This can be done by adding what is effectively a benaphore\cit{benaphore} in front of the event fd. 207 A simple three state flag is added beside the event fd to avoid unnecessary system calls, as shown in Figure~\ref{fig:idle:state}. 208 The flag starts in state \texttt{SEARCH}, while the \proc is searching for \ats to run. 209 The \proc then confirms the sleep by atomically swaping the state to \texttt{SLEEP}. 210 If the previous state was still \texttt{SEARCH}, then the \proc does read the event fd. 211 Meanwhile, notifiers atomically exchange the state to \texttt{AWAKE} state. 212 if the previous state was \texttt{SLEEP}, then the notifier must write to the event fd. 213 However, if the notify arrives almost immediately after the \proc marks itself idle, then both reads and writes on the event fd can be omitted, which reduces latency notably. 214 This leads to the final data structure shown in Figure~\ref{fig:idle}. 215 216 \begin{figure} 217 \centering 218 \input{idle.pstex_t} 219 \caption[Low-latency Idle Sleep Data Structure]{Low-latency Idle Sleep Data Structure \smallskip\newline Each idle \proc is put unto a doubly-linked stack protected by a lock. 220 Each \proc has a private event FD with a benaphore in front of it. 221 The list also has an atomic pointer to the event fd and benaphore of the first \proc on the list.} 222 \label{fig:idle} 223 \end{figure} 127 \section{Reducing Latency} -
doc/theses/thierry_delisle_PhD/thesis/thesis.tex
re5d9274 r015925a 80 80 %\usepackage{nomencl} % For a nomenclature (optional; available from ctan.org) 81 81 \usepackage{amsmath,amssymb,amstext} % Lots of math symbols and environments 82 \usepackage [dvipsnames]{xcolor}82 \usepackage{xcolor} 83 83 \usepackage{graphicx} % For including graphics 84 \usepackage{subcaption}85 84 86 85 % Hyperlinks make it very easy to navigate an electronic document. … … 105 104 colorlinks=true, % false: boxed links; true: colored links 106 105 linkcolor=blue, % color of internal links 107 citecolor= OliveGreen,% color of links to bibliography106 citecolor=green, % color of links to bibliography 108 107 filecolor=magenta, % color of file links 109 108 urlcolor=cyan % color of external links … … 205 204 \newcommand\at{\gls{at}\xspace}% 206 205 \newcommand\ats{\glspl{at}\xspace}% 207 \newcommand\Proc{\Pls{proc}\xspace}%208 206 \newcommand\proc{\gls{proc}\xspace}% 209 207 \newcommand\procs{\glspl{proc}\xspace}% -
libcfa/src/Makefile.am
re5d9274 r015925a 33 33 # The built sources must not depend on the installed inst_headers_src 34 34 AM_CFAFLAGS = -quiet -cfalib -I$(srcdir)/stdhdr -I$(srcdir)/concurrency $(if $(findstring ${gdbwaittarget}, ${@}), -XCFA --gdb) @CONFIG_CFAFLAGS@ 35 AM_CFLAGS = -g -Wall -Werror=return-type -Wno-unused-function -fPIC -fexceptions - fvisibility=hidden -pthread @ARCH_FLAGS@ @CONFIG_CFLAGS@35 AM_CFLAGS = -g -Wall -Werror=return-type -Wno-unused-function -fPIC -fexceptions -pthread @ARCH_FLAGS@ @CONFIG_CFLAGS@ 36 36 AM_CCASFLAGS = -g -Wall -Werror=return-type -Wno-unused-function @ARCH_FLAGS@ @CONFIG_CFLAGS@ 37 37 CFACC = @CFACC@ … … 194 194 195 195 prelude.o : prelude.cfa extras.cf gcc-builtins.cf builtins.cf @LOCAL_CFACC@ @CFACPP@ 196 ${AM_V_GEN}$(CFACOMPILE) -quiet -XCFA,-l ${<} -c - fvisibility=default -o ${@}196 ${AM_V_GEN}$(CFACOMPILE) -quiet -XCFA,-l ${<} -c -o ${@} 197 197 198 198 prelude.lo: prelude.cfa extras.cf gcc-builtins.cf builtins.cf @LOCAL_CFACC@ @CFACPP@ 199 199 ${AM_V_GEN}$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile \ 200 $(CFACOMPILE) -quiet -XCFA,-l ${<} -c - fvisibility=default -o ${@}200 $(CFACOMPILE) -quiet -XCFA,-l ${<} -c -o ${@} 201 201 202 202 concurrency/io/call.cfa: $(srcdir)/concurrency/io/call.cfa.in -
libcfa/src/algorithms/range_iterator.cfa
re5d9274 r015925a 20 20 #include <fstream.hfa> 21 21 22 #include "bits/defs.hfa" 23 24 void main(RangeIter & this) libcfa_public { 22 void main(RangeIter & this) { 25 23 for() { 26 24 this._start = -1; -
libcfa/src/assert.cfa
re5d9274 r015925a 19 19 #include <unistd.h> // STDERR_FILENO 20 20 #include "bits/debug.hfa" 21 #include "bits/defs.hfa"22 21 23 22 extern "C" { … … 27 26 28 27 // called by macro assert in assert.h 29 // would be cool to remove libcfa_public but it's needed for libcfathread 30 void __assert_fail( const char assertion[], const char file[], unsigned int line, const char function[] ) libcfa_public { 28 void __assert_fail( const char assertion[], const char file[], unsigned int line, const char function[] ) { 31 29 __cfaabi_bits_print_safe( STDERR_FILENO, CFA_ASSERT_FMT ".\n", assertion, __progname, function, line, file ); 32 30 abort(); … … 34 32 35 33 // called by macro assertf 36 // would be cool to remove libcfa_public but it's needed for libcfathread 37 void __assert_fail_f( const char assertion[], const char file[], unsigned int line, const char function[], const char fmt[], ... ) libcfa_public { 34 void __assert_fail_f( const char assertion[], const char file[], unsigned int line, const char function[], const char fmt[], ... ) { 38 35 __cfaabi_bits_acquire(); 39 36 __cfaabi_bits_print_nolock( STDERR_FILENO, CFA_ASSERT_FMT ": ", assertion, __progname, function, line, file ); -
libcfa/src/bits/debug.cfa
re5d9274 r015925a 21 21 #include <unistd.h> 22 22 23 #include "bits/defs.hfa"24 25 23 enum { buffer_size = 4096 }; 26 24 static char buffer[ buffer_size ]; 27 25 28 26 extern "C" { 29 // would be cool to remove libcfa_public but it's needed for libcfathread 30 void __cfaabi_bits_write( int fd, const char in_buffer[], int len ) libcfa_public { 27 void __cfaabi_bits_write( int fd, const char in_buffer[], int len ) { 31 28 // ensure all data is written 32 29 for ( int count = 0, retcode; count < len; count += retcode ) { … … 47 44 void __cfaabi_bits_release() __attribute__((__weak__)) {} 48 45 49 // would be cool to remove libcfa_public but it's needed for libcfathread 50 int __cfaabi_bits_print_safe ( int fd, const char fmt[], ... ) __attribute__(( format(printf, 2, 3) )) libcfa_public { 46 int __cfaabi_bits_print_safe ( int fd, const char fmt[], ... ) __attribute__(( format(printf, 2, 3) )) { 51 47 va_list args; 52 48 -
libcfa/src/bits/defs.hfa
re5d9274 r015925a 36 36 #define __cfa_dlink(x) struct { struct x * next; struct x * back; } __dlink_substitute 37 37 #endif 38 39 #define libcfa_public __attribute__((visibility("default")))40 38 41 39 #ifdef __cforall -
libcfa/src/bits/weakso_locks.cfa
re5d9274 r015925a 18 18 #include "bits/weakso_locks.hfa" 19 19 20 #pragma GCC visibility push(default)21 22 20 void ?{}( blocking_lock &, bool, bool ) {} 23 21 void ^?{}( blocking_lock & ) {} -
libcfa/src/common.cfa
re5d9274 r015925a 18 18 #include <stdlib.h> // div_t, *div 19 19 20 #pragma GCC visibility push(default)21 22 20 //--------------------------------------- 23 21 -
libcfa/src/concurrency/alarm.cfa
re5d9274 r015925a 141 141 //============================================================================================= 142 142 143 void sleep( Duration duration ) libcfa_public{143 void sleep( Duration duration ) { 144 144 alarm_node_t node = { active_thread(), duration, 0`s }; 145 145 -
libcfa/src/concurrency/clib/cfathread.cfa
re5d9274 r015925a 237 237 238 238 typedef ThreadCancelled(cfathread_object) cfathread_exception; 239 typedef vtable(ThreadCancelled(cfathread_object)) cfathread_vtable;239 typedef ThreadCancelled_vtable(cfathread_object) cfathread_vtable; 240 240 241 241 void defaultResumptionHandler(ThreadCancelled(cfathread_object) & except) { … … 283 283 284 284 typedef ThreadCancelled(__cfainit) __cfainit_exception; 285 typedef vtable(ThreadCancelled(__cfainit)) __cfainit_vtable;285 typedef ThreadCancelled_vtable(__cfainit) __cfainit_vtable; 286 286 287 287 void defaultResumptionHandler(ThreadCancelled(__cfainit) & except) { … … 326 326 } 327 327 328 #pragma GCC visibility push(default)329 330 328 //================================================================================ 331 329 // Main Api 332 330 extern "C" { 333 int cfathread_cluster_create(cfathread_cluster_t * cl) __attribute__((nonnull(1))) libcfa_public{331 int cfathread_cluster_create(cfathread_cluster_t * cl) __attribute__((nonnull(1))) { 334 332 *cl = new(); 335 333 return 0; 336 334 } 337 335 338 cfathread_cluster_t cfathread_cluster_self(void) libcfa_public{336 cfathread_cluster_t cfathread_cluster_self(void) { 339 337 return active_cluster(); 340 338 } 341 339 342 int cfathread_cluster_print_stats( cfathread_cluster_t cl ) libcfa_public{340 int cfathread_cluster_print_stats( cfathread_cluster_t cl ) { 343 341 #if !defined(__CFA_NO_STATISTICS__) 344 342 print_stats_at_exit( *cl, CFA_STATS_READY_Q | CFA_STATS_IO ); -
libcfa/src/concurrency/coroutine.cfa
re5d9274 r015925a 48 48 //----------------------------------------------------------------------------- 49 49 forall(T &) 50 void copy(CoroutineCancelled(T) * dst, CoroutineCancelled(T) * src) libcfa_public{50 void copy(CoroutineCancelled(T) * dst, CoroutineCancelled(T) * src) { 51 51 dst->virtual_table = src->virtual_table; 52 52 dst->the_coroutine = src->the_coroutine; … … 55 55 56 56 forall(T &) 57 const char * msg(CoroutineCancelled(T) *) libcfa_public{57 const char * msg(CoroutineCancelled(T) *) { 58 58 return "CoroutineCancelled(...)"; 59 59 } … … 62 62 forall(T & | is_coroutine(T)) 63 63 void __cfaehm_cancelled_coroutine( 64 T & cor, coroutine$ * desc, EHM_DEFAULT_VTABLE(CoroutineCancelled (T)) ) libcfa_public{64 T & cor, coroutine$ * desc, EHM_DEFAULT_VTABLE(CoroutineCancelled, (T)) ) { 65 65 verify( desc->cancellation ); 66 66 desc->state = Cancelled; … … 89 89 90 90 void __stack_prepare( __stack_info_t * this, size_t create_size ); 91 staticvoid __stack_clean ( __stack_info_t * this );91 void __stack_clean ( __stack_info_t * this ); 92 92 93 93 //----------------------------------------------------------------------------- … … 114 114 } 115 115 116 void ?{}( coroutine$ & this, const char name[], void * storage, size_t storageSize ) libcfa_publicwith( this ) {116 void ?{}( coroutine$ & this, const char name[], void * storage, size_t storageSize ) with( this ) { 117 117 (this.context){0p, 0p}; 118 118 (this.stack){storage, storageSize}; … … 124 124 } 125 125 126 void ^?{}(coroutine$& this) libcfa_public{126 void ^?{}(coroutine$& this) { 127 127 if(this.state != Halted && this.state != Start && this.state != Primed) { 128 128 coroutine$ * src = active_coroutine(); … … 146 146 // Part of the Public API 147 147 // Not inline since only ever called once per coroutine 148 forall(T & | is_coroutine(T) | { EHM_DEFAULT_VTABLE(CoroutineCancelled (T)); })149 void prime(T& cor) libcfa_public{148 forall(T & | is_coroutine(T) | { EHM_DEFAULT_VTABLE(CoroutineCancelled, (T)); }) 149 void prime(T& cor) { 150 150 coroutine$* this = get_coroutine(cor); 151 151 assert(this->state == Start); … … 155 155 } 156 156 157 static[void *, size_t] __stack_alloc( size_t storageSize ) {157 [void *, size_t] __stack_alloc( size_t storageSize ) { 158 158 const size_t stack_data_size = libCeiling( sizeof(__stack_t), 16 ); // minimum alignment 159 159 assert(__page_size != 0l); … … 193 193 } 194 194 195 staticvoid __stack_clean ( __stack_info_t * this ) {195 void __stack_clean ( __stack_info_t * this ) { 196 196 void * storage = this->storage->limit; 197 197 … … 215 215 } 216 216 217 void __stack_prepare( __stack_info_t * this, size_t create_size ) libcfa_public{217 void __stack_prepare( __stack_info_t * this, size_t create_size ) { 218 218 const size_t stack_data_size = libCeiling( sizeof(__stack_t), 16 ); // minimum alignment 219 219 bool userStack; -
libcfa/src/concurrency/coroutine.hfa
re5d9274 r015925a 22 22 //----------------------------------------------------------------------------- 23 23 // Exception thrown from resume when a coroutine stack is cancelled. 24 forall(coroutine_t &) 25 exception CoroutineCancelled { 24 EHM_FORALL_EXCEPTION(CoroutineCancelled, (coroutine_t &), (coroutine_t)) ( 26 25 coroutine_t * the_coroutine; 27 26 exception_t * the_exception; 28 };27 ); 29 28 30 29 forall(T &) … … 38 37 // Anything that implements this trait can be resumed. 39 38 // Anything that is resumed is a coroutine. 40 trait is_coroutine(T & | IS_RESUMPTION_EXCEPTION(CoroutineCancelled (T))) {39 trait is_coroutine(T & | IS_RESUMPTION_EXCEPTION(CoroutineCancelled, (T))) { 41 40 void main(T & this); 42 41 coroutine$ * get_coroutine(T & this); … … 61 60 //----------------------------------------------------------------------------- 62 61 // Public coroutine API 63 forall(T & | is_coroutine(T) | { EHM_DEFAULT_VTABLE(CoroutineCancelled (T)); })62 forall(T & | is_coroutine(T) | { EHM_DEFAULT_VTABLE(CoroutineCancelled, (T)); }) 64 63 void prime(T & cor); 65 64 … … 114 113 115 114 extern void __stack_prepare( __stack_info_t * this, size_t size /* ignored if storage already allocated */); 115 extern void __stack_clean ( __stack_info_t * this ); 116 116 117 117 118 // Suspend implementation inlined for performance … … 140 141 forall(T & | is_coroutine(T)) 141 142 void __cfaehm_cancelled_coroutine( 142 T & cor, coroutine$ * desc, EHM_DEFAULT_VTABLE(CoroutineCancelled (T)) );143 T & cor, coroutine$ * desc, EHM_DEFAULT_VTABLE(CoroutineCancelled, (T)) ); 143 144 144 145 // Resume implementation inlined for performance 145 forall(T & | is_coroutine(T) | { EHM_DEFAULT_VTABLE(CoroutineCancelled (T)); })146 forall(T & | is_coroutine(T) | { EHM_DEFAULT_VTABLE(CoroutineCancelled, (T)); }) 146 147 static inline T & resume(T & cor) { 147 148 // optimization : read TLS once and reuse it -
libcfa/src/concurrency/exception.cfa
re5d9274 r015925a 64 64 extern "C" { 65 65 66 struct exception_context_t * this_exception_context(void) libcfa_public{66 struct exception_context_t * this_exception_context(void) { 67 67 return &__get_stack( active_coroutine() )->exception_context; 68 68 } 69 69 70 _Unwind_Reason_Code __cfaehm_cancellation_unwind( struct _Unwind_Exception * unwind_exception ) libcfa_public{70 _Unwind_Reason_Code __cfaehm_cancellation_unwind( struct _Unwind_Exception * unwind_exception ) { 71 71 _Unwind_Stop_Fn stop_func; 72 72 void * stop_param; -
libcfa/src/concurrency/invoke.c
re5d9274 r015925a 36 36 extern void enable_interrupts( _Bool poll ); 37 37 38 libcfa_publicvoid __cfactx_invoke_coroutine(38 void __cfactx_invoke_coroutine( 39 39 void (*main)(void *), 40 40 void *this … … 70 70 } 71 71 72 libcfa_publicvoid __cfactx_coroutine_unwind(struct _Unwind_Exception * storage, struct coroutine$ * cor) __attribute__ ((__noreturn__));72 void __cfactx_coroutine_unwind(struct _Unwind_Exception * storage, struct coroutine$ * cor) __attribute__ ((__noreturn__)); 73 73 void __cfactx_coroutine_unwind(struct _Unwind_Exception * storage, struct coroutine$ * cor) { 74 74 _Unwind_Reason_Code ret = _Unwind_ForcedUnwind( storage, __cfactx_coroutine_unwindstop, cor ); … … 77 77 } 78 78 79 libcfa_publicvoid __cfactx_invoke_thread(79 void __cfactx_invoke_thread( 80 80 void (*main)(void *), 81 81 void *this … … 98 98 } 99 99 100 libcfa_publicvoid __cfactx_start(100 void __cfactx_start( 101 101 void (*main)(void *), 102 102 struct coroutine$ * cor, -
libcfa/src/concurrency/io.cfa
re5d9274 r015925a 221 221 const unsigned long long ctsc = rdtscl(); 222 222 223 if(proc->io.target == UINT_MAX) {223 if(proc->io.target == MAX) { 224 224 uint64_t chaos = __tls_rand(); 225 225 unsigned ext = chaos & 0xff; … … 232 232 else { 233 233 const unsigned target = proc->io.target; 234 /* paranoid */ verify( io.tscs[target].tv != ULLONG_MAX );234 /* paranoid */ verify( io.tscs[target].tv != MAX ); 235 235 HELP: if(target < ctxs_count) { 236 236 const unsigned long long cutoff = calc_cutoff(ctsc, ctx->cq.id, ctxs_count, io.data, io.tscs, __shard_factor.io); … … 246 246 __STATS__( true, io.calls.helped++; ) 247 247 } 248 proc->io.target = UINT_MAX;248 proc->io.target = MAX; 249 249 } 250 250 } … … 340 340 // for convenience, return both the index and the pointer to the sqe 341 341 // sqe == &sqes[idx] 342 struct $io_context * cfa_io_allocate(struct io_uring_sqe * sqes[], __u32 idxs[], __u32 want) libcfa_public{342 struct $io_context * cfa_io_allocate(struct io_uring_sqe * sqes[], __u32 idxs[], __u32 want) { 343 343 // __cfadbg_print_safe(io, "Kernel I/O : attempting to allocate %u\n", want); 344 344 … … 419 419 } 420 420 421 void cfa_io_submit( struct $io_context * inctx, __u32 idxs[], __u32 have, bool lazy ) __attribute__((nonnull (1))) libcfa_public{421 void cfa_io_submit( struct $io_context * inctx, __u32 idxs[], __u32 have, bool lazy ) __attribute__((nonnull (1))) { 422 422 // __cfadbg_print_safe(io, "Kernel I/O : attempting to submit %u (%s)\n", have, lazy ? "lazy" : "eager"); 423 423 -
libcfa/src/concurrency/io/call.cfa.in
re5d9274 r015925a 139 139 // I/O Interface 140 140 //============================================================================================= 141 #pragma GCC visibility push(default)142 141 """ 143 142 -
libcfa/src/concurrency/io/setup.cfa
re5d9274 r015925a 26 26 27 27 #if !defined(CFA_HAVE_LINUX_IO_URING_H) 28 void ?{}(io_context_params & this) libcfa_public{}28 void ?{}(io_context_params & this) {} 29 29 30 30 void ?{}($io_context & this, struct cluster & cl) {} … … 66 66 #pragma GCC diagnostic pop 67 67 68 void ?{}(io_context_params & this) libcfa_public{68 void ?{}(io_context_params & this) { 69 69 this.num_entries = 256; 70 70 } -
libcfa/src/concurrency/io/types.hfa
re5d9274 r015925a 17 17 #pragma once 18 18 19 #include <limits.h>20 21 19 extern "C" { 22 20 #include <linux/types.h> … … 27 25 #include "iofwd.hfa" 28 26 #include "kernel/fwd.hfa" 27 #include "limits.hfa" 29 28 30 29 #if defined(CFA_HAVE_LINUX_IO_URING_H) … … 141 140 const __u32 tail = *this->cq.tail; 142 141 143 if(head == tail) return ULLONG_MAX;142 if(head == tail) return MAX; 144 143 145 144 return this->cq.ts; -
libcfa/src/concurrency/kernel.cfa
re5d9274 r015925a 389 389 390 390 // KERNEL_ONLY 391 staticvoid returnToKernel() {391 void returnToKernel() { 392 392 /* paranoid */ verify( ! __preemption_enabled() ); 393 393 coroutine$ * proc_cor = get_coroutine(kernelTLS().this_processor->runner); … … 547 547 } 548 548 549 void unpark( thread$ * thrd, unpark_hint hint ) libcfa_public{549 void unpark( thread$ * thrd, unpark_hint hint ) { 550 550 if( !thrd ) return; 551 551 … … 558 558 } 559 559 560 void park( void ) libcfa_public{560 void park( void ) { 561 561 __disable_interrupts_checked(); 562 562 /* paranoid */ verify( kernelTLS().this_thread->preempted == __NO_PREEMPTION ); … … 601 601 602 602 // KERNEL ONLY 603 bool force_yield( __Preemption_Reason reason ) libcfa_public{603 bool force_yield( __Preemption_Reason reason ) { 604 604 __disable_interrupts_checked(); 605 605 thread$ * thrd = kernelTLS().this_thread; … … 849 849 //----------------------------------------------------------------------------- 850 850 // Debug 851 bool threading_enabled(void) __attribute__((const)) libcfa_public{851 bool threading_enabled(void) __attribute__((const)) { 852 852 return true; 853 853 } … … 856 856 // Statistics 857 857 #if !defined(__CFA_NO_STATISTICS__) 858 void print_halts( processor & this ) libcfa_public{858 void print_halts( processor & this ) { 859 859 this.print_halts = true; 860 860 } … … 873 873 } 874 874 875 staticvoid crawl_cluster_stats( cluster & this ) {875 void crawl_cluster_stats( cluster & this ) { 876 876 // Stop the world, otherwise stats could get really messed-up 877 877 // this doesn't solve all problems but does solve many … … 889 889 890 890 891 void print_stats_now( cluster & this, int flags ) libcfa_public{891 void print_stats_now( cluster & this, int flags ) { 892 892 crawl_cluster_stats( this ); 893 893 __print_stats( this.stats, flags, "Cluster", this.name, (void*)&this ); -
libcfa/src/concurrency/kernel.hfa
re5d9274 r015925a 49 49 50 50 // Coroutine used py processors for the 2-step context switch 51 52 struct processorCtx_t { 53 struct coroutine$ self; 51 coroutine processorCtx_t { 54 52 struct processor * proc; 55 53 }; -
libcfa/src/concurrency/kernel/cluster.cfa
re5d9274 r015925a 49 49 50 50 // returns the maximum number of processors the RWLock support 51 __attribute__((weak)) unsigned __max_processors() libcfa_public{51 __attribute__((weak)) unsigned __max_processors() { 52 52 const char * max_cores_s = getenv("CFA_MAX_PROCESSORS"); 53 53 if(!max_cores_s) { … … 233 233 if(is_empty(sl)) { 234 234 assert( sl.anchor.next == 0p ); 235 assert( sl.anchor.ts == MAX);235 assert( sl.anchor.ts == -1llu ); 236 236 assert( mock_head(sl) == sl.prev ); 237 237 } else { 238 238 assert( sl.anchor.next != 0p ); 239 assert( sl.anchor.ts != MAX);239 assert( sl.anchor.ts != -1llu ); 240 240 assert( mock_head(sl) != sl.prev ); 241 241 } … … 259 259 /* paranoid */ verifyf( it, "Unexpected null iterator, at index %u of %u\n", i, count); 260 260 it->rdq.id = valrq; 261 it->rdq.target = UINT_MAX;261 it->rdq.target = MAX; 262 262 valrq += __shard_factor.readyq; 263 263 #if defined(CFA_HAVE_LINUX_IO_URING_H) 264 264 it->io.ctx->cq.id = valio; 265 it->io.target = UINT_MAX;265 it->io.target = MAX; 266 266 valio += __shard_factor.io; 267 267 #endif … … 472 472 this.prev = mock_head(this); 473 473 this.anchor.next = 0p; 474 this.anchor.ts = MAX;474 this.anchor.ts = -1llu; 475 475 #if !defined(__CFA_NO_STATISTICS__) 476 476 this.cnt = 0; … … 484 484 /* paranoid */ verify( &mock_head(this)->link.ts == &this.anchor.ts ); 485 485 /* paranoid */ verify( mock_head(this)->link.next == 0p ); 486 /* paranoid */ verify( mock_head(this)->link.ts == MAX);486 /* paranoid */ verify( mock_head(this)->link.ts == -1llu ); 487 487 /* paranoid */ verify( mock_head(this) == this.prev ); 488 488 /* paranoid */ verify( __alignof__(__intrusive_lane_t) == 128 ); … … 495 495 // Make sure the list is empty 496 496 /* paranoid */ verify( this.anchor.next == 0p ); 497 /* paranoid */ verify( this.anchor.ts == MAX);497 /* paranoid */ verify( this.anchor.ts == -1llu ); 498 498 /* paranoid */ verify( mock_head(this) == this.prev ); 499 499 } -
libcfa/src/concurrency/kernel/cluster.hfa
re5d9274 r015925a 19 19 #include "kernel/private.hfa" 20 20 21 #include <limits.h>21 #include "limits.hfa" 22 22 23 23 //----------------------------------------------------------------------- … … 37 37 38 38 static inline void touch_tsc(__timestamp_t * tscs, size_t idx, unsigned long long ts_prev, unsigned long long ts_next) { 39 if (ts_next == ULLONG_MAX) return;39 if (ts_next == MAX) return; 40 40 unsigned long long now = rdtscl(); 41 41 unsigned long long pma = __atomic_load_n(&tscs[ idx ].ma, __ATOMIC_RELAXED); … … 59 59 for(i; shard_factor) { 60 60 unsigned long long ptsc = ts(data[start + i]); 61 if(ptsc != ULLONG_MAX) {61 if(ptsc != -1ull) { 62 62 /* paranoid */ verify( start + i < count ); 63 63 unsigned long long tsc = moving_average(ctsc, ptsc, tscs[start + i].ma); -
libcfa/src/concurrency/kernel/private.hfa
re5d9274 r015925a 109 109 //----------------------------------------------------------------------------- 110 110 // Processor 111 void main(processorCtx_t &); 112 static inline coroutine$* get_coroutine(processorCtx_t & this) { return &this.self; } 111 void main(processorCtx_t *); 113 112 114 113 void * __create_pthread( pthread_t *, void * (*)(void *), void * ); -
libcfa/src/concurrency/kernel/startup.cfa
re5d9274 r015925a 120 120 #endif 121 121 122 cluster * mainCluster libcfa_public;122 cluster * mainCluster; 123 123 processor * mainProcessor; 124 124 thread$ * mainThread; … … 169 169 }; 170 170 171 staticvoid ?{}( current_stack_info_t & this ) {171 void ?{}( current_stack_info_t & this ) { 172 172 __stack_context_t ctx; 173 173 CtxGet( ctx ); … … 209 209 // Construct the processor context of the main processor 210 210 void ?{}(processorCtx_t & this, processor * proc) { 211 (this. self){ "Processor" };212 this. self.starter = 0p;211 (this.__cor){ "Processor" }; 212 this.__cor.starter = 0p; 213 213 this.proc = proc; 214 214 } … … 507 507 self_mon_p = &self_mon; 508 508 link.next = 0p; 509 link.ts = MAX;509 link.ts = -1llu; 510 510 preferred = ready_queue_new_preferred(); 511 511 last_proc = 0p; … … 526 526 // Construct the processor context of non-main processors 527 527 static void ?{}(processorCtx_t & this, processor * proc, current_stack_info_t * info) { 528 (this. self){ info };528 (this.__cor){ info }; 529 529 this.proc = proc; 530 530 } … … 578 578 } 579 579 580 void ?{}(processor & this, const char name[], cluster & _cltr, thread$ * initT) libcfa_public{580 void ?{}(processor & this, const char name[], cluster & _cltr, thread$ * initT) { 581 581 ( this.terminated ){}; 582 582 ( this.runner ){}; … … 591 591 } 592 592 593 void ?{}(processor & this, const char name[], cluster & _cltr) libcfa_public{593 void ?{}(processor & this, const char name[], cluster & _cltr) { 594 594 (this){name, _cltr, 0p}; 595 595 } 596 596 597 597 extern size_t __page_size; 598 void ^?{}(processor & this) libcfa_public with( this ){598 void ^?{}(processor & this) with( this ){ 599 599 /* paranoid */ verify( !__atomic_load_n(&do_terminate, __ATOMIC_ACQUIRE) ); 600 600 __cfadbg_print_safe(runtime_core, "Kernel : core %p signaling termination\n", &this); … … 623 623 } 624 624 625 void ?{}(cluster & this, const char name[], Duration preemption_rate, unsigned num_io, const io_context_params & io_params) libcfa_publicwith( this ) {625 void ?{}(cluster & this, const char name[], Duration preemption_rate, unsigned num_io, const io_context_params & io_params) with( this ) { 626 626 this.name = name; 627 627 this.preemption_rate = preemption_rate; … … 658 658 } 659 659 660 void ^?{}(cluster & this) libcfa_public{660 void ^?{}(cluster & this) { 661 661 destroy(this.io.arbiter); 662 662 -
libcfa/src/concurrency/locks.cfa
re5d9274 r015925a 24 24 #include <stdlib.hfa> 25 25 26 #pragma GCC visibility push(default)27 28 26 //----------------------------------------------------------------------------- 29 27 // info_thread … … 118 116 } 119 117 120 staticvoid pop_and_set_new_owner( blocking_lock & this ) with( this ) {118 void pop_and_set_new_owner( blocking_lock & this ) with( this ) { 121 119 thread$ * t = &try_pop_front( blocked_threads ); 122 120 owner = t; … … 266 264 void ^?{}( alarm_node_wrap(L) & this ) { } 267 265 268 staticvoid timeout_handler ( alarm_node_wrap(L) & this ) with( this ) {266 void timeout_handler ( alarm_node_wrap(L) & this ) with( this ) { 269 267 // This condition_variable member is called from the kernel, and therefore, cannot block, but it can spin. 270 268 lock( cond->lock __cfaabi_dbg_ctx2 ); … … 290 288 291 289 // this casts the alarm node to our wrapped type since we used type erasure 292 staticvoid alarm_node_wrap_cast( alarm_node_t & a ) { timeout_handler( (alarm_node_wrap(L) &)a ); }290 void alarm_node_wrap_cast( alarm_node_t & a ) { timeout_handler( (alarm_node_wrap(L) &)a ); } 293 291 } 294 292 … … 307 305 void ^?{}( condition_variable(L) & this ){ } 308 306 309 staticvoid process_popped( condition_variable(L) & this, info_thread(L) & popped ) with( this ) {307 void process_popped( condition_variable(L) & this, info_thread(L) & popped ) with( this ) { 310 308 if(&popped != 0p) { 311 309 popped.signalled = true; … … 352 350 int counter( condition_variable(L) & this ) with(this) { return count; } 353 351 354 s tatic size_t queue_and_get_recursion( condition_variable(L) & this, info_thread(L) * i ) with(this) {352 size_t queue_and_get_recursion( condition_variable(L) & this, info_thread(L) * i ) with(this) { 355 353 // add info_thread to waiting queue 356 354 insert_last( blocked_threads, *i ); … … 365 363 366 364 // helper for wait()'s' with no timeout 367 staticvoid queue_info_thread( condition_variable(L) & this, info_thread(L) & i ) with(this) {365 void queue_info_thread( condition_variable(L) & this, info_thread(L) & i ) with(this) { 368 366 lock( lock __cfaabi_dbg_ctx2 ); 369 367 size_t recursion_count = queue_and_get_recursion(this, &i); … … 382 380 383 381 // helper for wait()'s' with a timeout 384 staticvoid queue_info_thread_timeout( condition_variable(L) & this, info_thread(L) & info, Duration t, Alarm_Callback callback ) with(this) {382 void queue_info_thread_timeout( condition_variable(L) & this, info_thread(L) & info, Duration t, Alarm_Callback callback ) with(this) { 385 383 lock( lock __cfaabi_dbg_ctx2 ); 386 384 size_t recursion_count = queue_and_get_recursion(this, &info); … … 417 415 // fast_cond_var 418 416 void ?{}( fast_cond_var(L) & this ){ 419 this.blocked_threads{}; 417 this.blocked_threads{}; 420 418 #ifdef __CFA_DEBUG__ 421 419 this.lock_used = 0p; -
libcfa/src/concurrency/monitor.cfa
re5d9274 r015925a 44 44 static inline void restore( monitor$ * ctx [], __lock_size_t count, __spinlock_t * locks [], unsigned int /*in */ recursions [], __waitfor_mask_t /*in */ masks [] ); 45 45 46 static inline void ?{}(__condition_node_t & this, thread$ * waiting_thread, __lock_size_t count, uintptr_t user_info );47 static inline void ?{}(__condition_criterion_t & this );48 static inline void ?{}(__condition_criterion_t & this, monitor$ * target, __condition_node_t * owner );49 50 46 static inline void init ( __lock_size_t count, monitor$ * monitors [], __condition_node_t & waiter, __condition_criterion_t criteria [] ); 51 47 static inline void init_push( __lock_size_t count, monitor$ * monitors [], __condition_node_t & waiter, __condition_criterion_t criteria [] ); … … 247 243 248 244 // Leave single monitor 249 staticvoid __leave( monitor$ * this ) {245 void __leave( monitor$ * this ) { 250 246 // Lock the monitor spinlock 251 247 lock( this->lock __cfaabi_dbg_ctx2 ); … … 282 278 283 279 // Leave single monitor for the last time 284 staticvoid __dtor_leave( monitor$ * this, bool join ) {280 void __dtor_leave( monitor$ * this, bool join ) { 285 281 __cfaabi_dbg_debug_do( 286 282 if( active_thread() != this->owner ) { … … 348 344 // Ctor for monitor guard 349 345 // Sorts monitors before entering 350 void ?{}( monitor_guard_t & this, monitor$ * m [], __lock_size_t count, fptr_t func ) libcfa_public{346 void ?{}( monitor_guard_t & this, monitor$ * m [], __lock_size_t count, fptr_t func ) { 351 347 thread$ * thrd = active_thread(); 352 348 … … 373 369 } 374 370 375 void ?{}( monitor_guard_t & this, monitor$ * m [], __lock_size_t count ) libcfa_public{371 void ?{}( monitor_guard_t & this, monitor$ * m [], __lock_size_t count ) { 376 372 this{ m, count, 0p }; 377 373 } … … 379 375 380 376 // Dtor for monitor guard 381 void ^?{}( monitor_guard_t & this ) libcfa_public{377 void ^?{}( monitor_guard_t & this ) { 382 378 // __cfaabi_dbg_print_safe( "MGUARD : leaving %d\n", this.count); 383 379 … … 393 389 // Ctor for monitor guard 394 390 // Sorts monitors before entering 395 void ?{}( monitor_dtor_guard_t & this, monitor$ * m [], fptr_t func, bool join ) libcfa_public{391 void ?{}( monitor_dtor_guard_t & this, monitor$ * m [], fptr_t func, bool join ) { 396 392 // optimization 397 393 thread$ * thrd = active_thread(); … … 413 409 414 410 // Dtor for monitor guard 415 void ^?{}( monitor_dtor_guard_t & this ) libcfa_public{411 void ^?{}( monitor_dtor_guard_t & this ) { 416 412 // Leave the monitors in order 417 413 __dtor_leave( this.m, this.join ); … … 423 419 //----------------------------------------------------------------------------- 424 420 // Internal scheduling types 425 staticvoid ?{}(__condition_node_t & this, thread$ * waiting_thread, __lock_size_t count, uintptr_t user_info ) {421 void ?{}(__condition_node_t & this, thread$ * waiting_thread, __lock_size_t count, uintptr_t user_info ) { 426 422 this.waiting_thread = waiting_thread; 427 423 this.count = count; … … 430 426 } 431 427 432 staticvoid ?{}(__condition_criterion_t & this ) with( this ) {428 void ?{}(__condition_criterion_t & this ) with( this ) { 433 429 ready = false; 434 430 target = 0p; … … 437 433 } 438 434 439 staticvoid ?{}(__condition_criterion_t & this, monitor$ * target, __condition_node_t & owner ) {435 void ?{}(__condition_criterion_t & this, monitor$ * target, __condition_node_t & owner ) { 440 436 this.ready = false; 441 437 this.target = target; … … 446 442 //----------------------------------------------------------------------------- 447 443 // Internal scheduling 448 void wait( condition & this, uintptr_t user_info = 0 ) libcfa_public{444 void wait( condition & this, uintptr_t user_info = 0 ) { 449 445 brand_condition( this ); 450 446 … … 500 496 } 501 497 502 bool signal( condition & this ) libcfa_public{498 bool signal( condition & this ) { 503 499 if( is_empty( this ) ) { return false; } 504 500 … … 542 538 } 543 539 544 bool signal_block( condition & this ) libcfa_public{540 bool signal_block( condition & this ) { 545 541 if( !this.blocked.head ) { return false; } 546 542 … … 590 586 591 587 // Access the user_info of the thread waiting at the front of the queue 592 uintptr_t front( condition & this ) libcfa_public{588 uintptr_t front( condition & this ) { 593 589 verifyf( !is_empty(this), 594 590 "Attempt to access user data on an empty condition.\n" … … 612 608 // setup mask 613 609 // block 614 void __waitfor_internal( const __waitfor_mask_t & mask, int duration ) libcfa_public{610 void __waitfor_internal( const __waitfor_mask_t & mask, int duration ) { 615 611 // This statment doesn't have a contiguous list of monitors... 616 612 // Create one! … … 998 994 // Can't be accepted since a mutex stmt is effectively an anonymous routine 999 995 // Thus we do not need a monitor group 1000 void lock( monitor$ * this ) libcfa_public{996 void lock( monitor$ * this ) { 1001 997 thread$ * thrd = active_thread(); 1002 998 … … 1050 1046 // Leave routine for mutex stmt 1051 1047 // Is just a wrapper around __leave for the is_lock trait to see 1052 void unlock( monitor$ * this ) libcfa_public{ __leave( this ); }1048 void unlock( monitor$ * this ) { __leave( this ); } 1053 1049 1054 1050 // Local Variables: // -
libcfa/src/concurrency/monitor.hfa
re5d9274 r015925a 119 119 } 120 120 121 //void ?{}(__condition_node_t & this, thread$ * waiting_thread, __lock_size_t count, uintptr_t user_info );122 //void ?{}(__condition_criterion_t & this );123 //void ?{}(__condition_criterion_t & this, monitor$ * target, __condition_node_t * owner );121 void ?{}(__condition_node_t & this, thread$ * waiting_thread, __lock_size_t count, uintptr_t user_info ); 122 void ?{}(__condition_criterion_t & this ); 123 void ?{}(__condition_criterion_t & this, monitor$ * target, __condition_node_t * owner ); 124 124 125 125 struct condition { -
libcfa/src/concurrency/preemption.cfa
re5d9274 r015925a 38 38 #endif 39 39 40 __attribute__((weak)) Duration default_preemption() libcfa_public{40 __attribute__((weak)) Duration default_preemption() { 41 41 const char * preempt_rate_s = getenv("CFA_DEFAULT_PREEMPTION"); 42 42 if(!preempt_rate_s) { … … 238 238 //---------- 239 239 // special case for preemption since used often 240 __attribute__((optimize("no-reorder-blocks"))) bool __preemption_enabled() libcfa_public{240 __attribute__((optimize("no-reorder-blocks"))) bool __preemption_enabled() { 241 241 // create a assembler label before 242 242 // marked as clobber all to avoid movement … … 276 276 // Get data from the TLS block 277 277 // struct asm_region __cfaasm_get; 278 uintptr_t __cfatls_get( unsigned long int offset ) __attribute__((__noinline__ , visibility("default"))); //no inline to avoid problems278 uintptr_t __cfatls_get( unsigned long int offset ) __attribute__((__noinline__)); //no inline to avoid problems 279 279 uintptr_t __cfatls_get( unsigned long int offset ) { 280 280 // create a assembler label before … … 295 295 extern "C" { 296 296 // Disable interrupts by incrementing the counter 297 __attribute__((__noinline__, visibility("default"))) void disable_interrupts() libcfa_public{297 void disable_interrupts() { 298 298 // create a assembler label before 299 299 // marked as clobber all to avoid movement … … 326 326 // Enable interrupts by decrementing the counter 327 327 // If counter reaches 0, execute any pending __cfactx_switch 328 void enable_interrupts( bool poll ) libcfa_public{328 void enable_interrupts( bool poll ) { 329 329 // Cache the processor now since interrupts can start happening after the atomic store 330 330 processor * proc = __cfaabi_tls.this_processor; … … 362 362 //----------------------------------------------------------------------------- 363 363 // Kernel Signal Debug 364 void __cfaabi_check_preemption() libcfa_public{364 void __cfaabi_check_preemption() { 365 365 bool ready = __preemption_enabled(); 366 366 if(!ready) { abort("Preemption should be ready"); } -
libcfa/src/concurrency/ready_queue.cfa
re5d9274 r015925a 125 125 const unsigned long long ctsc = rdtscl(); 126 126 127 if(proc->rdq.target == UINT_MAX) {127 if(proc->rdq.target == MAX) { 128 128 uint64_t chaos = __tls_rand(); 129 129 unsigned ext = chaos & 0xff; … … 137 137 const unsigned target = proc->rdq.target; 138 138 __cfadbg_print_safe(ready_queue, "Kernel : %u considering helping %u, tcsc %llu\n", this, target, readyQ.tscs[target].tv); 139 /* paranoid */ verify( readyQ.tscs[target].tv != ULLONG_MAX );139 /* paranoid */ verify( readyQ.tscs[target].tv != MAX ); 140 140 if(target < lanes_count) { 141 141 const unsigned long long cutoff = calc_cutoff(ctsc, proc->rdq.id, lanes_count, cltr->sched.readyQ.data, cltr->sched.readyQ.tscs, __shard_factor.readyq); … … 147 147 } 148 148 } 149 proc->rdq.target = UINT_MAX;149 proc->rdq.target = MAX; 150 150 } 151 151 … … 245 245 // get preferred ready for new thread 246 246 unsigned ready_queue_new_preferred() { 247 unsigned pref = UINT_MAX;247 unsigned pref = MAX; 248 248 if(struct thread$ * thrd = publicTLS_get( this_thread )) { 249 249 pref = thrd->preferred; -
libcfa/src/concurrency/ready_subqueue.hfa
re5d9274 r015925a 32 32 /* paranoid */ verify( this.lock ); 33 33 /* paranoid */ verify( node->link.next == 0p ); 34 /* paranoid */ verify( __atomic_load_n(&node->link.ts, __ATOMIC_RELAXED)== MAX );34 /* paranoid */ verify( node->link.ts == MAX ); 35 35 /* paranoid */ verify( this.prev->link.next == 0p ); 36 /* paranoid */ verify( __atomic_load_n(&this.prev->link.ts, __ATOMIC_RELAXED)== MAX );36 /* paranoid */ verify( this.prev->link.ts == MAX ); 37 37 if( this.anchor.next == 0p ) { 38 38 /* paranoid */ verify( this.anchor.next == 0p ); 39 /* paranoid */ verify( __atomic_load_n(&this.anchor.ts, __ATOMIC_RELAXED)== MAX );40 /* paranoid */ verify( __atomic_load_n(&this.anchor.ts, __ATOMIC_RELAXED)!= 0 );39 /* paranoid */ verify( this.anchor.ts == MAX ); 40 /* paranoid */ verify( this.anchor.ts != 0 ); 41 41 /* paranoid */ verify( this.prev == mock_head( this ) ); 42 42 } else { 43 43 /* paranoid */ verify( this.anchor.next != 0p ); 44 /* paranoid */ verify( __atomic_load_n(&this.anchor.ts, __ATOMIC_RELAXED)!= MAX );45 /* paranoid */ verify( __atomic_load_n(&this.anchor.ts, __ATOMIC_RELAXED)!= 0 );44 /* paranoid */ verify( this.anchor.ts != MAX ); 45 /* paranoid */ verify( this.anchor.ts != 0 ); 46 46 /* paranoid */ verify( this.prev != mock_head( this ) ); 47 47 } … … 62 62 /* paranoid */ verify( this.lock ); 63 63 /* paranoid */ verify( this.anchor.next != 0p ); 64 /* paranoid */ verify( __atomic_load_n(&this.anchor.ts, __ATOMIC_RELAXED)!= MAX );65 /* paranoid */ verify( __atomic_load_n(&this.anchor.ts, __ATOMIC_RELAXED) != 0);64 /* paranoid */ verify( this.anchor.ts != MAX ); 65 /* paranoid */ verify( this.anchor.ts != 0 ); 66 66 67 67 // Get the relevant nodes locally 68 68 thread$ * node = this.anchor.next; 69 69 this.anchor.next = node->link.next; 70 __atomic_store_n(&this.anchor.ts, __atomic_load_n(&node->link.ts, __ATOMIC_RELAXED), __ATOMIC_RELAXED);70 this.anchor.ts = node->link.ts; 71 71 bool is_empty = this.anchor.next == 0p; 72 72 node->link.next = 0p; 73 __atomic_store_n(&node->link.ts, ULLONG_MAX, __ATOMIC_RELAXED);73 node->link.ts = MAX; 74 74 #if !defined(__CFA_NO_STATISTICS__) 75 75 this.cnt--; … … 79 79 if(is_empty) this.prev = mock_head( this ); 80 80 81 unsigned long long ats = __atomic_load_n(&this.anchor.ts, __ATOMIC_RELAXED);82 81 /* paranoid */ verify( node->link.next == 0p ); 83 /* paranoid */ verify( __atomic_load_n(&node->link.ts , __ATOMIC_RELAXED) == MAX);84 /* paranoid */ verify( __atomic_load_n(&node->link.ts , __ATOMIC_RELAXED) != 0);85 /* paranoid */ verify( ats != 0);86 /* paranoid */ verify( ( ats== MAX) == is_empty );87 return [node, ats];82 /* paranoid */ verify( node->link.ts == MAX ); 83 /* paranoid */ verify( node->link.ts != 0 ); 84 /* paranoid */ verify( this.anchor.ts != 0 ); 85 /* paranoid */ verify( (this.anchor.ts == MAX) == is_empty ); 86 return [node, this.anchor.ts]; 88 87 } 89 88 … … 97 96 // Cannot verify 'emptiness' here since it may not be locked 98 97 /* paranoid */ verify(this.anchor.ts != 0); 99 /* paranoid */ static_assert(__atomic_always_lock_free(sizeof(this.anchor.ts), &this.anchor.ts)); 100 return __atomic_load_n(&this.anchor.ts, __ATOMIC_RELAXED); 98 return this.anchor.ts; 101 99 } -
libcfa/src/concurrency/thread.cfa
re5d9274 r015925a 19 19 #include "thread.hfa" 20 20 21 #include "kernel/private.hfa" 21 22 #include "exception.hfa" 22 #include "kernel/private.hfa"23 #include "limits.hfa"24 23 25 24 #define __CFA_INVOKE_PRIVATE__ … … 27 26 28 27 extern uint32_t __global_random_seed, __global_random_prime, __global_random_mask; 29 30 #pragma GCC visibility push(default)31 28 32 29 //----------------------------------------------------------------------------- … … 45 42 curr_cluster = &cl; 46 43 link.next = 0p; 47 link.ts = MAX;44 link.ts = -1llu; 48 45 preferred = ready_queue_new_preferred(); 49 46 last_proc = 0p; … … 90 87 } 91 88 92 forall(T & | is_thread(T) | IS_EXCEPTION(ThreadCancelled (T))93 | { EHM_DEFAULT_VTABLE(ThreadCancelled (T)); })89 forall(T & | is_thread(T) | IS_EXCEPTION(ThreadCancelled, (T)) 90 | { EHM_DEFAULT_VTABLE(ThreadCancelled, (T)); }) 94 91 void ?{}( thread_dtor_guard_t & this, 95 92 T & thrd, void(*cancelHandler)(ThreadCancelled(T) &)) { … … 169 166 170 167 //----------------------------------------------------------------------------- 171 forall(T & | is_thread(T) | IS_RESUMPTION_EXCEPTION(ThreadCancelled (T))172 | { EHM_DEFAULT_VTABLE(ThreadCancelled(T)); })168 forall(T & | is_thread(T) | IS_RESUMPTION_EXCEPTION(ThreadCancelled, (T)) 169 | { EHM_DEFAULT_VTABLE(ThreadCancelled, (T)); }) 173 170 T & join( T & this ) { 174 171 thread_dtor_guard_t guard = { this, defaultResumptionHandler }; -
libcfa/src/concurrency/thread.hfa
re5d9274 r015925a 32 32 }; 33 33 34 forall(thread_t &) 35 exception ThreadCancelled { 34 EHM_FORALL_EXCEPTION(ThreadCancelled, (thread_t &), (thread_t)) ( 36 35 thread_t * the_thread; 37 36 exception_t * the_exception; 38 };37 ); 39 38 40 39 forall(T &) … … 80 79 }; 81 80 82 forall( T & | is_thread(T) | IS_EXCEPTION(ThreadCancelled (T))83 | { EHM_DEFAULT_VTABLE(ThreadCancelled(T)); })81 forall( T & | is_thread(T) | IS_EXCEPTION(ThreadCancelled, (T)) 82 | { EHM_DEFAULT_VTABLE(ThreadCancelled, (T)); }) 84 83 void ?{}( thread_dtor_guard_t & this, T & thrd, void(*)(ThreadCancelled(T) &) ); 85 84 void ^?{}( thread_dtor_guard_t & this ); … … 127 126 //---------- 128 127 // join 129 forall( T & | is_thread(T) | IS_RESUMPTION_EXCEPTION(ThreadCancelled (T))130 | { EHM_DEFAULT_VTABLE(ThreadCancelled(T)); })128 forall( T & | is_thread(T) | IS_RESUMPTION_EXCEPTION(ThreadCancelled, (T)) 129 | { EHM_DEFAULT_VTABLE(ThreadCancelled, (T)); }) 131 130 T & join( T & this ); 132 131 -
libcfa/src/containers/maybe.cfa
re5d9274 r015925a 17 17 #include <assert.h> 18 18 19 #pragma GCC visibility push(default)20 19 21 20 forall(T) -
libcfa/src/containers/result.cfa
re5d9274 r015925a 17 17 #include <assert.h> 18 18 19 #pragma GCC visibility push(default)20 19 21 20 forall(T, E) -
libcfa/src/containers/string.cfa
re5d9274 r015925a 18 18 #include <stdlib.hfa> 19 19 20 #pragma GCC visibility push(default)21 20 22 21 /* -
libcfa/src/containers/string_sharectx.hfa
re5d9274 r015925a 16 16 #pragma once 17 17 18 #pragma GCC visibility push(default)19 20 18 //######################### String Sharing Context ######################### 21 19 22 20 struct VbyteHeap; 23 21 24 // A string_sharectx 22 // A string_sharectx 25 23 // 26 24 // Usage: -
libcfa/src/containers/vector.cfa
re5d9274 r015925a 18 18 #include <stdlib.hfa> 19 19 20 #pragma GCC visibility push(default)21 22 20 forall(T, allocator_t | allocator_c(T, allocator_t)) 23 staticvoid copy_internal(vector(T, allocator_t)* this, vector(T, allocator_t)* other);21 void copy_internal(vector(T, allocator_t)* this, vector(T, allocator_t)* other); 24 22 25 23 //------------------------------------------------------------------------------ … … 85 83 86 84 forall(T, allocator_t | allocator_c(T, allocator_t)) 87 staticvoid copy_internal(vector(T, allocator_t)* this, vector(T, allocator_t)* other)85 void copy_internal(vector(T, allocator_t)* this, vector(T, allocator_t)* other) 88 86 { 89 87 this->size = other->size; -
libcfa/src/device/cpu.cfa
re5d9274 r015925a 31 31 } 32 32 33 #include "bits/defs.hfa"34 33 #include "algorithms/range_iterator.hfa" 35 34 … … 457 456 } 458 457 459 libcfa_publiccpu_info_t cpu_info;458 cpu_info_t cpu_info; -
libcfa/src/exception.c
re5d9274 r015925a 27 27 #include "stdhdr/assert.h" 28 28 #include "virtual.h" 29 30 #pragma GCC visibility push(default)31 32 29 #include "lsda.h" 33 30 … … 264 261 #else // defined( __ARM_ARCH ) 265 262 // The return code from _Unwind_RaiseException seems to be corrupt on ARM at end of stack. 266 // This workaround tries to keep default exception handling working. 263 // This workaround tries to keep default exception handling working. 267 264 if ( ret == _URC_FATAL_PHASE1_ERROR || ret == _URC_FATAL_PHASE2_ERROR ) { 268 265 #endif -
libcfa/src/exception.hfa
re5d9274 r015925a 10 10 // Created On : Thu Apr 7 10:25:00 2020 11 11 // Last Modified By : Andrew Beach 12 // Last Modified On : Wed May 25 17:20:00 202213 // Update Count : 512 // Last Modified On : Thr Apr 8 15:16:00 2021 13 // Update Count : 4 14 14 // 15 15 … … 18 18 // ----------------------------------------------------------------------------------------------- 19 19 20 // EHM_DEFAULT_VTABLE(exception_type) 20 // EHM_EXCEPTION(exception_name)(fields...); 21 // Create an exception (a virtual structure that inherits from exception_t) 22 // with the given name and fields. 23 #define EHM_EXCEPTION(exception_name) \ 24 _EHM_TYPE_ID_STRUCT(exception_name, ); \ 25 _EHM_TYPE_ID_VALUE(exception_name, ); \ 26 _EHM_VIRTUAL_TABLE_STRUCT(exception_name, , ); \ 27 _EHM_EXCEPTION_STRUCT(exception_name, , ) 28 29 // EHM_EXTERN_VTABLE(exception_name, table_name); 30 // Forward declare a virtual table called table_name for exception_name type. 31 #define EHM_EXTERN_VTABLE(exception_name, table_name) \ 32 _EHM_EXTERN_VTABLE(exception_name, , table_name) 33 34 // EHM_VIRTUAL_TABLE(exception_name, table_name); 35 // Define a virtual table called table_name for exception_name type. 36 #define EHM_VIRTUAL_TABLE(exception_name, table_name) \ 37 _EHM_DEFINE_COPY(exception_name, ) \ 38 _EHM_DEFINE_MSG(exception_name, ) \ 39 _EHM_VIRTUAL_TABLE(exception_name, , table_name) 40 41 // EHM_FORALL_EXCEPTION(exception_name, (assertions), (parameters))(fields...); 42 // As EHM_EXCEPTION but for polymorphic types instead of monomorphic ones. 43 // The assertions list should include all polymorphic parameters and 44 // assertions inside a parentisized list. Parameters should include all the 45 // polymorphic parameter names inside a parentisized list (same order). 46 #define EHM_FORALL_EXCEPTION(exception_name, assertions, parameters) \ 47 _EHM_TYPE_ID_STRUCT(exception_name, forall assertions); \ 48 _EHM_VIRTUAL_TABLE_STRUCT(exception_name, forall assertions, parameters); \ 49 _EHM_EXCEPTION_STRUCT(exception_name, forall assertions, parameters) 50 51 // EHM_FORALL_EXTERN_VTABLE(exception_name, (arguments), table_name); 52 // As EHM_EXTERN_VTABLE but for polymorphic types instead of monomorphic ones. 53 // Arguments should be the parentisized list of polymorphic arguments. 54 #define EHM_FORALL_EXTERN_VTABLE(exception_name, arguments, table_name) \ 55 _EHM_EXTERN_VTABLE(exception_name, arguments, table_name) 56 57 // EHM_FORALL_VIRTUAL_TABLE(exception_name, (arguments), table_name); 58 // As EHM_VIRTUAL_TABLE but for polymorphic types instead of monomorphic ones. 59 // Arguments should be the parentisized list of polymorphic arguments. 60 #define EHM_FORALL_VIRTUAL_TABLE(exception_name, arguments, table_name) \ 61 _EHM_TYPE_ID_VALUE(exception_name, arguments); \ 62 _EHM_DEFINE_COPY(exception_name, arguments) \ 63 _EHM_DEFINE_MSG(exception_name, arguments) \ 64 _EHM_VIRTUAL_TABLE(exception_name, arguments, table_name) 65 66 // EHM_DEFAULT_VTABLE(exception_name, (arguments)) 21 67 // Create a declaration for a (possibly polymorphic) default vtable. 22 // Mostly used by and for the currency module. 23 #define EHM_DEFAULT_VTABLE(type) vtable(type)& const _default_vtable68 #define EHM_DEFAULT_VTABLE(exception_name, arguments) \ 69 _EHM_VTABLE_TYPE(exception_name) arguments & const _default_vtable 24 70 25 // IS_EXCEPTION(exception_type) 26 // IS_RESUMPTION_EXCEPTION(exception_type) 27 // IS_TERMINATION_EXCEPTION(exception_type) 28 // Create an assertion that exception_type is the given kind of exception. 29 // This is used to mimic associated types so the vtable type is unmentioned. 30 #define IS_EXCEPTION(type) is_exception(type, vtable(type)) 31 #define IS_RESUMPTION_EXCEPTION(type) is_resumption_exception(type, vtable(type)) 32 #define IS_TERMINATION_EXCEPTION(type) is_termination_exception(type, vtable(type)) 71 // IS_EXCEPTION(exception_name [, (...parameters)]) 72 // IS_RESUMPTION_EXCEPTION(exception_name [, (parameters...)]) 73 // IS_TERMINATION_EXCEPTION(exception_name [, (parameters...)]) 74 // Create an assertion that exception_name, possibly with the qualifing parameters, is the given 75 // kind of exception with the standard vtable with the same parameters if applicable. 76 #define IS_EXCEPTION(...) _IS_EXCEPTION(is_exception, __VA_ARGS__, , ~) 77 #define IS_RESUMPTION_EXCEPTION(...) _IS_EXCEPTION(is_resumption_exception, __VA_ARGS__, , ~) 78 #define IS_TERMINATION_EXCEPTION(...) _IS_EXCEPTION(is_termination_exception, __VA_ARGS__, , ~) 79 80 // Macros starting with a leading underscore are internal. 81 82 // Create an exception type definition. must be tailing, can be polymorphic. 83 #define _EHM_EXCEPTION_STRUCT(exception_name, forall_clause, parameters) \ 84 forall_clause struct exception_name { \ 85 _EHM_VTABLE_TYPE(exception_name) parameters const * virtual_table; \ 86 _CLOSE 87 88 // Create a (possibly polymorphic) virtual table forward declaration. 89 #define _EHM_EXTERN_VTABLE(exception_name, arguments, table_name) \ 90 extern const _EHM_VTABLE_TYPE(exception_name) arguments table_name 91 92 // Create a (possibly polymorphic) virtual table definition. 93 #define _EHM_VIRTUAL_TABLE(exception_type, arguments, table_name) \ 94 const _EHM_VTABLE_TYPE(exception_type) arguments table_name @= { \ 95 .__cfavir_typeid : &_EHM_TYPE_ID_NAME(exception_type), \ 96 .size : sizeof(struct exception_type arguments), \ 97 .copy : copy, \ 98 .^?{} : ^?{}, \ 99 .msg : msg, \ 100 } 101 102 // Create a (possibly polymorphic) copy function from an assignment operator. 103 #define _EHM_DEFINE_FORALL_COPY(exception_name, forall_clause, parameters) \ 104 forall_clause void copy(exception_name parameters * this, \ 105 exception_name parameters * that) { \ 106 *this = *that; \ 107 } 108 109 #define _EHM_DEFINE_COPY(exception_name, arguments) \ 110 void copy(exception_name arguments * this, exception_name arguments * that) { \ 111 *this = *that; \ 112 } 113 114 // Create a (possibly polymorphic) msg function 115 #define _EHM_DEFINE_FORALL_MSG(exception_name, forall_clause, parameters) \ 116 forall_clause const char * msg(exception_name parameters * this) { \ 117 return #exception_name #parameters; \ 118 } 119 120 #define _EHM_DEFINE_MSG(exception_name, arguments) \ 121 const char * msg(exception_name arguments * this) { \ 122 return #exception_name #arguments; \ 123 } 124 125 // Produces the C compatable name of the virtual table type for a virtual type. 126 #define _EHM_VTABLE_TYPE(type_name) struct _GLUE2(type_name,_vtable) 127 128 // Create the vtable type for exception name. 129 #define _EHM_VIRTUAL_TABLE_STRUCT(exception_name, forall_clause, parameters) \ 130 forall_clause struct exception_name; \ 131 forall_clause _EHM_VTABLE_TYPE(exception_name) { \ 132 _EHM_TYPE_ID_TYPE(exception_name) parameters const * __cfavir_typeid; \ 133 size_t size; \ 134 void (*copy)(exception_name parameters * this, exception_name parameters * other); \ 135 void (*^?{})(exception_name parameters & this); \ 136 const char * (*msg)(exception_name parameters * this); \ 137 } 138 139 // Define the function required to satify the trait for exceptions. 140 #define _EHM_TRAIT_FUNCTION(exception_name, forall_clause, parameters) \ 141 forall_clause inline void mark_exception( \ 142 exception_name parameters const &, \ 143 _EHM_VTABLE_TYPE(exception_name) parameters const &) {} \ 144 145 #define __EHM_TRAIT_FUNCTION(exception_name, forall_clause, parameters) \ 146 forall_clause inline _EHM_VTABLE_TYPE(exception_name) parameters const & \ 147 get_exception_vtable(exception_name parameters const & this) { \ 148 /* This comes before the structure definition, but we know the offset. */ \ 149 /* return (_EHM_VTABLE_TYPE(exception_name) parameters const &)this; */ \ 150 assert(false); \ 151 } 152 153 // Generates a new type-id structure. This is used to mangle the name of the 154 // type-id instance so it also includes polymorphic information. Must be the 155 // direct decendent of exception_t. 156 // The second field is used to recover type information about the exception. 157 #define _EHM_TYPE_ID_STRUCT(exception_name, forall_clause) \ 158 forall_clause _EHM_TYPE_ID_TYPE(exception_name) { \ 159 __cfavir_type_info const * parent; \ 160 } 161 162 // Generate a new type-id value. 163 #define _EHM_TYPE_ID_VALUE(exception_name, arguments) \ 164 __attribute__((cfa_linkonce)) \ 165 _EHM_TYPE_ID_TYPE(exception_name) arguments const \ 166 _EHM_TYPE_ID_NAME(exception_name) = { \ 167 &__cfatid_exception_t, \ 168 } 169 170 // _EHM_TYPE_ID_STRUCT and _EHM_TYPE_ID_VALUE are the two that would need to 171 // be updated to extend the hierarchy if we are still using macros when that 172 // is added. 173 174 // Produce the C compatable name of the type-id type for an exception type. 175 #define _EHM_TYPE_ID_TYPE(exception_name) \ 176 struct _GLUE2(__cfatid_struct_, exception_name) 177 178 // Produce the name of the instance of the type-id for an exception type. 179 #define _EHM_TYPE_ID_NAME(exception_name) _GLUE2(__cfatid_,exception_name) 180 181 #define _IS_EXCEPTION(kind, exception_name, parameters, ...) \ 182 kind(exception_name parameters, _EHM_VTABLE_TYPE(exception_name) parameters) 183 184 // Internal helper macros: 185 #define _CLOSE(...) __VA_ARGS__ } 186 #define _GLUE2(left, right) left##right -
libcfa/src/fstream.cfa
re5d9274 r015925a 22 22 #include <assert.h> 23 23 #include <errno.h> // errno 24 25 #pragma GCC visibility push(default)26 24 27 25 // *********************************** ofstream *********************************** … … 120 118 // abort | IO_MSG "open output file \"" | name | "\"" | nl | strerror( errno ); 121 119 } // if 122 (os){ file }; // initialize 120 (os){ file }; // initialize 123 121 } // open 124 122 … … 159 157 va_list args; 160 158 va_start( args, format ); 161 159 162 160 int len; 163 161 for ( cnt; 10 ) { … … 243 241 // abort | IO_MSG "open input file \"" | name | "\"" | nl | strerror( errno ); 244 242 } // if 245 (is){ file }; // initialize 243 (is){ file }; // initialize 246 244 } // open 247 245 -
libcfa/src/fstream.hfa
re5d9274 r015925a 18 18 #include "bits/weakso_locks.hfa" // mutex_lock 19 19 #include "iostream.hfa" 20 #include <exception.hfa> 20 21 21 22 -
libcfa/src/heap.cfa
re5d9274 r015925a 36 36 static bool traceHeap = false; 37 37 38 inline bool traceHeap() libcfa_public{ return traceHeap; }39 40 bool traceHeapOn() libcfa_public{38 inline bool traceHeap() { return traceHeap; } 39 40 bool traceHeapOn() { 41 41 bool temp = traceHeap; 42 42 traceHeap = true; … … 44 44 } // traceHeapOn 45 45 46 bool traceHeapOff() libcfa_public{46 bool traceHeapOff() { 47 47 bool temp = traceHeap; 48 48 traceHeap = false; … … 50 50 } // traceHeapOff 51 51 52 bool traceHeapTerm() libcfa_public{ return false; }52 bool traceHeapTerm() { return false; } 53 53 54 54 55 55 static bool prtFree = false; 56 56 57 staticbool prtFree() {57 bool prtFree() { 58 58 return prtFree; 59 59 } // prtFree 60 60 61 staticbool prtFreeOn() {61 bool prtFreeOn() { 62 62 bool temp = prtFree; 63 63 prtFree = true; … … 65 65 } // prtFreeOn 66 66 67 staticbool prtFreeOff() {67 bool prtFreeOff() { 68 68 bool temp = prtFree; 69 69 prtFree = false; … … 388 388 static unsigned int maxBucketsUsed; // maximum number of buckets in use 389 389 // extern visibility, used by runtime kernel 390 // would be cool to remove libcfa_public but it's needed for libcfathread 391 libcfa_public size_t __page_size; // architecture pagesize 392 libcfa_public int __map_prot; // common mmap/mprotect protection 390 size_t __page_size; // architecture pagesize 391 int __map_prot; // common mmap/mprotect protection 393 392 394 393 … … 728 727 729 728 730 s tatic size_t prtFree( Heap & manager ) with( manager ) {729 size_t prtFree( Heap & manager ) with( manager ) { 731 730 size_t total = 0; 732 731 #ifdef __STATISTICS__ … … 880 879 // Allocates size bytes and returns a pointer to the allocated memory. The contents are undefined. If size is 0, 881 880 // then malloc() returns a unique pointer value that can later be successfully passed to free(). 882 void * malloc( size_t size ) libcfa_public{881 void * malloc( size_t size ) { 883 882 #ifdef __STATISTICS__ 884 883 if ( likely( size > 0 ) ) { … … 895 894 896 895 // Same as malloc() except size bytes is an array of dim elements each of elemSize bytes. 897 void * aalloc( size_t dim, size_t elemSize ) libcfa_public{896 void * aalloc( size_t dim, size_t elemSize ) { 898 897 size_t size = dim * elemSize; 899 898 #ifdef __STATISTICS__ … … 911 910 912 911 // Same as aalloc() with memory set to zero. 913 void * calloc( size_t dim, size_t elemSize ) libcfa_public{912 void * calloc( size_t dim, size_t elemSize ) { 914 913 size_t size = dim * elemSize; 915 914 if ( unlikely( size ) == 0 ) { // 0 BYTE ALLOCATION RETURNS NULL POINTER … … 952 951 // not 0p, then the call is equivalent to free(oaddr). Unless oaddr is 0p, it must have been returned by an earlier 953 952 // call to malloc(), alloc(), calloc() or realloc(). If the area pointed to was moved, a free(oaddr) is done. 954 void * resize( void * oaddr, size_t size ) libcfa_public{953 void * resize( void * oaddr, size_t size ) { 955 954 // If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned. 956 955 if ( unlikely( size == 0 ) ) { // special cases … … 997 996 // Same as resize() but the contents are unchanged in the range from the start of the region up to the minimum of 998 997 // the old and new sizes. 999 void * realloc( void * oaddr, size_t size ) libcfa_public{998 void * realloc( void * oaddr, size_t size ) { 1000 999 // If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned. 1001 1000 if ( unlikely( size == 0 ) ) { // special cases … … 1061 1060 1062 1061 // Same as realloc() except the new allocation size is large enough for an array of nelem elements of size elsize. 1063 void * reallocarray( void * oaddr, size_t dim, size_t elemSize ) libcfa_public{1062 void * reallocarray( void * oaddr, size_t dim, size_t elemSize ) { 1064 1063 return realloc( oaddr, dim * elemSize ); 1065 1064 } // reallocarray … … 1067 1066 1068 1067 // Same as malloc() except the memory address is a multiple of alignment, which must be a power of two. (obsolete) 1069 void * memalign( size_t alignment, size_t size ) libcfa_public{1068 void * memalign( size_t alignment, size_t size ) { 1070 1069 #ifdef __STATISTICS__ 1071 1070 if ( likely( size > 0 ) ) { … … 1082 1081 1083 1082 // Same as aalloc() with memory alignment. 1084 void * amemalign( size_t alignment, size_t dim, size_t elemSize ) libcfa_public{1083 void * amemalign( size_t alignment, size_t dim, size_t elemSize ) { 1085 1084 size_t size = dim * elemSize; 1086 1085 #ifdef __STATISTICS__ … … 1098 1097 1099 1098 // Same as calloc() with memory alignment. 1100 void * cmemalign( size_t alignment, size_t dim, size_t elemSize ) libcfa_public{1099 void * cmemalign( size_t alignment, size_t dim, size_t elemSize ) { 1101 1100 size_t size = dim * elemSize; 1102 1101 if ( unlikely( size ) == 0 ) { // 0 BYTE ALLOCATION RETURNS NULL POINTER … … 1137 1136 // Same as memalign(), but ISO/IEC 2011 C11 Section 7.22.2 states: the value of size shall be an integral multiple 1138 1137 // of alignment. This requirement is universally ignored. 1139 void * aligned_alloc( size_t alignment, size_t size ) libcfa_public{1138 void * aligned_alloc( size_t alignment, size_t size ) { 1140 1139 return memalign( alignment, size ); 1141 1140 } // aligned_alloc … … 1146 1145 // is 0, then posix_memalign() returns either 0p, or a unique pointer value that can later be successfully passed to 1147 1146 // free(3). 1148 int posix_memalign( void ** memptr, size_t alignment, size_t size ) libcfa_public{1147 int posix_memalign( void ** memptr, size_t alignment, size_t size ) { 1149 1148 if ( unlikely( alignment < libAlign() || ! is_pow2( alignment ) ) ) return EINVAL; // check alignment 1150 1149 *memptr = memalign( alignment, size ); … … 1155 1154 // Allocates size bytes and returns a pointer to the allocated memory. The memory address shall be a multiple of the 1156 1155 // page size. It is equivalent to memalign(sysconf(_SC_PAGESIZE),size). 1157 void * valloc( size_t size ) libcfa_public{1156 void * valloc( size_t size ) { 1158 1157 return memalign( __page_size, size ); 1159 1158 } // valloc … … 1161 1160 1162 1161 // Same as valloc but rounds size to multiple of page size. 1163 void * pvalloc( size_t size ) libcfa_public{1162 void * pvalloc( size_t size ) { 1164 1163 return memalign( __page_size, ceiling2( size, __page_size ) ); // round size to multiple of page size 1165 1164 } // pvalloc … … 1169 1168 // or realloc(). Otherwise, or if free(ptr) has already been called before, undefined behaviour occurs. If ptr is 1170 1169 // 0p, no operation is performed. 1171 void free( void * addr ) libcfa_public{1170 void free( void * addr ) { 1172 1171 if ( unlikely( addr == 0p ) ) { // special case 1173 1172 #ifdef __STATISTICS__ … … 1190 1189 1191 1190 // Returns the alignment of an allocation. 1192 size_t malloc_alignment( void * addr ) libcfa_public{1191 size_t malloc_alignment( void * addr ) { 1193 1192 if ( unlikely( addr == 0p ) ) return libAlign(); // minimum alignment 1194 1193 Heap.Storage.Header * header = HeaderAddr( addr ); … … 1202 1201 1203 1202 // Returns true if the allocation is zero filled, e.g., allocated by calloc(). 1204 bool malloc_zero_fill( void * addr ) libcfa_public{1203 bool malloc_zero_fill( void * addr ) { 1205 1204 if ( unlikely( addr == 0p ) ) return false; // null allocation is not zero fill 1206 1205 Heap.Storage.Header * header = HeaderAddr( addr ); … … 1213 1212 1214 1213 // Returns original total allocation size (not bucket size) => array size is dimension * sizeof(T). 1215 size_t malloc_size( void * addr ) libcfa_public{1214 size_t malloc_size( void * addr ) { 1216 1215 if ( unlikely( addr == 0p ) ) return 0; // null allocation has zero size 1217 1216 Heap.Storage.Header * header = HeaderAddr( addr ); … … 1225 1224 // Returns the number of usable bytes in the block pointed to by ptr, a pointer to a block of memory allocated by 1226 1225 // malloc or a related function. 1227 size_t malloc_usable_size( void * addr ) libcfa_public{1226 size_t malloc_usable_size( void * addr ) { 1228 1227 if ( unlikely( addr == 0p ) ) return 0; // null allocation has 0 size 1229 1228 Heap.Storage.Header * header; … … 1237 1236 1238 1237 // Prints (on default standard error) statistics about memory allocated by malloc and related functions. 1239 void malloc_stats( void ) libcfa_public{1238 void malloc_stats( void ) { 1240 1239 #ifdef __STATISTICS__ 1241 1240 printStats(); … … 1246 1245 1247 1246 // Changes the file descriptor where malloc_stats() writes statistics. 1248 int malloc_stats_fd( int fd __attribute__(( unused )) ) libcfa_public{1247 int malloc_stats_fd( int fd __attribute__(( unused )) ) { 1249 1248 #ifdef __STATISTICS__ 1250 1249 int temp = stats_fd; … … 1260 1259 // The string is printed on the file stream stream. The exported string includes information about all arenas (see 1261 1260 // malloc). 1262 int malloc_info( int options, FILE * stream __attribute__(( unused )) ) libcfa_public{1261 int malloc_info( int options, FILE * stream __attribute__(( unused )) ) { 1263 1262 if ( options != 0 ) { errno = EINVAL; return -1; } 1264 1263 #ifdef __STATISTICS__ … … 1272 1271 // Adjusts parameters that control the behaviour of the memory-allocation functions (see malloc). The param argument 1273 1272 // specifies the parameter to be modified, and value specifies the new value for that parameter. 1274 int mallopt( int option, int value ) libcfa_public{1273 int mallopt( int option, int value ) { 1275 1274 if ( value < 0 ) return 0; 1276 1275 choose( option ) { … … 1286 1285 1287 1286 // Attempt to release free memory at the top of the heap (by calling sbrk with a suitable argument). 1288 int malloc_trim( size_t ) libcfa_public{1287 int malloc_trim( size_t ) { 1289 1288 return 0; // => impossible to release memory 1290 1289 } // malloc_trim … … 1295 1294 // structure dynamically allocated via malloc, and a pointer to that data structure is returned as the function 1296 1295 // result. (The caller must free this memory.) 1297 void * malloc_get_state( void ) libcfa_public{1296 void * malloc_get_state( void ) { 1298 1297 return 0p; // unsupported 1299 1298 } // malloc_get_state … … 1302 1301 // Restores the state of all malloc internal bookkeeping variables to the values recorded in the opaque data 1303 1302 // structure pointed to by state. 1304 int malloc_set_state( void * ) libcfa_public{1303 int malloc_set_state( void * ) { 1305 1304 return 0; // unsupported 1306 1305 } // malloc_set_state … … 1308 1307 1309 1308 // Sets the amount (bytes) to extend the heap when there is insufficent free storage to service an allocation. 1310 __attribute__((weak)) size_t malloc_expansion() libcfa_public{ return __CFA_DEFAULT_HEAP_EXPANSION__; }1309 __attribute__((weak)) size_t malloc_expansion() { return __CFA_DEFAULT_HEAP_EXPANSION__; } 1311 1310 1312 1311 // Sets the crossover point between allocations occuring in the sbrk area or separately mmapped. 1313 __attribute__((weak)) size_t malloc_mmap_start() libcfa_public{ return __CFA_DEFAULT_MMAP_START__; }1312 __attribute__((weak)) size_t malloc_mmap_start() { return __CFA_DEFAULT_MMAP_START__; } 1314 1313 1315 1314 // Amount subtracted to adjust for unfreed program storage (debug only). 1316 __attribute__((weak)) size_t malloc_unfreed() libcfa_public{ return __CFA_DEFAULT_HEAP_UNFREED__; }1315 __attribute__((weak)) size_t malloc_unfreed() { return __CFA_DEFAULT_HEAP_UNFREED__; } 1317 1316 } // extern "C" 1318 1317 1319 1318 1320 1319 // Must have CFA linkage to overload with C linkage realloc. 1321 void * resize( void * oaddr, size_t nalign, size_t size ) libcfa_public{1320 void * resize( void * oaddr, size_t nalign, size_t size ) { 1322 1321 // If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned. 1323 1322 if ( unlikely( size == 0 ) ) { // special cases … … 1381 1380 1382 1381 1383 void * realloc( void * oaddr, size_t nalign, size_t size ) libcfa_public{1382 void * realloc( void * oaddr, size_t nalign, size_t size ) { 1384 1383 // If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned. 1385 1384 if ( unlikely( size == 0 ) ) { // special cases -
libcfa/src/interpose.cfa
re5d9274 r015925a 36 36 //============================================================================================= 37 37 38 staticvoid preload_libgcc(void) {38 void preload_libgcc(void) { 39 39 dlopen( "libgcc_s.so.1", RTLD_NOW ); 40 40 if ( const char * error = dlerror() ) abort( "interpose_symbol : internal error pre-loading libgcc, %s\n", error ); … … 42 42 43 43 typedef void (* generic_fptr_t)(void); 44 staticgeneric_fptr_t interpose_symbol( const char symbol[], const char version[] ) {44 generic_fptr_t interpose_symbol( const char symbol[], const char version[] ) { 45 45 const char * error; 46 46 … … 83 83 //============================================================================================= 84 84 85 staticvoid sigHandler_segv( __CFA_SIGPARMS__ );86 staticvoid sigHandler_ill ( __CFA_SIGPARMS__ );87 staticvoid sigHandler_fpe ( __CFA_SIGPARMS__ );88 staticvoid sigHandler_abrt( __CFA_SIGPARMS__ );89 staticvoid sigHandler_term( __CFA_SIGPARMS__ );90 91 st atic struct {85 void sigHandler_segv( __CFA_SIGPARMS__ ); 86 void sigHandler_ill ( __CFA_SIGPARMS__ ); 87 void sigHandler_fpe ( __CFA_SIGPARMS__ ); 88 void sigHandler_abrt( __CFA_SIGPARMS__ ); 89 void sigHandler_term( __CFA_SIGPARMS__ ); 90 91 struct { 92 92 void (* exit)( int ) __attribute__(( __noreturn__ )); 93 93 void (* abort)( void ) __attribute__(( __noreturn__ )); 94 94 } __cabi_libc; 95 95 96 libcfa_publicint cfa_main_returned;96 int cfa_main_returned; 97 97 98 98 extern "C" { … … 148 148 149 149 // Forward declare abort after the __typeof__ call to avoid ambiguities 150 libcfa_publicvoid exit( int status, const char fmt[], ... ) __attribute__(( format(printf, 2, 3), __nothrow__, __leaf__, __noreturn__ ));151 libcfa_publicvoid abort( const char fmt[], ... ) __attribute__(( format(printf, 1, 2), __nothrow__, __leaf__, __noreturn__ ));152 libcfa_publicvoid abort( bool signalAbort, const char fmt[], ... ) __attribute__(( format(printf, 2, 3), __nothrow__, __leaf__, __noreturn__ ));153 libcfa_publicvoid __abort( bool signalAbort, const char fmt[], va_list args ) __attribute__(( __nothrow__, __leaf__, __noreturn__ ));150 void exit( int status, const char fmt[], ... ) __attribute__(( format(printf, 2, 3), __nothrow__, __leaf__, __noreturn__ )); 151 void abort( const char fmt[], ... ) __attribute__(( format(printf, 1, 2), __nothrow__, __leaf__, __noreturn__ )); 152 void abort( bool signalAbort, const char fmt[], ... ) __attribute__(( format(printf, 2, 3), __nothrow__, __leaf__, __noreturn__ )); 153 void __abort( bool signalAbort, const char fmt[], va_list args ) __attribute__(( __nothrow__, __leaf__, __noreturn__ )); 154 154 155 155 extern "C" { 156 libcfa_publicvoid abort( void ) __attribute__(( __nothrow__, __leaf__, __noreturn__ )) {156 void abort( void ) __attribute__(( __nothrow__, __leaf__, __noreturn__ )) { 157 157 abort( false, "%s", "" ); 158 158 } 159 159 160 libcfa_publicvoid __cabi_abort( const char fmt[], ... ) __attribute__(( format(printf, 1, 2), __nothrow__, __leaf__, __noreturn__ )) {160 void __cabi_abort( const char fmt[], ... ) __attribute__(( format(printf, 1, 2), __nothrow__, __leaf__, __noreturn__ )) { 161 161 va_list argp; 162 162 va_start( argp, fmt ); … … 165 165 } 166 166 167 libcfa_publicvoid exit( int status ) __attribute__(( __nothrow__, __leaf__, __noreturn__ )) {167 void exit( int status ) __attribute__(( __nothrow__, __leaf__, __noreturn__ )) { 168 168 __cabi_libc.exit( status ); 169 169 } -
libcfa/src/iostream.cfa
re5d9274 r015925a 32 32 #include "bitmanip.hfa" // high1 33 33 34 #pragma GCC visibility push(default)35 34 36 35 // *********************************** ostream *********************************** -
libcfa/src/limits.cfa
re5d9274 r015925a 20 20 #include <complex.h> 21 21 #include "limits.hfa" 22 23 #pragma GCC visibility push(default)24 22 25 23 // Integral Constants -
libcfa/src/memory.cfa
re5d9274 r015925a 16 16 #include "memory.hfa" 17 17 #include "stdlib.hfa" 18 19 #pragma GCC visibility push(default)20 18 21 19 // Internal data object. -
libcfa/src/parseargs.cfa
re5d9274 r015925a 24 24 #include "common.hfa" 25 25 #include "limits.hfa" 26 27 #pragma GCC visibility push(default)28 26 29 27 extern int cfa_args_argc __attribute__((weak)); -
libcfa/src/parseconfig.cfa
re5d9274 r015925a 14 14 15 15 16 #pragma GCC visibility push(default)17 18 16 // *********************************** exceptions *********************************** 19 17 20 18 21 19 // TODO: Add names of missing config entries to exception (see further below) 22 vtable(Missing_Config_Entries) Missing_Config_Entries_vt;20 static vtable(Missing_Config_Entries) Missing_Config_Entries_vt; 23 21 24 22 [ void ] ?{}( & Missing_Config_Entries this, unsigned int num_missing ) { … … 33 31 34 32 35 vtable(Parse_Failure) Parse_Failure_vt;33 static vtable(Parse_Failure) Parse_Failure_vt; 36 34 37 35 [ void ] ?{}( & Parse_Failure this, [] char failed_key, [] char failed_value ) { … … 55 53 56 54 57 vtable(Validation_Failure) Validation_Failure_vt;55 static vtable(Validation_Failure) Validation_Failure_vt; 58 56 59 57 [ void ] ?{}( & Validation_Failure this, [] char failed_key, [] char failed_value ) { … … 112 110 113 111 114 static[ bool ] comments( & ifstream in, [] char name ) {112 [ bool ] comments( & ifstream in, [] char name ) { 115 113 while () { 116 114 in | name; -
libcfa/src/rational.cfa
re5d9274 r015925a 17 17 #include "fstream.hfa" 18 18 #include "stdlib.hfa" 19 20 #pragma GCC visibility push(default)21 19 22 20 forall( T | Arithmetic( T ) ) { -
libcfa/src/startup.cfa
re5d9274 r015925a 41 41 } // __cfaabi_appready_shutdown 42 42 43 void disable_interrupts() __attribute__(( weak )) libcfa_public{}44 void enable_interrupts() __attribute__(( weak )) libcfa_public{}43 void disable_interrupts() __attribute__(( weak )) {} 44 void enable_interrupts() __attribute__(( weak )) {} 45 45 46 46 … … 64 64 struct __spinlock_t; 65 65 extern "C" { 66 void __cfaabi_dbg_record_lock(struct __spinlock_t & this, const char prev_name[]) __attribute__(( weak )) libcfa_public{}66 void __cfaabi_dbg_record_lock(struct __spinlock_t & this, const char prev_name[]) __attribute__(( weak )) {} 67 67 } 68 68 -
libcfa/src/stdlib.cfa
re5d9274 r015925a 25 25 #include <complex.h> // _Complex_I 26 26 #include <assert.h> 27 28 #pragma GCC visibility push(default)29 27 30 28 //--------------------------------------- … … 227 225 #define GENERATOR LCG 228 226 229 // would be cool to make hidden but it's needed for libcfathread 230 __attribute__((visibility("default"))) uint32_t __global_random_seed; // sequential/concurrent 231 __attribute__((visibility("hidden"))) uint32_t __global_random_state; // sequential only 227 uint32_t __global_random_seed; // sequential/concurrent 228 uint32_t __global_random_state; // sequential only 232 229 233 230 void set_seed( PRNG & prng, uint32_t seed_ ) with( prng ) { state = seed = seed_; GENERATOR( state ); } // set seed -
libcfa/src/strstream.cfa
re5d9274 r015925a 1 // 1 // 2 2 // Cforall Version 1.0.0 Copyright (C) 2021 University of Waterloo 3 // 3 // 4 4 // The contents of this file are covered under the licence agreement in the 5 5 // file "LICENCE" distributed with Cforall. 6 6 // 7 // strstream.cfa -- 8 // 7 // strstream.cfa -- 8 // 9 9 // Author : Peter A. Buhr 10 10 // Created On : Thu Apr 22 22:24:35 2021 … … 12 12 // Last Modified On : Sun Oct 10 16:13:20 2021 13 13 // Update Count : 101 14 // 14 // 15 15 16 16 #include "strstream.hfa" … … 24 24 #include <unistd.h> // sbrk, sysconf 25 25 26 #pragma GCC visibility push(default)27 26 28 27 // *********************************** strstream *********************************** -
libcfa/src/time.cfa
re5d9274 r015925a 18 18 #include <stdio.h> // snprintf 19 19 #include <assert.h> 20 21 #pragma GCC visibility push(default)22 20 23 21 static char * nanomsd( long int ns, char * buf ) { // most significant digits -
libcfa/src/virtual.c
re5d9274 r015925a 16 16 #include "virtual.h" 17 17 #include "assert.h" 18 19 #pragma GCC visibility push(default)20 18 21 19 int __cfavir_is_parent( -
src/AST/Expr.cpp
re5d9274 r015925a 10 10 // Created On : Wed May 15 17:00:00 2019 11 11 // Last Modified By : Andrew Beach 12 // Created On : Wed May 18 13:56:00 202213 // Update Count : 812 // Created On : Tue Nov 30 14:23:00 2021 13 // Update Count : 7 14 14 // 15 15 … … 21 21 22 22 #include "Copy.hpp" // for shallowCopy 23 #include "Eval.hpp" // for call 23 24 #include "GenericSubstitution.hpp" 24 25 #include "LinkageSpec.hpp" … … 66 67 // --- UntypedExpr 67 68 68 bool UntypedExpr::get_lvalue() const {69 std::string fname = InitTweak::getFunctionName( this );70 return lvalueFunctionNames.count( fname );71 }72 73 69 UntypedExpr * UntypedExpr::createDeref( const CodeLocation & loc, const Expr * arg ) { 74 70 assert( arg ); 75 71 76 UntypedExpr * ret = c reateCall( loc, "*?", { arg });72 UntypedExpr * ret = call( loc, "*?", arg ); 77 73 if ( const Type * ty = arg->result ) { 78 74 const Type * base = InitTweak::getPointerBase( ty ); … … 91 87 } 92 88 89 bool UntypedExpr::get_lvalue() const { 90 std::string fname = InitTweak::getFunctionName( this ); 91 return lvalueFunctionNames.count( fname ); 92 } 93 93 94 UntypedExpr * UntypedExpr::createAssign( const CodeLocation & loc, const Expr * lhs, const Expr * rhs ) { 94 95 assert( lhs && rhs ); 95 96 96 UntypedExpr * ret = c reateCall( loc, "?=?", { lhs, rhs });97 UntypedExpr * ret = call( loc, "?=?", lhs, rhs ); 97 98 if ( lhs->result && rhs->result ) { 98 99 // if both expressions are typed, assumes that this assignment is a C bitwise assignment, … … 101 102 } 102 103 return ret; 103 }104 105 UntypedExpr * UntypedExpr::createCall( const CodeLocation & loc,106 const std::string & name, std::vector<ptr<Expr>> && args ) {107 return new UntypedExpr( loc,108 new NameExpr( loc, name ), std::move( args ) );109 104 } 110 105 -
src/AST/Expr.hpp
re5d9274 r015925a 230 230 /// Creates a new assignment expression 231 231 static UntypedExpr * createAssign( const CodeLocation & loc, const Expr * lhs, const Expr * rhs ); 232 /// Creates a new call of a variable.233 static UntypedExpr * createCall( const CodeLocation & loc,234 const std::string & name, std::vector<ptr<Expr>> && args );235 232 236 233 const Expr * accept( Visitor & v ) const override { return v.visit( this ); } -
src/AST/module.mk
re5d9274 r015925a 29 29 AST/DeclReplacer.cpp \ 30 30 AST/DeclReplacer.hpp \ 31 AST/Eval.hpp \ 31 32 AST/Expr.cpp \ 32 33 AST/Expr.hpp \ -
src/CodeGen/CodeGenerator.cc
re5d9274 r015925a 1238 1238 } // namespace CodeGen 1239 1239 1240 1241 unsigned Indenter::tabsize = 2; 1242 1243 std::ostream & operator<<( std::ostream & out, const BaseSyntaxNode * node ) { 1244 if ( node ) { 1245 node->print( out ); 1246 } else { 1247 out << "nullptr"; 1248 } 1249 return out; 1250 } 1251 1240 1252 // Local Variables: // 1241 1253 // tab-width: 4 // -
src/CodeGen/FixMain.cc
re5d9274 r015925a 49 49 50 50 } 51 52 bool FixMain::replace_main = false; 51 53 52 54 template<typename container> -
src/CodeGen/GenType.cc
re5d9274 r015925a 10 10 // Created On : Mon May 18 07:44:20 2015 11 11 // Last Modified By : Andrew Beach 12 // Last Modified On : Fri May 20 11:18:00 202213 // Update Count : 2 412 // Last Modified On : Wed May 1 15:24:00 2019 13 // Update Count : 23 14 14 // 15 15 #include "GenType.h" … … 50 50 void postvisit( TraitInstType * inst ); 51 51 void postvisit( TypeofType * typeof ); 52 void postvisit( VTableType * vtable );53 52 void postvisit( QualifiedType * qualType ); 54 53 … … 260 259 if ( options.genC ) { 261 260 typeString = "enum " + typeString; 262 } 263 } 261 } 262 } 264 263 handleQualifiers( enumInst ); 265 264 } 266 265 267 266 void GenType::postvisit( TypeInstType * typeInst ) { 268 assertf( ! options.genC, "Type instance types should not reach code generation." );269 267 typeString = typeInst->name + " " + typeString; 270 268 handleQualifiers( typeInst ); … … 322 320 } 323 321 324 void GenType::postvisit( VTableType * vtable ) {325 assertf( ! options.genC, "Virtual table types should not reach code generation." );326 std::ostringstream os;327 os << "vtable(" << genType( vtable->base, "", options ) << ") " << typeString;328 typeString = os.str();329 handleQualifiers( vtable );330 }331 332 322 void GenType::postvisit( QualifiedType * qualType ) { 333 323 assertf( ! options.genC, "Qualified types should not reach code generation." ); -
src/CodeGen/LinkOnce.cc
re5d9274 r015925a 53 53 new ConstantExpr( Constant::from_string( section_name ) ) 54 54 ); 55 56 // Unconditionnaly add "visibility(default)" to anything with gnu.linkonce57 // visibility is a mess otherwise58 attributes.push_back(new Attribute("visibility", {new ConstantExpr( Constant::from_string( "default" ) )}));59 60 55 } 61 56 visit_children = false; -
src/CodeGen/module.mk
re5d9274 r015925a 10 10 ## Author : Richard C. Bilson 11 11 ## Created On : Mon Jun 1 17:49:17 2015 12 ## Last Modified By : Andrew Beach13 ## Last Modified On : Tue May 17 14:26:00 202214 ## Update Count : 512 ## Last Modified By : Peter A. Buhr 13 ## Last Modified On : Sat Dec 14 07:29:42 2019 14 ## Update Count : 4 15 15 ############################################################################### 16 16 17 #SRC += ArgTweak/Rewriter.cc \ 18 # ArgTweak/Mutate.cc 19 17 20 SRC_CODEGEN = \ 18 CodeGen/FixMain2.cc \19 CodeGen/FixMain.h \20 CodeGen/OperatorTable.cc \21 CodeGen/OperatorTable.h22 23 SRC += $(SRC_CODEGEN) \24 21 CodeGen/CodeGenerator.cc \ 25 22 CodeGen/CodeGenerator.h \ 26 CodeGen/Generate.cc \27 CodeGen/Generate.h \28 23 CodeGen/FixMain.cc \ 29 CodeGen/FixNames.cc \ 30 CodeGen/FixNames.h \ 24 CodeGen/FixMain.h \ 31 25 CodeGen/GenType.cc \ 32 26 CodeGen/GenType.h \ 33 27 CodeGen/LinkOnce.cc \ 34 28 CodeGen/LinkOnce.h \ 29 CodeGen/OperatorTable.cc \ 30 CodeGen/OperatorTable.h \ 35 31 CodeGen/Options.h 36 32 33 SRC += $(SRC_CODEGEN) CodeGen/Generate.cc CodeGen/Generate.h CodeGen/FixNames.cc CodeGen/FixNames.h 37 34 SRCDEMANGLE += $(SRC_CODEGEN) -
src/CodeTools/ResolvProtoDump.cc
re5d9274 r015925a 304 304 305 305 // replace enums with int 306 void previsit( EnumInstType* ) { 307 // TODO: add the meaningful representation of typed int 308 ss << (int)BasicType::SignedInt; 309 } 306 void previsit( EnumInstType* ) { ss << (int)BasicType::SignedInt; } 310 307 311 308 void previsit( TypeInstType* vt ) { -
src/Common/Indenter.h
re5d9274 r015925a 10 10 // Created On : Fri Jun 30 16:55:23 2017 11 11 // Last Modified By : Andrew Beach 12 // Last Modified On : Fri May 13 14:10:00 202213 // Update Count : 212 // Last Modified On : Fri Aug 11 11:15:00 2017 13 // Update Count : 1 14 14 // 15 15 16 #pragma once 17 18 #include <ostream> 16 #ifndef INDENTER_H 17 #define INDENTER_H 19 18 20 19 struct Indenter { … … 38 37 return out << std::string(indent.indent * indent.amt, ' '); 39 38 } 39 40 #endif // INDENTER_H -
src/Common/ResolvProtoDump.cpp
re5d9274 r015925a 227 227 } 228 228 229 void previsit( const ast::EnumInstType * enumInst) { 230 // TODO: Add the meaningful text representation of typed enum 229 void previsit( const ast::EnumInstType * ) { 231 230 ss << (int)ast::BasicType::SignedInt; 232 231 } -
src/Common/module.mk
re5d9274 r015925a 10 10 ## Author : Richard C. Bilson 11 11 ## Created On : Mon Jun 1 17:49:17 2015 12 ## Last Modified By : Andrew Beach13 ## Last Modified On : Tue May 17 14:27:00 202214 ## Update Count : 512 ## Last Modified By : Peter A. Buhr 13 ## Last Modified On : Tue Sep 27 11:06:38 2016 14 ## Update Count : 4 15 15 ############################################################################### 16 16 17 17 SRC_COMMON = \ 18 Common/Assert.cc \ 19 Common/CodeLocation.h \ 20 Common/CodeLocationTools.hpp \ 21 Common/CodeLocationTools.cpp \ 22 Common/CompilerError.h \ 23 Common/Debug.h \ 24 Common/DeclStats.hpp \ 25 Common/DeclStats.cpp \ 26 Common/ErrorObjects.h \ 27 Common/Eval.cc \ 28 Common/Examine.cc \ 29 Common/Examine.h \ 30 Common/FilterCombos.h \ 31 Common/Indenter.h \ 32 Common/Indenter.cc \ 33 Common/PassVisitor.cc \ 34 Common/PassVisitor.h \ 35 Common/PassVisitor.impl.h \ 36 Common/PassVisitor.proto.h \ 37 Common/PersistentMap.h \ 38 Common/ResolvProtoDump.hpp \ 39 Common/ResolvProtoDump.cpp \ 40 Common/ScopedMap.h \ 41 Common/SemanticError.cc \ 42 Common/SemanticError.h \ 43 Common/Stats.h \ 44 Common/Stats/Base.h \ 45 Common/Stats/Counter.cc \ 46 Common/Stats/Counter.h \ 47 Common/Stats/Heap.cc \ 48 Common/Stats/Heap.h \ 49 Common/Stats/ResolveTime.cc \ 50 Common/Stats/ResolveTime.h \ 51 Common/Stats/Stats.cc \ 52 Common/Stats/Time.cc \ 53 Common/Stats/Time.h \ 54 Common/UnimplementedError.h \ 55 Common/UniqueName.cc \ 56 Common/UniqueName.h \ 57 Common/utility.h \ 58 Common/VectorMap.h 18 Common/Assert.cc \ 19 Common/CodeLocation.h \ 20 Common/CodeLocationTools.hpp \ 21 Common/CodeLocationTools.cpp \ 22 Common/CompilerError.h \ 23 Common/Debug.h \ 24 Common/DeclStats.hpp \ 25 Common/DeclStats.cpp \ 26 Common/ErrorObjects.h \ 27 Common/Eval.cc \ 28 Common/Examine.cc \ 29 Common/Examine.h \ 30 Common/FilterCombos.h \ 31 Common/Indenter.h \ 32 Common/PassVisitor.cc \ 33 Common/PassVisitor.h \ 34 Common/PassVisitor.impl.h \ 35 Common/PassVisitor.proto.h \ 36 Common/PersistentMap.h \ 37 Common/ResolvProtoDump.hpp \ 38 Common/ResolvProtoDump.cpp \ 39 Common/ScopedMap.h \ 40 Common/SemanticError.cc \ 41 Common/SemanticError.h \ 42 Common/Stats.h \ 43 Common/Stats/Base.h \ 44 Common/Stats/Counter.cc \ 45 Common/Stats/Counter.h \ 46 Common/Stats/Heap.cc \ 47 Common/Stats/Heap.h \ 48 Common/Stats/ResolveTime.cc \ 49 Common/Stats/ResolveTime.h \ 50 Common/Stats/Stats.cc \ 51 Common/Stats/Time.cc \ 52 Common/Stats/Time.h \ 53 Common/UnimplementedError.h \ 54 Common/UniqueName.cc \ 55 Common/UniqueName.h \ 56 Common/utility.h \ 57 Common/VectorMap.h 59 58 60 SRC += $(SRC_COMMON) \ 61 Common/DebugMalloc.cc 62 59 SRC += $(SRC_COMMON) Common/DebugMalloc.cc 63 60 SRCDEMANGLE += $(SRC_COMMON) -
src/Concurrency/module.mk
re5d9274 r015925a 10 10 ## Author : Thierry Delisle 11 11 ## Created On : Mon Mar 13 12:48:40 2017 12 ## Last Modified By : Andrew Beach13 ## Last Modified On : Tue May 17 13:28:00 202214 ## Update Count : 112 ## Last Modified By : 13 ## Last Modified On : 14 ## Update Count : 0 15 15 ############################################################################### 16 16 17 SRC += \17 SRC_CONCURRENCY = \ 18 18 Concurrency/KeywordsNew.cpp \ 19 Concurrency/Keywords.cc \ 19 Concurrency/Keywords.cc 20 21 SRC += $(SRC_CONCURRENCY) \ 20 22 Concurrency/Keywords.h \ 21 23 Concurrency/Waitfor.cc \ 22 24 Concurrency/Waitfor.h 25 26 SRCDEMANGLE += $(SRC_CONCURRENCY) 27 -
src/ControlStruct/ExceptDecl.cc
re5d9274 r015925a 9 9 // Author : Henry Xue 10 10 // Created On : Tue Jul 20 04:10:50 2021 11 // Last Modified By : Andrew Beach12 // Last Modified On : Wed May 25 16:43:00 202213 // Update Count : 511 // Last Modified By : Henry Xue 12 // Last Modified On : Tue Aug 03 10:42:26 2021 13 // Update Count : 4 14 14 // 15 15 … … 39 39 } 40 40 41 StructInstType * makeExceptInstType(42 const std::string & exceptionName, 43 const std::list< Expression *> & parameters 44 ) { 45 StructInstType * exceptInstType = new StructInstType(41 TypeInstType * makeExceptInstType( 42 const std::string & exceptionName, 43 const std::list< Expression *> & parameters 44 ) { 45 TypeInstType * exceptInstType = new TypeInstType( 46 46 noQualifiers, 47 exceptionName 47 exceptionName, 48 false 48 49 ); 49 50 cloneAll( parameters, exceptInstType->parameters ); … … 150 151 nullptr, 151 152 new PointerType( noQualifiers, 152 new StructInstType( Type::Const, "__cfavir_type_info") ),153 new TypeInstType( Type::Const, "__cfavir_type_info", false ) ), 153 154 nullptr 154 155 ) ); … … 256 257 const std::string & exceptionName, 257 258 const std::list< TypeDecl *> & forallClause, 258 const std::list< Expression *> & parameters, 259 const std::list< Expression *> & parameters, 259 260 const std::list< Declaration *> & members 260 261 ) { … … 301 302 ObjectDecl * ehmExternVtable( 302 303 const std::string & exceptionName, 303 const std::list< Expression *> & parameters, 304 const std::list< Expression *> & parameters, 304 305 const std::string & tableName 305 306 ) { … … 456 457 } 457 458 458 class VTableCore : public WithDeclsToAdd {459 public:460 // Remove any remaining vtable type nodes in the tree.461 Type * postmutate( VTableType * vtableType );462 };463 464 Type * VTableCore::postmutate( VTableType * vtableType ) {465 auto inst = strict_dynamic_cast<ReferenceToType *>( vtableType->base );466 467 std::string vtableName = Virtual::vtableTypeName( inst->name );468 StructInstType * newType = new StructInstType( noQualifiers, vtableName );469 cloneAll( inst->parameters, newType->parameters );470 471 delete vtableType;472 return newType;473 }474 475 459 void translateExcept( std::list< Declaration *> & translationUnit ) { 476 460 PassVisitor<ExceptDeclCore> translator; 477 461 mutateAll( translationUnit, translator ); 478 PassVisitor<VTableCore> typeTranslator; 479 mutateAll( translationUnit, typeTranslator ); 480 } 481 482 } 462 } 463 464 } -
src/ControlStruct/module.mk
re5d9274 r015925a 10 10 ## Author : Richard C. Bilson 11 11 ## Created On : Mon Jun 1 17:49:17 2015 12 ## Last Modified By : Andrew Beach13 ## Last Modified On : Tue May 17 14:30:00202214 ## Update Count : 812 ## Last Modified By : Peter A. Buhr 13 ## Last Modified On : Sat Jan 29 12:04:19 2022 14 ## Update Count : 7 15 15 ############################################################################### 16 16 17 SRC += \17 SRC_CONTROLSTRUCT = \ 18 18 ControlStruct/ExceptDecl.cc \ 19 19 ControlStruct/ExceptDecl.h \ 20 ControlStruct/ExceptTranslateNew.cpp \21 ControlStruct/ExceptTranslate.cc \22 ControlStruct/ExceptTranslate.h \23 20 ControlStruct/FixLabels.cpp \ 24 21 ControlStruct/FixLabels.hpp \ … … 40 37 ControlStruct/Mutate.h 41 38 39 SRC += $(SRC_CONTROLSTRUCT) \ 40 ControlStruct/ExceptTranslateNew.cpp \ 41 ControlStruct/ExceptTranslate.cc \ 42 ControlStruct/ExceptTranslate.h 43 44 SRCDEMANGLE += $(SRC_CONTROLSTRUCT) 45 -
src/GenPoly/Lvalue.cc
re5d9274 r015925a 9 9 // Author : Richard C. Bilson 10 10 // Created On : Mon May 18 07:44:20 2015 11 // Last Modified By : Andrew Beach12 // Last Modified On : Mon May 16 14:09:00 202213 // Update Count : 811 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Fri Dec 13 23:14:38 2019 13 // Update Count : 7 14 14 // 15 15 … … 125 125 } // namespace 126 126 127 // Stored elsewhere (Lvalue2, initially false). 128 extern bool referencesEliminated; 127 static bool referencesEliminated = false; 128 // used by UntypedExpr::createDeref to determine whether result type of dereference should be ReferenceType or value type. 129 bool referencesPermissable() { 130 return ! referencesEliminated; 131 } 129 132 130 133 void convertLvalue( std::list< Declaration* > & translationUnit ) { -
src/GenPoly/module.mk
re5d9274 r015925a 10 10 ## Author : Richard C. Bilson 11 11 ## Created On : Mon Jun 1 17:49:17 2015 12 ## Last Modified By : Andrew Beach13 ## Last Modified On : Tue May 17 14:31:00 202214 ## Update Count : 212 ## Last Modified By : Peter A. Buhr 13 ## Last Modified On : Mon Jun 1 17:52:30 2015 14 ## Update Count : 1 15 15 ############################################################################### 16 16 17 SRC_GENPOLY = \ 18 GenPoly/GenPoly.cc \ 19 GenPoly/GenPoly.h \ 20 GenPoly/Lvalue2.cc \ 21 GenPoly/Lvalue.h 17 SRC += GenPoly/Box.cc \ 18 GenPoly/Box.h \ 19 GenPoly/ErasableScopedMap.h \ 20 GenPoly/FindFunction.cc \ 21 GenPoly/FindFunction.h \ 22 GenPoly/GenPoly.cc \ 23 GenPoly/GenPoly.h \ 24 GenPoly/InstantiateGeneric.cc \ 25 GenPoly/InstantiateGeneric.h \ 26 GenPoly/Lvalue.cc \ 27 GenPoly/Lvalue.h \ 28 GenPoly/ScopedSet.h \ 29 GenPoly/ScrubTyVars.cc \ 30 GenPoly/ScrubTyVars.h \ 31 GenPoly/Specialize.cc \ 32 GenPoly/Specialize.h 22 33 23 SRC += $(SRC_GENPOLY) \ 24 GenPoly/Box.cc \ 25 GenPoly/Box.h \ 26 GenPoly/ErasableScopedMap.h \ 27 GenPoly/FindFunction.cc \ 28 GenPoly/FindFunction.h \ 29 GenPoly/InstantiateGeneric.cc \ 30 GenPoly/InstantiateGeneric.h \ 31 GenPoly/Lvalue.cc \ 32 GenPoly/ScopedSet.h \ 33 GenPoly/ScrubTyVars.cc \ 34 GenPoly/ScrubTyVars.h \ 35 GenPoly/Specialize.cc \ 36 GenPoly/Specialize.h 34 SRCDEMANGLE += GenPoly/GenPoly.cc GenPoly/GenPoly.h GenPoly/Lvalue.cc GenPoly/Lvalue.h 37 35 38 SRCDEMANGLE += $(SRC_GENPOLY) -
src/InitTweak/FixInitNew.cpp
re5d9274 r015925a 454 454 455 455 auto expr = new ast::ImplicitCopyCtorExpr( appExpr->location, mutExpr ); 456 // Move the type substitution to the new top-level. The substitution 457 // is needed to obtain the type of temporary variables so that copy 458 // constructor calls can be resolved. 456 // Move the type substitution to the new top-level, if it is attached to the appExpr. 457 // Ensure it is not deleted with the ImplicitCopyCtorExpr by removing it before deletion. 458 // The substitution is needed to obtain the type of temporary variables so that copy constructor 459 // calls can be resolved. 459 460 assert( typeSubs ); 461 // assert (mutExpr->env); 460 462 expr->env = tmp; 463 // mutExpr->env = nullptr; 464 //std::swap( expr->env, appExpr->env ); 461 465 return expr; 462 466 } 463 467 464 468 void ResolveCopyCtors::previsit(const ast::Expr * expr) { 465 if ( nullptr == expr->env ) { 466 return; 467 } 468 GuardValue( env ) = expr->env->clone(); 469 GuardValue( envModified ) = false; 469 if (expr->env) { 470 GuardValue(env); 471 GuardValue(envModified); 472 env = expr->env->clone(); 473 envModified = false; 474 } 470 475 } 471 476 472 477 const ast::Expr * ResolveCopyCtors::postvisit(const ast::Expr * expr) { 473 // No local environment, skip. 474 if ( nullptr == expr->env ) { 475 return expr; 476 // Environment was modified, mutate and replace. 477 } else if ( envModified ) { 478 auto mutExpr = mutate(expr); 479 mutExpr->env = env; 480 return mutExpr; 481 // Environment was not mutated, delete the shallow copy before guard. 482 } else { 483 delete env; 478 if (expr->env) { 479 if (envModified) { 480 auto mutExpr = mutate(expr); 481 mutExpr->env = env; 482 return mutExpr; 483 } 484 else { 485 // env was not mutated, skip and delete the shallow copy 486 delete env; 487 return expr; 488 } 489 } 490 else { 484 491 return expr; 485 492 } … … 490 497 const ast::Expr * ResolveCopyCtors::makeCtorDtor( const std::string & fname, const ast::ObjectDecl * var, const ast::Expr * cpArg ) { 491 498 assert( var ); 492 assert ( var->isManaged());493 assert ( !cpArg || cpArg->isManaged());499 assert (var->isManaged()); 500 assert (!cpArg || cpArg->isManaged()); 494 501 // arrays are not copy constructed, so this should always be an ExprStmt 495 502 ast::ptr< ast::Stmt > stmt = genCtorDtor(var->location, fname, var, cpArg ); … … 497 504 auto exprStmt = stmt.strict_as<ast::ImplicitCtorDtorStmt>()->callStmt.strict_as<ast::ExprStmt>(); 498 505 ast::ptr<ast::Expr> untyped = exprStmt->expr; // take ownership of expr 506 // exprStmt->expr = nullptr; 499 507 500 508 // resolve copy constructor … … 508 516 env->add( *resolved->env ); 509 517 envModified = true; 518 // delete resolved->env; 510 519 auto mut = mutate(resolved.get()); 511 520 assertf(mut == resolved.get(), "newly resolved expression must be unique"); 512 521 mut->env = nullptr; 513 522 } // if 523 // delete stmt; 514 524 if ( auto assign = resolved.as<ast::TupleAssignExpr>() ) { 515 525 // fix newly generated StmtExpr -
src/InitTweak/GenInit.cc
re5d9274 r015925a 368 368 369 369 struct ReturnFixer_New final : 370 public ast::WithStmtsToAdd<>, ast::WithGuards , ast::WithShortCircuiting{370 public ast::WithStmtsToAdd<>, ast::WithGuards { 371 371 void previsit( const ast::FunctionDecl * decl ); 372 372 const ast::ReturnStmt * previsit( const ast::ReturnStmt * stmt ); … … 376 376 377 377 void ReturnFixer_New::previsit( const ast::FunctionDecl * decl ) { 378 if (decl->linkage == ast::Linkage::Intrinsic) visit_children = false;379 378 GuardValue( funcDecl ) = decl; 380 379 } -
src/InitTweak/module.mk
re5d9274 r015925a 10 10 ## Author : Richard C. Bilson 11 11 ## Created On : Mon Jun 1 17:49:17 2015 12 ## Last Modified By : Andrew Beach13 ## Last Modified On : Tue May 17 14:31:00 202214 ## Update Count : 412 ## Last Modified By : Rob Schluntz 13 ## Last Modified On : Fri May 13 11:36:24 2016 14 ## Update Count : 3 15 15 ############################################################################### 16 16 17 SRC_INITTWEAK = \ 17 SRC += \ 18 InitTweak/FixGlobalInit.cc \ 19 InitTweak/FixGlobalInit.h \ 20 InitTweak/FixInit.cc \ 21 InitTweak/FixInit.h \ 22 InitTweak/GenInit.cc \ 23 InitTweak/GenInit.h \ 24 InitTweak/InitTweak.cc \ 25 InitTweak/InitTweak.h \ 26 InitTweak/FixInitNew.cpp 27 28 SRCDEMANGLE += \ 18 29 InitTweak/GenInit.cc \ 19 30 InitTweak/GenInit.h \ … … 21 32 InitTweak/InitTweak.h 22 33 23 SRC += $(SRC_INITTWEAK) \24 InitTweak/FixGlobalInit.cc \25 InitTweak/FixGlobalInit.h \26 InitTweak/FixInit.cc \27 InitTweak/FixInit.h \28 InitTweak/FixInitNew.cpp29 30 SRCDEMANGLE += $(SRC_INITTWEAK) -
src/Parser/parser.yy
re5d9274 r015925a 10 10 // Created On : Sat Sep 1 20:22:55 2001 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Sat May 14 09:16:22202213 // Update Count : 5 40112 // Last Modified On : Wed May 4 17:22:48 2022 13 // Update Count : 5279 14 14 // 15 15 … … 54 54 #include "Common/SemanticError.h" // error_str 55 55 #include "Common/utility.h" // for maybeMoveBuild, maybeBuild, CodeLo... 56 57 #include "SynTree/Attribute.h" // for Attribute58 56 59 57 extern DeclarationNode * parseTree; … … 95 93 } // appendStr 96 94 97 DeclarationNode * distAttr( DeclarationNode * typeSpec, DeclarationNode * declList ) { 98 // distribute declaration_specifier across all declared variables, e.g., static, const, but not __attribute__. 99 assert( declList ); 100 // printf( "distAttr1 typeSpec %p\n", typeSpec ); typeSpec->print( std::cout ); 101 DeclarationNode * cur = declList, * cl = (new DeclarationNode)->addType( typeSpec ); 102 // printf( "distAttr2 cl %p\n", cl ); cl->type->print( std::cout ); 103 // cl->type->aggregate.name = cl->type->aggInst.aggregate->aggregate.name; 104 95 DeclarationNode * distAttr( DeclarationNode * specifier, DeclarationNode * declList ) { 96 // distribute declaration_specifier across all declared variables, e.g., static, const, __attribute__. 97 DeclarationNode * cur = declList, * cl = (new DeclarationNode)->addType( specifier ); 105 98 for ( cur = dynamic_cast<DeclarationNode *>( cur->get_next() ); cur != nullptr; cur = dynamic_cast<DeclarationNode *>( cur->get_next() ) ) { 106 99 cl->cloneBaseType( cur ); 107 100 } // for 108 101 declList->addType( cl ); 109 // printf( "distAttr3 declList %p\n", declList ); declList->print( std::cout, 0 );110 102 return declList; 111 103 } // distAttr … … 179 171 if ( ! ( typeSpec->type && (typeSpec->type->kind == TypeData::Aggregate || typeSpec->type->kind == TypeData::Enum) ) ) { 180 172 stringstream ss; 181 // printf( "fieldDecl1 typeSpec %p\n", typeSpec ); typeSpec->type->print( std::cout);173 typeSpec->type->print( ss ); 182 174 SemanticWarning( yylloc, Warning::SuperfluousDecl, ss.str().c_str() ); 183 175 return nullptr; 184 176 } // if 185 // printf( "fieldDecl2 typeSpec %p\n", typeSpec ); typeSpec->type->print( std::cout );186 177 fieldList = DeclarationNode::newName( nullptr ); 187 178 } // if 188 // return distAttr( typeSpec, fieldList ); // mark all fields in list 189 190 // printf( "fieldDecl3 typeSpec %p\n", typeSpec ); typeSpec->print( std::cout, 0 ); 191 DeclarationNode * temp = distAttr( typeSpec, fieldList ); // mark all fields in list 192 // printf( "fieldDecl4 temp %p\n", temp ); temp->print( std::cout, 0 ); 193 return temp; 179 return distAttr( typeSpec, fieldList ); // mark all fields in list 194 180 } // fieldDecl 195 181 … … 1634 1620 declaration: // old & new style declarations 1635 1621 c_declaration ';' 1636 {1637 // printf( "C_DECLARATION1 %p %s\n", $$, $$->name ? $$->name->c_str() : "(nil)" );1638 // for ( Attribute * attr: reverseIterate( $$->attributes ) ) {1639 // printf( "\tattr %s\n", attr->name.c_str() );1640 // } // for1641 }1642 1622 | cfa_declaration ';' // CFA 1643 1623 | static_assert // C11 … … 1845 1825 basic_type_specifier 1846 1826 | sue_type_specifier 1847 {1848 // printf( "sue_type_specifier2 %p %s\n", $$, $$->type->aggregate.name ? $$->type->aggregate.name->c_str() : "(nil)" );1849 // for ( Attribute * attr: reverseIterate( $$->attributes ) ) {1850 // printf( "\tattr %s\n", attr->name.c_str() );1851 // } // for1852 }1853 1827 | type_type_specifier 1854 1828 ; … … 2067 2041 sue_declaration_specifier: // struct, union, enum + storage class + type specifier 2068 2042 sue_type_specifier 2069 {2070 // printf( "sue_declaration_specifier %p %s\n", $$, $$->type->aggregate.name ? $$->type->aggregate.name->c_str() : "(nil)" );2071 // for ( Attribute * attr: reverseIterate( $$->attributes ) ) {2072 // printf( "\tattr %s\n", attr->name.c_str() );2073 // } // for2074 }2075 2043 | declaration_qualifier_list sue_type_specifier 2076 2044 { $$ = $2->addQualifiers( $1 ); } … … 2083 2051 sue_type_specifier: // struct, union, enum + type specifier 2084 2052 elaborated_type 2085 {2086 // printf( "sue_type_specifier %p %s\n", $$, $$->type->aggregate.name ? $$->type->aggregate.name->c_str() : "(nil)" );2087 // for ( Attribute * attr: reverseIterate( $$->attributes ) ) {2088 // printf( "\tattr %s\n", attr->name.c_str() );2089 // } // for2090 }2091 2053 | type_qualifier_list 2092 2054 { if ( $1->type != nullptr && $1->type->forall ) forall = true; } // remember generic type … … 2161 2123 elaborated_type: // struct, union, enum 2162 2124 aggregate_type 2163 {2164 // printf( "elaborated_type %p %s\n", $$, $$->type->aggregate.name ? $$->type->aggregate.name->c_str() : "(nil)" );2165 // for ( Attribute * attr: reverseIterate( $$->attributes ) ) {2166 // printf( "\tattr %s\n", attr->name.c_str() );2167 // } // for2168 }2169 2125 | enum_type 2170 2126 ; … … 2186 2142 } 2187 2143 '{' field_declaration_list_opt '}' type_parameters_opt 2188 { 2189 // printf( "aggregate_type1 %s\n", $3.str->c_str() ); 2190 // if ( $2 ) 2191 // for ( Attribute * attr: reverseIterate( $2->attributes ) ) { 2192 // printf( "copySpecifiers12 %s\n", attr->name.c_str() ); 2193 // } // for 2194 $$ = DeclarationNode::newAggregate( $1, $3, $8, $6, true )->addQualifiers( $2 ); 2195 // printf( "aggregate_type2 %p %s\n", $$, $$->type->aggregate.name ? $$->type->aggregate.name->c_str() : "(nil)" ); 2196 // for ( Attribute * attr: reverseIterate( $$->attributes ) ) { 2197 // printf( "aggregate_type3 %s\n", attr->name.c_str() ); 2198 // } // for 2199 } 2144 { $$ = DeclarationNode::newAggregate( $1, $3, $8, $6, true )->addQualifiers( $2 ); } 2200 2145 | aggregate_key attribute_list_opt TYPEDEFname // unqualified type name 2201 2146 { … … 2205 2150 '{' field_declaration_list_opt '}' type_parameters_opt 2206 2151 { 2207 // printf( "AGG3\n" );2208 2152 DeclarationNode::newFromTypedef( $3 ); 2209 2153 $$ = DeclarationNode::newAggregate( $1, $3, $8, $6, true )->addQualifiers( $2 ); … … 2216 2160 '{' field_declaration_list_opt '}' type_parameters_opt 2217 2161 { 2218 // printf( "AGG4\n" );2219 2162 DeclarationNode::newFromTypeGen( $3, nullptr ); 2220 2163 $$ = DeclarationNode::newAggregate( $1, $3, $8, $6, true )->addQualifiers( $2 ); … … 2293 2236 field_declaration: 2294 2237 type_specifier field_declaring_list_opt ';' 2295 { 2296 // printf( "type_specifier1 %p %s\n", $$, $$->type->aggregate.name ? $$->type->aggregate.name->c_str() : "(nil)" ); 2297 $$ = fieldDecl( $1, $2 ); 2298 // printf( "type_specifier2 %p %s\n", $$, $$->type->aggregate.name ? $$->type->aggregate.name->c_str() : "(nil)" ); 2299 // for ( Attribute * attr: reverseIterate( $$->attributes ) ) { 2300 // printf( "\tattr %s\n", attr->name.c_str() ); 2301 // } // for 2302 } 2238 { $$ = fieldDecl( $1, $2 ); } 2303 2239 | EXTENSION type_specifier field_declaring_list_opt ';' // GCC 2304 2240 { $$ = fieldDecl( $2, $3 ); distExt( $$ ); } … … 2909 2845 // empty 2910 2846 { $$ = nullptr; forall = false; } 2911 | WITH '(' tuple_expression_list ')' attribute_list_opt 2912 { 2913 $$ = $3; forall = false; 2914 if ( $5 ) { 2915 SemanticError( yylloc, "Attributes cannot be associated with function body. Move attribute(s) before \"with\" clause." ); 2916 $$ = nullptr; 2917 } // if 2918 } 2847 | WITH '(' tuple_expression_list ')' 2848 { $$ = $3; forall = false; } 2919 2849 ; 2920 2850 -
src/ResolvExpr/AlternativeFinder.cc
re5d9274 r015925a 42 42 #include "SymTab/Indexer.h" // for Indexer 43 43 #include "SymTab/Mangler.h" // for Mangler 44 #include "SymTab/Validate Type.h"// for validateType44 #include "SymTab/Validate.h" // for validateType 45 45 #include "SynTree/Constant.h" // for Constant 46 46 #include "SynTree/Declaration.h" // for DeclarationWithType, TypeDecl, Dec... -
src/ResolvExpr/CandidateFinder.cpp
re5d9274 r015925a 899 899 900 900 if (argType.as<ast::PointerType>()) funcFinder.otypeKeys.insert(Mangle::Encoding::pointer); 901 else if (const ast::EnumInstType * enumInst = argType.as<ast::EnumInstType>()) {902 const ast::EnumDecl * enumDecl = enumInst->base;903 if ( const ast::Type* enumType = enumDecl->base ) {904 // instance of enum (T) is a instance of type (T)905 funcFinder.otypeKeys.insert(Mangle::mangle(enumType, Mangle::NoGenericParams | Mangle::Type));906 } else {907 // instance of an untyped enum is techically int908 funcFinder.otypeKeys.insert(Mangle::mangle(enumDecl, Mangle::NoGenericParams | Mangle::Type));909 }910 }911 901 else funcFinder.otypeKeys.insert(Mangle::mangle(argType, Mangle::NoGenericParams | Mangle::Type)); 912 902 } … … 928 918 929 919 // find function operators 930 ast::ptr< ast::Expr > opExpr = new ast::NameExpr{ untypedExpr->location, "?()" }; // ??? why not ?{}920 ast::ptr< ast::Expr > opExpr = new ast::NameExpr{ untypedExpr->location, "?()" }; 931 921 CandidateFinder opFinder( context, tenv ); 932 922 // okay if there aren't any function operations -
src/ResolvExpr/CommonType.cc
re5d9274 r015925a 497 497 result = new BasicType( basicType->tq | otherBasic->tq, newType ); 498 498 } // if 499 } else if ( dynamic_cast< ZeroType * >( type2 ) || dynamic_cast< OneType * >( type2 ) ) {499 } else if ( dynamic_cast< EnumInstType * > ( type2 ) || dynamic_cast< ZeroType * >( type2 ) || dynamic_cast< OneType * >( type2 ) ) { 500 500 // use signed int in lieu of the enum/zero/one type 501 501 BasicType::Kind newType = commonTypes[ basicType->get_kind() ][ BasicType::SignedInt ]; … … 503 503 result = new BasicType( basicType->tq | type2->tq, newType ); 504 504 } // if 505 } else if ( const EnumInstType * enumInst = dynamic_cast< const EnumInstType * > ( type2 ) ) { 506 const EnumDecl* enumDecl = enumInst->baseEnum; 507 if ( const Type* baseType = enumDecl->base ) { 508 result = baseType->clone(); 509 } else { 510 BasicType::Kind newType = commonTypes[ basicType->get_kind() ][ BasicType::SignedInt ]; 511 if ( ( ( newType == basicType->get_kind() && basicType->tq >= type2->tq ) || widenFirst ) && ( ( newType != basicType->get_kind() && basicType->tq <= type2->tq ) || widenSecond ) ) { 512 result = new BasicType( basicType->tq | type2->tq, newType ); 513 } // if 514 } 515 } 505 } // if 516 506 } 517 507 … … 701 691 } 702 692 } else if ( 703 dynamic_cast< const ast::ZeroType * >( type2 ) 693 dynamic_cast< const ast::EnumInstType * >( type2 ) 694 || dynamic_cast< const ast::ZeroType * >( type2 ) 704 695 || dynamic_cast< const ast::OneType * >( type2 ) 705 696 ) { … … 714 705 result = new ast::BasicType{ kind, basic->qualifiers | type2->qualifiers }; 715 706 } 716 } else if ( const ast::EnumInstType * enumInst = dynamic_cast< const ast::EnumInstType * >( type2 ) ) {717 #warning remove casts when `commonTypes` moved to new AST718 const ast::EnumDecl* enumDecl = enumInst->base;719 if ( enumDecl->base ) {720 result = enumDecl->base.get();721 } else {722 ast::BasicType::Kind kind = (ast::BasicType::Kind)(int)commonTypes[ (BasicType::Kind)(int)basic->kind ][ (BasicType::Kind)(int)ast::BasicType::SignedInt ];723 if (724 ( ( kind == basic->kind && basic->qualifiers >= type2->qualifiers )725 || widen.first )726 && ( ( kind != basic->kind && basic->qualifiers <= type2->qualifiers )727 || widen.second )728 ) {729 result = new ast::BasicType{ kind, basic->qualifiers | type2->qualifiers };730 }731 }732 707 } 733 708 } … … 748 723 result = voidPtr; 749 724 add_qualifiers( result, oPtr->qualifiers ); 750 }751 752 // For a typed enum, we want to unify type1 with the base type of the enum753 bool tryResolveWithTypedEnum( const ast::Type * type1 ) {754 if (auto enumInst = dynamic_cast<const ast::EnumInstType *> (type2) ) {755 ast::AssertionSet have, need; // unused756 ast::OpenVarSet newOpen{ open };757 if (enumInst->base->base758 && unifyExact(type1, enumInst->base->base, tenv, need, have, newOpen, widen, symtab)) {759 result = type1;760 return true;761 }762 }763 return false;764 725 } 765 726 … … 807 768 result = pointer; 808 769 add_qualifiers( result, type2->qualifiers ); 809 } else { 810 tryResolveWithTypedEnum( pointer ); 811 } 812 } 813 814 void postvisit( const ast::ArrayType * arr ) { 815 // xxx - does it make sense? 816 tryResolveWithTypedEnum( arr ); 817 } 770 } 771 } 772 773 void postvisit( const ast::ArrayType * ) {} 818 774 819 775 void postvisit( const ast::ReferenceType * ref ) { … … 854 810 result = ref; 855 811 add_qualifiers( result, type2->qualifiers ); 856 } else { 857 // xxx - does unifying a ref with typed enumInst makes sense? 858 if (!dynamic_cast<const ast::EnumInstType *>(type2)) 859 result = commonType( type2, ref, widen, symtab, tenv, open ); 860 } 861 } 862 863 void postvisit( const ast::FunctionType * func) { 864 tryResolveWithTypedEnum( func ); 865 } 866 867 void postvisit( const ast::StructInstType * inst ) { 868 tryResolveWithTypedEnum( inst ); 869 } 870 871 void postvisit( const ast::UnionInstType * inst ) { 872 tryResolveWithTypedEnum( inst ); 873 } 812 } 813 } 814 815 void postvisit( const ast::FunctionType * ) {} 816 817 void postvisit( const ast::StructInstType * ) {} 818 819 void postvisit( const ast::UnionInstType * ) {} 874 820 875 821 void postvisit( const ast::EnumInstType * enumInst ) { 876 // reuse BasicType/EnumInstType common type by swapping 877 // xxx - is this already handled by unify? 878 if (!dynamic_cast<const ast::EnumInstType *>(type2)) 822 if ( 823 dynamic_cast< const ast::BasicType * >( type2 ) 824 || dynamic_cast< const ast::ZeroType * >( type2 ) 825 || dynamic_cast< const ast::OneType * >( type2 ) 826 ) { 827 // reuse BasicType/EnumInstType common type by swapping 879 828 result = commonType( type2, enumInst, widen, symtab, tenv, open ); 829 } 880 830 } 881 831 … … 900 850 result = type2; 901 851 reset_qualifiers( result, q1 | q2 ); 902 } else {903 tryResolveWithTypedEnum( t1 );904 852 } 905 853 } … … 907 855 } 908 856 909 void postvisit( const ast::TupleType * tuple) { 910 tryResolveWithTypedEnum( tuple ); 911 } 857 void postvisit( const ast::TupleType * ) {} 912 858 913 859 void postvisit( const ast::VarArgsType * ) {} … … 915 861 void postvisit( const ast::ZeroType * zero ) { 916 862 if ( ! widen.first ) return; 917 if ( dynamic_cast< const ast::BasicType * >( type2 ) 918 || dynamic_cast< const ast::PointerType * >( type2 ) ) { 863 if ( 864 dynamic_cast< const ast::BasicType * >( type2 ) 865 || dynamic_cast< const ast::PointerType * >( type2 ) 866 || dynamic_cast< const ast::EnumInstType * >( type2 ) 867 ) { 919 868 if ( widen.second || zero->qualifiers <= type2->qualifiers ) { 920 869 result = type2; … … 924 873 result = new ast::BasicType{ 925 874 ast::BasicType::SignedInt, zero->qualifiers | type2->qualifiers }; 926 } else if ( const ast::EnumInstType * enumInst = dynamic_cast< const ast::EnumInstType * >( type2 ) ) {927 const ast::EnumDecl * enumDecl = enumInst->base;928 if ( enumDecl->base ) {929 if ( tryResolveWithTypedEnum( zero ) )930 add_qualifiers( result, zero->qualifiers );931 } else {932 if ( widen.second || zero->qualifiers <= type2->qualifiers ) {933 result = type2;934 add_qualifiers( result, zero->qualifiers );935 }936 }937 875 } 938 876 } … … 940 878 void postvisit( const ast::OneType * one ) { 941 879 if ( ! widen.first ) return; 942 if ( dynamic_cast< const ast::BasicType * >( type2 ) ) { 880 if ( 881 dynamic_cast< const ast::BasicType * >( type2 ) 882 || dynamic_cast< const ast::EnumInstType * >( type2 ) 883 ) { 943 884 if ( widen.second || one->qualifiers <= type2->qualifiers ) { 944 885 result = type2; … … 948 889 result = new ast::BasicType{ 949 890 ast::BasicType::SignedInt, one->qualifiers | type2->qualifiers }; 950 } else if ( const ast::EnumInstType * enumInst = dynamic_cast< const ast::EnumInstType * >( type2 ) ) {951 const ast::EnumDecl * enumBase = enumInst->base;952 if ( enumBase->base ) {953 if ( tryResolveWithTypedEnum( one ))954 add_qualifiers( result, one->qualifiers );955 } else {956 if ( widen.second || one->qualifiers <= type2->qualifiers ) {957 result = type2;958 add_qualifiers( result, one->qualifiers );959 }960 }961 891 } 962 892 } -
src/ResolvExpr/ConversionCost.cc
re5d9274 r015925a 321 321 } 322 322 323 // refactor for code resue324 void ConversionCost::conversionCostFromBasicToBasic(const BasicType * src, const BasicType * dest) {325 int tableResult = costMatrix[ src->kind ][ dest->kind ];326 if ( tableResult == -1 ) {327 cost = Cost::unsafe;328 } else {329 cost = Cost::zero;330 cost.incSafe( tableResult );331 cost.incSign( signMatrix[ src->kind ][ dest->kind ] );332 } // if333 } // ConversionCost::conversionCostFromBasicToBasic334 335 323 void ConversionCost::postvisit(const BasicType * basicType) { 336 324 if ( const BasicType * destAsBasic = dynamic_cast< const BasicType * >( dest ) ) { 337 conversionCostFromBasicToBasic(basicType, destAsBasic); 338 } else if ( const EnumInstType * enumInst = dynamic_cast< const EnumInstType * >( dest ) ) { 339 const EnumDecl * base_enum = enumInst->baseEnum; 340 if ( const Type * base = base_enum->base ) { // if the base enum has a base (if it is typed) 341 if ( const BasicType * enumBaseAstBasic = dynamic_cast< const BasicType *> (base) ) { 342 conversionCostFromBasicToBasic(basicType, enumBaseAstBasic); 343 } else { 344 cost = Cost::infinity; 345 } // if 325 int tableResult = costMatrix[ basicType->kind ][ destAsBasic->kind ]; 326 if ( tableResult == -1 ) { 327 cost = Cost::unsafe; 328 } else { 329 cost = Cost::zero; 330 cost.incSafe( tableResult ); 331 cost.incSign( signMatrix[ basicType->kind ][ destAsBasic->kind ] ); 332 } // if 333 } else if ( dynamic_cast< const EnumInstType * >( dest ) ) { 334 // xxx - not positive this is correct, but appears to allow casting int => enum 335 // TODO 336 EnumDecl * decl = dynamic_cast< const EnumInstType * >( dest )->baseEnum; 337 if ( decl->base ) { 338 cost = Cost::infinity; 346 339 } else { 347 340 cost = Cost::unsafe; … … 405 398 void ConversionCost::postvisit( const FunctionType * ) {} 406 399 407 void ConversionCost::postvisit( const EnumInstType * enumInst) { 408 const EnumDecl * enumDecl = enumInst -> baseEnum; 409 if ( const Type * enumType = enumDecl -> base ) { // if it is a typed enum 410 cost = costFunc( enumType, dest, srcIsLvalue, indexer, env ); 411 } else { 412 static Type::Qualifiers q; 413 static BasicType integer( q, BasicType::SignedInt ); 414 cost = costFunc( &integer, dest, srcIsLvalue, indexer, env ); // safe if dest >= int 415 } // if 400 void ConversionCost::postvisit( const EnumInstType * ) { 401 static Type::Qualifiers q; 402 static BasicType integer( q, BasicType::SignedInt ); 403 cost = costFunc( &integer, dest, srcIsLvalue, indexer, env ); // safe if dest >= int 416 404 if ( cost < Cost::unsafe ) { 417 cost.incSafe();405 cost.incSafe(); 418 406 } // if 419 407 } … … 616 604 } 617 605 618 void ConversionCost_new::conversionCostFromBasicToBasic( const ast::BasicType * src, const ast::BasicType* dest ) {619 int tableResult = costMatrix[ src->kind ][ dest->kind ];620 if ( tableResult == -1 ) {621 cost = Cost::unsafe;622 } else {623 cost = Cost::zero;624 cost.incSafe( tableResult );625 cost.incSign( signMatrix[ src->kind ][ dest->kind ] );626 }627 }628 629 606 void ConversionCost_new::postvisit( const ast::BasicType * basicType ) { 630 607 if ( const ast::BasicType * dstAsBasic = dynamic_cast< const ast::BasicType * >( dst ) ) { 631 conversionCostFromBasicToBasic( basicType, dstAsBasic ); 632 } else if ( const ast::EnumInstType * enumInst = dynamic_cast< const ast::EnumInstType * >( dst ) ) { 633 const ast::EnumDecl * enumDecl = enumInst->base.get(); 634 if ( const ast::Type * enumType = enumDecl->base.get() ) { 635 if ( const ast::BasicType * enumTypeAsBasic = dynamic_cast<const ast::BasicType *>(enumType) ) { 636 conversionCostFromBasicToBasic( basicType, enumTypeAsBasic ); 637 } else { 638 cost = Cost::infinity; 639 } 640 } else { 641 cost = Cost::unsafe; 642 } 608 int tableResult = costMatrix[ basicType->kind ][ dstAsBasic->kind ]; 609 if ( tableResult == -1 ) { 610 cost = Cost::unsafe; 611 } else { 612 cost = Cost::zero; 613 cost.incSafe( tableResult ); 614 cost.incSign( signMatrix[ basicType->kind ][ dstAsBasic->kind ] ); 615 } 616 } else if ( dynamic_cast< const ast::EnumInstType * >( dst ) ) { 617 // xxx - not positive this is correct, but appears to allow casting int => enum 618 const ast::EnumDecl * decl = (dynamic_cast< const ast::EnumInstType * >( dst ))->base.get(); 619 if ( decl->base ) { 620 cost = Cost::infinity; 621 } else { 622 cost = Cost::unsafe; 623 } // if 643 624 } 644 625 } … … 692 673 693 674 void ConversionCost_new::postvisit( const ast::EnumInstType * enumInstType ) { 694 const ast::EnumDecl * baseEnum = enumInstType->base; 695 if ( const ast::Type * baseType = baseEnum->base ) { 696 cost = costCalc( baseType, dst, srcIsLvalue, symtab, env ); 697 } else { 698 (void)enumInstType; 699 static ast::ptr<ast::BasicType> integer = { new ast::BasicType( ast::BasicType::SignedInt ) }; 700 cost = costCalc( integer, dst, srcIsLvalue, symtab, env ); 701 } 675 (void)enumInstType; 676 static ast::ptr<ast::BasicType> integer = { new ast::BasicType( ast::BasicType::SignedInt ) }; 677 cost = costCalc( integer, dst, srcIsLvalue, symtab, env ); 702 678 if ( cost < Cost::unsafe ) { 703 679 cost.incSafe(); -
src/ResolvExpr/ConversionCost.h
re5d9274 r015925a 65 65 const TypeEnvironment &env; 66 66 CostFunction costFunc; 67 private:68 // refactor for code resue69 void conversionCostFromBasicToBasic( const BasicType * src, const BasicType* dest );70 67 }; 71 68 … … 114 111 void postvisit( const ast::ZeroType * zeroType ); 115 112 void postvisit( const ast::OneType * oneType ); 116 private:117 // refactor for code resue118 void conversionCostFromBasicToBasic( const ast::BasicType * src, const ast::BasicType* dest );119 113 }; 120 114 -
src/SymTab/Autogen.h
re5d9274 r015925a 21 21 22 22 #include "AST/Decl.hpp" 23 #include "AST/Eval.hpp" 23 24 #include "AST/Expr.hpp" 24 25 #include "AST/Init.hpp" … … 70 71 template< typename OutIter > 71 72 ast::ptr< ast::Stmt > genCall( 72 InitTweak::InitExpander_new & srcParam, const ast::Expr * dstParam, 73 const CodeLocation & loc, const std::string & fname, OutIter && out, 73 InitTweak::InitExpander_new & srcParam, const ast::Expr * dstParam, 74 const CodeLocation & loc, const std::string & fname, OutIter && out, 74 75 const ast::Type * type, const ast::Type * addCast, LoopDirection forward = LoopForward ); 75 76 … … 127 128 } 128 129 129 /// inserts into out a generated call expression to function fname with arguments dstParam and 130 /// inserts into out a generated call expression to function fname with arguments dstParam and 130 131 /// srcParam. Should only be called with non-array types. 131 /// optionally returns a statement which must be inserted prior to the containing loop, if 132 /// optionally returns a statement which must be inserted prior to the containing loop, if 132 133 /// there is one 133 134 template< typename OutIter > 134 ast::ptr< ast::Stmt > genScalarCall( 135 InitTweak::InitExpander_new & srcParam, const ast::Expr * dstParam, 136 const CodeLocation & loc, std::string fname, OutIter && out, const ast::Type * type, 135 ast::ptr< ast::Stmt > genScalarCall( 136 InitTweak::InitExpander_new & srcParam, const ast::Expr * dstParam, 137 const CodeLocation & loc, std::string fname, OutIter && out, const ast::Type * type, 137 138 const ast::Type * addCast = nullptr 138 139 ) { … … 152 153 153 154 if ( addCast ) { 154 // cast to T& with qualifiers removed, so that qualified objects can be constructed and 155 // destructed with the same functions as non-qualified objects. Unfortunately, lvalue 156 // is considered a qualifier - for AddressExpr to resolve, its argument must have an 155 // cast to T& with qualifiers removed, so that qualified objects can be constructed and 156 // destructed with the same functions as non-qualified objects. Unfortunately, lvalue 157 // is considered a qualifier - for AddressExpr to resolve, its argument must have an 157 158 // lvalue-qualified type, so remove all qualifiers except lvalue. 158 159 // xxx -- old code actually removed lvalue too... 159 160 ast::ptr< ast::Type > guard = addCast; // prevent castType from mutating addCast 160 161 ast::ptr< ast::Type > castType = addCast; 161 ast::remove_qualifiers( 162 castType, 162 ast::remove_qualifiers( 163 castType, 163 164 ast::CV::Const | ast::CV::Volatile | ast::CV::Restrict | ast::CV::Atomic ); 164 165 dstParam = new ast::CastExpr{ dstParam, new ast::ReferenceType{ castType } }; … … 180 181 181 182 srcParam.clearArrayIndices(); 182 183 183 184 return listInit; 184 185 } … … 248 249 } 249 250 250 /// Store in out a loop which calls fname on each element of the array with srcParam and 251 /// Store in out a loop which calls fname on each element of the array with srcParam and 251 252 /// dstParam as arguments. If forward is true, loop goes from 0 to N-1, else N-1 to 0 252 253 template< typename OutIter > 253 254 void genArrayCall( 254 InitTweak::InitExpander_new & srcParam, const ast::Expr * dstParam, 255 const CodeLocation & loc, const std::string & fname, OutIter && out, 256 const ast::ArrayType * array, const ast::Type * addCast = nullptr, 257 LoopDirection forward = LoopForward 255 InitTweak::InitExpander_new & srcParam, const ast::Expr * dstParam, 256 const CodeLocation & loc, const std::string & fname, OutIter && out, 257 const ast::ArrayType * array, const ast::Type * addCast = nullptr, 258 LoopDirection forward = LoopForward 258 259 ) { 259 260 static UniqueName indexName( "_index" ); … … 278 279 } else { 279 280 // generate: for ( int i = N-1; i >= 0; --i ) 280 begin = ast:: UntypedExpr::createCall( loc, "?-?",281 { array->dimension, ast::ConstantExpr::from_int( loc, 1 ) });281 begin = ast::call( 282 loc, "?-?", array->dimension, ast::ConstantExpr::from_int( loc, 1 ) ); 282 283 end = ast::ConstantExpr::from_int( loc, 0 ); 283 284 cmp = "?>=?"; … … 285 286 } 286 287 287 ast::ptr< ast::DeclWithType > index = new ast::ObjectDecl{ 288 loc, indexName.newName(), new ast::BasicType{ ast::BasicType::SignedInt }, 288 ast::ptr< ast::DeclWithType > index = new ast::ObjectDecl{ 289 loc, indexName.newName(), new ast::BasicType{ ast::BasicType::SignedInt }, 289 290 new ast::SingleInit{ loc, begin } }; 290 291 ast::ptr< ast::Expr > indexVar = new ast::VariableExpr{ loc, index }; 291 292 ast::ptr< ast::Expr > cond = ast::UntypedExpr::createCall( 293 loc, cmp, { indexVar, end } ); 294 295 ast::ptr< ast::Expr > inc = ast::UntypedExpr::createCall( 296 loc, update, { indexVar } ); 297 298 ast::ptr< ast::Expr > dstIndex = ast::UntypedExpr::createCall( 299 loc, "?[?]", { dstParam, indexVar } ); 300 301 // srcParam must keep track of the array indices to build the source parameter and/or 292 293 ast::ptr< ast::Expr > cond = ast::call( loc, cmp, indexVar, end ); 294 295 ast::ptr< ast::Expr > inc = ast::call( loc, update, indexVar ); 296 297 ast::ptr< ast::Expr > dstIndex = ast::call( loc, "?[?]", dstParam, indexVar ); 298 299 // srcParam must keep track of the array indices to build the source parameter and/or 302 300 // array list initializer 303 301 srcParam.addArrayIndex( indexVar, array->dimension ); … … 305 303 // for stmt's body, eventually containing call 306 304 ast::CompoundStmt * body = new ast::CompoundStmt{ loc }; 307 ast::ptr< ast::Stmt > listInit = genCall( 308 srcParam, dstIndex, loc, fname, std::back_inserter( body->kids ), array->base, addCast, 305 ast::ptr< ast::Stmt > listInit = genCall( 306 srcParam, dstIndex, loc, fname, std::back_inserter( body->kids ), array->base, addCast, 309 307 forward ); 310 308 311 309 // block containing the stmt and index variable 312 310 ast::CompoundStmt * block = new ast::CompoundStmt{ loc }; … … 330 328 template< typename OutIter > 331 329 ast::ptr< ast::Stmt > genCall( 332 InitTweak::InitExpander_new & srcParam, const ast::Expr * dstParam, 333 const CodeLocation & loc, const std::string & fname, OutIter && out, 330 InitTweak::InitExpander_new & srcParam, const ast::Expr * dstParam, 331 const CodeLocation & loc, const std::string & fname, OutIter && out, 334 332 const ast::Type * type, const ast::Type * addCast, LoopDirection forward 335 333 ) { 336 334 if ( auto at = dynamic_cast< const ast::ArrayType * >( type ) ) { 337 genArrayCall( 338 srcParam, dstParam, loc, fname, std::forward< OutIter >(out), at, addCast, 335 genArrayCall( 336 srcParam, dstParam, loc, fname, std::forward< OutIter >(out), at, addCast, 339 337 forward ); 340 338 return {}; 341 339 } else { 342 return genScalarCall( 340 return genScalarCall( 343 341 srcParam, dstParam, loc, fname, std::forward< OutIter >( out ), type, addCast ); 344 342 } … … 379 377 } 380 378 381 static inline ast::ptr< ast::Stmt > genImplicitCall( 382 InitTweak::InitExpander_new & srcParam, const ast::Expr * dstParam, 383 const CodeLocation & loc, const std::string & fname, const ast::ObjectDecl * obj, 384 LoopDirection forward = LoopForward 379 static inline ast::ptr< ast::Stmt > genImplicitCall( 380 InitTweak::InitExpander_new & srcParam, const ast::Expr * dstParam, 381 const CodeLocation & loc, const std::string & fname, const ast::ObjectDecl * obj, 382 LoopDirection forward = LoopForward 385 383 ) { 386 384 // unnamed bit fields are not copied as they cannot be accessed … … 394 392 395 393 std::vector< ast::ptr< ast::Stmt > > stmts; 396 genCall( 394 genCall( 397 395 srcParam, dstParam, loc, fname, back_inserter( stmts ), obj->type, addCast, forward ); 398 396 … … 402 400 const ast::Stmt * callStmt = stmts.front(); 403 401 if ( addCast ) { 404 // implicitly generated ctor/dtor calls should be wrapped so that later passes are 402 // implicitly generated ctor/dtor calls should be wrapped so that later passes are 405 403 // aware they were generated. 406 404 callStmt = new ast::ImplicitCtorDtorStmt{ callStmt->location, callStmt }; … … 419 417 // compile-command: "make install" // 420 418 // End: // 419 -
src/SymTab/Demangle.cc
re5d9274 r015925a 5 5 // file "LICENCE" distributed with Cforall. 6 6 // 7 // Demangle .cc -- Convert a mangled name into a human readable name.7 // Demangler.cc -- 8 8 // 9 9 // Author : Rob Schluntz -
src/SymTab/Mangler.h
re5d9274 r015925a 111 111 } 112 112 113 extern "C" { 114 char * cforall_demangle(const char *, int); 115 } 116 113 117 // Local Variables: // 114 118 // tab-width: 4 // -
src/SymTab/Validate.cc
re5d9274 r015925a 10 10 // Created On : Sun May 17 21:50:04 2015 11 11 // Last Modified By : Andrew Beach 12 // Last Modified On : Tue May 17 14:36:00 202213 // Update Count : 36 612 // Last Modified On : Fri Apr 29 9:45:00 2022 13 // Update Count : 365 14 14 // 15 15 … … 74 74 #include "ResolvExpr/ResolveTypeof.h" // for resolveTypeof 75 75 #include "SymTab/Autogen.h" // for SizeType 76 #include "SymTab/ValidateType.h" // for decayEnumsAndPointers, decayFo...77 76 #include "SynTree/LinkageSpec.h" // for C 78 77 #include "SynTree/Attribute.h" // for noAttributes, Attribute … … 135 134 }; 136 135 136 /// Replaces enum types by int, and function or array types in function parameter and return lists by appropriate pointers. 137 struct EnumAndPointerDecay_old { 138 void previsit( EnumDecl * aggregateDecl ); 139 void previsit( FunctionType * func ); 140 }; 141 142 /// Associates forward declarations of aggregates with their definitions 143 struct LinkReferenceToTypes_old final : public WithIndexer, public WithGuards, public WithVisitorRef<LinkReferenceToTypes_old>, public WithShortCircuiting { 144 LinkReferenceToTypes_old( const Indexer * indexer ); 145 146 void postvisit( TypeInstType * typeInst ); 147 148 void postvisit( EnumInstType * enumInst ); 149 void postvisit( StructInstType * structInst ); 150 void postvisit( UnionInstType * unionInst ); 151 void postvisit( TraitInstType * traitInst ); 152 void previsit( QualifiedType * qualType ); 153 void postvisit( QualifiedType * qualType ); 154 155 void postvisit( EnumDecl * enumDecl ); 156 void postvisit( StructDecl * structDecl ); 157 void postvisit( UnionDecl * unionDecl ); 158 void postvisit( TraitDecl * traitDecl ); 159 160 void previsit( StructDecl * structDecl ); 161 void previsit( UnionDecl * unionDecl ); 162 163 void renameGenericParams( std::list< TypeDecl * > & params ); 164 165 private: 166 const Indexer * local_indexer; 167 168 typedef std::map< std::string, std::list< EnumInstType * > > ForwardEnumsType; 169 typedef std::map< std::string, std::list< StructInstType * > > ForwardStructsType; 170 typedef std::map< std::string, std::list< UnionInstType * > > ForwardUnionsType; 171 ForwardEnumsType forwardEnums; 172 ForwardStructsType forwardStructs; 173 ForwardUnionsType forwardUnions; 174 /// true if currently in a generic type body, so that type parameter instances can be renamed appropriately 175 bool inGeneric = false; 176 }; 177 137 178 /// Does early resolution on the expressions that give enumeration constants their values 138 179 struct ResolveEnumInitializers final : public WithIndexer, public WithGuards, public WithVisitorRef<ResolveEnumInitializers>, public WithShortCircuiting { … … 152 193 void previsit( StructDecl * aggrDecl ); 153 194 void previsit( UnionDecl * aggrDecl ); 195 }; 196 197 // These structs are the sub-sub-passes of ForallPointerDecay_old. 198 199 struct TraitExpander_old final { 200 void previsit( FunctionType * ); 201 void previsit( StructDecl * ); 202 void previsit( UnionDecl * ); 203 }; 204 205 struct AssertionFixer_old final { 206 void previsit( FunctionType * ); 207 void previsit( StructDecl * ); 208 void previsit( UnionDecl * ); 209 }; 210 211 struct CheckOperatorTypes_old final { 212 void previsit( ObjectDecl * ); 213 }; 214 215 struct FixUniqueIds_old final { 216 void previsit( DeclarationWithType * ); 154 217 }; 155 218 … … 295 358 296 359 void validate_A( std::list< Declaration * > & translationUnit ) { 360 PassVisitor<EnumAndPointerDecay_old> epc; 297 361 PassVisitor<HoistTypeDecls> hoistDecls; 298 362 { … … 303 367 ReplaceTypedef::replaceTypedef( translationUnit ); 304 368 ReturnTypeFixer::fix( translationUnit ); // must happen before autogen 305 decayEnumsAndPointers( translationUnit ); // must happen before VerifyCtorDtorAssign, because void return objects should not exist; before LinkReferenceToTypes_old because it is an indexer and needs correct types for mangling 306 } 369 acceptAll( translationUnit, epc ); // must happen before VerifyCtorDtorAssign, because void return objects should not exist; before LinkReferenceToTypes_old because it is an indexer and needs correct types for mangling 370 } 371 } 372 373 void linkReferenceToTypes( std::list< Declaration * > & translationUnit ) { 374 PassVisitor<LinkReferenceToTypes_old> lrt( nullptr ); 375 acceptAll( translationUnit, lrt ); // must happen before autogen, because sized flag needs to propagate to generated functions 307 376 } 308 377 … … 343 412 }); 344 413 } 414 } 415 416 static void decayForallPointers( std::list< Declaration * > & translationUnit ) { 417 PassVisitor<TraitExpander_old> te; 418 acceptAll( translationUnit, te ); 419 PassVisitor<AssertionFixer_old> af; 420 acceptAll( translationUnit, af ); 421 PassVisitor<CheckOperatorTypes_old> cot; 422 acceptAll( translationUnit, cot ); 423 PassVisitor<FixUniqueIds_old> fui; 424 acceptAll( translationUnit, fui ); 345 425 } 346 426 … … 421 501 } 422 502 503 void validateType( Type * type, const Indexer * indexer ) { 504 PassVisitor<EnumAndPointerDecay_old> epc; 505 PassVisitor<LinkReferenceToTypes_old> lrt( indexer ); 506 PassVisitor<TraitExpander_old> te; 507 PassVisitor<AssertionFixer_old> af; 508 PassVisitor<CheckOperatorTypes_old> cot; 509 PassVisitor<FixUniqueIds_old> fui; 510 type->accept( epc ); 511 type->accept( lrt ); 512 type->accept( te ); 513 type->accept( af ); 514 type->accept( cot ); 515 type->accept( fui ); 516 } 517 423 518 void HoistTypeDecls::handleType( Type * type ) { 424 519 // some type declarations are buried in expressions and not easy to hoist during parsing; hoist them here … … 613 708 } 614 709 710 void EnumAndPointerDecay_old::previsit( EnumDecl * enumDecl ) { 711 // Set the type of each member of the enumeration to be EnumConstant 712 for ( std::list< Declaration * >::iterator i = enumDecl->members.begin(); i != enumDecl->members.end(); ++i ) { 713 ObjectDecl * obj = dynamic_cast< ObjectDecl * >( * i ); 714 assert( obj ); 715 obj->set_type( new EnumInstType( Type::Qualifiers( Type::Const ), enumDecl->name ) ); 716 } // for 717 } 718 719 namespace { 720 template< typename DWTList > 721 void fixFunctionList( DWTList & dwts, bool isVarArgs, FunctionType * func ) { 722 auto nvals = dwts.size(); 723 bool containsVoid = false; 724 for ( auto & dwt : dwts ) { 725 // fix each DWT and record whether a void was found 726 containsVoid |= fixFunction( dwt ); 727 } 728 729 // the only case in which "void" is valid is where it is the only one in the list 730 if ( containsVoid && ( nvals > 1 || isVarArgs ) ) { 731 SemanticError( func, "invalid type void in function type " ); 732 } 733 734 // one void is the only thing in the list; remove it. 735 if ( containsVoid ) { 736 delete dwts.front(); 737 dwts.clear(); 738 } 739 } 740 } 741 742 void EnumAndPointerDecay_old::previsit( FunctionType * func ) { 743 // Fix up parameters and return types 744 fixFunctionList( func->parameters, func->isVarArgs, func ); 745 fixFunctionList( func->returnVals, false, func ); 746 } 747 748 LinkReferenceToTypes_old::LinkReferenceToTypes_old( const Indexer * other_indexer ) : WithIndexer( false ) { 749 if ( other_indexer ) { 750 local_indexer = other_indexer; 751 } else { 752 local_indexer = &indexer; 753 } // if 754 } 755 756 void LinkReferenceToTypes_old::postvisit( EnumInstType * enumInst ) { 757 const EnumDecl * st = local_indexer->lookupEnum( enumInst->name ); 758 // it's not a semantic error if the enum is not found, just an implicit forward declaration 759 if ( st ) { 760 enumInst->baseEnum = const_cast<EnumDecl *>(st); // Just linking in the node 761 } // if 762 if ( ! st || ! st->body ) { 763 // use of forward declaration 764 forwardEnums[ enumInst->name ].push_back( enumInst ); 765 } // if 766 } 767 void LinkReferenceToTypes_old::postvisit( StructInstType * structInst ) { 768 const StructDecl * st = local_indexer->lookupStruct( structInst->name ); 769 // it's not a semantic error if the struct is not found, just an implicit forward declaration 770 if ( st ) { 771 structInst->baseStruct = const_cast<StructDecl *>(st); // Just linking in the node 772 } // if 773 if ( ! st || ! st->body ) { 774 // use of forward declaration 775 forwardStructs[ structInst->name ].push_back( structInst ); 776 } // if 777 } 778 779 void LinkReferenceToTypes_old::postvisit( UnionInstType * unionInst ) { 780 const UnionDecl * un = local_indexer->lookupUnion( unionInst->name ); 781 // it's not a semantic error if the union is not found, just an implicit forward declaration 782 if ( un ) { 783 unionInst->baseUnion = const_cast<UnionDecl *>(un); // Just linking in the node 784 } // if 785 if ( ! un || ! un->body ) { 786 // use of forward declaration 787 forwardUnions[ unionInst->name ].push_back( unionInst ); 788 } // if 789 } 790 791 void LinkReferenceToTypes_old::previsit( QualifiedType * ) { 792 visit_children = false; 793 } 794 795 void LinkReferenceToTypes_old::postvisit( QualifiedType * qualType ) { 796 // linking only makes sense for the 'oldest ancestor' of the qualified type 797 qualType->parent->accept( * visitor ); 798 } 799 800 template< typename Decl > 801 void normalizeAssertions( std::list< Decl * > & assertions ) { 802 // ensure no duplicate trait members after the clone 803 auto pred = [](Decl * d1, Decl * d2) { 804 // only care if they're equal 805 DeclarationWithType * dwt1 = dynamic_cast<DeclarationWithType *>( d1 ); 806 DeclarationWithType * dwt2 = dynamic_cast<DeclarationWithType *>( d2 ); 807 if ( dwt1 && dwt2 ) { 808 if ( dwt1->name == dwt2->name && ResolvExpr::typesCompatible( dwt1->get_type(), dwt2->get_type(), SymTab::Indexer() ) ) { 809 // std::cerr << "=========== equal:" << std::endl; 810 // std::cerr << "d1: " << d1 << std::endl; 811 // std::cerr << "d2: " << d2 << std::endl; 812 return false; 813 } 814 } 815 return d1 < d2; 816 }; 817 std::set<Decl *, decltype(pred)> unique_members( assertions.begin(), assertions.end(), pred ); 818 // if ( unique_members.size() != assertions.size() ) { 819 // std::cerr << "============different" << std::endl; 820 // std::cerr << unique_members.size() << " " << assertions.size() << std::endl; 821 // } 822 823 std::list< Decl * > order; 824 order.splice( order.end(), assertions ); 825 std::copy_if( order.begin(), order.end(), back_inserter( assertions ), [&]( Decl * decl ) { 826 return unique_members.count( decl ); 827 }); 828 } 829 615 830 // expand assertions from trait instance, performing the appropriate type variable substitutions 616 831 template< typename Iterator > … … 623 838 // substitute trait decl parameters for instance parameters 624 839 applySubstitution( inst->baseTrait->parameters.begin(), inst->baseTrait->parameters.end(), inst->parameters.begin(), asserts.begin(), asserts.end(), out ); 840 } 841 842 void LinkReferenceToTypes_old::postvisit( TraitDecl * traitDecl ) { 843 if ( traitDecl->name == "sized" ) { 844 // "sized" is a special trait - flick the sized status on for the type variable 845 assertf( traitDecl->parameters.size() == 1, "Built-in trait 'sized' has incorrect number of parameters: %zd", traitDecl->parameters.size() ); 846 TypeDecl * td = traitDecl->parameters.front(); 847 td->set_sized( true ); 848 } 849 850 // move assertions from type parameters into the body of the trait 851 for ( TypeDecl * td : traitDecl->parameters ) { 852 for ( DeclarationWithType * assert : td->assertions ) { 853 if ( TraitInstType * inst = dynamic_cast< TraitInstType * >( assert->get_type() ) ) { 854 expandAssertions( inst, back_inserter( traitDecl->members ) ); 855 } else { 856 traitDecl->members.push_back( assert->clone() ); 857 } 858 } 859 deleteAll( td->assertions ); 860 td->assertions.clear(); 861 } // for 862 } 863 864 void LinkReferenceToTypes_old::postvisit( TraitInstType * traitInst ) { 865 // handle other traits 866 const TraitDecl * traitDecl = local_indexer->lookupTrait( traitInst->name ); 867 if ( ! traitDecl ) { 868 SemanticError( traitInst->location, "use of undeclared trait " + traitInst->name ); 869 } // if 870 if ( traitDecl->parameters.size() != traitInst->parameters.size() ) { 871 SemanticError( traitInst, "incorrect number of trait parameters: " ); 872 } // if 873 traitInst->baseTrait = const_cast<TraitDecl *>(traitDecl); // Just linking in the node 874 875 // need to carry over the 'sized' status of each decl in the instance 876 for ( auto p : group_iterate( traitDecl->parameters, traitInst->parameters ) ) { 877 TypeExpr * expr = dynamic_cast< TypeExpr * >( std::get<1>(p) ); 878 if ( ! expr ) { 879 SemanticError( std::get<1>(p), "Expression parameters for trait instances are currently unsupported: " ); 880 } 881 if ( TypeInstType * inst = dynamic_cast< TypeInstType * >( expr->get_type() ) ) { 882 TypeDecl * formalDecl = std::get<0>(p); 883 TypeDecl * instDecl = inst->baseType; 884 if ( formalDecl->get_sized() ) instDecl->set_sized( true ); 885 } 886 } 887 // normalizeAssertions( traitInst->members ); 888 } 889 890 void LinkReferenceToTypes_old::postvisit( EnumDecl * enumDecl ) { 891 // visit enum members first so that the types of self-referencing members are updated properly 892 // Replace the enum base; right now it works only for StructEnum 893 if ( enumDecl->base && dynamic_cast<TypeInstType*>(enumDecl->base) ) { 894 std::string baseName = static_cast<TypeInstType*>(enumDecl->base)->name; 895 const StructDecl * st = local_indexer->lookupStruct( baseName ); 896 if ( st ) { 897 enumDecl->base = new StructInstType(Type::Qualifiers(),const_cast<StructDecl *>(st)); // Just linking in the node 898 } 899 } 900 if ( enumDecl->body ) { 901 ForwardEnumsType::iterator fwds = forwardEnums.find( enumDecl->name ); 902 if ( fwds != forwardEnums.end() ) { 903 for ( std::list< EnumInstType * >::iterator inst = fwds->second.begin(); inst != fwds->second.end(); ++inst ) { 904 (* inst)->baseEnum = enumDecl; 905 } // for 906 forwardEnums.erase( fwds ); 907 } // if 908 } // if 909 } 910 911 void LinkReferenceToTypes_old::renameGenericParams( std::list< TypeDecl * > & params ) { 912 // rename generic type parameters uniquely so that they do not conflict with user-defined function forall parameters, e.g. 913 // forall(otype T) 914 // struct Box { 915 // T x; 916 // }; 917 // forall(otype T) 918 // void f(Box(T) b) { 919 // ... 920 // } 921 // The T in Box and the T in f are different, so internally the naming must reflect that. 922 GuardValue( inGeneric ); 923 inGeneric = ! params.empty(); 924 for ( TypeDecl * td : params ) { 925 td->name = "__" + td->name + "_generic_"; 926 } 927 } 928 929 void LinkReferenceToTypes_old::previsit( StructDecl * structDecl ) { 930 renameGenericParams( structDecl->parameters ); 931 } 932 933 void LinkReferenceToTypes_old::previsit( UnionDecl * unionDecl ) { 934 renameGenericParams( unionDecl->parameters ); 935 } 936 937 void LinkReferenceToTypes_old::postvisit( StructDecl * structDecl ) { 938 // visit struct members first so that the types of self-referencing members are updated properly 939 // xxx - need to ensure that type parameters match up between forward declarations and definition (most importantly, number of type parameters and their defaults) 940 if ( structDecl->body ) { 941 ForwardStructsType::iterator fwds = forwardStructs.find( structDecl->name ); 942 if ( fwds != forwardStructs.end() ) { 943 for ( std::list< StructInstType * >::iterator inst = fwds->second.begin(); inst != fwds->second.end(); ++inst ) { 944 (* inst)->baseStruct = structDecl; 945 } // for 946 forwardStructs.erase( fwds ); 947 } // if 948 } // if 949 } 950 951 void LinkReferenceToTypes_old::postvisit( UnionDecl * unionDecl ) { 952 if ( unionDecl->body ) { 953 ForwardUnionsType::iterator fwds = forwardUnions.find( unionDecl->name ); 954 if ( fwds != forwardUnions.end() ) { 955 for ( std::list< UnionInstType * >::iterator inst = fwds->second.begin(); inst != fwds->second.end(); ++inst ) { 956 (* inst)->baseUnion = unionDecl; 957 } // for 958 forwardUnions.erase( fwds ); 959 } // if 960 } // if 961 } 962 963 void LinkReferenceToTypes_old::postvisit( TypeInstType * typeInst ) { 964 // ensure generic parameter instances are renamed like the base type 965 if ( inGeneric && typeInst->baseType ) typeInst->name = typeInst->baseType->name; 966 if ( const NamedTypeDecl * namedTypeDecl = local_indexer->lookupType( typeInst->name ) ) { 967 if ( const TypeDecl * typeDecl = dynamic_cast< const TypeDecl * >( namedTypeDecl ) ) { 968 typeInst->set_isFtype( typeDecl->kind == TypeDecl::Ftype ); 969 } // if 970 } // if 625 971 } 626 972 … … 651 997 } 652 998 } 999 653 1000 } 654 1001 } … … 738 1085 void ForallPointerDecay_old::previsit( UnionDecl * aggrDecl ) { 739 1086 forallFixer( aggrDecl->parameters, aggrDecl ); 1087 } 1088 1089 void TraitExpander_old::previsit( FunctionType * ftype ) { 1090 expandTraits( ftype->forall ); 1091 } 1092 1093 void TraitExpander_old::previsit( StructDecl * aggrDecl ) { 1094 expandTraits( aggrDecl->parameters ); 1095 } 1096 1097 void TraitExpander_old::previsit( UnionDecl * aggrDecl ) { 1098 expandTraits( aggrDecl->parameters ); 1099 } 1100 1101 void AssertionFixer_old::previsit( FunctionType * ftype ) { 1102 fixAssertions( ftype->forall, ftype ); 1103 } 1104 1105 void AssertionFixer_old::previsit( StructDecl * aggrDecl ) { 1106 fixAssertions( aggrDecl->parameters, aggrDecl ); 1107 } 1108 1109 void AssertionFixer_old::previsit( UnionDecl * aggrDecl ) { 1110 fixAssertions( aggrDecl->parameters, aggrDecl ); 1111 } 1112 1113 void CheckOperatorTypes_old::previsit( ObjectDecl * object ) { 1114 // ensure that operator names only apply to functions or function pointers 1115 if ( CodeGen::isOperator( object->name ) && ! dynamic_cast< FunctionType * >( object->type->stripDeclarator() ) ) { 1116 SemanticError( object->location, toCString( "operator ", object->name.c_str(), " is not a function or function pointer." ) ); 1117 } 1118 } 1119 1120 void FixUniqueIds_old::previsit( DeclarationWithType * decl ) { 1121 decl->fixUniqueId(); 740 1122 } 741 1123 -
src/SymTab/Validate.h
re5d9274 r015925a 10 10 // Author : Richard C. Bilson 11 11 // Created On : Sun May 17 21:53:34 2015 12 // Last Modified By : Andrew Beach13 // Last Modified On : Tue May 17 14:35:00 202214 // Update Count : 512 // Last Modified By : Peter A. Buhr 13 // Last Modified On : Sat Jul 22 09:46:07 2017 14 // Update Count : 4 15 15 // 16 16 … … 33 33 /// Normalizes struct and function declarations 34 34 void validate( std::list< Declaration * > &translationUnit, bool doDebug = false ); 35 void validateType( Type *type, const Indexer *indexer ); 35 36 36 37 // Sub-passes of validate. … … 41 42 void validate_E( std::list< Declaration * > &translationUnit ); 42 43 void validate_F( std::list< Declaration * > &translationUnit ); 44 void linkReferenceToTypes( std::list< Declaration * > &translationUnit ); 45 46 const ast::Type * validateType( 47 const CodeLocation & loc, const ast::Type * type, const ast::SymbolTable & symtab ); 43 48 } // namespace SymTab 44 49 -
src/SymTab/demangler.cc
re5d9274 r015925a 1 #include " Demangle.h"1 #include "Mangler.h" 2 2 #include <iostream> 3 3 #include <fstream> -
src/SymTab/module.mk
re5d9274 r015925a 11 11 ## Created On : Mon Jun 1 17:49:17 2015 12 12 ## Last Modified By : Andrew Beach 13 ## Last Modified On : T ue May 17 14:46:00 202214 ## Update Count : 513 ## Last Modified On : Thr Aug 10 16:08:00 2017 14 ## Update Count : 4 15 15 ############################################################################### 16 16 17 17 SRC_SYMTAB = \ 18 SymTab/Autogen.cc \19 SymTab/Autogen.h \20 SymTab/FixFunction.cc \21 SymTab/FixFunction.h \22 SymTab/Indexer.cc \23 SymTab/Indexer.h \24 SymTab/Mangler.cc \25 SymTab/ManglerCommon.cc \26 SymTab/Mangler.h \27 SymTab/ValidateType.cc \28 SymTab/ValidateType.h18 SymTab/Autogen.cc \ 19 SymTab/Autogen.h \ 20 SymTab/FixFunction.cc \ 21 SymTab/FixFunction.h \ 22 SymTab/Indexer.cc \ 23 SymTab/Indexer.h \ 24 SymTab/Mangler.cc \ 25 SymTab/ManglerCommon.cc \ 26 SymTab/Mangler.h \ 27 SymTab/Validate.cc \ 28 SymTab/Validate.h 29 29 30 SRC += $(SRC_SYMTAB) \ 31 SymTab/Validate.cc \ 32 SymTab/Validate.h 33 34 SRCDEMANGLE += $(SRC_SYMTAB) \ 35 SymTab/Demangle.cc \ 36 SymTab/Demangle.h 30 SRC += $(SRC_SYMTAB) 31 SRCDEMANGLE += $(SRC_SYMTAB) SymTab/Demangle.cc -
src/SynTree/module.mk
re5d9274 r015925a 24 24 SynTree/AttrType.cc \ 25 25 SynTree/BaseSyntaxNode.h \ 26 SynTree/BaseSyntaxNode.cc \27 26 SynTree/BasicType.cc \ 28 27 SynTree/CommaExpr.cc \ -
src/Tuples/TupleExpansion.cc
re5d9274 r015925a 9 9 // Author : Rodolfo G. Esteves 10 10 // Created On : Mon May 18 07:44:20 2015 11 // Last Modified By : Andrew Beach12 // Last Modified On : Tue May 17 15:02:00 202213 // Update Count : 2 511 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Fri Dec 13 23:45:51 2019 13 // Update Count : 24 14 14 // 15 15 … … 367 367 return nullptr; 368 368 } 369 370 namespace { 371 /// determines if impurity (read: side-effects) may exist in a piece of code. Currently gives a very crude approximation, wherein any function call expression means the code may be impure 372 struct ImpurityDetector : public WithShortCircuiting { 373 ImpurityDetector( bool ignoreUnique ) : ignoreUnique( ignoreUnique ) {} 374 375 void previsit( const ApplicationExpr * appExpr ) { 376 visit_children = false; 377 if ( const DeclarationWithType * function = InitTweak::getFunction( appExpr ) ) { 378 if ( function->linkage == LinkageSpec::Intrinsic ) { 379 if ( function->name == "*?" || function->name == "?[?]" ) { 380 // intrinsic dereference, subscript are pure, but need to recursively look for impurity 381 visit_children = true; 382 return; 383 } 384 } 385 } 386 maybeImpure = true; 387 } 388 void previsit( const UntypedExpr * ) { maybeImpure = true; visit_children = false; } 389 void previsit( const UniqueExpr * ) { 390 if ( ignoreUnique ) { 391 // bottom out at unique expression. 392 // The existence of a unique expression doesn't change the purity of an expression. 393 // That is, even if the wrapped expression is impure, the wrapper protects the rest of the expression. 394 visit_children = false; 395 return; 396 } 397 } 398 399 bool maybeImpure = false; 400 bool ignoreUnique; 401 }; 402 } // namespace 403 404 bool maybeImpure( const Expression * expr ) { 405 PassVisitor<ImpurityDetector> detector( false ); 406 expr->accept( detector ); 407 return detector.pass.maybeImpure; 408 } 409 410 bool maybeImpureIgnoreUnique( const Expression * expr ) { 411 PassVisitor<ImpurityDetector> detector( true ); 412 expr->accept( detector ); 413 return detector.pass.maybeImpure; 414 } 369 415 } // namespace Tuples 370 416 -
src/Tuples/Tuples.cc
re5d9274 r015925a 10 10 // Created On : Mon Jun 17 14:41:00 2019 11 11 // Last Modified By : Andrew Beach 12 // Last Modified On : Mon May 16 16:15:00 202213 // Update Count : 212 // Last Modified On : Tue Jun 18 9:31:00 2019 13 // Update Count : 1 14 14 // 15 15 … … 18 18 #include "AST/Pass.hpp" 19 19 #include "AST/LinkageSpec.hpp" 20 #include "Common/PassVisitor.h"21 20 #include "InitTweak/InitTweak.h" 22 21 … … 24 23 25 24 namespace { 26 /// Checks if impurity (read: side-effects) may exist in a piece of code.27 /// Currently gives a very crude approximation, wherein any function28 /// call expression means the code may be impure.29 struct ImpurityDetector_old : public WithShortCircuiting {30 bool const ignoreUnique;31 bool maybeImpure;32 33 ImpurityDetector_old( bool ignoreUnique ) :34 ignoreUnique( ignoreUnique ), maybeImpure( false )35 {}36 37 void previsit( const ApplicationExpr * appExpr ) {38 visit_children = false;39 if ( const DeclarationWithType * function =40 InitTweak::getFunction( appExpr ) ) {41 if ( function->linkage == LinkageSpec::Intrinsic ) {42 if ( function->name == "*?" || function->name == "?[?]" ) {43 // intrinsic dereference, subscript are pure,44 // but need to recursively look for impurity45 visit_children = true;46 return;47 }48 }49 }50 maybeImpure = true;51 }52 53 void previsit( const UntypedExpr * ) {54 maybeImpure = true;55 visit_children = false;56 }57 58 void previsit( const UniqueExpr * ) {59 if ( ignoreUnique ) {60 // bottom out at unique expression.61 // The existence of a unique expression doesn't change the purity of an expression.62 // That is, even if the wrapped expression is impure, the wrapper protects the rest of the expression.63 visit_children = false;64 return;65 }66 }67 };68 69 bool detectImpurity( const Expression * expr, bool ignoreUnique ) {70 PassVisitor<ImpurityDetector_old> detector( ignoreUnique );71 expr->accept( detector );72 return detector.pass.maybeImpure;73 }74 75 25 /// Determines if impurity (read: side-effects) may exist in a piece of code. Currently gives 76 26 /// a very crude approximation, wherein any function call expression means the code may be 77 27 /// impure. 78 28 struct ImpurityDetector : public ast::WithShortCircuiting { 79 bool result= false;29 bool maybeImpure = false; 80 30 81 31 void previsit( ast::ApplicationExpr const * appExpr ) { … … 86 36 } 87 37 } 88 result= true; visit_children = false;38 maybeImpure = true; visit_children = false; 89 39 } 90 40 void previsit( ast::UntypedExpr const * ) { 91 result= true; visit_children = false;41 maybeImpure = true; visit_children = false; 92 42 } 93 43 }; 94 95 44 struct ImpurityDetectorIgnoreUnique : public ImpurityDetector { 96 45 using ImpurityDetector::previsit; … … 99 48 } 100 49 }; 50 51 template<typename Detector> 52 bool detectImpurity( const ast::Expr * expr ) { 53 ast::Pass<Detector> detector; 54 expr->accept( detector ); 55 return detector.core.maybeImpure; 56 } 101 57 } // namespace 102 58 103 59 bool maybeImpure( const ast::Expr * expr ) { 104 return ast::Pass<ImpurityDetector>::read( expr );60 return detectImpurity<ImpurityDetector>( expr ); 105 61 } 106 62 107 63 bool maybeImpureIgnoreUnique( const ast::Expr * expr ) { 108 return ast::Pass<ImpurityDetectorIgnoreUnique>::read( expr ); 109 } 110 111 bool maybeImpure( const Expression * expr ) { 112 return detectImpurity( expr, false ); 113 } 114 115 bool maybeImpureIgnoreUnique( const Expression * expr ) { 116 return detectImpurity( expr, true ); 64 return detectImpurity<ImpurityDetectorIgnoreUnique>( expr ); 117 65 } 118 66 -
src/Tuples/module.mk
re5d9274 r015925a 10 10 ## Author : Richard C. Bilson 11 11 ## Created On : Mon Jun 1 17:49:17 2015 12 ## Last Modified By : Andrew Beach13 ## Last Modified On : Mon May 17 15:00:00 202214 ## Update Count : 312 ## Last Modified By : Henry Xue 13 ## Last Modified On : Mon Aug 23 15:36:09 2021 14 ## Update Count : 2 15 15 ############################################################################### 16 16 … … 24 24 Tuples/Tuples.h 25 25 26 26 27 SRC += $(SRC_TUPLES) 27 28 28 SRCDEMANGLE += $(SRC_TUPLES) -
src/Validate/Autogen.cpp
re5d9274 r015925a 402 402 auto retval = srcParam(); 403 403 retval->name = "_ret"; 404 // xxx - Adding this unused attribute can slience unused variable warning405 // However, some code might not be compiled as expected406 // Temporarily disabled407 // retval->attributes.push_back(new ast::Attribute("unused"));408 404 return genProto( "?=?", { dstParam(), srcParam() }, { retval } ); 409 405 } -
src/Validate/module.mk
re5d9274 r015925a 10 10 ## Author : Rob Schluntz 11 11 ## Created On : Fri Jul 27 10:10:10 2018 12 ## Last Modified By : Andrew Beach13 ## Last Modified On : Tue May 17 14:59:00 202214 ## Update Count : 312 ## Last Modified By : Rob Schluntz 13 ## Last Modified On : Fri Jul 27 10:10:26 2018 14 ## Update Count : 2 15 15 ############################################################################### 16 16 17 17 SRC_VALIDATE = \ 18 Validate/FindSpecialDecls.cc \19 Validate/FindSpecialDecls.h20 21 SRC += $(SRC_VALIDATE) \22 18 Validate/Autogen.cpp \ 23 19 Validate/Autogen.hpp \ … … 26 22 Validate/EliminateTypedef.cpp \ 27 23 Validate/EliminateTypedef.hpp \ 28 Validate/FindSpecialDeclsNew.cpp \29 24 Validate/FixQualifiedTypes.cpp \ 30 25 Validate/FixQualifiedTypes.hpp \ … … 43 38 Validate/NoIdSymbolTable.hpp \ 44 39 Validate/ReturnCheck.cpp \ 45 Validate/ReturnCheck.hpp 40 Validate/ReturnCheck.hpp \ 41 Validate/FindSpecialDeclsNew.cpp \ 42 Validate/FindSpecialDecls.cc \ 43 Validate/FindSpecialDecls.h 46 44 45 SRC += $(SRC_VALIDATE) 47 46 SRCDEMANGLE += $(SRC_VALIDATE) -
src/Virtual/module.mk
re5d9274 r015925a 11 11 ## Created On : Tus Jul 25 10:18:00 2017 12 12 ## Last Modified By : Andrew Beach 13 ## Last Modified On : Tus May 17 14:59:00 202214 ## Update Count : 113 ## Last Modified On : Tus Jul 25 10:18:00 2017 14 ## Update Count : 0 15 15 ############################################################################### 16 16 17 SRC += \ 18 Virtual/ExpandCasts.cc \ 19 Virtual/ExpandCasts.h \ 20 Virtual/Tables.cc \ 21 Virtual/Tables.h 17 SRC += Virtual/ExpandCasts.cc Virtual/ExpandCasts.h \ 18 Virtual/Tables.cc Virtual/Tables.h 19 20 SRCDEMANGLE += Virtual/Tables.cc -
src/main.cc
re5d9274 r015925a 70 70 #include "ResolvExpr/Resolver.h" // for resolve 71 71 #include "SymTab/Validate.h" // for validate 72 #include "SymTab/ValidateType.h" // for linkReferenceToTypes73 72 #include "SynTree/LinkageSpec.h" // for Spec, Cforall, Intrinsic 74 73 #include "SynTree/Declaration.h" // for Declaration -
tests/.expect/attributes.nast.x64.txt
re5d9274 r015925a 1339 1339 } 1340 1340 1341 return (*_X4_dstM12__anonymous4_2); 1341 { 1342 ((void)(_X4_retM12__anonymous4_2=(*_X4_dstM12__anonymous4_2)) /* ?{} */); 1343 } 1344 1345 return _X4_retM12__anonymous4_2; 1342 1346 } 1343 1347 { -
tests/.expect/attributes.nast.x86.txt
re5d9274 r015925a 1339 1339 } 1340 1340 1341 return (*_X4_dstM12__anonymous4_2); 1341 { 1342 ((void)(_X4_retM12__anonymous4_2=(*_X4_dstM12__anonymous4_2)) /* ?{} */); 1343 } 1344 1345 return _X4_retM12__anonymous4_2; 1342 1346 } 1343 1347 { -
tests/.expect/attributes.oast.x64.txt
re5d9274 r015925a 1339 1339 } 1340 1340 1341 return (*_X4_dstM12__anonymous4_2); 1341 { 1342 ((void)(_X4_retM12__anonymous4_2=(*_X4_dstM12__anonymous4_2)) /* ?{} */); 1343 } 1344 1345 return _X4_retM12__anonymous4_2; 1342 1346 } 1343 1347 { -
tests/.expect/attributes.oast.x86.txt
re5d9274 r015925a 1339 1339 } 1340 1340 1341 { 1342 ((void)(_X4_retM12__anonymous4_2=(*_X4_dstM12__anonymous4_2)) /* ?{} */); 1343 } 1344 1341 1345 return _X4_retM12__anonymous4_2; 1342 1346 } -
tests/.expect/quasiKeyword.txt
re5d9274 r015925a 1 quasiKeyword.cfa:5 2:25: warning: Compiled1 quasiKeyword.cfa:54:25: warning: Compiled -
tests/exceptions/defaults.cfa
re5d9274 r015925a 2 2 3 3 #include <string.h> 4 #include <exception.hfa> 4 5 5 6 exception log_message { … … 7 8 }; 8 9 9 // Manually define the virtual table and helper functions. 10 void copy(log_message * this, log_message * that) { 11 *this = *that; 12 } 13 10 _EHM_DEFINE_COPY(log_message, ) 14 11 const char * msg(log_message * this) { 15 12 return this->msg; 16 13 } 17 18 const struct log_message_vtable log_vt @= { 19 .__cfavir_typeid : &__cfatid_log_message, 20 .size : sizeof(struct log_message), 21 .copy : copy, 22 .^?{} : ^?{}, 23 .msg : msg, 24 }; 14 _EHM_VIRTUAL_TABLE(log_message, , log_vt); 25 15 26 16 // Logging messages don't have to be handled. -
tests/include/.expect/includes.nast.txt
re5d9274 r015925a 1 include/includes.cfa:1 73:25: warning: Compiled1 include/includes.cfa:169:25: warning: Compiled -
tests/include/includes.cfa
re5d9274 r015925a 10 10 // Created On : Wed May 27 17:56:53 2015 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Sun May 22 08:27:20202213 // Update Count : 77 912 // Last Modified On : Tue May 10 16:36:44 2022 13 // Update Count : 776 14 14 // 15 15 … … 47 47 #endif 48 48 #include <execinfo.h> 49 #if __has_include( "expat.h" ) 50 #include <expat.h> // may not be installed 49 #include <expat.h> 51 50 #include <expat_external.h> 52 #endif53 51 #include <fcntl.h> 54 52 #include <features.h> … … 81 79 //#include <link.h> // CFA bug #240 nested anonymous enum fails 82 80 #include <locale.h> 83 #if __has_include( "ltdl.h" ) 84 #include <ltdl.h> // may not be installed 85 #endif 81 #include <ltdl.h> 86 82 //#include <malloc.h> // cannot include in extern "C" because of CFA #include_next 87 83 #include <math.h> -
tests/linking/exception-nothreads.cfa
re5d9274 r015925a 15 15 16 16 #include <stdlib.hfa> 17 #include <exception.hfa> 17 18 18 exception ping {};19 vtable(ping) ping_vt;19 EHM_EXCEPTION(ping)(); 20 EHM_VIRTUAL_TABLE(ping, ping_vt); 20 21 21 22 int main(void) { -
tests/linking/exception-withthreads.cfa
re5d9274 r015925a 15 15 16 16 #include <stdlib.hfa> 17 #include <exception.hfa> 17 18 #include "../exceptions/with-threads.hfa" 18 19 19 exception ping {};20 vtable(ping) ping_vt;20 EHM_EXCEPTION(ping)(); 21 EHM_VIRTUAL_TABLE(ping, ping_vt); 21 22 22 23 int main(void) { -
tests/pybin/settings.py
re5d9274 r015925a 201 201 global output_width 202 202 output_width = max(map(lambda t: len(t.target()), tests)) 203 # 35 is the maximum width of the name field before we get line wrapping.204 output_width = min(output_width, 35) -
tests/pybin/test_run.py
re5d9274 r015925a 43 43 return os.path.normpath( os.path.join(settings.BUILDDIR, self.path, self.name) ) 44 44 45 def format_target(self, width):46 target = self.target()47 length = len(target)48 if length < width:49 return '{0:{width}}'.format(target, width=width)50 elif length == width:51 return target52 else:53 return '...' + target[3-width:]54 55 45 @staticmethod 56 46 def valid_name(name): -
tests/quasiKeyword.cfa
re5d9274 r015925a 4 4 // quasiKeyword.cfa -- test that quasi-keywords can be used for variable and functions names, as well as keywords in 5 5 // control structures. 6 // 6 // 7 7 // Author : Peter A. Buhr 8 8 // Created On : Wed Feb 17 10:33:49 2021 … … 10 10 // Last Modified On : Sat Jun 5 10:07:59 2021 11 11 // Update Count : 8 12 // 12 // 13 13 14 exception E {}; 14 #include <exception.hfa> 15 16 EHM_EXCEPTION( E )(); 15 17 16 18 void catch( int i ) {} … … 47 49 } fixup ( E * ) { 48 50 } finally { 49 } 51 } 50 52 else catch = 3; 51 53 -
tests/test.py
re5d9274 r015925a 132 132 parser.add_argument('--install', help='Run all tests based on installed binaries or tree binaries', type=comma_separated(yes_no), default='no') 133 133 parser.add_argument('--continue', help='When multiple specifications are passed (debug/install/arch), sets whether or not to continue if the last specification failed', type=yes_no, default='yes', dest='continue_') 134 parser.add_argument('--timeout', help='Maximum duration in seconds after a single test is considered to have timed out', type=int, default=1 80)134 parser.add_argument('--timeout', help='Maximum duration in seconds after a single test is considered to have timed out', type=int, default=120) 135 135 parser.add_argument('--global-timeout', help='Maximum cumulative duration in seconds after the ALL tests are considered to have timed out', type=int, default=7200) 136 136 parser.add_argument('--timeout-with-gdb', help='Instead of killing the command when it times out, orphan it and print process id to allow gdb to attach', type=yes_no, default="no") … … 252 252 try : 253 253 # print formated name 254 name_txt = t.format_target(width=settings.output_width) + ' '254 name_txt = '{0:{width}} '.format(t.target(), width=settings.output_width) 255 255 256 256 retcode, error, duration = run_single_test(t)
Note:
See TracChangeset
for help on using the changeset viewer.