Changes in / [bb7c77d:2e9b59b]
- Location:
- doc/theses/mubeen_zulfiqar_MMath
- Files:
-
- 203 added
- 4 edited
-
allocator.tex (modified) (5 diffs)
-
archive.tex (added)
-
background.tex (modified) (1 diff)
-
benchmarks.tex (modified) (1 diff)
-
dofree.tex (added)
-
evaluations/algol-perf-eps/cache-time-0-scratch.eps (added)
-
evaluations/algol-perf-eps/cache-time-0-thrash.eps (added)
-
evaluations/algol-perf-eps/churn.eps (added)
-
evaluations/algol-perf-eps/mem-1-prod-1-cons-100-all.eps (added)
-
evaluations/algol-perf-eps/mem-1-prod-1-cons-100-cfa.eps (added)
-
evaluations/algol-perf-eps/mem-1-prod-1-cons-100-dl.eps (added)
-
evaluations/algol-perf-eps/mem-1-prod-1-cons-100-glc.eps (added)
-
evaluations/algol-perf-eps/mem-1-prod-1-cons-100-hrd.eps (added)
-
evaluations/algol-perf-eps/mem-1-prod-1-cons-100-je.eps (added)
-
evaluations/algol-perf-eps/mem-1-prod-1-cons-100-pt3.eps (added)
-
evaluations/algol-perf-eps/mem-1-prod-1-cons-100-rp.eps (added)
-
evaluations/algol-perf-eps/mem-1-prod-1-cons-100-tbb.eps (added)
-
evaluations/algol-perf-eps/mem-1-prod-2-cons-100-all.eps (added)
-
evaluations/algol-perf-eps/mem-1-prod-2-cons-100-cfa.eps (added)
-
evaluations/algol-perf-eps/mem-1-prod-2-cons-100-dl.eps (added)
-
evaluations/algol-perf-eps/mem-1-prod-2-cons-100-glc.eps (added)
-
evaluations/algol-perf-eps/mem-1-prod-2-cons-100-hrd.eps (added)
-
evaluations/algol-perf-eps/mem-1-prod-2-cons-100-je.eps (added)
-
evaluations/algol-perf-eps/mem-1-prod-2-cons-100-pt3.eps (added)
-
evaluations/algol-perf-eps/mem-1-prod-2-cons-100-rp.eps (added)
-
evaluations/algol-perf-eps/mem-1-prod-2-cons-100-tbb.eps (added)
-
evaluations/algol-perf-eps/mem-1-prod-4-cons-100-all.eps (added)
-
evaluations/algol-perf-eps/mem-1-prod-4-cons-100-cfa.eps (added)
-
evaluations/algol-perf-eps/mem-1-prod-4-cons-100-dl.eps (added)
-
evaluations/algol-perf-eps/mem-1-prod-4-cons-100-glc.eps (added)
-
evaluations/algol-perf-eps/mem-1-prod-4-cons-100-hrd.eps (added)
-
evaluations/algol-perf-eps/mem-1-prod-4-cons-100-je.eps (added)
-
evaluations/algol-perf-eps/mem-1-prod-4-cons-100-pt3.eps (added)
-
evaluations/algol-perf-eps/mem-1-prod-4-cons-100-rp.eps (added)
-
evaluations/algol-perf-eps/mem-1-prod-4-cons-100-tbb.eps (added)
-
evaluations/algol-perf-eps/mem-2-prod-1-cons-100-all.eps (added)
-
evaluations/algol-perf-eps/mem-2-prod-1-cons-100-cfa.eps (added)
-
evaluations/algol-perf-eps/mem-2-prod-1-cons-100-dl.eps (added)
-
evaluations/algol-perf-eps/mem-2-prod-1-cons-100-glc.eps (added)
-
evaluations/algol-perf-eps/mem-2-prod-1-cons-100-hrd.eps (added)
-
evaluations/algol-perf-eps/mem-2-prod-1-cons-100-je.eps (added)
-
evaluations/algol-perf-eps/mem-2-prod-1-cons-100-pt3.eps (added)
-
evaluations/algol-perf-eps/mem-2-prod-1-cons-100-rp.eps (added)
-
evaluations/algol-perf-eps/mem-2-prod-1-cons-100-tbb.eps (added)
-
evaluations/algol-perf-eps/mem-2-prod-2-cons-100-all.eps (added)
-
evaluations/algol-perf-eps/mem-2-prod-2-cons-100-cfa.eps (added)
-
evaluations/algol-perf-eps/mem-2-prod-2-cons-100-dl.eps (added)
-
evaluations/algol-perf-eps/mem-2-prod-2-cons-100-glc.eps (added)
-
evaluations/algol-perf-eps/mem-2-prod-2-cons-100-hrd.eps (added)
-
evaluations/algol-perf-eps/mem-2-prod-2-cons-100-je.eps (added)
-
evaluations/algol-perf-eps/mem-2-prod-2-cons-100-pt3.eps (added)
-
evaluations/algol-perf-eps/mem-2-prod-2-cons-100-rp.eps (added)
-
evaluations/algol-perf-eps/mem-2-prod-2-cons-100-tbb.eps (added)
-
evaluations/algol-perf-eps/mem-2-prod-4-cons-100-all.eps (added)
-
evaluations/algol-perf-eps/mem-2-prod-4-cons-100-cfa.eps (added)
-
evaluations/algol-perf-eps/mem-2-prod-4-cons-100-dl.eps (added)
-
evaluations/algol-perf-eps/mem-2-prod-4-cons-100-glc.eps (added)
-
evaluations/algol-perf-eps/mem-2-prod-4-cons-100-hrd.eps (added)
-
evaluations/algol-perf-eps/mem-2-prod-4-cons-100-je.eps (added)
-
evaluations/algol-perf-eps/mem-2-prod-4-cons-100-pt3.eps (added)
-
evaluations/algol-perf-eps/mem-2-prod-4-cons-100-rp.eps (added)
-
evaluations/algol-perf-eps/mem-2-prod-4-cons-100-tbb.eps (added)
-
evaluations/algol-perf-eps/mem-4-prod-1-cons-100-all.eps (added)
-
evaluations/algol-perf-eps/mem-4-prod-1-cons-100-cfa.eps (added)
-
evaluations/algol-perf-eps/mem-4-prod-1-cons-100-dl.eps (added)
-
evaluations/algol-perf-eps/mem-4-prod-1-cons-100-glc.eps (added)
-
evaluations/algol-perf-eps/mem-4-prod-1-cons-100-hrd.eps (added)
-
evaluations/algol-perf-eps/mem-4-prod-1-cons-100-je.eps (added)
-
evaluations/algol-perf-eps/mem-4-prod-1-cons-100-pt3.eps (added)
-
evaluations/algol-perf-eps/mem-4-prod-1-cons-100-rp.eps (added)
-
evaluations/algol-perf-eps/mem-4-prod-1-cons-100-tbb.eps (added)
-
evaluations/algol-perf-eps/mem-4-prod-2-cons-100-all.eps (added)
-
evaluations/algol-perf-eps/mem-4-prod-2-cons-100-cfa.eps (added)
-
evaluations/algol-perf-eps/mem-4-prod-2-cons-100-dl.eps (added)
-
evaluations/algol-perf-eps/mem-4-prod-2-cons-100-glc.eps (added)
-
evaluations/algol-perf-eps/mem-4-prod-2-cons-100-hrd.eps (added)
-
evaluations/algol-perf-eps/mem-4-prod-2-cons-100-je.eps (added)
-
evaluations/algol-perf-eps/mem-4-prod-2-cons-100-pt3.eps (added)
-
evaluations/algol-perf-eps/mem-4-prod-2-cons-100-rp.eps (added)
-
evaluations/algol-perf-eps/mem-4-prod-2-cons-100-tbb.eps (added)
-
evaluations/algol-perf-eps/mem-4-prod-4-cons-100-all.eps (added)
-
evaluations/algol-perf-eps/mem-4-prod-4-cons-100-cfa.eps (added)
-
evaluations/algol-perf-eps/mem-4-prod-4-cons-100-dl.eps (added)
-
evaluations/algol-perf-eps/mem-4-prod-4-cons-100-glc.eps (added)
-
evaluations/algol-perf-eps/mem-4-prod-4-cons-100-hrd.eps (added)
-
evaluations/algol-perf-eps/mem-4-prod-4-cons-100-je.eps (added)
-
evaluations/algol-perf-eps/mem-4-prod-4-cons-100-pt3.eps (added)
-
evaluations/algol-perf-eps/mem-4-prod-4-cons-100-rp.eps (added)
-
evaluations/algol-perf-eps/mem-4-prod-4-cons-100-tbb.eps (added)
-
evaluations/algol-perf-eps/speed-1-malloc-null.eps (added)
-
evaluations/algol-perf-eps/speed-10-malloc-realloc.eps (added)
-
evaluations/algol-perf-eps/speed-11-calloc-realloc.eps (added)
-
evaluations/algol-perf-eps/speed-12-malloc-realloc-free.eps (added)
-
evaluations/algol-perf-eps/speed-13-calloc-realloc-free.eps (added)
-
evaluations/algol-perf-eps/speed-14-{m,c,re}alloc-free.eps (added)
-
evaluations/algol-perf-eps/speed-2-free-null.eps (added)
-
evaluations/algol-perf-eps/speed-3-malloc.eps (added)
-
evaluations/algol-perf-eps/speed-4-realloc.eps (added)
-
evaluations/algol-perf-eps/speed-5-free.eps (added)
-
evaluations/algol-perf-eps/speed-6-calloc.eps (added)
-
evaluations/algol-perf-eps/speed-7-malloc-free.eps (added)
-
evaluations/algol-perf-eps/speed-8-realloc-free.eps (added)
-
evaluations/algol-perf-eps/speed-9-calloc-free.eps (added)
-
evaluations/nasus-perf-eps/cache-time-0-scratch.eps (added)
-
evaluations/nasus-perf-eps/cache-time-0-thrash.eps (added)
-
evaluations/nasus-perf-eps/churn.eps (added)
-
evaluations/nasus-perf-eps/mem-1-prod-1-cons-100-all.eps (added)
-
evaluations/nasus-perf-eps/mem-1-prod-1-cons-100-cfa.eps (added)
-
evaluations/nasus-perf-eps/mem-1-prod-1-cons-100-dl.eps (added)
-
evaluations/nasus-perf-eps/mem-1-prod-1-cons-100-glc.eps (added)
-
evaluations/nasus-perf-eps/mem-1-prod-1-cons-100-hrd.eps (added)
-
evaluations/nasus-perf-eps/mem-1-prod-1-cons-100-je.eps (added)
-
evaluations/nasus-perf-eps/mem-1-prod-1-cons-100-pt3.eps (added)
-
evaluations/nasus-perf-eps/mem-1-prod-1-cons-100-rp.eps (added)
-
evaluations/nasus-perf-eps/mem-1-prod-1-cons-100-tbb.eps (added)
-
evaluations/nasus-perf-eps/mem-1-prod-2-cons-100-all.eps (added)
-
evaluations/nasus-perf-eps/mem-1-prod-2-cons-100-cfa.eps (added)
-
evaluations/nasus-perf-eps/mem-1-prod-2-cons-100-dl.eps (added)
-
evaluations/nasus-perf-eps/mem-1-prod-2-cons-100-glc.eps (added)
-
evaluations/nasus-perf-eps/mem-1-prod-2-cons-100-hrd.eps (added)
-
evaluations/nasus-perf-eps/mem-1-prod-2-cons-100-je.eps (added)
-
evaluations/nasus-perf-eps/mem-1-prod-2-cons-100-pt3.eps (added)
-
evaluations/nasus-perf-eps/mem-1-prod-2-cons-100-rp.eps (added)
-
evaluations/nasus-perf-eps/mem-1-prod-2-cons-100-tbb.eps (added)
-
evaluations/nasus-perf-eps/mem-1-prod-4-cons-100-all.eps (added)
-
evaluations/nasus-perf-eps/mem-1-prod-4-cons-100-cfa.eps (added)
-
evaluations/nasus-perf-eps/mem-1-prod-4-cons-100-dl.eps (added)
-
evaluations/nasus-perf-eps/mem-1-prod-4-cons-100-glc.eps (added)
-
evaluations/nasus-perf-eps/mem-1-prod-4-cons-100-hrd.eps (added)
-
evaluations/nasus-perf-eps/mem-1-prod-4-cons-100-je.eps (added)
-
evaluations/nasus-perf-eps/mem-1-prod-4-cons-100-pt3.eps (added)
-
evaluations/nasus-perf-eps/mem-1-prod-4-cons-100-rp.eps (added)
-
evaluations/nasus-perf-eps/mem-1-prod-4-cons-100-tbb.eps (added)
-
evaluations/nasus-perf-eps/mem-2-prod-1-cons-100-all.eps (added)
-
evaluations/nasus-perf-eps/mem-2-prod-1-cons-100-cfa.eps (added)
-
evaluations/nasus-perf-eps/mem-2-prod-1-cons-100-dl.eps (added)
-
evaluations/nasus-perf-eps/mem-2-prod-1-cons-100-glc.eps (added)
-
evaluations/nasus-perf-eps/mem-2-prod-1-cons-100-hrd.eps (added)
-
evaluations/nasus-perf-eps/mem-2-prod-1-cons-100-je.eps (added)
-
evaluations/nasus-perf-eps/mem-2-prod-1-cons-100-pt3.eps (added)
-
evaluations/nasus-perf-eps/mem-2-prod-1-cons-100-rp.eps (added)
-
evaluations/nasus-perf-eps/mem-2-prod-1-cons-100-tbb.eps (added)
-
evaluations/nasus-perf-eps/mem-2-prod-2-cons-100-all.eps (added)
-
evaluations/nasus-perf-eps/mem-2-prod-2-cons-100-cfa.eps (added)
-
evaluations/nasus-perf-eps/mem-2-prod-2-cons-100-dl.eps (added)
-
evaluations/nasus-perf-eps/mem-2-prod-2-cons-100-glc.eps (added)
-
evaluations/nasus-perf-eps/mem-2-prod-2-cons-100-hrd.eps (added)
-
evaluations/nasus-perf-eps/mem-2-prod-2-cons-100-je.eps (added)
-
evaluations/nasus-perf-eps/mem-2-prod-2-cons-100-pt3.eps (added)
-
evaluations/nasus-perf-eps/mem-2-prod-2-cons-100-rp.eps (added)
-
evaluations/nasus-perf-eps/mem-2-prod-2-cons-100-tbb.eps (added)
-
evaluations/nasus-perf-eps/mem-2-prod-4-cons-100-all.eps (added)
-
evaluations/nasus-perf-eps/mem-2-prod-4-cons-100-cfa.eps (added)
-
evaluations/nasus-perf-eps/mem-2-prod-4-cons-100-dl.eps (added)
-
evaluations/nasus-perf-eps/mem-2-prod-4-cons-100-glc.eps (added)
-
evaluations/nasus-perf-eps/mem-2-prod-4-cons-100-hrd.eps (added)
-
evaluations/nasus-perf-eps/mem-2-prod-4-cons-100-je.eps (added)
-
evaluations/nasus-perf-eps/mem-2-prod-4-cons-100-pt3.eps (added)
-
evaluations/nasus-perf-eps/mem-2-prod-4-cons-100-rp.eps (added)
-
evaluations/nasus-perf-eps/mem-2-prod-4-cons-100-tbb.eps (added)
-
evaluations/nasus-perf-eps/mem-4-prod-1-cons-100-all.eps (added)
-
evaluations/nasus-perf-eps/mem-4-prod-1-cons-100-cfa.eps (added)
-
evaluations/nasus-perf-eps/mem-4-prod-1-cons-100-dl.eps (added)
-
evaluations/nasus-perf-eps/mem-4-prod-1-cons-100-glc.eps (added)
-
evaluations/nasus-perf-eps/mem-4-prod-1-cons-100-hrd.eps (added)
-
evaluations/nasus-perf-eps/mem-4-prod-1-cons-100-je.eps (added)
-
evaluations/nasus-perf-eps/mem-4-prod-1-cons-100-pt3.eps (added)
-
evaluations/nasus-perf-eps/mem-4-prod-1-cons-100-rp.eps (added)
-
evaluations/nasus-perf-eps/mem-4-prod-1-cons-100-tbb.eps (added)
-
evaluations/nasus-perf-eps/mem-4-prod-2-cons-100-all.eps (added)
-
evaluations/nasus-perf-eps/mem-4-prod-2-cons-100-cfa.eps (added)
-
evaluations/nasus-perf-eps/mem-4-prod-2-cons-100-dl.eps (added)
-
evaluations/nasus-perf-eps/mem-4-prod-2-cons-100-glc.eps (added)
-
evaluations/nasus-perf-eps/mem-4-prod-2-cons-100-hrd.eps (added)
-
evaluations/nasus-perf-eps/mem-4-prod-2-cons-100-je.eps (added)
-
evaluations/nasus-perf-eps/mem-4-prod-2-cons-100-pt3.eps (added)
-
evaluations/nasus-perf-eps/mem-4-prod-2-cons-100-rp.eps (added)
-
evaluations/nasus-perf-eps/mem-4-prod-2-cons-100-tbb.eps (added)
-
evaluations/nasus-perf-eps/mem-4-prod-4-cons-100-all.eps (added)
-
evaluations/nasus-perf-eps/mem-4-prod-4-cons-100-cfa.eps (added)
-
evaluations/nasus-perf-eps/mem-4-prod-4-cons-100-dl.eps (added)
-
evaluations/nasus-perf-eps/mem-4-prod-4-cons-100-glc.eps (added)
-
evaluations/nasus-perf-eps/mem-4-prod-4-cons-100-hrd.eps (added)
-
evaluations/nasus-perf-eps/mem-4-prod-4-cons-100-je.eps (added)
-
evaluations/nasus-perf-eps/mem-4-prod-4-cons-100-pt3.eps (added)
-
evaluations/nasus-perf-eps/mem-4-prod-4-cons-100-rp.eps (added)
-
evaluations/nasus-perf-eps/mem-4-prod-4-cons-100-tbb.eps (added)
-
evaluations/nasus-perf-eps/speed-1-malloc-null.eps (added)
-
evaluations/nasus-perf-eps/speed-10-malloc-realloc.eps (added)
-
evaluations/nasus-perf-eps/speed-11-calloc-realloc.eps (added)
-
evaluations/nasus-perf-eps/speed-12-malloc-realloc-free.eps (added)
-
evaluations/nasus-perf-eps/speed-13-calloc-realloc-free.eps (added)
-
evaluations/nasus-perf-eps/speed-14-{m,c,re}alloc-free.eps (added)
-
evaluations/nasus-perf-eps/speed-2-free-null.eps (added)
-
evaluations/nasus-perf-eps/speed-3-malloc.eps (added)
-
evaluations/nasus-perf-eps/speed-4-realloc.eps (added)
-
evaluations/nasus-perf-eps/speed-5-free.eps (added)
-
evaluations/nasus-perf-eps/speed-6-calloc.eps (added)
-
evaluations/nasus-perf-eps/speed-7-malloc-free.eps (added)
-
evaluations/nasus-perf-eps/speed-8-realloc-free.eps (added)
-
evaluations/nasus-perf-eps/speed-9-calloc-free.eps (added)
-
figures/bench-cache-scratch.eps (added)
-
figures/bench-cache-thrash.eps (added)
-
figures/bench-churn.eps (added)
-
figures/bench-memory.eps (added)
-
figures/bench-speed.eps (added)
-
performance.tex (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
-
doc/theses/mubeen_zulfiqar_MMath/allocator.tex
rbb7c77d r2e9b59b 24 24 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 25 25 26 <<<<<<< HEAD 27 \section{Design choices for uHeap}\label{sec:allocatorSec} 28 uHeap's design was reviewed and changed to fulfill new requirements (FIX ME: cite allocator philosophy). For this purpose, following two designs of uHeapLmm were proposed: 29 ======= 26 30 \section{Design Choices} 31 >>>>>>> bb7c77dc425e289ed60aa638529b3e5c7c3e4961 27 32 28 33 llheap's design was reviewed and changed multiple times throughout the thesis. … … 248 253 \begin{figure} 249 254 \centering 255 <<<<<<< HEAD 256 \includegraphics[width=0.65\textwidth]{figures/NewHeapStructure.eps} 257 \caption{uHeap Structure} 258 \label{fig:heapStructureFig} 259 ======= 250 260 % \includegraphics[width=0.65\textwidth]{figures/NewHeapStructure.eps} 251 261 \input{llheap} 252 262 \caption{llheap Structure} 253 263 \label{f:llheapStructure} 264 >>>>>>> bb7c77dc425e289ed60aa638529b3e5c7c3e4961 254 265 \end{figure} 255 266 … … 331 342 \end{algorithm} 332 343 344 <<<<<<< HEAD 345 Algorithm~\ref{alg:heapObjectFreeOwn} shows how a free request is fulfilled if object ownership is turned on. Algorithm~\ref{alg:heapObjectFreeNoOwn} shows how the same free request is fulfilled without object ownership. 346 347 \begin{algorithm} 348 \caption{Dynamic object free at address A with object ownership}\label{alg:heapObjectFreeOwn} 349 \begin{algorithmic}[1] 350 \If {$\textit{A was mmap-ed}$} 351 \State $\text{return A's dynamic memory to system using system call munmap}$ 352 ======= 333 353 \vspace*{-15pt} 334 354 \begin{algorithm}[H] … … 338 358 \If {$\textit{A mapped allocation}$} 339 359 \State $\text{return A's dynamic memory to system using system call \lstinline{munmap}}$ 360 >>>>>>> bb7c77dc425e289ed60aa638529b3e5c7c3e4961 340 361 \Else 341 362 \State $\text{B} \gets \textit{O's owner}$ … … 348 369 \end{algorithmic} 349 370 \end{algorithm} 371 <<<<<<< HEAD 372 373 \begin{algorithm} 374 \caption{Dynamic object free at address A without object ownership}\label{alg:heapObjectFreeNoOwn} 375 \begin{algorithmic}[1] 376 \If {$\textit{A was mmap-ed}$} 377 \State $\text{return A's dynamic memory to system using system call munmap}$ 378 \Else 379 \State $\text{B} \gets \textit{O's owner}$ 380 \If {$\textit{B is thread-local heap's bucket}$} 381 \State $\text{push A to B's free-list}$ 382 \Else 383 \State $\text{C} \gets \textit{thread local heap's bucket with same size as B}$ 384 \State $\text{push A to C's free-list}$ 385 \EndIf 386 \EndIf 387 \end{algorithmic} 388 \end{algorithm} 389 390 ======= 391 >>>>>>> bb7c77dc425e289ed60aa638529b3e5c7c3e4961 350 392 351 393 \vspace*{-15pt} -
doc/theses/mubeen_zulfiqar_MMath/background.tex
rbb7c77d r2e9b59b 757 757 Implementing lock-free operations for more complex data-structures (queue~\cite{Valois94}/deque~\cite{Sundell08}) is correspondingly more complex. 758 758 Michael~\cite{Michael04} and Gidenstam \etal \cite{Gidenstam05} have created lock-free variations of the Hoard allocator. 759 760 761 \subsubsection{Speed Workload} 762 The worload method uses the opposite approach. It calls the allocator's routines for a specific amount of time and measures how much work was done during that time. Then, similar to the time method, it divides the time by the workload done during that time and calculates the average time taken by the allocator's routine. 763 *** FIX ME: Insert a figure of above benchmark with description 764 765 \paragraph{Knobs} 766 *** FIX ME: Insert Knobs -
doc/theses/mubeen_zulfiqar_MMath/benchmarks.tex
rbb7c77d r2e9b59b 1 1 \chapter{Benchmarks} 2 2 3 \noindent 4 ==================== 5 6 Writing Points: 3 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 4 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 5 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Micro Benchmark Suite 6 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 7 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 8 9 The aim of micro benchmark suite is to create a set of programs that can evaluate a memory allocator based on the 10 performance matrices described in (FIX ME: local cite). These programs can be taken as a standard to benchmark an 11 allocator's basic goals. These programs give details of an allocator's memory overhead and speed under a certain 12 allocation pattern. The speed of the allocator is benchmarked in different ways. Similarly, false sharing happening in 13 an allocator is also measured in multiple ways. These benchmarks evalute the allocator under a certain allocation 14 pattern which is configurable and can be changed using a few knobs to benchmark observe an allocator's performance 15 under a desired allocation pattern. 16 17 Micro Benchmark Suite benchmarks an allocator's performance by allocating dynamic objects and, then, measuring specifc 18 matrices. The benchmark suite evaluates an allocator with a certain allocation pattern. Bnechmarks have different knobs 19 that can be used to change allocation pattern and evaluate an allocator under desired conditions. These can be set by 20 giving commandline arguments to the benchmark on execution. 21 22 \section{Current Benchmarks} There are multiple benchmarks that are built individually and evaluate different aspects of 23 a memory allocator. But, there is not a set of benchamrks that can be used to evaluate multiple aspects of memory 24 allocators. 25 26 \subsection{threadtest}(FIX ME: cite benchmark and hoard) Each thread repeatedly allocates and then deallocates 100,000 27 objects. Runtime of the benchmark evaluates its efficiency. 28 29 \subsection{shbench}(FIX ME: cite benchmark and hoard) Each thread allocates and randomly frees a number of random-sized 30 objects. It is a stress test that also uses runtime to determine efficiency of the allocator. 31 32 \subsection{larson}(FIX ME: cite benchmark and hoard) Larson simulates a server environment. Multiple threads are 33 created where each thread allocator and free a number of objects within a size range. Some objects are passed from 34 threads to the child threads to free. It caluculates memory operations per second as an indicator of memory 35 allocator's performance. 36 37 \section{Memory Benchmark} Memory benchmark measures memory overhead of an allocator. It allocates a number of dynamic 38 objects. Then, by reading /self/proc/maps, gets the total memory that the allocator has reuested from the OS. It 39 calculates the memory head by taking the difference between the memory the allocator has requested from the OS and the 40 memory that program has allocated. 41 42 \begin{figure} 43 \centering 44 \includegraphics[width=1\textwidth]{figures/bench-memory.eps} 45 \caption{Benchmark Memory Overhead} 46 \label{fig:benchMemoryFig} 47 \end{figure} 48 49 Figure \ref{fig:benchMemoryFig} gives a flow of the memory benchmark. It creates a producer-consumer scenerio with K producers 50 and each producer has M consumers. Producer has a separate buffer for each consumer. Producer allocates N objects of 51 random sizes following the given distrubution for each consumer. Consumer frees those objects. After every memory 52 operation, program memory usage is recorded throughout the runtime. This data then can be used to visualize the memory 53 usage and consumption of the prigram. 54 55 Different knobs can be adjusted to set certain thread model.\\ 56 -threadA : sets number of alloc threads (producers) for mem benchmark\\ 57 -consumeS: sets production and conumption round size\\ 58 -threadF : sets number of free threads (consumers) for mem benchmark 59 60 Object allocation size can be changed using the knobs:\\ 61 -maxS : sets max object size\\ 62 -minS : sets min object size\\ 63 -stepS : sets object size increment\\ 64 -distroS : sets object size distribution\\ 65 -objN : sets number of objects per thread\\ 66 67 \section{Speed Benchmark} Speed benchmark measures the runtime speed of an allocator (FIX ME: cite allocator routines). 68 Speed benchmark measures runtime speed of individual memory allocation routines. It also considers different 69 allocation chains to measures the performance of the allocator by combining multiple allocation routines in a chain. 70 It uses following chains and measures allocator runtime speed against them: 7 71 \begin{itemize} 8 \item 9 Performance matrices of memory allocation. 10 \item 11 Aim of micro benchmark suite. 12 13 ----- SHOULD WE GIVE IMPLEMENTATION DETAILS HERE? ----- 14 15 \PAB{For the benchmarks, yes.} 16 \item 17 A complete list of benchmarks in micro benchmark suite. 18 \item 19 One detailed section for each benchmark in micro benchmark suite including: 20 21 \begin{itemize} 22 \item 23 The introduction of the benchmark. 24 \item 25 Figure. 26 \item 27 Results with popular memory allocators. 72 \item malloc 0 73 \item free NULL 74 \item malloc 75 \item realloc 76 \item free 77 \item calloc 78 \item malloc-free 79 \item realloc-free 80 \item calloc-free 81 \item malloc-realloc 82 \item calloc-realloc 83 \item malloc-realloc-free 84 \item calloc-realloc-free 85 \item malloc-realloc-free-calloc 28 86 \end{itemize} 29 87 30 \item 31 Summarize performance of current memory allocators. 32 \end{itemize} 33 34 \noindent 35 ==================== 36 37 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 38 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 39 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Performance Matrices 40 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 41 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 42 43 44 \section{Benchmarks} 45 There are multiple benchmarks that are built individually and evaluate different aspects of a memory allocator. But, there is not standard set of benchamrks that can be used to evaluate multiple aspects of memory allocators. 46 47 \paragraph{threadtest} 48 (FIX ME: cite benchmark and hoard) Each thread repeatedly allocates and then deallocates 100,000 objects. Runtime of the benchmark evaluates its efficiency. 49 50 \paragraph{shbench} 51 (FIX ME: cite benchmark and hoard) Each thread allocates and randomly frees a number of random-sized objects. It is a stress test that also uses runtime to determine efficiency of the allocator. 52 53 \paragraph{larson} 54 (FIX ME: cite benchmark and hoard) Larson simulates a server environment. Multiple threads are created where each thread allocator and free a number of objects within a size range. Some objects are passed from threads to the child threads to free. It caluculates memory operations per second as an indicator of memory allocator's performance. 55 56 57 \section{Performance Matrices of Memory Allocators} 58 59 When it comes to memory allocators, there are no set standards of performance. Performance of a memory allocator depends highly on the usage pattern of the application. A memory allocator that is the best performer for a certain application X might be the worst for some other application which has completely different memory usage pattern compared to the application X. It is extremely difficult to make one universally best memory allocator which will outperform every other memory allocator for every usage pattern. So, there is a lack of a set of standard benchmarks that are used to evaluate a memory allocators's performance. 60 61 If we breakdown the goals of a memory allocator, there are two basic matrices on which a memory allocator's performance is evaluated. 62 \begin{enumerate} 63 \item 64 Memory Overhead 65 \item 66 Speed 67 \end{enumerate} 68 69 \subsection{Memory Overhead} 70 Memory overhead is the extra memory that a memory allocator takes from OS which is not requested by the application. Ideally, an allocator should get just enough memory from OS that can fulfill application's request and should return this memory to OS as soon as applications frees it. But, allocators retain more memory compared to what application has asked for which causes memory overhead. Memory overhead can happen for various reasons. 71 72 \subsubsection{Fragmentation} 73 Fragmentation is one of the major reasons behind memory overhead. Fragmentation happens because of situations that are either necassary for proper functioning of the allocator such as internal memory management and book-keeping or are out of allocator's control such as application's usage pattern. 74 75 \paragraph{Internal Fragmentation} 76 For internal book-keeping, allocators divide raw memory given by OS into chunks, blocks, or lists that can fulfill application's requested size. Allocators use memory given by OS for creating headers, footers etc. to store information about these chunks, blocks, or lists. This increases usage of memory in-addition to the memory requested by application as the allocators need to store their book-keeping information. This extra usage of memory for allocator's own book-keeping is called Internal Fragmentation. Although it cases memory overhead but this overhead is necassary for an allocator's proper funtioning. 77 78 *** FIX ME: Insert a figure of internal fragmentation with explanation 79 80 \paragraph{External Fragmentation} 81 External fragmentation is the free bits of memory between or around chunks of memory that are currently in-use of the application. Segmentation in memory due to application's usage pattern causes external fragmentation. The memory which is part of external fragmentation is completely free as it is neither used by allocator's internal book-keeping nor by the application. Ideally, an allocator should return a segment of memory back to the OS as soon as application frees it. But, this is not always the case. Allocators get memory from OS in one of the two ways. 82 83 \begin{itemize} 84 \item 85 MMap: an allocator can ask OS for whole pages in mmap area. Then, the allocator segments the page internally and fulfills application's request. 86 \item 87 Heap: an allocator can ask OS for memory in heap area using system calls such as sbrk. Heap are grows downwards and shrinks upwards. 88 \begin{itemize} 89 \item 90 If an allocator uses mmap area, it can only return extra memory back to OS if the whole page is free i.e. no chunk on the page is in-use of the application. Even if one chunk on the whole page is currently in-use of the application, the allocator has to retain the whole page. 91 \item 92 If an allocator uses the heap area, it can only return the continous free memory at the end of the heap area that is currently in allocator's possession as heap area shrinks upwards. If there are free bits of memory in-between chunks of memory that are currently in-use of the application, the allocator can not return these free bits. 93 94 *** FIX ME: Insert a figure of above scenrio with explanation 95 \item 96 Even if the entire heap area is free except one small chunk at the end of heap area that is being used by the application, the allocator cannot return the free heap area back to the OS as it is not a continous region at the end of heap area. 97 98 *** FIX ME: Insert a figure of above scenrio with explanation 99 100 \item 101 Such scenerios cause external fragmentation but it is out of the allocator's control and depend on application's usage pattern. 102 \end{itemize} 103 \end{itemize} 104 105 \subsubsection{Internal Memory Management} 106 Allocators such as je-malloc (FIX ME: insert reference) pro-actively get some memory from the OS and divide it into chunks of certain sizes that can be used in-future to fulfill application's request. This causes memory overhead as these chunks are made before application's request. There is also the possibility that an application may not even request memory of these sizes during their whole life-time. 107 108 *** FIX ME: Insert a figure of above scenrio with explanation 109 110 Allocators such as rp-malloc (FIX ME: insert reference) maintain lists or blocks of sized memory segments that is freed by the application for future use. These lists are maintained without any guarantee that application will even request these sizes again. 111 112 Such tactics are usually used to gain speed as allocator will not have to get raw memory from OS and manage it at the time of application's request but they do cause memory overhead. 113 114 Fragmentation and managed sized chunks of free memory can lead to Heap Blowup as the allocator may not be able to use the fragments or sized free chunks of free memory to fulfill application's requests of other sizes. 115 116 \subsection{Speed} 117 When it comes to performance evaluation of any piece of software, its runtime is usually the first thing that is evaluated. The same is true for memory allocators but, in case of memory allocators, speed does not only mean the runtime of memory allocator's routines but there are other factors too. 118 119 \subsubsection{Runtime Speed} 120 Low runtime is the main goal of a memory allocator when it comes it proving its speed. Runtime is the time that it takes for a routine of memory allocator to complete its execution. As mentioned in (FIX ME: refernce to routines' list), there four basic routines that are used in memory allocation. Ideally, each routine of a memory allocator should be fast. Some memory allocator designs use pro-active measures (FIX ME: local refernce) to gain speed when allocating some memory to the application. Some memory allocators do memory allocation faster than memory freeing (FIX ME: graph refernce) while others show similar speed whether memory is allocated or freed. 121 122 \subsubsection{Memory Access Speed} 123 Runtime speed is not the only speed matrix in memory allocators. The memory that a memory allocator has allocated to the application also needs to be accessible as quick as possible. The application should be able to read/write allocated memory quickly. The allocation method of a memory allocator may introduce some delays when it comes to memory access speed, which is specially important in concurrent applications. Ideally, a memory allocator should allocate all memory on a cache-line to only one thread and no cache-line should be shared among multiple threads. If a memory allocator allocates memory to multple threads on a same cache line, then cache may get invalidated more frequesntly when two different threads running on two different processes will try to read/write the same memory region. On the other hand, if one cache-line is used by only one thread then the cache may get invalidated less frequently. This sharing of one cache-line among multiple threads is called false sharing (FIX ME: cite wasik). 124 125 \paragraph{Active False Sharing} 126 Active false sharing is the sharing of one cache-line among multiple threads that is caused by memory allocator. It happens when two threads request memory from memory allocator and the allocator allocates memory to both of them on the same cache-line. After that, if the threads are running on different processes who have their own caches and both threads start reading/writing the allocated memory simultanously, their caches will start getting invalidated every time the other thread writes something to the memory. This will cause the application to slow down as the process has to load cache much more frequently. 127 128 *** FIX ME: Insert a figure of above scenrio with explanation 129 130 \paragraph{Passive False Sharing} 131 Passive false sharing is the kind of false sharing which is caused by the application and not the memory allocator. The memory allocator may preservce passive false sharing in future instead of eradicating it. But, passive false sharing is initiated by the application. 132 133 \subparagraph{Program Induced Passive False Sharing} 134 Program induced false sharing is completely out of memory allocator's control and is purely caused by the application. When a thread in the application creates multiple objects in the dynamic area and allocator allocates memory for these objects on the same cache-line as the objects are created by the same thread. Passive false sharing will occur if this thread passes one of these objects to another thread but it retains the rest of these objects or it passes some/all of the remaining objects to some third thread(s). Now, one cache-line is shared among multiple threads but it is caused by the application and not the allocator. It is out of allocator's control and has the similar performance impact as Active False Sharing (FIX ME: cite local) if these threads, who are sharing the same cache-line, start reading/writing the given objects simultanously. 135 136 *** FIX ME: Insert a figure of above scenrio 1 with explanation 137 138 *** FIX ME: Insert a figure of above scenrio 2 with explanation 139 140 \subparagraph{Program Induced Allocator Preserved Passive False Sharing} 141 Program induced allocator preserved passive false sharing is another interesting case of passive false sharing. Both the application and the allocator are partially responsible for it. It starts the same as Program Induced False Sharing (FIX ME: cite local). Once, an application thread has created multiple dynamic objects on the same cache-line and ditributed these objects among multiple threads causing sharing of one cache-line among multiple threads (Program Induced Passive False Sharing). This kind of false sharing occurs when one of these threads, which got the object on the shared cache-line, frees the passed object then re-allocates another object but the allocator returns the same object (on the shared cache-line) that this thread just freed. Although, the application caused the false sharing to happen in the frst place however, to prevent furthur false sharing, the allocator should have returned the new object on some other cache-line which is only shared by the allocating thread. When it comes to performnce impact, this passive false sharing will slow down the application just like any other kind of false sharing if the threads sharing the cache-line start reading/writing the objects simultanously. 142 143 144 *** FIX ME: Insert a figure of above scenrio with explanation 145 146 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 147 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 148 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Micro Benchmark Suite 149 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 150 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 151 152 \section{Micro Benchmark Suite} 153 The aim of micro benchmark suite is to create a set of programs that can evaluate a memory allocator based on the performance matrices described in (FIX ME: local cite). These programs can be taken as a standard to benchmark an allocator's basic goals. These programs give details of an allocator's memory overhead and speed under a certain allocation pattern. The speed of the allocator is benchmarked in different ways. Similarly, false sharing happening in an allocator is also measured in multiple ways. These benchmarks evalute the allocator under a certain allocation pattern which is configurable and can be changed using a few knobs to benchmark observe an allocator's performance under a desired allocation pattern. 154 155 Micro Benchmark Suite benchmarks an allocator's performance by allocating dynamic objects and, then, measuring specifc matrices. The benchmark suite evaluates an allocator with a certain allocation pattern. Bnechmarks have different knobs that can be used to change allocation pattern and evaluate an allocator under desired conditions. These can be set by giving commandline arguments to the benchmark on execution. 156 157 Following is the list of avalable knobs. 158 159 *** FIX ME: Add knobs items after finalize 160 161 \subsection{Memory Benchmark} 162 Memory benchmark measures memory overhead of an allocator. It allocates a number of dynamic objects. Then, by reading /self/proc/maps, gets the total memory that the allocator has reuested from the OS. Finally, it calculates the memory head by taking the difference between the memory the allocator has requested from the OS and the memory that program has allocated. 163 *** FIX ME: Insert a figure of above benchmark with description 164 165 \paragraph{Relevant Knobs} 166 *** FIX ME: Insert Relevant Knobs 167 168 \subsection{Speed Benchmark} 169 Speed benchmark calculates the runtime speed of an allocator's functions (FIX ME: cite allocator routines). It does by measuring the runtime of allocator routines in two different ways. 170 171 \subsubsection{Speed Time} 172 The time method does a certain amount of work by calling each routine of the allocator (FIX ME: cite allocator routines) a specific time. It calculates the total time it took to perform this workload. Then, it divides the time it took by the workload and calculates the average time taken by the allocator's routine. 173 *** FIX ME: Insert a figure of above benchmark with description 174 175 \paragraph{Relevant Knobs} 176 *** FIX ME: Insert Relevant Knobs 177 178 \subsubsection{Speed Workload} 179 The worload method uses the opposite approach. It calls the allocator's routines for a specific amount of time and measures how much work was done during that time. Then, similar to the time method, it divides the time by the workload done during that time and calculates the average time taken by the allocator's routine. 180 *** FIX ME: Insert a figure of above benchmark with description 181 182 \paragraph{Relevant Knobs} 183 *** FIX ME: Insert Relevant Knobs 184 185 \subsection{Cache Scratch} 186 Cache Scratch benchmark measures program induced allocator preserved passive false sharing (FIX ME CITE) in an allocator. It does so in two ways. 187 188 \subsubsection{Cache Scratch Time} 189 Cache Scratch Time allocates dynamic objects. Then, it benchmarks program induced allocator preserved passive false sharing (FIX ME CITE) in an allocator by measuring the time it takes to read/write these objects. 190 *** FIX ME: Insert a figure of above benchmark with description 191 192 \paragraph{Relevant Knobs} 193 *** FIX ME: Insert Relevant Knobs 194 195 \subsubsection{Cache Scratch Layout} 196 Cache Scratch Layout also allocates dynamic objects. Then, it benchmarks program induced allocator preserved passive false sharing (FIX ME CITE) by using heap addresses returned by the allocator. It calculates how many objects were allocated to different threads on the same cache line. 197 *** FIX ME: Insert a figure of above benchmark with description 198 199 \paragraph{Relevant Knobs} 200 *** FIX ME: Insert Relevant Knobs 201 202 \subsection{Cache Thrash} 203 Cache Thrash benchmark measures allocator induced passive false sharing (FIX ME CITE) in an allocator. It also does so in two ways. 204 205 \subsubsection{Cache Thrash Time} 206 Cache Thrash Time allocates dynamic objects. Then, it benchmarks allocator induced false sharing (FIX ME CITE) in an allocator by measuring the time it takes to read/write these objects. 207 *** FIX ME: Insert a figure of above benchmark with description 208 209 \paragraph{Relevant Knobs} 210 *** FIX ME: Insert Relevant Knobs 211 212 \subsubsection{Cache Thrash Layout} 213 Cache Thrash Layout also allocates dynamic objects. Then, it benchmarks allocator induced false sharing (FIX ME CITE) by using heap addresses returned by the allocator. It calculates how many objects were allocated to different threads on the same cache line. 214 *** FIX ME: Insert a figure of above benchmark with description 215 216 \paragraph{Relevant Knobs} 217 *** FIX ME: Insert Relevant Knobs 88 \begin{figure} 89 \centering 90 \includegraphics[width=1\textwidth]{figures/bench-speed.eps} 91 \caption{Benchmark Speed} 92 \label{fig:benchSpeedFig} 93 \end{figure} 94 95 As laid out in figure \ref{fig:benchSpeedFig}, each chain is measured separately. Each routine in the chain is called for N objects and then 96 those allocated objects are used when call the next routine in the allocation chain. This way we can measure the 97 complete latency of memory allocator when multiple routines are chained together e.g. malloc-realloc-free-calloc gives 98 us the whole picture of the major allocation routines when combined together in a chain. 99 100 For each chain, time taken is recorded which then can be used to visualize performance of a memory allocator against 101 each chain. 102 103 Number of worker threads can be adjust using a command-line argument -threadN. 104 105 \section{Churn Benchmark} Churn benchmark measures the overall runtime speed of an allocator in a multi-threaded 106 scenerio where each thread extinsevly allocates and frees dynamic memory. 107 108 \begin{figure} 109 \centering 110 \includegraphics[width=1\textwidth]{figures/bench-churn.eps} 111 \caption{Benchmark Churn} 112 \label{fig:benchChurnFig} 113 \end{figure} 114 115 Figure \ref{fig:benchChurnFig} illustrates churn benchmark. 116 This benchmark creates a buffer with M spots and starts K threads. Each thread randomly picks a 117 spot out of M spots, it frees the object currently at that spot and allocates a new object for that spot. Each thread 118 repeats this cycle for N times. Main threads measures the total time taken for the whole benchmark and that time is 119 used to evaluate memory allocator's performance. 120 121 Only malloc and free are used to allocate and free an object to eliminate any extra cost such as memcpy in realloc etc. 122 Malloc/free allows us to measure latency of memory allocation only without paying any extra cost. Churn simulates a 123 memory intensive program that can be tuned to create different scenerios. 124 125 Following commandline arguments can be used to tune the benchmark.\\ 126 -threadN : sets number of threads, K\\ 127 -cSpots : sets number of spots for churn, M\\ 128 -objN : sets number of objects per thread, N\\ 129 -maxS : sets max object size\\ 130 -minS : sets min object size\\ 131 -stepS : sets object size increment\\ 132 -distroS : sets object size distribution 133 134 \section{Cache Thrash}\label{sec:benchThrashSec} Cache Thrash benchmark measures allocator induced active false sharing 135 in an allocator as illustrated in figure \ref{f:AllocatorInducedActiveFalseSharing}. 136 If memory allocator allocates memory for multiple threads on 137 same cache line, this can slow down the program performance. If both threads, who share one cache line, frequently 138 read/write to their object on the cache line concurrently then this will cause cache miss everytime a thread accesse 139 the object as the other thread might have written something at their memory location on the same cache line. 140 141 \begin{figure} 142 \centering 143 \includegraphics[width=1\textwidth]{figures/bench-cache-thrash.eps} 144 \caption{Benchmark Allocator Induced Active False Sharing} 145 \label{fig:benchThrashFig} 146 \end{figure} 147 148 Cache thrash tries to create a scenerio that should lead to allocator induced false sharing if the underlying memory 149 allocator is allocating dynamic memory to multiple threads on same cache lines. Ideally, a memory allocator should 150 distance dynamic memory region of one thread from other threads'. Having multiple threads allocating small objects 151 simultanously should cause the memory allocator to allocate objects for multiple objects on the same cache line if its 152 not distancing the memory among different threads. 153 154 Figure \ref{fig:benchThrashFig} lays out flow of the cache thrash benchmark. 155 It creates K worker threads. Each worker thread allocates an object and intensively read/write 156 it for M times to invalidate cache lines frequently to slow down other threads who might be sharing this cache line 157 with it. Each thread repeats this for N times. Main thread measures the total time taken to for all worker threads to 158 complete. Worker threads sharing cahche lines with each other will take longer. 159 160 Different cache access scenerios can be created using the following commandline arguments.\\ 161 -threadN : sets number of threads, K\\ 162 -cacheIt : iterations for cache benchmark, N\\ 163 -cacheRep: repetations for cache benchmark, M\\ 164 -cacheObj: object size for cache benchmark 165 166 \section{Cache Scratch} Cache Scratch benchmark measures allocator induced passive false sharing in an allocator. An 167 allocator can unintentionally induce false sharing depending upon its management of the freed objects as described in 168 figure \ref{f:AllocatorInducedPassiveFalseSharing}. If a thread A allocates multiple objects together then they will be 169 possibly allocated on the same cache line by the memory allocator. If the thread now passes this object to another 170 thread B then the two of them will sharing the same cache line but this scenerio is not induced by the allocator. 171 Instead, the program induced this situation. Now it might be possible that if thread B frees this object and then 172 allocate an object of the same size then the allocator may return the same object which is on a cache line shared 173 with thread A. Now this false sharing is being caused by the memory allocator although it was started by the 174 program. 175 176 \begin{figure} 177 \centering 178 \includegraphics[width=1\textwidth]{figures/bench-cache-scratch.eps} 179 \caption{Benchmark Program Induced Passive False Sharing} 180 \label{fig:benchScratchFig} 181 \end{figure} 182 183 Cache scratch main thread induces false sharing and creates a scenerio that should make memory allocator preserve the 184 program-induced false sharing if it does not retur a freed object to its owner thread and, instead, re-uses it 185 instantly. An alloator using object ownership, as described in section \ref{s:Ownership}, would be less susceptible to allocator induced passive 186 false sharing. If the object is returned to the thread who owns it or originally allocated it then the thread B will 187 get a new object that will be less likely to be on the same cache line as thread A. 188 189 As in figure \ref{fig:benchScratchFig}, cache Scratch allocates K dynamic objects together, one for each of the K worker threads, 190 possibly causing memory allocator to allocate these objects on the same cache-line. Then it create K worker threads and passes 191 an object from the K allocated objects to each of the K threads. Each worker thread frees the object passed by the main thread. 192 Then, it allocates an object and reads/writes it repetitively for M times causing frequent cache invalidations. Each worker 193 repeats this for N times. 194 195 Each thread allocating an object after freeing the original object passed by the main thread should cause the memory 196 allocator to return the same object that was initially allocated by the main thread if the allocator did not return the 197 intial object bakc to its owner (main thread). Then, intensive read/write on the shared cache line by multiple threads 198 should slow down worker threads due to to high cache invalidations and misses. Main thread measures the total time 199 taken for all the workers to complete. 200 201 Similar to bechmark cache thrash in section \ref{sec:benchThrashSec}, different cache access scenerios can be created using the following commandline arguments.\\ 202 -threadN : sets number of threads, K\\ 203 -cacheIt : iterations for cache benchmark, N\\ 204 -cacheRep: repetations for cache benchmark, M\\ 205 -cacheObj: object size for cache benchmark -
doc/theses/mubeen_zulfiqar_MMath/performance.tex
rbb7c77d r2e9b59b 1 1 \chapter{Performance} 2 2 \label{c:Performance} 3 4 \noindent5 ====================6 7 Writing Points:8 \begin{itemize}9 \item10 Machine Specification11 \item12 Allocators and their details13 \item14 Benchmarks and their details15 \item16 Results17 \end{itemize}18 19 \noindent20 ====================21 3 22 4 \section{Machine Specification} … … 25 7 \begin{itemize} 26 8 \item 27 AMD EPYC 7662, 64-core socket $\times$ 2, 2.0 GHz 9 {\bf Nasus} AMD EPYC 7662, 64-core socket $\times$ 2, 2.0 GHz, GCC version 9.3.0 28 10 \item 29 Huawei ARM TaiShan 2280 V2 Kunpeng 920, 24-core socket $\times$ 4, 2.6 GHz 30 \item 31 Intel Xeon Gold 5220R, 48-core socket $\times$ 2, 2.20GHz 11 {\bf Algol} Huawei ARM TaiShan 2280 V2 Kunpeng 920, 24-core socket $\times$ 4, 2.6 GHz, GCC version 9.4.0 32 12 \end{itemize} 33 13 34 14 35 \section{Existing Memory Allocators} 15 \section{Existing Memory Allocators}\label{sec:curAllocatorSec} 36 16 With dynamic allocation being an important feature of C, there are many stand-alone memory allocators that have been designed for different purposes. For this thesis, we chose 7 of the most popular and widely used memory allocators. 37 17 38 \paragraph{dlmalloc} 39 dlmalloc (FIX ME: cite allocator) is a thread-safe allocator that is single threaded and single heap. dlmalloc maintains free-lists of different sizes to store freed dynamic memory. (FIX ME: cite wasik) 40 41 \paragraph{hoard} 18 \subsection{dlmalloc} 19 dlmalloc (FIX ME: cite allocator with download link) is a thread-safe allocator that is single threaded and single heap. dlmalloc maintains free-lists of different sizes to store freed dynamic memory. (FIX ME: cite wasik) 20 \\ 21 \\ 22 {\bf Version:} 2.8.6\\ 23 {\bf Configuration:} Compiled with pre-processor USE\_LOCKS.\\ 24 {\bf Compilation command:}\\ 25 cc -g3 -O3 -Wall -Wextra -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free -fPIC -shared -DUSE\_LOCKS -o libdlmalloc.so malloc-2.8.6.c 26 27 \subsection{hoard} 42 28 Hoard (FIX ME: cite allocator) is a thread-safe allocator that is multi-threaded and using a heap layer framework. It has per-thread heaps that have thread-local free-lists, and a global shared heap. (FIX ME: cite wasik) 43 44 \paragraph{jemalloc} 29 \\ 30 \\ 31 {\bf Version:} 3.13\\ 32 {\bf Configuration:} Compiled with hoard's default configurations and Makefile.\\ 33 {\bf Compilation command:}\\ 34 make all 35 36 \subsection{jemalloc} 45 37 jemalloc (FIX ME: cite allocator) is a thread-safe allocator that uses multiple arenas. Each thread is assigned an arena. Each arena has chunks that contain contagious memory regions of same size. An arena has multiple chunks that contain regions of multiple sizes. 46 47 \paragraph{ptmalloc} 48 ptmalloc (FIX ME: cite allocator) is a modification of dlmalloc. It is a thread-safe multi-threaded memory allocator that uses multiple heaps. ptmalloc heap has similar design to dlmalloc's heap. 49 50 \paragraph{rpmalloc} 38 \\ 39 \\ 40 {\bf Version:} 5.2.1\\ 41 {\bf Configuration:} Compiled with jemalloc's default configurations and Makefile.\\ 42 {\bf Compilation command:}\\ 43 ./autogen.sh\\ 44 ./configure\\ 45 make\\ 46 make install 47 48 \subsection{pt3malloc} 49 pt3malloc (FIX ME: cite allocator) is a modification of dlmalloc. It is a thread-safe multi-threaded memory allocator that uses multiple heaps. pt3malloc heap has similar design to dlmalloc's heap. 50 \\ 51 \\ 52 {\bf Version:} 1.8\\ 53 {\bf Configuration:} Compiled with pt3malloc's Makefile using option "linux-shared".\\ 54 {\bf Compilation command:}\\ 55 make linux-shared 56 57 \subsection{rpmalloc} 51 58 rpmalloc (FIX ME: cite allocator) is a thread-safe allocator that is multi-threaded and uses per-thread heap. Each heap has multiple size-classes and each size-class contains memory regions of the relevant size. 52 53 \paragraph{tbb malloc} 59 \\ 60 \\ 61 {\bf Version:} 1.4.1\\ 62 {\bf Configuration:} Compiled with rpmalloc's default configurations and ninja build system.\\ 63 {\bf Compilation command:}\\ 64 python3 configure.py\\ 65 ninja 66 67 \subsection{tbb malloc} 54 68 tbb malloc (FIX ME: cite allocator) is a thread-safe allocator that is multi-threaded and uses private heap for each thread. Each private-heap has multiple bins of different sizes. Each bin contains free regions of the same size. 55 56 \paragraph{tc malloc} 57 tcmalloc (FIX ME: cite allocator) is a thread-safe allocator. It uses per-thread cache to store free objects that prevents contention on shared resources in multi-threaded application. A central free-list is used to refill per-thread cache when it gets empty. 58 59 60 \section{Memory Allocators} 61 For these experiments, we used 7 memory allocators excluding our standalone memory allocator uHeapLmmm. 62 63 \begin{tabularx}{0.8\textwidth} { 64 | >{\raggedright\arraybackslash}X 65 | >{\centering\arraybackslash}X 66 | >{\raggedleft\arraybackslash}X | 67 } 68 \hline 69 Memory Allocator & Version & Configurations \\ 70 \hline 71 dl & & \\ 72 \hline 73 hoard & & \\ 74 \hline 75 je & & \\ 76 \hline 77 pt3 & & \\ 78 \hline 79 rp & & \\ 80 \hline 81 tbb & & \\ 82 \hline 83 tc & & \\ 84 \end{tabularx} 85 86 %(FIX ME: complete table) 69 \\ 70 \\ 71 {\bf Version:} intel tbb 2020 update 2, tbb\_interface\_version == 11102\\ 72 {\bf Configuration:} Compiled with tbbmalloc's default configurations and Makefile.\\ 73 {\bf Compilation command:}\\ 74 make 87 75 88 76 \section{Experiment Environment} 89 We conducted these experiments ... (FIX ME: what machine and which specifications to add). 90 91 We used our micro becnhmark suite (FIX ME: cite mbench) to evaluate other memory allocators (FIX ME: cite above memory allocators) and our uHeapLmmm. 77 We used our micro becnhmark suite (FIX ME: cite mbench) to evaluate these memory allocators \ref{sec:curAllocatorSec} and our own memory allocator uHeap \ref{sec:allocatorSec}. 92 78 93 79 \section{Results} 80 FIX ME: add experiment, knobs, graphs, description+analysis 81 82 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 83 %% CHURN 84 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 85 86 \subsection{Churn Benchmark} 87 88 Churn benchmark tested memory allocators for speed under intensive dynamic memory usage. 89 90 This experiment was run with following configurations: 91 92 -maxS : 500 93 94 -minS : 50 95 96 -stepS : 50 97 98 -distroS : fisher 99 100 -objN : 100000 101 102 -cSpots : 16 103 104 -threadN : \{ 1, 2, 4, 8, 16 \} * 105 106 * Each allocator was tested for its performance across different number of threads. Experiment was repeated for each allocator for 1, 2, 4, 8, and 16 threads by setting the configuration -threadN. 107 108 Results are shown in figure \ref{fig:churn} for both algol and nasus. 109 X-axis shows number of threads. Each allocator's performance for each thread is shown in different colors. 110 Y-axis shows the total time experiment took to finish. 111 112 \begin{figure} 113 \centering 114 \subfigure[Algol]{ \includegraphics[width=0.9\textwidth]{evaluations/algol-perf-eps/churn} } 115 \subfigure[Nasus]{ \includegraphics[width=0.9\textwidth]{evaluations/nasus-perf-eps/churn} } 116 \caption{Churn} 117 \label{fig:churn} 118 \end{figure} 119 120 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 121 %% THRASH 122 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 123 124 \subsection{Cache Thrash} 125 126 Thrash benchmark tested memory allocators for active false sharing. 127 128 This experiment was run with following configurations: 129 130 -cacheIt : 1000 131 132 -cacheRep : 1000000 133 134 -cacheObj : 1 135 136 -threadN : \{ 1, 2, 4, 8, 16 \} * 137 138 * Each allocator was tested for its performance across different number of threads. Experiment was repeated for each allocator for 1, 2, 4, 8, and 16 threads by setting the configuration -threadN. 139 140 Results are shown in figure \ref{fig:cacheThrash} for both algol and nasus. 141 X-axis shows number of threads. Each allocator's performance for each thread is shown in different colors. 142 Y-axis shows the total time experiment took to finish. 143 144 \begin{figure} 145 \centering 146 \subfigure[Algol]{ \includegraphics[width=0.9\textwidth]{evaluations/algol-perf-eps/cache-time-0-thrash} } 147 \subfigure[Nasus]{ \includegraphics[width=0.9\textwidth]{evaluations/nasus-perf-eps/cache-time-0-thrash} } 148 \caption{Cache Thrash} 149 \label{fig:cacheThrash} 150 \end{figure} 151 152 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 153 %% SCRATCH 154 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 155 156 \subsection{Cache Scratch} 157 158 Scratch benchmark tested memory allocators for program induced allocator preserved passive false sharing. 159 160 This experiment was run with following configurations: 161 162 -cacheIt : 1000 163 164 -cacheRep : 1000000 165 166 -cacheObj : 1 167 168 -threadN : \{ 1, 2, 4, 8, 16 \} * 169 170 * Each allocator was tested for its performance across different number of threads. Experiment was repeated for each allocator for 1, 2, 4, 8, and 16 threads by setting the configuration -threadN. 171 172 Results are shown in figure \ref{fig:cacheScratch} for both algol and nasus. 173 X-axis shows number of threads. Each allocator's performance for each thread is shown in different colors. 174 Y-axis shows the total time experiment took to finish. 175 176 \begin{figure} 177 \centering 178 \subfigure[Algol]{ \includegraphics[width=0.9\textwidth]{evaluations/algol-perf-eps/cache-time-0-scratch} } 179 \subfigure[Nasus]{ \includegraphics[width=0.9\textwidth]{evaluations/nasus-perf-eps/cache-time-0-scratch} } 180 \caption{Cache Scratch} 181 \label{fig:cacheScratch} 182 \end{figure} 183 184 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 185 %% SPEED 186 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 187 188 \subsection{Speed Benchmark} 189 190 Speed benchmark tested memory allocators for program induced allocator preserved passive false sharing. 191 192 This experiment was run with following configurations: 193 194 -threadN : sets number of threads, K\\ 195 -cSpots : sets number of spots for churn, M\\ 196 -objN : sets number of objects per thread, N\\ 197 -maxS : sets max object size\\ 198 -minS : sets min object size\\ 199 -stepS : sets object size increment\\ 200 -distroS : sets object size distribution 201 202 %speed-1-malloc-null.eps 203 \begin{figure} 204 \centering 205 \includegraphics[width=1\textwidth]{evaluations/nasus-perf-eps/speed-1-malloc-null} 206 \caption{speed-1-malloc-null} 207 \label{fig:speed-1-malloc-null} 208 \end{figure} 209 210 %speed-2-free-null.eps 211 \begin{figure} 212 \centering 213 \includegraphics[width=1\textwidth]{evaluations/nasus-perf-eps/speed-2-free-null} 214 \caption{speed-2-free-null} 215 \label{fig:speed-2-free-null} 216 \end{figure} 217 218 %speed-3-malloc.eps 219 \begin{figure} 220 \centering 221 \includegraphics[width=1\textwidth]{evaluations/nasus-perf-eps/speed-3-malloc} 222 \caption{speed-3-malloc} 223 \label{fig:speed-3-malloc} 224 \end{figure} 225 226 %speed-4-realloc.eps 227 \begin{figure} 228 \centering 229 \includegraphics[width=1\textwidth]{evaluations/nasus-perf-eps/speed-4-realloc} 230 \caption{speed-4-realloc} 231 \label{fig:speed-4-realloc} 232 \end{figure} 233 234 %speed-5-free.eps 235 \begin{figure} 236 \centering 237 \includegraphics[width=1\textwidth]{evaluations/nasus-perf-eps/speed-5-free} 238 \caption{speed-5-free} 239 \label{fig:speed-5-free} 240 \end{figure} 241 242 %speed-6-calloc.eps 243 \begin{figure} 244 \centering 245 \includegraphics[width=1\textwidth]{evaluations/nasus-perf-eps/speed-6-calloc} 246 \caption{speed-6-calloc} 247 \label{fig:speed-6-calloc} 248 \end{figure} 249 250 %speed-7-malloc-free.eps 251 \begin{figure} 252 \centering 253 \includegraphics[width=1\textwidth]{evaluations/nasus-perf-eps/speed-7-malloc-free} 254 \caption{speed-7-malloc-free} 255 \label{fig:speed-7-malloc-free} 256 \end{figure} 257 258 %speed-8-realloc-free.eps 259 \begin{figure} 260 \centering 261 \includegraphics[width=1\textwidth]{evaluations/nasus-perf-eps/speed-8-realloc-free} 262 \caption{speed-8-realloc-free} 263 \label{fig:speed-8-realloc-free} 264 \end{figure} 265 266 %speed-9-calloc-free.eps 267 \begin{figure} 268 \centering 269 \includegraphics[width=1\textwidth]{evaluations/nasus-perf-eps/speed-9-calloc-free} 270 \caption{speed-9-calloc-free} 271 \label{fig:speed-9-calloc-free} 272 \end{figure} 273 274 %speed-10-malloc-realloc.eps 275 \begin{figure} 276 \centering 277 \includegraphics[width=1\textwidth]{evaluations/nasus-perf-eps/speed-10-malloc-realloc} 278 \caption{speed-10-malloc-realloc} 279 \label{fig:speed-10-malloc-realloc} 280 \end{figure} 281 282 %speed-11-calloc-realloc.eps 283 \begin{figure} 284 \centering 285 \includegraphics[width=1\textwidth]{evaluations/nasus-perf-eps/speed-11-calloc-realloc} 286 \caption{speed-11-calloc-realloc} 287 \label{fig:speed-11-calloc-realloc} 288 \end{figure} 289 290 %speed-12-malloc-realloc-free.eps 291 \begin{figure} 292 \centering 293 \includegraphics[width=1\textwidth]{evaluations/nasus-perf-eps/speed-12-malloc-realloc-free} 294 \caption{speed-12-malloc-realloc-free} 295 \label{fig:speed-12-malloc-realloc-free} 296 \end{figure} 297 298 %speed-13-calloc-realloc-free.eps 299 \begin{figure} 300 \centering 301 \includegraphics[width=1\textwidth]{evaluations/nasus-perf-eps/speed-13-calloc-realloc-free} 302 \caption{speed-13-calloc-realloc-free} 303 \label{fig:speed-13-calloc-realloc-free} 304 \end{figure} 305 306 %speed-14-{m,c,re}alloc-free.eps 307 \begin{figure} 308 \centering 309 \includegraphics[width=1\textwidth]{evaluations/nasus-perf-eps/speed-14-{m,c,re}alloc-free} 310 \caption{speed-14-{m,c,re}alloc-free} 311 \label{fig:speed-14-{m,c,re}alloc-free} 312 \end{figure} 313 314 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 315 %% MEMORY 316 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 94 317 95 318 \subsection{Memory Benchmark} 96 FIX ME: add experiment, knobs, graphs, and description 97 98 \subsection{Speed Benchmark} 99 FIX ME: add experiment, knobs, graphs, and description 100 101 \subsubsection{Speed Time} 102 FIX ME: add experiment, knobs, graphs, and description 103 104 \subsubsection{Speed Workload} 105 FIX ME: add experiment, knobs, graphs, and description 106 107 \subsection{Cache Scratch} 108 FIX ME: add experiment, knobs, graphs, and description 109 110 \subsubsection{Cache Scratch Time} 111 FIX ME: add experiment, knobs, graphs, and description 112 113 \subsubsection{Cache Scratch Layout} 114 FIX ME: add experiment, knobs, graphs, and description 115 116 \subsection{Cache Thrash} 117 FIX ME: add experiment, knobs, graphs, and description 118 119 \subsubsection{Cache Thrash Time} 120 FIX ME: add experiment, knobs, graphs, and description 121 122 \subsubsection{Cache Thrash Layout} 123 FIX ME: add experiment, knobs, graphs, and description 319 320 %mem-1-prod-1-cons-100-cfa.eps 321 \begin{figure} 322 \centering 323 \includegraphics[width=1\textwidth]{evaluations/nasus-perf-eps/mem-1-prod-1-cons-100-cfa} 324 \caption{mem-1-prod-1-cons-100-cfa} 325 \label{fig:mem-1-prod-1-cons-100-cfa} 326 \end{figure} 327 328 %mem-1-prod-1-cons-100-dl.eps 329 \begin{figure} 330 \centering 331 \includegraphics[width=1\textwidth]{evaluations/nasus-perf-eps/mem-1-prod-1-cons-100-dl} 332 \caption{mem-1-prod-1-cons-100-dl} 333 \label{fig:mem-1-prod-1-cons-100-dl} 334 \end{figure} 335 336 %mem-1-prod-1-cons-100-glc.eps 337 \begin{figure} 338 \centering 339 \includegraphics[width=1\textwidth]{evaluations/nasus-perf-eps/mem-1-prod-1-cons-100-glc} 340 \caption{mem-1-prod-1-cons-100-glc} 341 \label{fig:mem-1-prod-1-cons-100-glc} 342 \end{figure} 343 344 %mem-1-prod-1-cons-100-hrd.eps 345 \begin{figure} 346 \centering 347 \includegraphics[width=1\textwidth]{evaluations/nasus-perf-eps/mem-1-prod-1-cons-100-hrd} 348 \caption{mem-1-prod-1-cons-100-hrd} 349 \label{fig:mem-1-prod-1-cons-100-hrd} 350 \end{figure} 351 352 %mem-1-prod-1-cons-100-je.eps 353 \begin{figure} 354 \centering 355 \includegraphics[width=1\textwidth]{evaluations/nasus-perf-eps/mem-1-prod-1-cons-100-je} 356 \caption{mem-1-prod-1-cons-100-je} 357 \label{fig:mem-1-prod-1-cons-100-je} 358 \end{figure} 359 360 %mem-1-prod-1-cons-100-pt3.eps 361 \begin{figure} 362 \centering 363 \includegraphics[width=1\textwidth]{evaluations/nasus-perf-eps/mem-1-prod-1-cons-100-pt3} 364 \caption{mem-1-prod-1-cons-100-pt3} 365 \label{fig:mem-1-prod-1-cons-100-pt3} 366 \end{figure} 367 368 %mem-1-prod-1-cons-100-rp.eps 369 \begin{figure} 370 \centering 371 \includegraphics[width=1\textwidth]{evaluations/nasus-perf-eps/mem-1-prod-1-cons-100-rp} 372 \caption{mem-1-prod-1-cons-100-rp} 373 \label{fig:mem-1-prod-1-cons-100-rp} 374 \end{figure} 375 376 %mem-1-prod-1-cons-100-tbb.eps 377 \begin{figure} 378 \centering 379 \includegraphics[width=1\textwidth]{evaluations/nasus-perf-eps/mem-1-prod-1-cons-100-tbb} 380 \caption{mem-1-prod-1-cons-100-tbb} 381 \label{fig:mem-1-prod-1-cons-100-tbb} 382 \end{figure} 383 384 %mem-4-prod-4-cons-100-cfa.eps 385 \begin{figure} 386 \centering 387 \includegraphics[width=1\textwidth]{evaluations/nasus-perf-eps/mem-4-prod-4-cons-100-cfa} 388 \caption{mem-4-prod-4-cons-100-cfa} 389 \label{fig:mem-4-prod-4-cons-100-cfa} 390 \end{figure} 391 392 %mem-4-prod-4-cons-100-dl.eps 393 \begin{figure} 394 \centering 395 \includegraphics[width=1\textwidth]{evaluations/nasus-perf-eps/mem-4-prod-4-cons-100-dl} 396 \caption{mem-4-prod-4-cons-100-dl} 397 \label{fig:mem-4-prod-4-cons-100-dl} 398 \end{figure} 399 400 %mem-4-prod-4-cons-100-glc.eps 401 \begin{figure} 402 \centering 403 \includegraphics[width=1\textwidth]{evaluations/nasus-perf-eps/mem-4-prod-4-cons-100-glc} 404 \caption{mem-4-prod-4-cons-100-glc} 405 \label{fig:mem-4-prod-4-cons-100-glc} 406 \end{figure} 407 408 %mem-4-prod-4-cons-100-hrd.eps 409 \begin{figure} 410 \centering 411 \includegraphics[width=1\textwidth]{evaluations/nasus-perf-eps/mem-4-prod-4-cons-100-hrd} 412 \caption{mem-4-prod-4-cons-100-hrd} 413 \label{fig:mem-4-prod-4-cons-100-hrd} 414 \end{figure} 415 416 %mem-4-prod-4-cons-100-je.eps 417 \begin{figure} 418 \centering 419 \includegraphics[width=1\textwidth]{evaluations/nasus-perf-eps/mem-4-prod-4-cons-100-je} 420 \caption{mem-4-prod-4-cons-100-je} 421 \label{fig:mem-4-prod-4-cons-100-je} 422 \end{figure} 423 424 %mem-4-prod-4-cons-100-pt3.eps 425 \begin{figure} 426 \centering 427 \includegraphics[width=1\textwidth]{evaluations/nasus-perf-eps/mem-4-prod-4-cons-100-pt3} 428 \caption{mem-4-prod-4-cons-100-pt3} 429 \label{fig:mem-4-prod-4-cons-100-pt3} 430 \end{figure} 431 432 %mem-4-prod-4-cons-100-rp.eps 433 \begin{figure} 434 \centering 435 \includegraphics[width=1\textwidth]{evaluations/nasus-perf-eps/mem-4-prod-4-cons-100-rp} 436 \caption{mem-4-prod-4-cons-100-rp} 437 \label{fig:mem-4-prod-4-cons-100-rp} 438 \end{figure} 439 440 %mem-4-prod-4-cons-100-tbb.eps 441 \begin{figure} 442 \centering 443 \includegraphics[width=1\textwidth]{evaluations/nasus-perf-eps/mem-4-prod-4-cons-100-tbb} 444 \caption{mem-4-prod-4-cons-100-tbb} 445 \label{fig:mem-4-prod-4-cons-100-tbb} 446 \end{figure}
Note:
See TracChangeset
for help on using the changeset viewer.