Ignore:
Timestamp:
Jun 27, 2018, 10:46:07 PM (6 years ago)
Author:
Peter A. Buhr <pabuhr@…>
Branches:
ADT, aaron-thesis, arm-eh, ast-experimental, cleanup-dtors, deferred_resn, demangler, enum, forall-pointer-decay, jacob/cs343-translation, jenkins-sandbox, master, new-ast, new-ast-unique-expr, no_list, persistent-indexer, pthread-emulation, qualifiedEnum
Children:
287da46
Parents:
6d6cf5a
Message:

more updates

File:
1 edited

Legend:

Unmodified
Added
Removed
  • doc/papers/concurrency/Paper.tex

    r6d6cf5a r64188cc  
    2222\captionsetup{justification=raggedright,singlelinecheck=false}
    2323\usepackage{dcolumn}                                            % align decimal points in tables
     24\usepackage{capt-of}
    2425
    2526\hypersetup{breaklinks=true}
     
    21582159\begin{cfa}
    21592160unsigned int N = 10_000_000;
    2160 #define BENCH( run, result ) Time before = getTimeNsec(); run; result = (getTimeNsec() - before) / N;
     2161#define BENCH( run ) Time before = getTimeNsec(); run; Duration result = (getTimeNsec() - before) / N;
    21612162\end{cfa}
    21622163The method used to get time is @clock_gettime( CLOCK_REALTIME )@.
     
    21842185void main( C & ) { for ( ;; ) { @suspend();@ } }
    21852186int main() {
    2186         Duration result;
    21872187        BENCH(
    2188                 for ( size_t i = 0; i < N; i += 1 ) { @resume( c );@ },
    2189                 result
    2190         )
     2188                for ( size_t i = 0; i < N; i += 1 ) { @resume( c );@ } )
    21912189        sout | result`ns | endl;
    21922190}
     
    22002198
    22012199int main() {
    2202         Duration result;
    22032200        BENCH(
    2204                 for ( size_t i = 0; i < N; i += 1 ) { @yield();@ },
    2205                 result
    2206         )
     2201                for ( size_t i = 0; i < N; i += 1 ) { @yield();@ } )
    22072202        sout | result`ns | endl;
    22082203}
     
    22132208\quad
    22142209\subfloat[Thread]{\label{f:ExternalState}\usebox\myboxB}
    2215 \caption{\CFA Context-switch benchmark}
     2210\captionof{figure}{\CFA context-switch benchmark}
    22162211\label{f:ctx-switch}
    2217 \end{figure}
    2218 
    2219 \begin{table}
     2212
    22202213\centering
    2221 \caption{Context Switch comparison (nanoseconds)}
     2214
     2215\captionof{table}{Context switch comparison (nanoseconds)}
    22222216\label{tab:ctx-switch}
    2223 
     2217\bigskip
    22242218\begin{tabular}{|r|*{3}{D{.}{.}{3.2}|}}
    22252219\cline{2-4}
     
    22352229\hline
    22362230\end{tabular}
    2237 \end{table}
    2238 
    2239 
    2240 \paragraph{Mutual-Exclusion}
    2241 
    2242 Mutual exclusion is measured by entering/leaving a critical section.
    2243 For monitors, entering and leaving a monitor routine is measured.
    2244 Figure~\ref{f:mutex} shows the code for \CFA with all results in Table~\ref{tab:mutex}.
    2245 To put the results in context, the cost of entering a non-inline routine and the cost of acquiring and releasing a @pthread_mutex@ lock is also measured.
    2246 Note, the incremental cost of bulk acquire for \CFA, which is largely a fixed cost for small numbers of mutex objects.
    2247 
    2248 \begin{samepage}
    2249 \begin{figure}[!p]
     2231
     2232\bigskip
     2233\bigskip
     2234
    22502235\lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}
    22512236\begin{cfa}
     
    22532238void __attribute__((noinline)) do_call( M & mutex m/*, m2, m3, m4*/ ) {}
    22542239int main() {
    2255         Duration result;
    2256         BENCH( for( size_t i = 0; i < N; i += 1 ) { @do_call( m1/*, m2, m3, m4*/ );@ }, result )
     2240        BENCH( for( size_t i = 0; i < N; i += 1 ) { @do_call( m1/*, m2, m3, m4*/ );@ } )
    22572241        sout | result`ns | endl;
    22582242}
    22592243\end{cfa}
    2260 \caption{\CFA benchmark code used to measure mutex routines.}
     2244\captionof{figure}{\CFA acquire/release mutex benchmark}
    22612245\label{f:mutex}
    2262 \end{figure}
    2263 
    2264 \begin{table}[!p]
     2246
    22652247\centering
    2266 \caption{Mutex routine comparison (nanoseconds)}
     2248
     2249\captionof{table}{Mutex comparison (nanoseconds)}
    22672250\label{tab:mutex}
     2251\bigskip
    22682252
    22692253\begin{tabular}{|r|*{3}{D{.}{.}{3.2}|}}
     
    22812265\hline
    22822266\end{tabular}
    2283 \end{table}
    2284 \end{samepage}
     2267\end{figure}
     2268
     2269
     2270\paragraph{Mutual-Exclusion}
     2271
     2272Mutual exclusion is measured by entering/leaving a critical section.
     2273For monitors, entering and leaving a monitor routine is measured.
     2274Figure~\ref{f:mutex} shows the code for \CFA with all results in Table~\ref{tab:mutex}.
     2275To put the results in context, the cost of entering a non-inline routine and the cost of acquiring and releasing a @pthread_mutex@ lock is also measured.
     2276Note, the incremental cost of bulk acquire for \CFA, which is largely a fixed cost for small numbers of mutex objects.
    22852277
    22862278
     
    22912283Note, the incremental cost of bulk acquire for \CFA, which is largely a fixed cost for small numbers of mutex objects.
    22922284
    2293 \begin{samepage}
    2294 \begin{figure}[!p]
     2285\begin{figure}
    22952286\lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}
    22962287\begin{cfa}
     
    23052296}
    23062297int  __attribute__((noinline)) do_wait( M & mutex m ) {
    2307         Duration result;
    23082298        go = 1; // continue other thread
    2309         BENCH( for ( size_t i = 0; i < N; i += 1 ) { @wait( c );@ }, result );
     2299        BENCH( for ( size_t i = 0; i < N; i += 1 ) { @wait( c );@ } );
    23102300        go = 0; // stop other thread
    23112301        sout | result`ns | endl;
     
    23162306}
    23172307\end{cfa}
    2318 \caption{Internal scheduling benchmark}
     2308\captionof{figure}{\CFA Internal scheduling benchmark}
    23192309\label{f:int-sched}
    2320 \end{figure}
    2321 
    2322 \begin{table}[!p]
     2310
    23232311\centering
    2324 \caption{Internal scheduling comparison (nanoseconds)}
     2312\captionof{table}{Internal scheduling comparison (nanoseconds)}
    23252313\label{tab:int-sched}
     2314\bigskip
     2315
    23262316\begin{tabular}{|r|*{3}{D{.}{.}{5.2}|}}
    23272317\cline{2-4}
     
    23362326\hline
    23372327\end{tabular}
    2338 \end{table}
    2339 \end{samepage}
     2328\end{figure}
    23402329
    23412330
     
    23462335Note, the incremental cost of bulk acquire for \CFA, which is largely a fixed cost for small numbers of mutex objects.
    23472336
    2348 \begin{samepage}
    23492337\begin{figure}
    23502338\lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}
     
    23592347}
    23602348int __attribute__((noinline)) do_wait( M & mutex m ) {
    2361         Duration result;
    2362         go = 1; BENCH( for ( size_t i = 0; i < N; i += 1 ) { @waitfor( do_call, m );@ }, result ) go = 0;
     2349        go = 1; // continue other thread
     2350        BENCH( for ( size_t i = 0; i < N; i += 1 ) { @waitfor( do_call, m );@ } )
     2351        go = 0; // stop other thread
    23632352        sout | result`ns | endl;
    23642353}
     
    23682357}
    23692358\end{cfa}
    2370 \caption{Benchmark code for external scheduling}
     2359\captionof{figure}{\CFA external scheduling benchmark}
    23712360\label{f:ext-sched}
    2372 \end{figure}
    2373 
    2374 \begin{table}
     2361
    23752362\centering
    2376 \caption{External scheduling comparison (nanoseconds)}
     2363
     2364\captionof{table}{External scheduling comparison (nanoseconds)}
    23772365\label{tab:ext-sched}
     2366\bigskip
    23782367\begin{tabular}{|r|*{3}{D{.}{.}{3.2}|}}
    23792368\cline{2-4}
    23802369\multicolumn{1}{c|}{} & \multicolumn{1}{c|}{Median} &\multicolumn{1}{c|}{Average} & \multicolumn{1}{c|}{Std Dev} \\
    23812370\hline
    2382 \uC @Accept@                            & 350           & 350.61        & 3.11  \\
     2371\uC @_Accept@                           & 350           & 350.61        & 3.11  \\
    23832372\CFA @waitfor@, 1 @monitor@     & 358.5         & 358.36        & 3.82  \\
    23842373\CFA @waitfor@, 2 @monitor@     & 422           & 426.79        & 7.95  \\
     
    23862375\hline
    23872376\end{tabular}
    2388 \end{table}
    2389 \end{samepage}
    2390 
    2391 
    2392 \paragraph{Object Creation}
    2393 
    2394 Object creation is measured by creating/deleting the specific kind of concurrent object.
    2395 Figure~\ref{f:creation} shows the code for \CFA, with results in Table~\ref{tab:creation}.
    2396 The only note here is that the call stacks of \CFA coroutines are lazily created, therefore without priming the coroutine to force stack creation, the creation cost is artificially low.
    2397 
    2398 \begin{figure}
    2399 \centering
     2377
     2378\bigskip
     2379\medskip
     2380
    24002381\lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}
    24012382\begin{cfa}
     
    24032384void main( MyThread & ) {}
    24042385int main() {
    2405         Duration result;
    2406         BENCH( for ( size_t i = 0; i < N; i += 1 ) { @MyThread m;@ }, result )
     2386        BENCH( for ( size_t i = 0; i < N; i += 1 ) { @MyThread m;@ } )
    24072387        sout | result`ns | endl;
    24082388}
    24092389\end{cfa}
    2410 \caption{Benchmark code for \CFA object creation}
     2390\captionof{figure}{\CFA object creation benchmark}
    24112391\label{f:creation}
    2412 \end{figure}
    2413 
    2414 \begin{table}
     2392
    24152393\centering
    2416 \caption{Creation comparison (nanoseconds)}
     2394
     2395\captionof{table}{Creation comparison (nanoseconds)}
    24172396\label{tab:creation}
     2397\bigskip
     2398
    24182399\begin{tabular}{|r|*{3}{D{.}{.}{5.2}|}}
    24192400\cline{2-4}
     
    24302411\hline
    24312412\end{tabular}
    2432 \end{table}
     2413\end{figure}
     2414
     2415
     2416\paragraph{Object Creation}
     2417
     2418Object creation is measured by creating/deleting the specific kind of concurrent object.
     2419Figure~\ref{f:creation} shows the code for \CFA, with results in Table~\ref{tab:creation}.
     2420The only note here is that the call stacks of \CFA coroutines are lazily created, therefore without priming the coroutine to force stack creation, the creation cost is artificially low.
    24332421
    24342422
Note: See TracChangeset for help on using the changeset viewer.