Context Navigation

-              r6d6cf5a
+              r64188cc
 \captionsetup{justification=raggedright,singlelinecheck=false}
 \usepackage{dcolumn}                                            % align decimal points in tables
+\usepackage{capt-of}
 \hypersetup{breaklinks=true}
 …
 \begin{cfa}
 unsigned int N = 10_000_000;
 #define BENCH( run, result ) Time before = getTimeNsec(); run; result = (getTimeNsec() - before) / N;
+#define BENCH( run ) Time before = getTimeNsec(); run; Duration result = (getTimeNsec() - before) / N;
 \end{cfa}
 The method used to get time is @clock_gettime( CLOCK_REALTIME )@.
 …
 void main( C & ) { for ( ;; ) { @suspend();@ } }
 int main() {
-        Duration result;
         BENCH(
+                for ( size_t i = 0; i < N; i += 1 ) { @resume( c );@ },
+                result
+        )
+                for ( size_t i = 0; i < N; i += 1 ) { @resume( c );@ } )
         sout | result`ns | endl;
+}
 …
 int main() {
-        Duration result;
         BENCH(
+                for ( size_t i = 0; i < N; i += 1 ) { @yield();@ },
+                result
+        )
+                for ( size_t i = 0; i < N; i += 1 ) { @yield();@ } )
         sout | result`ns | endl;
+}
 …
 \quad
 \subfloat[Thread]{\label{f:ExternalState}\usebox\myboxB}
 \caption{\CFA Context-switch benchmark}
+\captionof{figure}{\CFA context-switch benchmark}
 \label{f:ctx-switch}
+\end{figure}
+\begin{table}
 \centering
+\caption{Context Switch comparison (nanoseconds)}
+\captionof{table}{Context switch comparison (nanoseconds)}
 \label{tab:ctx-switch}
+\bigskip
 \begin{tabular}{|r|*{3}{D{.}{.}{3.2}|}}
 \cline{2-4}
 …
 \hline
 \end{tabular}
+\end{table}
+\paragraph{Mutual-Exclusion}
+Mutual exclusion is measured by entering/leaving a critical section.
+For monitors, entering and leaving a monitor routine is measured.
+Figure~\ref{f:mutex} shows the code for \CFA with all results in Table~\ref{tab:mutex}.
+To put the results in context, the cost of entering a non-inline routine and the cost of acquiring and releasing a @pthread_mutex@ lock is also measured.
+Note, the incremental cost of bulk acquire for \CFA, which is largely a fixed cost for small numbers of mutex objects.
+\begin{samepage}
+\begin{figure}[!p]
+\bigskip
+\bigskip
 \lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}
 \begin{cfa}
 …
 void __attribute__((noinline)) do_call( M & mutex m/*, m2, m3, m4*/ ) {}
 int main() {
+        Duration result;
+        BENCH( for( size_t i = 0; i < N; i += 1 ) { @do_call( m1/*, m2, m3, m4*/ );@ }, result )
+        BENCH( for( size_t i = 0; i < N; i += 1 ) { @do_call( m1/*, m2, m3, m4*/ );@ } )
         sout | result`ns | endl;
+}
 \end{cfa}
 \caption{\CFA benchmark code used to measure mutex routines.}
+\captionof{figure}{\CFA acquire/release mutex benchmark}
 \label{f:mutex}
+\end{figure}
+\begin{table}[!p]
 \centering
+\caption{Mutex routine comparison (nanoseconds)}
+\captionof{table}{Mutex comparison (nanoseconds)}
 \label{tab:mutex}
+\bigskip
 \begin{tabular}{|r|*{3}{D{.}{.}{3.2}|}}
 …
 \hline
 \end{tabular}
+\end{table}
+\end{samepage}
+\end{figure}
+\paragraph{Mutual-Exclusion}
+Mutual exclusion is measured by entering/leaving a critical section.
+For monitors, entering and leaving a monitor routine is measured.
+Figure~\ref{f:mutex} shows the code for \CFA with all results in Table~\ref{tab:mutex}.
+To put the results in context, the cost of entering a non-inline routine and the cost of acquiring and releasing a @pthread_mutex@ lock is also measured.
+Note, the incremental cost of bulk acquire for \CFA, which is largely a fixed cost for small numbers of mutex objects.
 …
 Note, the incremental cost of bulk acquire for \CFA, which is largely a fixed cost for small numbers of mutex objects.
+\begin{samepage}
+\begin{figure}[!p]
+\begin{figure}
 \lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}
 \begin{cfa}
 …
+}
 int  __attribute__((noinline)) do_wait( M & mutex m ) {
-        Duration result;
         go = 1; // continue other thread
         BENCH( for ( size_t i = 0; i < N; i += 1 ) { @wait( c );@ }, result );
+        BENCH( for ( size_t i = 0; i < N; i += 1 ) { @wait( c );@ } );
         go = 0; // stop other thread
         sout | result`ns | endl;
 …
+}
 \end{cfa}
 \caption{Internal scheduling benchmark}
+\captionof{figure}{\CFA Internal scheduling benchmark}
 \label{f:int-sched}
+\end{figure}
+\begin{table}[!p]
 \centering
 \caption{Internal scheduling comparison (nanoseconds)}
+\captionof{table}{Internal scheduling comparison (nanoseconds)}
 \label{tab:int-sched}
+\bigskip
 \begin{tabular}{|r|*{3}{D{.}{.}{5.2}|}}
 \cline{2-4}
 …
 \hline
 \end{tabular}
+\end{table}
+\end{samepage}
+\end{figure}
 …
 Note, the incremental cost of bulk acquire for \CFA, which is largely a fixed cost for small numbers of mutex objects.
-\begin{samepage}
 \begin{figure}
 \lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}
 …
+}
 int __attribute__((noinline)) do_wait( M & mutex m ) {
+        Duration result;
+        go = 1; BENCH( for ( size_t i = 0; i < N; i += 1 ) { @waitfor( do_call, m );@ }, result ) go = 0;
+        go = 1; // continue other thread
+        BENCH( for ( size_t i = 0; i < N; i += 1 ) { @waitfor( do_call, m );@ } )
+        go = 0; // stop other thread
         sout | result`ns | endl;
+}
 …
+}
 \end{cfa}
 \caption{Benchmark code for external scheduling}
+\captionof{figure}{\CFA external scheduling benchmark}
 \label{f:ext-sched}
+\end{figure}
+\begin{table}
 \centering
+\caption{External scheduling comparison (nanoseconds)}
+\captionof{table}{External scheduling comparison (nanoseconds)}
 \label{tab:ext-sched}
+\bigskip
 \begin{tabular}{|r|*{3}{D{.}{.}{3.2}|}}
 \cline{2-4}
 \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{Median} &\multicolumn{1}{c|}{Average} & \multicolumn{1}{c|}{Std Dev} \\
 \hline
 \uC @Accept@                            & 350           & 350.61        & 3.11  \\
+\uC @_Accept@                           & 350           & 350.61        & 3.11  \\
 \CFA @waitfor@, 1 @monitor@     & 358.5         & 358.36        & 3.82  \\
 \CFA @waitfor@, 2 @monitor@     & 422           & 426.79        & 7.95  \\
 …
 \hline
 \end{tabular}
+\end{table}
+\end{samepage}
+\paragraph{Object Creation}
+Object creation is measured by creating/deleting the specific kind of concurrent object.
+Figure~\ref{f:creation} shows the code for \CFA, with results in Table~\ref{tab:creation}.
+The only note here is that the call stacks of \CFA coroutines are lazily created, therefore without priming the coroutine to force stack creation, the creation cost is artificially low.
+\begin{figure}
+\centering
+\bigskip
+\medskip
 \lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}
 \begin{cfa}
 …
 void main( MyThread & ) {}
 int main() {
+        Duration result;
+        BENCH( for ( size_t i = 0; i < N; i += 1 ) { @MyThread m;@ }, result )
+        BENCH( for ( size_t i = 0; i < N; i += 1 ) { @MyThread m;@ } )
         sout | result`ns | endl;
+}
 \end{cfa}
 \caption{Benchmark code for \CFA object creation}
+\captionof{figure}{\CFA object creation benchmark}
 \label{f:creation}
+\end{figure}
+\begin{table}
 \centering
+\caption{Creation comparison (nanoseconds)}
+\captionof{table}{Creation comparison (nanoseconds)}
 \label{tab:creation}
+\bigskip
 \begin{tabular}{|r|*{3}{D{.}{.}{5.2}|}}
 \cline{2-4}
 …
 \hline
 \end{tabular}
+\end{table}
+\end{figure}
+\paragraph{Object Creation}
+Object creation is measured by creating/deleting the specific kind of concurrent object.
+Figure~\ref{f:creation} shows the code for \CFA, with results in Table~\ref{tab:creation}.
+The only note here is that the call stacks of \CFA coroutines are lazily created, therefore without priming the coroutine to force stack creation, the creation cost is artificially low.

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 64188cc

Legend:

doc/papers/concurrency/Paper.tex

Download in other formats: