Changeset beabdf3


Ignore:
Timestamp:
Apr 8, 2023, 3:50:20 PM (13 months ago)
Author:
Peter A. Buhr <pabuhr@…>
Branches:
ADT, ast-experimental, master
Children:
8472c6c
Parents:
3d5fba21
Message:

formatting, replace latex package subcaption with subfig

File:
1 edited

Legend:

Unmodified
Added
Removed
  • doc/theses/colby_parsons_MMAth/text/actors.tex

    r3d5fba21 rbeabdf3  
    9090\begin{cfa}
    9191struct derived_actor {
    92     inline actor;       // Plan-9 C inheritance
     92        inline actor;      // Plan-9 C inheritance
    9393};
    9494void ?{}( derived_actor & this ) { // Default ctor
    95     ((actor &)this){};  // Call to actor ctor
     95        ((actor &)this){};  // Call to actor ctor
    9696}
    9797
    9898struct derived_msg {
    99     inline message;    // Plan-9 C nominal inheritance
    100     char word[12];
     99        inline message; // Plan-9 C nominal inheritance
     100        char word[12];
    101101};
    102102void ?{}( derived_msg & this, char * new_word ) { // Overloaded ctor
    103     ((message &) this){ Nodelete }; // Passing allocation to ctor
    104     strcpy(this.word, new_word);
     103        ((message &) this){ Nodelete }; // Passing allocation to ctor
     104        strcpy(this.word, new_word);
    105105}
    106106
    107107Allocation receive( derived_actor & receiver, derived_msg & msg ) {
    108     printf("The message contained the string: %s\n", msg.word);
    109     return Finished; // Return finished since actor is done
     108        printf("The message contained the string: %s\n", msg.word);
     109        return Finished; // Return finished since actor is done
    110110}
    111111
    112112int main() {
    113     start_actor_system(); // Sets up executor
    114     derived_actor my_actor;       
    115     derived_msg my_msg{ "Hello World" }; // Constructor call
    116     my_actor << my_msg;   // Send message via left shift operator
    117     stop_actor_system(); // Waits until actors are finished
    118     return 0;
     113        start_actor_system(); // Sets up executor
     114        derived_actor my_actor;         
     115        derived_msg my_msg{ "Hello World" }; // Constructor call
     116        my_actor << my_msg;   // Send message via left shift operator
     117        stop_actor_system(); // Waits until actors are finished
     118        return 0;
    119119}
    120120\end{cfa}
     
    229229\section{Envelopes}\label{s:envelope}
    230230In actor systems messages are sent and received by actors.
    231 When a actor receives a message it  executes its behaviour that is associated with that message type.
     231When a actor receives a message it executes its behaviour that is associated with that message type.
    232232However the unit of work that stores the message, the receiving actor's address, and other pertinent information needs to persist between send and the receive.
    233233Furthermore the unit of work needs to be able to be stored in some fashion, usually in a queue, until it is executed by an actor.
     
    301301While other systems are concerned with stealing actors, the \CFA actor system steals queues.
    302302This is a result of \CFA's use of the inverted actor system.
    303  The goal of the \CFA actor work stealing mechanism is to have a zero-victim-cost stealing mechanism.
     303The goal of the \CFA actor work stealing mechanism is to have a zero-victim-cost stealing mechanism.
    304304This does not means that stealing has no cost.
    305305This goal is to ensure that stealing work does not impact the performance of victim workers.
     
    369369
    370370\begin{cfa}
    371 void swap( uint victim_idx, uint my_idx  ) {
    372     // Step 0:
    373     work_queue * my_queue = request_queues[my_idx];
    374     work_queue * vic_queue = request_queues[victim_idx];
    375     // Step 2:
    376     request_queues[my_idx] = 0p;
    377     // Step 3:
    378     request_queues[victim_idx] = my_queue;
    379     // Step 4:
    380     request_queues[my_idx] = vic_queue;
     371void swap( uint victim_idx, uint my_idx ) {
     372        // Step 0:
     373        work_queue * my_queue = request_queues[my_idx];
     374        work_queue * vic_queue = request_queues[victim_idx];
     375        // Step 2:
     376        request_queues[my_idx] = 0p;
     377        // Step 3:
     378        request_queues[victim_idx] = my_queue;
     379        // Step 4:
     380        request_queues[my_idx] = vic_queue;
    381381}
    382382\end{cfa}
     
    389389// This routine is atomic
    390390bool CAS( work_queue ** ptr, work_queue ** old, work_queue * new ) {
    391     if ( *ptr != *old )
    392         return false;
    393     *ptr = new;
    394     return true;
     391        if ( *ptr != *old )
     392                return false;
     393        *ptr = new;
     394        return true;
    395395}
    396396
    397397bool try_swap_queues( worker & this, uint victim_idx, uint my_idx ) with(this) {
    398     // Step 0:
    399     // request_queues is the shared array of all sharded queues
    400     work_queue * my_queue = request_queues[my_idx];
    401     work_queue * vic_queue = request_queues[victim_idx];
    402 
    403     // Step 1:
    404     // If either queue is 0p then they are in the process of being stolen
    405     // 0p is CForAll's equivalent of C++'s nullptr
    406     if ( vic_queue == 0p ) return false;
    407 
    408     // Step 2:
    409     // Try to set thief's queue ptr to be 0p.
    410     // If this CAS fails someone stole thief's queue so return false
    411     if ( !CAS( &request_queues[my_idx], &my_queue, 0p ) )
    412         return false;
    413    
    414     // Step 3:
    415     // Try to set victim queue ptr to be thief's queue ptr.
    416     // If it fails someone stole the other queue, so fix up then return false
    417     if ( !CAS( &request_queues[victim_idx], &vic_queue, my_queue ) ) {
    418         request_queues[my_idx] = my_queue; // reset queue ptr back to prev val
    419         return false;
    420     }
    421 
    422     // Step 4:
    423     // Successfully swapped.
    424     // Thief's ptr is 0p so no one will touch it
    425     // Write back without CAS is safe
    426     request_queues[my_idx] = vic_queue;
    427     return true;
     398        // Step 0:
     399        // request_queues is the shared array of all sharded queues
     400        work_queue * my_queue = request_queues[my_idx];
     401        work_queue * vic_queue = request_queues[victim_idx];
     402
     403        // Step 1:
     404        // If either queue is 0p then they are in the process of being stolen
     405        // 0p is CForAll's equivalent of C++'s nullptr
     406        if ( vic_queue == 0p ) return false;
     407
     408        // Step 2:
     409        // Try to set thief's queue ptr to be 0p.
     410        // If this CAS fails someone stole thief's queue so return false
     411        if ( !CAS( &request_queues[my_idx], &my_queue, 0p ) )
     412                return false;
     413       
     414        // Step 3:
     415        // Try to set victim queue ptr to be thief's queue ptr.
     416        // If it fails someone stole the other queue, so fix up then return false
     417        if ( !CAS( &request_queues[victim_idx], &vic_queue, my_queue ) ) {
     418                request_queues[my_idx] = my_queue; // reset queue ptr back to prev val
     419                return false;
     420        }
     421
     422        // Step 4:
     423        // Successfully swapped.
     424        // Thief's ptr is 0p so no one will touch it
     425        // Write back without CAS is safe
     426        request_queues[my_idx] = vic_queue;
     427        return true;
    428428}
    429429\end{cfa}\label{c:swap}
     
    706706\label{t:StaticActorMessagePerformance}
    707707\begin{tabular}{*{5}{r|}r}
    708     & \multicolumn{1}{c|}{\CFA (100M)} & \multicolumn{1}{c|}{CAF (10M)} & \multicolumn{1}{c|}{Akka (100M)} & \multicolumn{1}{c|}{\uC (100M)} & \multicolumn{1}{c@{}}{ProtoActor (100M)} \\
    709     \hline                                                                                                                                         
    710     AMD         & \input{data/pykeSendStatic} \\
    711     \hline                                                                                                                                         
    712     Intel       & \input{data/nasusSendStatic}
     708        & \multicolumn{1}{c|}{\CFA (100M)} & \multicolumn{1}{c|}{CAF (10M)} & \multicolumn{1}{c|}{Akka (100M)} & \multicolumn{1}{c|}{\uC (100M)} & \multicolumn{1}{c@{}}{ProtoActor (100M)} \\
     709        \hline                                                                                                                                                 
     710        AMD             & \input{data/pykeSendStatic} \\
     711        \hline                                                                                                                                                 
     712        Intel   & \input{data/nasusSendStatic}
    713713\end{tabular}
    714714
     
    719719
    720720\begin{tabular}{*{5}{r|}r}
    721     & \multicolumn{1}{c|}{\CFA (20M)} & \multicolumn{1}{c|}{CAF (2M)} & \multicolumn{1}{c|}{Akka (2M)} & \multicolumn{1}{c|}{\uC (20M)} & \multicolumn{1}{c@{}}{ProtoActor (2M)} \\
    722     \hline                                                                                                                                         
    723     AMD         & \input{data/pykeSendDynamic} \\
    724     \hline                                                                                                                                         
    725     Intel       & \input{data/nasusSendDynamic}
     721        & \multicolumn{1}{c|}{\CFA (20M)} & \multicolumn{1}{c|}{CAF (2M)} & \multicolumn{1}{c|}{Akka (2M)} & \multicolumn{1}{c|}{\uC (20M)} & \multicolumn{1}{c@{}}{ProtoActor (2M)} \\
     722        \hline                                                                                                                                                 
     723        AMD             & \input{data/pykeSendDynamic} \\
     724        \hline                                                                                                                                                 
     725        Intel   & \input{data/nasusSendDynamic}
    726726\end{tabular}
    727727\end{table}
     
    745745In the static send benchmark all systems except CAF have static send costs that are in the same ballpark, only varying by ~70ns.
    746746In the dynamic send benchmark all systems experience slower message sends, as expected due to the extra allocations.
    747 However,  Akka and ProtoActor, slow down by a more significant margin than the \uC and \CFA.
     747However, Akka and ProtoActor, slow down by a more significant margin than the \uC and \CFA.
    748748This is likely a result of Akka and ProtoActor's garbage collection, which can suffer from hits in performance for allocation heavy workloads, whereas \uC and \CFA have explicit allocation/deallocation.
    749749
     
    753753
    754754\begin{figure}
    755     \centering
    756     \begin{subfigure}{0.5\textwidth}
    757         \centering
    758         \scalebox{0.5}{\input{figures/nasusCFABalance-One.pgf}}
    759         \subcaption{AMD \CFA Balance-One Benchmark}
    760         \label{f:BalanceOneAMD}
    761     \end{subfigure}\hfill
    762     \begin{subfigure}{0.5\textwidth}
    763         \centering
    764         \scalebox{0.5}{\input{figures/pykeCFABalance-One.pgf}}
    765         \subcaption{Intel \CFA Balance-One Benchmark}
    766         \label{f:BalanceOneIntel}
    767     \end{subfigure}
    768     \caption{The balance-one benchmark comparing stealing heuristics (lower is better).}
    769 \end{figure}
    770 
    771 \begin{figure}
    772     \centering
    773     \begin{subfigure}{0.5\textwidth}
    774         \centering
    775         \scalebox{0.5}{\input{figures/nasusCFABalance-Multi.pgf}}
    776         \subcaption{AMD \CFA Balance-Multi Benchmark}
    777         \label{f:BalanceMultiAMD}
    778     \end{subfigure}\hfill
    779     \begin{subfigure}{0.5\textwidth}
    780         \centering
    781         \scalebox{0.5}{\input{figures/pykeCFABalance-Multi.pgf}}
    782         \subcaption{Intel \CFA Balance-Multi Benchmark}
    783         \label{f:BalanceMultiIntel}
    784     \end{subfigure}
    785     \caption{The balance-multi benchmark comparing stealing heuristics (lower is better).}
     755        \centering
     756        \subfloat[AMD \CFA Balance-One Benchmark]{
     757                \resizebox{0.5\textwidth}{!}{\input{figures/nasusCFABalance-One.pgf}}
     758                \label{f:BalanceOneAMD}
     759        }
     760        \subfloat[Intel \CFA Balance-One Benchmark]{
     761                \resizebox{0.5\textwidth}{!}{\input{figures/pykeCFABalance-One.pgf}}
     762                \label{f:BalanceOneIntel}
     763        }
     764        \caption{The balance-one benchmark comparing stealing heuristics (lower is better).}
     765\end{figure}
     766
     767\begin{figure}
     768        \centering
     769        \subfloat[AMD \CFA Balance-Multi Benchmark]{
     770                \resizebox{0.5\textwidth}{!}{\input{figures/nasusCFABalance-Multi.pgf}}
     771                \label{f:BalanceMultiAMD}
     772        }
     773        \subfloat[Intel \CFA Balance-Multi Benchmark]{
     774                \resizebox{0.5\textwidth}{!}{\input{figures/pykeCFABalance-Multi.pgf}}
     775                \label{f:BalanceMultiIntel}
     776        }
     777        \caption{The balance-multi benchmark comparing stealing heuristics (lower is better).}
    786778\end{figure}
    787779
     
    817809
    818810\begin{figure}
    819     \centering
    820     \begin{subfigure}{0.5\textwidth}
    821         \centering
    822         \scalebox{0.5}{\input{figures/nasusExecutor.pgf}}
    823         \subcaption{AMD Executor Benchmark}
    824         \label{f:ExecutorAMD}
    825     \end{subfigure}\hfill
    826     \begin{subfigure}{0.5\textwidth}
    827         \centering
    828         \scalebox{0.5}{\input{figures/pykeExecutor.pgf}}
    829         \subcaption{Intel Executor Benchmark}
    830         \label{f:ExecutorIntel}
    831     \end{subfigure}
    832     \caption{The executor benchmark comparing actor systems (lower is better).}
     811        \centering
     812        \subfloat[AMD Executor Benchmark]{
     813                \resizebox{0.5\textwidth}{!}{\input{figures/nasusExecutor.pgf}}
     814                \label{f:ExecutorAMD}
     815        }
     816        \subfloat[Intel Executor Benchmark]{
     817                \resizebox{0.5\textwidth}{!}{\input{figures/pykeExecutor.pgf}}
     818                \label{f:ExecutorIntel}
     819        }
     820        \caption{The executor benchmark comparing actor systems (lower is better).}
    833821\end{figure}
    834822
     
    840828
    841829\begin{figure}
    842     \centering
    843     \begin{subfigure}{0.5\textwidth}
    844         \centering
    845         \scalebox{0.5}{\input{figures/nasusCFAExecutor.pgf}}
    846         \subcaption{AMD \CFA Executor Benchmark}\label{f:cfaExecutorAMD}
    847     \end{subfigure}\hfill
    848     \begin{subfigure}{0.5\textwidth}
    849         \centering
    850         \scalebox{0.5}{\input{figures/pykeCFAExecutor.pgf}}
    851         \subcaption{Intel \CFA Executor Benchmark}\label{f:cfaExecutorIntel}
    852     \end{subfigure}
    853     \caption{Executor benchmark comparing \CFA stealing heuristics (lower is better).}
     830        \centering
     831        \subfloat[AMD \CFA Executor Benchmark]{
     832                \resizebox{0.5\textwidth}{!}{\input{figures/nasusCFAExecutor.pgf}}
     833                \label{f:cfaExecutorAMD}
     834        }
     835        \subfloat[Intel \CFA Executor Benchmark]{
     836                \resizebox{0.5\textwidth}{!}{\input{figures/pykeCFAExecutor.pgf}}
     837                \label{f:cfaExecutorIntel}
     838        }
     839        \caption{Executor benchmark comparing \CFA stealing heuristics (lower is better).}
    854840\end{figure}
    855841
     
    857843
    858844\begin{figure}
    859     \centering
    860     \begin{subfigure}{0.5\textwidth}
    861         \centering
    862         \scalebox{0.5}{\input{figures/nasusRepeat.pgf}}
    863         \subcaption{AMD Repeat Benchmark}\label{f:RepeatAMD}
    864     \end{subfigure}\hfill
    865     \begin{subfigure}{0.5\textwidth}
    866         \centering
    867         \scalebox{0.5}{\input{figures/pykeRepeat.pgf}}
    868         \subcaption{Intel Repeat Benchmark}\label{f:RepeatIntel}
    869     \end{subfigure}
    870     \caption{The repeat benchmark comparing actor systems (lower is better).}
     845        \centering
     846        \subfloat[AMD Repeat Benchmark]{
     847                \resizebox{0.5\textwidth}{!}{\input{figures/nasusRepeat.pgf}}
     848                \label{f:RepeatAMD}
     849        }
     850        \subfloat[Intel Repeat Benchmark]{
     851                \resizebox{0.5\textwidth}{!}{\input{figures/pykeRepeat.pgf}}
     852                \label{f:RepeatIntel}
     853        }
     854        \caption{The repeat benchmark comparing actor systems (lower is better).}
    871855\end{figure}
    872856
     
    881865
    882866\begin{figure}
    883     \centering
    884     \begin{subfigure}{0.5\textwidth}
    885         \centering
    886         \scalebox{0.5}{\input{figures/nasusCFARepeat.pgf}}
    887         \subcaption{AMD \CFA Repeat Benchmark}\label{f:cfaRepeatAMD}
    888     \end{subfigure}\hfill
    889     \begin{subfigure}{0.5\textwidth}
    890         \centering
    891         \scalebox{0.5}{\input{figures/pykeCFARepeat.pgf}}
    892         \subcaption{Intel \CFA Repeat Benchmark}\label{f:cfaRepeatIntel}
    893     \end{subfigure}
    894     \caption{The repeat benchmark comparing \CFA stealing heuristics (lower is better).}
     867        \centering
     868        \subfloat[AMD \CFA Repeat Benchmark]{
     869                \resizebox{0.5\textwidth}{!}{\input{figures/nasusCFARepeat.pgf}}
     870                \label{f:cfaRepeatAMD}
     871        }
     872        \subfloat[Intel \CFA Repeat Benchmark]{
     873                \resizebox{0.5\textwidth}{!}{\input{figures/pykeCFARepeat.pgf}}
     874                \label{f:cfaRepeatIntel}
     875        }
     876        \caption{The repeat benchmark comparing \CFA stealing heuristics (lower is better).}
    895877\end{figure}
    896878
     
    913895
    914896\begin{table}[t]
    915     \centering
    916     \setlength{\extrarowheight}{2pt}
    917     \setlength{\tabcolsep}{5pt}
    918    
    919     \caption{Executor Program Memory High Watermark}
    920     \label{t:ExecutorMemory}
    921     \begin{tabular}{*{5}{r|}r}
    922         & \multicolumn{1}{c|}{\CFA} & \multicolumn{1}{c|}{CAF} & \multicolumn{1}{c|}{Akka} & \multicolumn{1}{c|}{\uC} & \multicolumn{1}{c@{}}{ProtoActor} \\
    923         \hline                                                                                                                                     
    924         AMD             & \input{data/pykeExecutorMem} \\
    925         \hline                                                                                                                                     
    926         Intel   & \input{data/nasusExecutorMem}
    927     \end{tabular}
     897        \centering
     898        \setlength{\extrarowheight}{2pt}
     899        \setlength{\tabcolsep}{5pt}
     900       
     901        \caption{Executor Program Memory High Watermark}
     902        \label{t:ExecutorMemory}
     903        \begin{tabular}{*{5}{r|}r}
     904                & \multicolumn{1}{c|}{\CFA} & \multicolumn{1}{c|}{CAF} & \multicolumn{1}{c|}{Akka} & \multicolumn{1}{c|}{\uC} & \multicolumn{1}{c@{}}{ProtoActor} \\
     905                \hline                                                                                                                                                 
     906                AMD             & \input{data/pykeExecutorMem} \\
     907                \hline                                                                                                                                                 
     908                Intel   & \input{data/nasusExecutorMem}
     909        \end{tabular}
    928910\end{table}
    929911
     
    951933
    952934\begin{figure}
    953     \centering
    954     \begin{subfigure}{0.5\textwidth}
    955         \centering
    956         \scalebox{0.5}{\input{figures/nasusMatrix.pgf}}
    957         \subcaption{AMD Matrix Benchmark}\label{f:MatrixAMD}
    958     \end{subfigure}\hfill
    959     \begin{subfigure}{0.5\textwidth}
    960         \centering
    961         \scalebox{0.5}{\input{figures/pykeMatrix.pgf}}
    962         \subcaption{Intel Matrix Benchmark}\label{f:MatrixIntel}
    963     \end{subfigure}
    964     \caption{The matrix benchmark comparing actor systems (lower is better).}
    965 \end{figure}
    966 
    967 \begin{figure}
    968     \centering
    969     \begin{subfigure}{0.5\textwidth}
    970         \centering
    971         \scalebox{0.5}{\input{figures/nasusCFAMatrix.pgf}}
    972         \subcaption{AMD \CFA Matrix Benchmark}\label{f:cfaMatrixAMD}
    973     \end{subfigure}\hfill
    974     \begin{subfigure}{0.5\textwidth}
    975         \centering
    976         \scalebox{0.5}{\input{figures/pykeCFAMatrix.pgf}}
    977         \subcaption{Intel \CFA Matrix Benchmark}\label{f:cfaMatrixIntel}
    978     \end{subfigure}
    979     \caption{The matrix benchmark comparing \CFA stealing heuristics (lower is better).}
    980 \end{figure}
     935        \centering
     936        \subfloat[AMD Matrix Benchmark]{
     937                \resizebox{0.5\textwidth}{!}{\input{figures/nasusMatrix.pgf}}
     938                \label{f:MatrixAMD}
     939        }
     940        \subfloat[Intel Matrix Benchmark]{
     941                \resizebox{0.5\textwidth}{!}{\input{figures/pykeMatrix.pgf}}
     942                \label{f:MatrixIntel}
     943        }
     944        \caption{The matrix benchmark comparing actor systems (lower is better).}
     945\end{figure}
     946
     947\begin{figure}
     948        \centering
     949        \subfloat[AMD \CFA Matrix Benchmark]{
     950                \resizebox{0.5\textwidth}{!}{\input{figures/nasusCFAMatrix.pgf}}
     951                \label{f:cfaMatrixAMD}
     952        }
     953        \subfloat[Intel \CFA Matrix Benchmark]{
     954                \resizebox{0.5\textwidth}{!}{\input{figures/pykeCFAMatrix.pgf}}
     955                \label{f:cfaMatrixIntel}
     956        }
     957        \caption{The matrix benchmark comparing \CFA stealing heuristics (lower is better).}
     958\end{figure}
     959
     960% Local Variables: %
     961% tab-width: 4 %
     962% End: %
Note: See TracChangeset for help on using the changeset viewer.