Changeset beabdf3
- Timestamp:
- Apr 8, 2023, 3:50:20 PM (3 years ago)
- Branches:
- ADT, ast-experimental, master
- Children:
- 8472c6c
- Parents:
- 3d5fba21
- File:
-
- 1 edited
-
doc/theses/colby_parsons_MMAth/text/actors.tex (modified) (15 diffs)
Legend:
- Unmodified
- Added
- Removed
-
doc/theses/colby_parsons_MMAth/text/actors.tex
r3d5fba21 rbeabdf3 90 90 \begin{cfa} 91 91 struct derived_actor { 92 inline actor;// Plan-9 C inheritance92 inline actor; // Plan-9 C inheritance 93 93 }; 94 94 void ?{}( derived_actor & this ) { // Default ctor 95 ((actor &)this){}; // Call to actor ctor95 ((actor &)this){}; // Call to actor ctor 96 96 } 97 97 98 98 struct derived_msg { 99 inline message;// Plan-9 C nominal inheritance100 char word[12];99 inline message; // Plan-9 C nominal inheritance 100 char word[12]; 101 101 }; 102 102 void ?{}( derived_msg & this, char * new_word ) { // Overloaded ctor 103 ((message &) this){ Nodelete }; // Passing allocation to ctor104 strcpy(this.word, new_word);103 ((message &) this){ Nodelete }; // Passing allocation to ctor 104 strcpy(this.word, new_word); 105 105 } 106 106 107 107 Allocation receive( derived_actor & receiver, derived_msg & msg ) { 108 printf("The message contained the string: %s\n", msg.word);109 return Finished; // Return finished since actor is done108 printf("The message contained the string: %s\n", msg.word); 109 return Finished; // Return finished since actor is done 110 110 } 111 111 112 112 int main() { 113 start_actor_system(); // Sets up executor114 derived_actor my_actor;115 derived_msg my_msg{ "Hello World" }; // Constructor call116 my_actor << my_msg; // Send message via left shift operator117 stop_actor_system(); // Waits until actors are finished118 return 0;113 start_actor_system(); // Sets up executor 114 derived_actor my_actor; 115 derived_msg my_msg{ "Hello World" }; // Constructor call 116 my_actor << my_msg; // Send message via left shift operator 117 stop_actor_system(); // Waits until actors are finished 118 return 0; 119 119 } 120 120 \end{cfa} … … 229 229 \section{Envelopes}\label{s:envelope} 230 230 In actor systems messages are sent and received by actors. 231 When a actor receives a message it executes its behaviour that is associated with that message type.231 When a actor receives a message it executes its behaviour that is associated with that message type. 232 232 However the unit of work that stores the message, the receiving actor's address, and other pertinent information needs to persist between send and the receive. 233 233 Furthermore the unit of work needs to be able to be stored in some fashion, usually in a queue, until it is executed by an actor. … … 301 301 While other systems are concerned with stealing actors, the \CFA actor system steals queues. 302 302 This is a result of \CFA's use of the inverted actor system. 303 The goal of the \CFA actor work stealing mechanism is to have a zero-victim-cost stealing mechanism.303 The goal of the \CFA actor work stealing mechanism is to have a zero-victim-cost stealing mechanism. 304 304 This does not means that stealing has no cost. 305 305 This goal is to ensure that stealing work does not impact the performance of victim workers. … … 369 369 370 370 \begin{cfa} 371 void swap( uint victim_idx, uint my_idx ) {372 // Step 0:373 work_queue * my_queue = request_queues[my_idx];374 work_queue * vic_queue = request_queues[victim_idx];375 // Step 2:376 request_queues[my_idx] = 0p;377 // Step 3:378 request_queues[victim_idx] = my_queue;379 // Step 4:380 request_queues[my_idx] = vic_queue;371 void swap( uint victim_idx, uint my_idx ) { 372 // Step 0: 373 work_queue * my_queue = request_queues[my_idx]; 374 work_queue * vic_queue = request_queues[victim_idx]; 375 // Step 2: 376 request_queues[my_idx] = 0p; 377 // Step 3: 378 request_queues[victim_idx] = my_queue; 379 // Step 4: 380 request_queues[my_idx] = vic_queue; 381 381 } 382 382 \end{cfa} … … 389 389 // This routine is atomic 390 390 bool CAS( work_queue ** ptr, work_queue ** old, work_queue * new ) { 391 if ( *ptr != *old )392 return false;393 *ptr = new;394 return true;391 if ( *ptr != *old ) 392 return false; 393 *ptr = new; 394 return true; 395 395 } 396 396 397 397 bool try_swap_queues( worker & this, uint victim_idx, uint my_idx ) with(this) { 398 // Step 0:399 // request_queues is the shared array of all sharded queues400 work_queue * my_queue = request_queues[my_idx];401 work_queue * vic_queue = request_queues[victim_idx];402 403 // Step 1:404 // If either queue is 0p then they are in the process of being stolen405 // 0p is CForAll's equivalent of C++'s nullptr406 if ( vic_queue == 0p ) return false;407 408 // Step 2:409 // Try to set thief's queue ptr to be 0p.410 // If this CAS fails someone stole thief's queue so return false411 if ( !CAS( &request_queues[my_idx], &my_queue, 0p ) )412 return false;413 414 // Step 3:415 // Try to set victim queue ptr to be thief's queue ptr.416 // If it fails someone stole the other queue, so fix up then return false417 if ( !CAS( &request_queues[victim_idx], &vic_queue, my_queue ) ) {418 request_queues[my_idx] = my_queue; // reset queue ptr back to prev val419 return false;420 }421 422 // Step 4:423 // Successfully swapped.424 // Thief's ptr is 0p so no one will touch it425 // Write back without CAS is safe426 request_queues[my_idx] = vic_queue;427 return true;398 // Step 0: 399 // request_queues is the shared array of all sharded queues 400 work_queue * my_queue = request_queues[my_idx]; 401 work_queue * vic_queue = request_queues[victim_idx]; 402 403 // Step 1: 404 // If either queue is 0p then they are in the process of being stolen 405 // 0p is CForAll's equivalent of C++'s nullptr 406 if ( vic_queue == 0p ) return false; 407 408 // Step 2: 409 // Try to set thief's queue ptr to be 0p. 410 // If this CAS fails someone stole thief's queue so return false 411 if ( !CAS( &request_queues[my_idx], &my_queue, 0p ) ) 412 return false; 413 414 // Step 3: 415 // Try to set victim queue ptr to be thief's queue ptr. 416 // If it fails someone stole the other queue, so fix up then return false 417 if ( !CAS( &request_queues[victim_idx], &vic_queue, my_queue ) ) { 418 request_queues[my_idx] = my_queue; // reset queue ptr back to prev val 419 return false; 420 } 421 422 // Step 4: 423 // Successfully swapped. 424 // Thief's ptr is 0p so no one will touch it 425 // Write back without CAS is safe 426 request_queues[my_idx] = vic_queue; 427 return true; 428 428 } 429 429 \end{cfa}\label{c:swap} … … 706 706 \label{t:StaticActorMessagePerformance} 707 707 \begin{tabular}{*{5}{r|}r} 708 & \multicolumn{1}{c|}{\CFA (100M)} & \multicolumn{1}{c|}{CAF (10M)} & \multicolumn{1}{c|}{Akka (100M)} & \multicolumn{1}{c|}{\uC (100M)} & \multicolumn{1}{c@{}}{ProtoActor (100M)} \\709 \hline 710 AMD & \input{data/pykeSendStatic} \\711 \hline 712 Intel & \input{data/nasusSendStatic}708 & \multicolumn{1}{c|}{\CFA (100M)} & \multicolumn{1}{c|}{CAF (10M)} & \multicolumn{1}{c|}{Akka (100M)} & \multicolumn{1}{c|}{\uC (100M)} & \multicolumn{1}{c@{}}{ProtoActor (100M)} \\ 709 \hline 710 AMD & \input{data/pykeSendStatic} \\ 711 \hline 712 Intel & \input{data/nasusSendStatic} 713 713 \end{tabular} 714 714 … … 719 719 720 720 \begin{tabular}{*{5}{r|}r} 721 & \multicolumn{1}{c|}{\CFA (20M)} & \multicolumn{1}{c|}{CAF (2M)} & \multicolumn{1}{c|}{Akka (2M)} & \multicolumn{1}{c|}{\uC (20M)} & \multicolumn{1}{c@{}}{ProtoActor (2M)} \\722 \hline 723 AMD & \input{data/pykeSendDynamic} \\724 \hline 725 Intel & \input{data/nasusSendDynamic}721 & \multicolumn{1}{c|}{\CFA (20M)} & \multicolumn{1}{c|}{CAF (2M)} & \multicolumn{1}{c|}{Akka (2M)} & \multicolumn{1}{c|}{\uC (20M)} & \multicolumn{1}{c@{}}{ProtoActor (2M)} \\ 722 \hline 723 AMD & \input{data/pykeSendDynamic} \\ 724 \hline 725 Intel & \input{data/nasusSendDynamic} 726 726 \end{tabular} 727 727 \end{table} … … 745 745 In the static send benchmark all systems except CAF have static send costs that are in the same ballpark, only varying by ~70ns. 746 746 In the dynamic send benchmark all systems experience slower message sends, as expected due to the extra allocations. 747 However, Akka and ProtoActor, slow down by a more significant margin than the \uC and \CFA.747 However, Akka and ProtoActor, slow down by a more significant margin than the \uC and \CFA. 748 748 This is likely a result of Akka and ProtoActor's garbage collection, which can suffer from hits in performance for allocation heavy workloads, whereas \uC and \CFA have explicit allocation/deallocation. 749 749 … … 753 753 754 754 \begin{figure} 755 \centering 756 \begin{subfigure}{0.5\textwidth} 757 \centering 758 \scalebox{0.5}{\input{figures/nasusCFABalance-One.pgf}} 759 \subcaption{AMD \CFA Balance-One Benchmark} 760 \label{f:BalanceOneAMD} 761 \end{subfigure}\hfill 762 \begin{subfigure}{0.5\textwidth} 763 \centering 764 \scalebox{0.5}{\input{figures/pykeCFABalance-One.pgf}} 765 \subcaption{Intel \CFA Balance-One Benchmark} 766 \label{f:BalanceOneIntel} 767 \end{subfigure} 768 \caption{The balance-one benchmark comparing stealing heuristics (lower is better).} 769 \end{figure} 770 771 \begin{figure} 772 \centering 773 \begin{subfigure}{0.5\textwidth} 774 \centering 775 \scalebox{0.5}{\input{figures/nasusCFABalance-Multi.pgf}} 776 \subcaption{AMD \CFA Balance-Multi Benchmark} 777 \label{f:BalanceMultiAMD} 778 \end{subfigure}\hfill 779 \begin{subfigure}{0.5\textwidth} 780 \centering 781 \scalebox{0.5}{\input{figures/pykeCFABalance-Multi.pgf}} 782 \subcaption{Intel \CFA Balance-Multi Benchmark} 783 \label{f:BalanceMultiIntel} 784 \end{subfigure} 785 \caption{The balance-multi benchmark comparing stealing heuristics (lower is better).} 755 \centering 756 \subfloat[AMD \CFA Balance-One Benchmark]{ 757 \resizebox{0.5\textwidth}{!}{\input{figures/nasusCFABalance-One.pgf}} 758 \label{f:BalanceOneAMD} 759 } 760 \subfloat[Intel \CFA Balance-One Benchmark]{ 761 \resizebox{0.5\textwidth}{!}{\input{figures/pykeCFABalance-One.pgf}} 762 \label{f:BalanceOneIntel} 763 } 764 \caption{The balance-one benchmark comparing stealing heuristics (lower is better).} 765 \end{figure} 766 767 \begin{figure} 768 \centering 769 \subfloat[AMD \CFA Balance-Multi Benchmark]{ 770 \resizebox{0.5\textwidth}{!}{\input{figures/nasusCFABalance-Multi.pgf}} 771 \label{f:BalanceMultiAMD} 772 } 773 \subfloat[Intel \CFA Balance-Multi Benchmark]{ 774 \resizebox{0.5\textwidth}{!}{\input{figures/pykeCFABalance-Multi.pgf}} 775 \label{f:BalanceMultiIntel} 776 } 777 \caption{The balance-multi benchmark comparing stealing heuristics (lower is better).} 786 778 \end{figure} 787 779 … … 817 809 818 810 \begin{figure} 819 \centering 820 \begin{subfigure}{0.5\textwidth} 821 \centering 822 \scalebox{0.5}{\input{figures/nasusExecutor.pgf}} 823 \subcaption{AMD Executor Benchmark} 824 \label{f:ExecutorAMD} 825 \end{subfigure}\hfill 826 \begin{subfigure}{0.5\textwidth} 827 \centering 828 \scalebox{0.5}{\input{figures/pykeExecutor.pgf}} 829 \subcaption{Intel Executor Benchmark} 830 \label{f:ExecutorIntel} 831 \end{subfigure} 832 \caption{The executor benchmark comparing actor systems (lower is better).} 811 \centering 812 \subfloat[AMD Executor Benchmark]{ 813 \resizebox{0.5\textwidth}{!}{\input{figures/nasusExecutor.pgf}} 814 \label{f:ExecutorAMD} 815 } 816 \subfloat[Intel Executor Benchmark]{ 817 \resizebox{0.5\textwidth}{!}{\input{figures/pykeExecutor.pgf}} 818 \label{f:ExecutorIntel} 819 } 820 \caption{The executor benchmark comparing actor systems (lower is better).} 833 821 \end{figure} 834 822 … … 840 828 841 829 \begin{figure} 842 \centering 843 \begin{subfigure}{0.5\textwidth} 844 \centering 845 \scalebox{0.5}{\input{figures/nasusCFAExecutor.pgf}} 846 \subcaption{AMD \CFA Executor Benchmark}\label{f:cfaExecutorAMD} 847 \end{subfigure}\hfill 848 \begin{subfigure}{0.5\textwidth} 849 \centering 850 \scalebox{0.5}{\input{figures/pykeCFAExecutor.pgf}} 851 \subcaption{Intel \CFA Executor Benchmark}\label{f:cfaExecutorIntel} 852 \end{subfigure} 853 \caption{Executor benchmark comparing \CFA stealing heuristics (lower is better).} 830 \centering 831 \subfloat[AMD \CFA Executor Benchmark]{ 832 \resizebox{0.5\textwidth}{!}{\input{figures/nasusCFAExecutor.pgf}} 833 \label{f:cfaExecutorAMD} 834 } 835 \subfloat[Intel \CFA Executor Benchmark]{ 836 \resizebox{0.5\textwidth}{!}{\input{figures/pykeCFAExecutor.pgf}} 837 \label{f:cfaExecutorIntel} 838 } 839 \caption{Executor benchmark comparing \CFA stealing heuristics (lower is better).} 854 840 \end{figure} 855 841 … … 857 843 858 844 \begin{figure} 859 \centering 860 \begin{subfigure}{0.5\textwidth} 861 \centering 862 \scalebox{0.5}{\input{figures/nasusRepeat.pgf}} 863 \subcaption{AMD Repeat Benchmark}\label{f:RepeatAMD} 864 \end{subfigure}\hfill 865 \begin{subfigure}{0.5\textwidth} 866 \centering 867 \scalebox{0.5}{\input{figures/pykeRepeat.pgf}} 868 \subcaption{Intel Repeat Benchmark}\label{f:RepeatIntel} 869 \end{subfigure} 870 \caption{The repeat benchmark comparing actor systems (lower is better).} 845 \centering 846 \subfloat[AMD Repeat Benchmark]{ 847 \resizebox{0.5\textwidth}{!}{\input{figures/nasusRepeat.pgf}} 848 \label{f:RepeatAMD} 849 } 850 \subfloat[Intel Repeat Benchmark]{ 851 \resizebox{0.5\textwidth}{!}{\input{figures/pykeRepeat.pgf}} 852 \label{f:RepeatIntel} 853 } 854 \caption{The repeat benchmark comparing actor systems (lower is better).} 871 855 \end{figure} 872 856 … … 881 865 882 866 \begin{figure} 883 \centering 884 \begin{subfigure}{0.5\textwidth} 885 \centering 886 \scalebox{0.5}{\input{figures/nasusCFARepeat.pgf}} 887 \subcaption{AMD \CFA Repeat Benchmark}\label{f:cfaRepeatAMD} 888 \end{subfigure}\hfill 889 \begin{subfigure}{0.5\textwidth} 890 \centering 891 \scalebox{0.5}{\input{figures/pykeCFARepeat.pgf}} 892 \subcaption{Intel \CFA Repeat Benchmark}\label{f:cfaRepeatIntel} 893 \end{subfigure} 894 \caption{The repeat benchmark comparing \CFA stealing heuristics (lower is better).} 867 \centering 868 \subfloat[AMD \CFA Repeat Benchmark]{ 869 \resizebox{0.5\textwidth}{!}{\input{figures/nasusCFARepeat.pgf}} 870 \label{f:cfaRepeatAMD} 871 } 872 \subfloat[Intel \CFA Repeat Benchmark]{ 873 \resizebox{0.5\textwidth}{!}{\input{figures/pykeCFARepeat.pgf}} 874 \label{f:cfaRepeatIntel} 875 } 876 \caption{The repeat benchmark comparing \CFA stealing heuristics (lower is better).} 895 877 \end{figure} 896 878 … … 913 895 914 896 \begin{table}[t] 915 \centering916 \setlength{\extrarowheight}{2pt}917 \setlength{\tabcolsep}{5pt}918 919 \caption{Executor Program Memory High Watermark}920 \label{t:ExecutorMemory}921 \begin{tabular}{*{5}{r|}r}922 & \multicolumn{1}{c|}{\CFA} & \multicolumn{1}{c|}{CAF} & \multicolumn{1}{c|}{Akka} & \multicolumn{1}{c|}{\uC} & \multicolumn{1}{c@{}}{ProtoActor} \\923 \hline 924 AMD & \input{data/pykeExecutorMem} \\925 \hline 926 Intel & \input{data/nasusExecutorMem}927 \end{tabular}897 \centering 898 \setlength{\extrarowheight}{2pt} 899 \setlength{\tabcolsep}{5pt} 900 901 \caption{Executor Program Memory High Watermark} 902 \label{t:ExecutorMemory} 903 \begin{tabular}{*{5}{r|}r} 904 & \multicolumn{1}{c|}{\CFA} & \multicolumn{1}{c|}{CAF} & \multicolumn{1}{c|}{Akka} & \multicolumn{1}{c|}{\uC} & \multicolumn{1}{c@{}}{ProtoActor} \\ 905 \hline 906 AMD & \input{data/pykeExecutorMem} \\ 907 \hline 908 Intel & \input{data/nasusExecutorMem} 909 \end{tabular} 928 910 \end{table} 929 911 … … 951 933 952 934 \begin{figure} 953 \centering954 \begin{subfigure}{0.5\textwidth} 955 \centering 956 \scalebox{0.5}{\input{figures/nasusMatrix.pgf}}957 \subcaption{AMD Matrix Benchmark}\label{f:MatrixAMD}958 \end{subfigure}\hfill 959 \begin{subfigure}{0.5\textwidth}960 \centering 961 \scalebox{0.5}{\input{figures/pykeMatrix.pgf}}962 \subcaption{Intel Matrix Benchmark}\label{f:MatrixIntel}963 \end{subfigure}964 \caption{The matrix benchmark comparing actor systems (lower is better).} 965 \ end{figure}966 967 \begin{figure} 968 \centering 969 \begin{subfigure}{0.5\textwidth}970 \centering 971 \scalebox{0.5}{\input{figures/nasusCFAMatrix.pgf}} 972 \subcaption{AMD \CFA Matrix Benchmark}\label{f:cfaMatrixAMD}973 \end{subfigure}\hfill 974 \begin{subfigure}{0.5\textwidth}975 \centering 976 \scalebox{0.5}{\input{figures/pykeCFAMatrix.pgf}}977 \subcaption{Intel \CFA Matrix Benchmark}\label{f:cfaMatrixIntel} 978 \end{subfigure} 979 \caption{The matrix benchmark comparing \CFA stealing heuristics (lower is better).} 980 \end{figure} 935 \centering 936 \subfloat[AMD Matrix Benchmark]{ 937 \resizebox{0.5\textwidth}{!}{\input{figures/nasusMatrix.pgf}} 938 \label{f:MatrixAMD} 939 } 940 \subfloat[Intel Matrix Benchmark]{ 941 \resizebox{0.5\textwidth}{!}{\input{figures/pykeMatrix.pgf}} 942 \label{f:MatrixIntel} 943 } 944 \caption{The matrix benchmark comparing actor systems (lower is better).} 945 \end{figure} 946 947 \begin{figure} 948 \centering 949 \subfloat[AMD \CFA Matrix Benchmark]{ 950 \resizebox{0.5\textwidth}{!}{\input{figures/nasusCFAMatrix.pgf}} 951 \label{f:cfaMatrixAMD} 952 } 953 \subfloat[Intel \CFA Matrix Benchmark]{ 954 \resizebox{0.5\textwidth}{!}{\input{figures/pykeCFAMatrix.pgf}} 955 \label{f:cfaMatrixIntel} 956 } 957 \caption{The matrix benchmark comparing \CFA stealing heuristics (lower is better).} 958 \end{figure} 959 960 % Local Variables: % 961 % tab-width: 4 % 962 % End: %
Note:
See TracChangeset
for help on using the changeset viewer.