Changeset beabdf3
- Timestamp:
- Apr 8, 2023, 3:50:20 PM (20 months ago)
- Branches:
- ADT, ast-experimental, master
- Children:
- 8472c6c
- Parents:
- 3d5fba21
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
doc/theses/colby_parsons_MMAth/text/actors.tex
r3d5fba21 rbeabdf3 90 90 \begin{cfa} 91 91 struct derived_actor { 92 inline actor;// Plan-9 C inheritance92 inline actor; // Plan-9 C inheritance 93 93 }; 94 94 void ?{}( derived_actor & this ) { // Default ctor 95 95 ((actor &)this){}; // Call to actor ctor 96 96 } 97 97 98 98 struct derived_msg { 99 inline message;// Plan-9 C nominal inheritance100 99 inline message; // Plan-9 C nominal inheritance 100 char word[12]; 101 101 }; 102 102 void ?{}( derived_msg & this, char * new_word ) { // Overloaded ctor 103 104 103 ((message &) this){ Nodelete }; // Passing allocation to ctor 104 strcpy(this.word, new_word); 105 105 } 106 106 107 107 Allocation receive( derived_actor & receiver, derived_msg & msg ) { 108 109 108 printf("The message contained the string: %s\n", msg.word); 109 return Finished; // Return finished since actor is done 110 110 } 111 111 112 112 int main() { 113 114 derived_actor my_actor;115 116 117 118 113 start_actor_system(); // Sets up executor 114 derived_actor my_actor; 115 derived_msg my_msg{ "Hello World" }; // Constructor call 116 my_actor << my_msg; // Send message via left shift operator 117 stop_actor_system(); // Waits until actors are finished 118 return 0; 119 119 } 120 120 \end{cfa} … … 229 229 \section{Envelopes}\label{s:envelope} 230 230 In actor systems messages are sent and received by actors. 231 When a actor receives a message it 231 When a actor receives a message it executes its behaviour that is associated with that message type. 232 232 However the unit of work that stores the message, the receiving actor's address, and other pertinent information needs to persist between send and the receive. 233 233 Furthermore the unit of work needs to be able to be stored in some fashion, usually in a queue, until it is executed by an actor. … … 301 301 While other systems are concerned with stealing actors, the \CFA actor system steals queues. 302 302 This is a result of \CFA's use of the inverted actor system. 303 303 The goal of the \CFA actor work stealing mechanism is to have a zero-victim-cost stealing mechanism. 304 304 This does not means that stealing has no cost. 305 305 This goal is to ensure that stealing work does not impact the performance of victim workers. … … 369 369 370 370 \begin{cfa} 371 void swap( uint victim_idx, uint my_idx 372 373 374 375 376 377 378 379 380 371 void swap( uint victim_idx, uint my_idx ) { 372 // Step 0: 373 work_queue * my_queue = request_queues[my_idx]; 374 work_queue * vic_queue = request_queues[victim_idx]; 375 // Step 2: 376 request_queues[my_idx] = 0p; 377 // Step 3: 378 request_queues[victim_idx] = my_queue; 379 // Step 4: 380 request_queues[my_idx] = vic_queue; 381 381 } 382 382 \end{cfa} … … 389 389 // This routine is atomic 390 390 bool CAS( work_queue ** ptr, work_queue ** old, work_queue * new ) { 391 392 393 394 391 if ( *ptr != *old ) 392 return false; 393 *ptr = new; 394 return true; 395 395 } 396 396 397 397 bool try_swap_queues( worker & this, uint victim_idx, uint my_idx ) with(this) { 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 398 // Step 0: 399 // request_queues is the shared array of all sharded queues 400 work_queue * my_queue = request_queues[my_idx]; 401 work_queue * vic_queue = request_queues[victim_idx]; 402 403 // Step 1: 404 // If either queue is 0p then they are in the process of being stolen 405 // 0p is CForAll's equivalent of C++'s nullptr 406 if ( vic_queue == 0p ) return false; 407 408 // Step 2: 409 // Try to set thief's queue ptr to be 0p. 410 // If this CAS fails someone stole thief's queue so return false 411 if ( !CAS( &request_queues[my_idx], &my_queue, 0p ) ) 412 return false; 413 414 // Step 3: 415 // Try to set victim queue ptr to be thief's queue ptr. 416 // If it fails someone stole the other queue, so fix up then return false 417 if ( !CAS( &request_queues[victim_idx], &vic_queue, my_queue ) ) { 418 request_queues[my_idx] = my_queue; // reset queue ptr back to prev val 419 return false; 420 } 421 422 // Step 4: 423 // Successfully swapped. 424 // Thief's ptr is 0p so no one will touch it 425 // Write back without CAS is safe 426 request_queues[my_idx] = vic_queue; 427 return true; 428 428 } 429 429 \end{cfa}\label{c:swap} … … 706 706 \label{t:StaticActorMessagePerformance} 707 707 \begin{tabular}{*{5}{r|}r} 708 709 \hline 710 711 \hline 712 708 & \multicolumn{1}{c|}{\CFA (100M)} & \multicolumn{1}{c|}{CAF (10M)} & \multicolumn{1}{c|}{Akka (100M)} & \multicolumn{1}{c|}{\uC (100M)} & \multicolumn{1}{c@{}}{ProtoActor (100M)} \\ 709 \hline 710 AMD & \input{data/pykeSendStatic} \\ 711 \hline 712 Intel & \input{data/nasusSendStatic} 713 713 \end{tabular} 714 714 … … 719 719 720 720 \begin{tabular}{*{5}{r|}r} 721 722 \hline 723 724 \hline 725 721 & \multicolumn{1}{c|}{\CFA (20M)} & \multicolumn{1}{c|}{CAF (2M)} & \multicolumn{1}{c|}{Akka (2M)} & \multicolumn{1}{c|}{\uC (20M)} & \multicolumn{1}{c@{}}{ProtoActor (2M)} \\ 722 \hline 723 AMD & \input{data/pykeSendDynamic} \\ 724 \hline 725 Intel & \input{data/nasusSendDynamic} 726 726 \end{tabular} 727 727 \end{table} … … 745 745 In the static send benchmark all systems except CAF have static send costs that are in the same ballpark, only varying by ~70ns. 746 746 In the dynamic send benchmark all systems experience slower message sends, as expected due to the extra allocations. 747 However, 747 However, Akka and ProtoActor, slow down by a more significant margin than the \uC and \CFA. 748 748 This is likely a result of Akka and ProtoActor's garbage collection, which can suffer from hits in performance for allocation heavy workloads, whereas \uC and \CFA have explicit allocation/deallocation. 749 749 … … 753 753 754 754 \begin{figure} 755 \centering 756 \begin{subfigure}{0.5\textwidth} 757 \centering 758 \scalebox{0.5}{\input{figures/nasusCFABalance-One.pgf}} 759 \subcaption{AMD \CFA Balance-One Benchmark} 760 \label{f:BalanceOneAMD} 761 \end{subfigure}\hfill 762 \begin{subfigure}{0.5\textwidth} 763 \centering 764 \scalebox{0.5}{\input{figures/pykeCFABalance-One.pgf}} 765 \subcaption{Intel \CFA Balance-One Benchmark} 766 \label{f:BalanceOneIntel} 767 \end{subfigure} 768 \caption{The balance-one benchmark comparing stealing heuristics (lower is better).} 769 \end{figure} 770 771 \begin{figure} 772 \centering 773 \begin{subfigure}{0.5\textwidth} 774 \centering 775 \scalebox{0.5}{\input{figures/nasusCFABalance-Multi.pgf}} 776 \subcaption{AMD \CFA Balance-Multi Benchmark} 777 \label{f:BalanceMultiAMD} 778 \end{subfigure}\hfill 779 \begin{subfigure}{0.5\textwidth} 780 \centering 781 \scalebox{0.5}{\input{figures/pykeCFABalance-Multi.pgf}} 782 \subcaption{Intel \CFA Balance-Multi Benchmark} 783 \label{f:BalanceMultiIntel} 784 \end{subfigure} 785 \caption{The balance-multi benchmark comparing stealing heuristics (lower is better).} 755 \centering 756 \subfloat[AMD \CFA Balance-One Benchmark]{ 757 \resizebox{0.5\textwidth}{!}{\input{figures/nasusCFABalance-One.pgf}} 758 \label{f:BalanceOneAMD} 759 } 760 \subfloat[Intel \CFA Balance-One Benchmark]{ 761 \resizebox{0.5\textwidth}{!}{\input{figures/pykeCFABalance-One.pgf}} 762 \label{f:BalanceOneIntel} 763 } 764 \caption{The balance-one benchmark comparing stealing heuristics (lower is better).} 765 \end{figure} 766 767 \begin{figure} 768 \centering 769 \subfloat[AMD \CFA Balance-Multi Benchmark]{ 770 \resizebox{0.5\textwidth}{!}{\input{figures/nasusCFABalance-Multi.pgf}} 771 \label{f:BalanceMultiAMD} 772 } 773 \subfloat[Intel \CFA Balance-Multi Benchmark]{ 774 \resizebox{0.5\textwidth}{!}{\input{figures/pykeCFABalance-Multi.pgf}} 775 \label{f:BalanceMultiIntel} 776 } 777 \caption{The balance-multi benchmark comparing stealing heuristics (lower is better).} 786 778 \end{figure} 787 779 … … 817 809 818 810 \begin{figure} 819 \centering 820 \begin{subfigure}{0.5\textwidth} 821 \centering 822 \scalebox{0.5}{\input{figures/nasusExecutor.pgf}} 823 \subcaption{AMD Executor Benchmark} 824 \label{f:ExecutorAMD} 825 \end{subfigure}\hfill 826 \begin{subfigure}{0.5\textwidth} 827 \centering 828 \scalebox{0.5}{\input{figures/pykeExecutor.pgf}} 829 \subcaption{Intel Executor Benchmark} 830 \label{f:ExecutorIntel} 831 \end{subfigure} 832 \caption{The executor benchmark comparing actor systems (lower is better).} 811 \centering 812 \subfloat[AMD Executor Benchmark]{ 813 \resizebox{0.5\textwidth}{!}{\input{figures/nasusExecutor.pgf}} 814 \label{f:ExecutorAMD} 815 } 816 \subfloat[Intel Executor Benchmark]{ 817 \resizebox{0.5\textwidth}{!}{\input{figures/pykeExecutor.pgf}} 818 \label{f:ExecutorIntel} 819 } 820 \caption{The executor benchmark comparing actor systems (lower is better).} 833 821 \end{figure} 834 822 … … 840 828 841 829 \begin{figure} 842 \centering 843 \begin{subfigure}{0.5\textwidth} 844 \centering 845 \scalebox{0.5}{\input{figures/nasusCFAExecutor.pgf}} 846 \subcaption{AMD \CFA Executor Benchmark}\label{f:cfaExecutorAMD} 847 \end{subfigure}\hfill 848 \begin{subfigure}{0.5\textwidth} 849 \centering 850 \scalebox{0.5}{\input{figures/pykeCFAExecutor.pgf}} 851 \subcaption{Intel \CFA Executor Benchmark}\label{f:cfaExecutorIntel} 852 \end{subfigure} 853 \caption{Executor benchmark comparing \CFA stealing heuristics (lower is better).} 830 \centering 831 \subfloat[AMD \CFA Executor Benchmark]{ 832 \resizebox{0.5\textwidth}{!}{\input{figures/nasusCFAExecutor.pgf}} 833 \label{f:cfaExecutorAMD} 834 } 835 \subfloat[Intel \CFA Executor Benchmark]{ 836 \resizebox{0.5\textwidth}{!}{\input{figures/pykeCFAExecutor.pgf}} 837 \label{f:cfaExecutorIntel} 838 } 839 \caption{Executor benchmark comparing \CFA stealing heuristics (lower is better).} 854 840 \end{figure} 855 841 … … 857 843 858 844 \begin{figure} 859 \centering 860 \begin{subfigure}{0.5\textwidth} 861 \centering 862 \scalebox{0.5}{\input{figures/nasusRepeat.pgf}} 863 \subcaption{AMD Repeat Benchmark}\label{f:RepeatAMD} 864 \end{subfigure}\hfill 865 \begin{subfigure}{0.5\textwidth} 866 \centering 867 \scalebox{0.5}{\input{figures/pykeRepeat.pgf}} 868 \subcaption{Intel Repeat Benchmark}\label{f:RepeatIntel} 869 \end{subfigure} 870 \caption{The repeat benchmark comparing actor systems (lower is better).} 845 \centering 846 \subfloat[AMD Repeat Benchmark]{ 847 \resizebox{0.5\textwidth}{!}{\input{figures/nasusRepeat.pgf}} 848 \label{f:RepeatAMD} 849 } 850 \subfloat[Intel Repeat Benchmark]{ 851 \resizebox{0.5\textwidth}{!}{\input{figures/pykeRepeat.pgf}} 852 \label{f:RepeatIntel} 853 } 854 \caption{The repeat benchmark comparing actor systems (lower is better).} 871 855 \end{figure} 872 856 … … 881 865 882 866 \begin{figure} 883 \centering 884 \begin{subfigure}{0.5\textwidth} 885 \centering 886 \scalebox{0.5}{\input{figures/nasusCFARepeat.pgf}} 887 \subcaption{AMD \CFA Repeat Benchmark}\label{f:cfaRepeatAMD} 888 \end{subfigure}\hfill 889 \begin{subfigure}{0.5\textwidth} 890 \centering 891 \scalebox{0.5}{\input{figures/pykeCFARepeat.pgf}} 892 \subcaption{Intel \CFA Repeat Benchmark}\label{f:cfaRepeatIntel} 893 \end{subfigure} 894 \caption{The repeat benchmark comparing \CFA stealing heuristics (lower is better).} 867 \centering 868 \subfloat[AMD \CFA Repeat Benchmark]{ 869 \resizebox{0.5\textwidth}{!}{\input{figures/nasusCFARepeat.pgf}} 870 \label{f:cfaRepeatAMD} 871 } 872 \subfloat[Intel \CFA Repeat Benchmark]{ 873 \resizebox{0.5\textwidth}{!}{\input{figures/pykeCFARepeat.pgf}} 874 \label{f:cfaRepeatIntel} 875 } 876 \caption{The repeat benchmark comparing \CFA stealing heuristics (lower is better).} 895 877 \end{figure} 896 878 … … 913 895 914 896 \begin{table}[t] 915 916 917 918 919 920 921 922 923 \hline 924 925 \hline 926 927 897 \centering 898 \setlength{\extrarowheight}{2pt} 899 \setlength{\tabcolsep}{5pt} 900 901 \caption{Executor Program Memory High Watermark} 902 \label{t:ExecutorMemory} 903 \begin{tabular}{*{5}{r|}r} 904 & \multicolumn{1}{c|}{\CFA} & \multicolumn{1}{c|}{CAF} & \multicolumn{1}{c|}{Akka} & \multicolumn{1}{c|}{\uC} & \multicolumn{1}{c@{}}{ProtoActor} \\ 905 \hline 906 AMD & \input{data/pykeExecutorMem} \\ 907 \hline 908 Intel & \input{data/nasusExecutorMem} 909 \end{tabular} 928 910 \end{table} 929 911 … … 951 933 952 934 \begin{figure} 953 954 \begin{subfigure}{0.5\textwidth} 955 \centering 956 \scalebox{0.5}{\input{figures/nasusMatrix.pgf}}957 \subcaption{AMD Matrix Benchmark}\label{f:MatrixAMD}958 \end{subfigure}\hfill 959 \begin{subfigure}{0.5\textwidth}960 \centering 961 \scalebox{0.5}{\input{figures/pykeMatrix.pgf}}962 \subcaption{Intel Matrix Benchmark}\label{f:MatrixIntel}963 \end{subfigure}964 \caption{The matrix benchmark comparing actor systems (lower is better).} 965 \ end{figure}966 967 \begin{figure} 968 \centering 969 \begin{subfigure}{0.5\textwidth}970 \centering 971 \scalebox{0.5}{\input{figures/nasusCFAMatrix.pgf}} 972 \subcaption{AMD \CFA Matrix Benchmark}\label{f:cfaMatrixAMD}973 \end{subfigure}\hfill 974 \begin{subfigure}{0.5\textwidth}975 \centering 976 \scalebox{0.5}{\input{figures/pykeCFAMatrix.pgf}}977 \subcaption{Intel \CFA Matrix Benchmark}\label{f:cfaMatrixIntel} 978 \end{subfigure} 979 \caption{The matrix benchmark comparing \CFA stealing heuristics (lower is better).} 980 \end{figure} 935 \centering 936 \subfloat[AMD Matrix Benchmark]{ 937 \resizebox{0.5\textwidth}{!}{\input{figures/nasusMatrix.pgf}} 938 \label{f:MatrixAMD} 939 } 940 \subfloat[Intel Matrix Benchmark]{ 941 \resizebox{0.5\textwidth}{!}{\input{figures/pykeMatrix.pgf}} 942 \label{f:MatrixIntel} 943 } 944 \caption{The matrix benchmark comparing actor systems (lower is better).} 945 \end{figure} 946 947 \begin{figure} 948 \centering 949 \subfloat[AMD \CFA Matrix Benchmark]{ 950 \resizebox{0.5\textwidth}{!}{\input{figures/nasusCFAMatrix.pgf}} 951 \label{f:cfaMatrixAMD} 952 } 953 \subfloat[Intel \CFA Matrix Benchmark]{ 954 \resizebox{0.5\textwidth}{!}{\input{figures/pykeCFAMatrix.pgf}} 955 \label{f:cfaMatrixIntel} 956 } 957 \caption{The matrix benchmark comparing \CFA stealing heuristics (lower is better).} 958 \end{figure} 959 960 % Local Variables: % 961 % tab-width: 4 % 962 % End: %
Note: See TracChangeset
for help on using the changeset viewer.