Ignore:
Timestamp:
Aug 1, 2023, 10:02:10 PM (20 months ago)
Author:
Peter A. Buhr <pabuhr@…>
Branches:
master
Children:
210c737
Parents:
2e7a299
Message:

final proofread of waituntil chapter

File:
1 edited

Legend:

Unmodified
Added
Removed
  • TabularUnified doc/theses/colby_parsons_MMAth/text/waituntil.tex

    r2e7a299 rafb3d68  
    123123
    124124\begin{figure}
    125 \begin{lstlisting}[language=ada,literate=]
    126 task type buffer is -- thread
     125\begin{lstlisting}[language=ada,literate=,{moredelim={**[is][\color{red}]{@}{@}}}]
     126task type buffer is -- thread type
    127127        ... -- buffer declarations
    128128        count : integer := 0;
     
    130130        loop
    131131                select
    132                         when count < Size => -- guard
    133                         accept insert( elem : in ElemType ) do  -- method
     132                        @when count < Size@ => -- guard
     133                        @accept insert( elem : in ElemType )@ do  -- method
    134134                                ... -- add to buffer
    135135                                count := count + 1;
     
    137137                        -- executed if this accept called
    138138                or
    139                         when count > 0 => -- guard
    140                         accept remove( elem : out ElemType ) do  -- method
     139                        @when count > 0@ => -- guard
     140                        @accept remove( elem : out ElemType )@ do  -- method
    141141                                ... --remove and return from buffer via parameter
    142142                                count := count - 1;
    143143                        end;
    144144                        -- executed if this accept called
    145                 or delay 10.0;  -- unblock after 10 seconds without call
    146                 or else -- do not block, cannot appear with delay
     145                or @delay 10.0@;  -- unblock after 10 seconds without call
     146                or @else@ -- do not block, cannot appear with delay
    147147                end select;
    148148        end loop;
     
    174174
    175175\begin{lrbox}{\myboxA}
    176 \begin{lstlisting}[language=go,literate=]
     176\begin{lstlisting}[language=go,literate=,{moredelim={**[is][\color{red}]{@}{@}}}]
    177177func main() {
    178         insert := make( chan int, Size )
    179         remove := make( chan int, Size )
     178        @insert := make( chan int, Size )@
     179        @remove := make( chan int, Size )@
    180180        term := make( chan string )
    181181        finish := make( chan string )
     
    184184                L: for {
    185185                        select { // wait for message
    186                           case i = <- buffer:
     186                          @case i = <- buffer:@
    187187                          case <- term: break L
    188188                        }
    189                         remove <- i;
     189                        @remove <- i;@
    190190                }
    191191                finish <- "STOP" // completion
     
    201201
    202202\begin{lrbox}{\myboxB}
    203 \begin{lstlisting}[language=uC++=]
     203\begin{lstlisting}[language=uC++,{moredelim={**[is][\color{red}]{@}{@}}}]
    204204_Task BoundedBuffer {
    205205        ... // buffer declarations
    206206        int count = 0;
    207207  public:
    208         void insert( int elem ) {
     208        @void insert( int elem )@ {
    209209                ... // add to buffer
    210210                count += 1;
    211211        }
    212         int remove() {
     212        @int remove()@ {
    213213                ... // remove and return from buffer
    214214                count -= 1;
     
    218218                for ( ;; ) {
    219219                        _Accept( ~buffer ) break;
    220                         or _When ( count < Size ) _Accept( insert );
    221                         or _When ( count > 0 ) _Accept( remove );
     220                        @or _When ( count < Size ) _Accept( insert )@;
     221                        @or _When ( count > 0 ) _Accept( remove )@;
    222222                }
    223223        }
     
    241241Both @_Accept@ and @_Select@ statements provide guards for multiplexing clauses, as well as, timeout, and @else@ clauses.
    242242
    243 There are other languages that provide \gls{synch_multiplex}, including Rust's @select!@ over futures~\cite{rust:select}, OCaml's @select@ over channels~\cite{ocaml:channel}, and C++14's @when_any@ over futures~\cite{cpp:whenany}.
     243There are other languages that provide \gls{synch_multiplex}, including Rust's @select!@ over futures~\cite{rust:select}, Java's @allof@/@anyof@ over futures~\cite{java:allof:anyof}, OCaml's @select@ over channels~\cite{ocaml:channel}, and C++14's @when_any@ over futures~\cite{cpp:whenany}.
    244244Note that while C++14 and Rust provide \gls{synch_multiplex}, the implementations leave much to be desired as both rely on polling to wait on multiple resources.
    245245
     
    250250Similar, actor systems circumvent the \gls{synch_multiplex} problem as actors only block when waiting for the next message never in a behaviour.
    251251While these approaches solve the \gls{synch_multiplex} problem, they introduce other issues.
    252 Consider the case where a thread has a single source of communication and it wants a set of @N@ resources.
    253 It must sequentially request the @N@ resources and wait for each response.
    254 During the receives for the @N@ resources, it can receive other communication, and has to save and postpone these communications, or discard them.
     252Consider the case where a thread has a single source of communication and it wants a set of $N$ resources.
     253It must sequentially request the $N$ resources and wait for each response.
     254During the receives for the $N$ resources, it can receive other communication, and has to save and postpone these communications, or discard them.
    255255% If the requests for the other resources need to be retracted, the burden falls on the programmer to determine how to synchronize appropriately to ensure that only one resource is delivered.
    256256
     
    294294int i = 0;
    295295
    296 waituntil( Lock ) { ... }
    297 or when( i == 0 ) waituntil( i << Channel ) { ... }
    298 and waituntil( Future ) { ... }
    299 or waituntil( timeout( 1`s ) ) { ... }
     296waituntil( @Lock@ ) { ... }
     297or when( i == 0 ) waituntil( i << @Channel@ ) { ... }
     298and waituntil( @Future@ ) { ... }
     299or waituntil( @timeout( 1`s )@ ) { ... }
    300300// else { ... }
    301301\end{cfa}
     
    316316\begin{cfa}
    317317future(int) bar, foo;
    318 waituntil( foo ) { ... } or waituntil( bar ) { ... }
    319 \end{cfa}
    320 The reason for this semantics is that prioritizing resources can be useful in certain problems.
    321 In the rare case where there is a starvation problem with the ordering, it possible to follow a @waituntil@ with its reverse form:
     318waituntil( foo ) { ... } or waituntil( bar ) { ... } // prioritize foo
     319\end{cfa}
     320The reason for this semantics is that prioritizing resources can be useful in certain problems, such as shutdown.
     321In the rare case where there is a starvation problem with the ordering, it possible to follow a @waituntil@ with its reverse form, alternating which resource has the highest priority:
    322322\begin{cfa}
    323323waituntil( foo ) { ... } or waituntil( bar ) { ... } // prioritize foo
    324324waituntil( bar ) { ... } or waituntil( foo ) { ... } // prioritize bar
    325325\end{cfa}
     326While this approach is not general for many resources, it handles many basic cases.
    326327
    327328The \CFA @and@ semantics match the @and@ semantics of \uC \lstinline[language=uC++]{_Select}.
    328329When multiple clauses are joined by @and@, the @waituntil@ makes a thread wait for all to be available, but still runs the corresponding code blocks \emph{as they become available}.
    329 When an @and@ clause becomes available, the waiting thread unblocks and runs that clause's code-block, and then the thread waits again for the next available clause or the @waituntil@ statement is now true.
     330When an @and@ clause becomes available, the waiting thread unblocks and runs that clause's code-block, and then the thread waits again for the next available clause or the @waituntil@ statement is now satisfied.
    330331This semantics allows work to be done in parallel while synchronizing over a set of resources, and furthermore, gives a good reason to use the @and@ operator.
    331332If the @and@ operator waited for all clauses to be available before running, it is the same as just acquiring those resources consecutively by a sequence of @waituntil@ statements.
     
    389390\section{\lstinline{waituntil} Implementation}
    390391
    391 The @waituntil@ statement is not inherently complex, and Figure~\ref{f:WU_Impl} only shows the basic outline of the @waituntil@ algorithm.
    392 The complexity comes from the consideration of race conditions and synchronization needed when supporting various primitives.
    393 The following sections then use examples to fill in details missing in Figure~\ref{f:WU_Impl}.
    394 The full pseudocode for the @waituntil@ algorithm is presented in Figure~\ref{f:WU_Full_Impl}.
     392The @waituntil@ statement is not inherently complex, and Figure~\ref{f:WU_Impl} shows the basic outline of the @waituntil@ algorithm.
     393Complexity comes from the consideration of race conditions and synchronization needed when supporting various primitives.
     394The following sections use examples to fill in complexity details missing in Figure~\ref{f:WU_Impl}.
     395After which, the full pseudocode for the @waituntil@ algorithm is presented in Figure~\ref{f:WU_Full_Impl}.
    395396
    396397\begin{figure}
     
    451452When a @select_node@ reaches the front of the lock's queue and gains ownership, the thread blocked on the @waituntil@ is unblocked.
    452453Now, the lock is held by the @waituntil@ thread until the code block is executed, and then the node is unregistered, during which the lock is released.
    453 Immediately releasing the lock prevents the waiting thread from holding multiple locks and potentially introducing a deadlock.
     454Immediately releasing the lock after the code block prevents the waiting thread from holding multiple locks and potentially introducing a deadlock.
    454455As such, the only unregistered nodes associated with locks are the ones that have not run.
    455456
    456457\subsection{Timeouts}
    457458
    458 A timeout for the @waituntil@ statement is a duration passed to \lstinline[deletekeywords={timeout}]{timeout}, \eg:
     459A timeout for the @waituntil@ statement is a duration passed to routine \lstinline[deletekeywords={timeout}]{timeout}\footnote{\lstinline{timeout} is a quasi-keyword in \CFA, allowing it to be used an identifier.}, \eg:
    459460\begin{cquote}
    460461\begin{tabular}{@{}l|l@{}}
     
    482483Here, the multiple timeouts are useful because the durations can change during execution and the separate clauses provide different code blocks if a timeout triggers.
    483484Multiple timeouts can also be used with @and@ to provide a minimal delay before proceeding.
    484 In following example, either channel @C1@ or @C2@ must be satisfied but nothing can be done for at least 1 or 3 seconds after the channel read, respctively.
     485In following example, either channel @C1@ or @C2@ must be satisfied but nothing can be done for at least 1 or 3 seconds after the channel read, respectively.
    485486\begin{cfa}[deletekeywords={timeout}]
    486487waituntil( i << C1 ); and waituntil( timeout( 1`s ) );
    487488or waituntil( i << C2 ); and waituntil( timeout( 3`s ) );
    488489\end{cfa}
    489 If only @C2@ is satisfied, \emph{both} timeout code-blocks trigger because 1 second ocurs before 3 seconds.
     490If only @C2@ is satisfied, \emph{both} timeout code-blocks trigger because 1 second occurs before 3 seconds.
    490491Note, the \CFA @waitfor@ statement only provides a single @timeout@ clause because it only supports @or@ semantics.
    491492
     
    497498
    498499Channels require more complexity to allow synchronous multiplexing.
    499 For locks, when an outside thread releases a lock and unblocks the waituntil thread (WUT), the lock's MX property is passed to the WUT (no spinning locks).
     500For locks, when an outside thread releases a lock and unblocks the @waituntil@ thread (WUT), the lock's MX property is passed to the WUT (no spinning locks).
    500501For futures, the outside thread deliveries a value to the future and unblocks any waiting threads, including WUTs.
    501 In either case, after the WUT unblocks it is safe to execute its the corresponding code block knowing access to the resource is protected by the lock or the read-only state of the future.
     502In either case, after the WUT unblocks, it is safe to execute its corresponding code block knowing access to the resource is protected by the lock or the read-only state of the future.
    502503Similarly, for channels, when an outside thread inserts a value into a channel, it must unblock the WUT.
    503504However, for channels, there is a race condition that poses an issue.
     
    510511Furthermore, if both @and@ and @or@ operators are used, the @or@ operations stop behaving like exclusive-or due to the race among channel operations, \eg:
    511512\begin{cfa}
     513int i;
    512514waituntil( i << A ) {} and waituntil( i << B ) {}
    513515or waituntil( i << C ) {} and waituntil( i << D ) {}
    514516\end{cfa}
    515517If exclusive-or semantics are followed, only the code blocks for @A@ and @B@ are run, or the code blocks for @C@ and @D@.
    516 However, four outside threads can simultaneously put values into @i@ and attempt to unblock the WUT to run the four code-blocks.
     518However, four outside threads inserting into each channel can simultaneously put values into @i@ and attempt to unblock the WUT to run the four code-blocks.
    517519This case introduces a race with complexity that increases with the size of the @waituntil@ statement.
    518520However, due to TOCTOU issues, it is impossible to know if all resources are available without acquiring all the internal locks of channels in the subtree of the @waituntil@ clauses.
    519521This approach is a poor solution for two reasons.
    520 It is possible that once all the locks are acquired the subtree is not satisfied and the locks must be released.
     522It is possible that once all the locks are acquired, the subtree is not satisfied and the locks must be released.
    521523This work incurs a high cost for signalling threads and heavily increase contention on internal channel locks.
    522524Furthermore, the @waituntil@ statement is polymorphic and can support resources that do not have internal locks, which also makes this approach infeasible.
    523525As such, the exclusive-or semantics are lost when using both @and@ and @or@ operators since it cannot be supported without significant complexity and significantly affects @waituntil@ performance.
     526Therefore, the example of reusing variable @i@ by multiple output channels is considered a user error without exclusive-or semantics.
     527Given aliasing in C, it is impossible to even warn of the potential race.
     528In the future, it would be interesting to support Go-like syntax, \lstinline[language=Go]{case i := <- ...}, defining a new scoped @i@ variable for each clause.
    524529
    525530It was deemed important that exclusive-or semantics are maintained when only @or@ operators are used, so this situation has been special-cased, and is handled by having all clauses race to set a value \emph{before} operating on the channel.
     
    527532\begin{cquote}
    528533\begin{tabular}{@{}l|l|l@{}}
    529 \multicolumn{3}{@{}l@{}}{\lstinline{channel A, B; // zero size channels}} \\
     534\multicolumn{3}{@{}l@{}}{\lstinline{channel(int) A, B; // zero size channels}} \\
    530535thread 1 & thread 2 & thread 3 \\
    531536\begin{cfa}
     
    556561Channels introduce another interesting implementation issue.
    557562Supporting both reading and writing to a channel in a @waituntil@ means that one @waituntil@ clause may be the notifier of another @waituntil@ clause.
    558 This poses a problem when dealing with the special-cased @or@ where the clauses need to win a race to operate on a channel.
     563This conjunction poses a problem when dealing with the special-cased @or@ where the clauses need to win a race to operate on a channel.
    559564Consider the following example, alongside a described ordering of events to highlight the race.
    560565\begin{cquote}
    561566\begin{tabular}{@{}l|l@{}}
    562 \multicolumn{2}{@{}l@{}}{\lstinline{channel A, B; // zero size channels}} \\
     567\multicolumn{2}{@{}l@{}}{\lstinline{channel(int) A, B; // zero size channels}} \\
    563568thread 1 & thread 2 \\
    564569\begin{cfa}[moredelim={**[is][\color{blue}]{\#}{\#}}]
     
    573578\end{tabular}
    574579\end{cquote}
    575 Assume thread 1 executes first, registers with channel @A@ and proceeds, since it is empty, and then is interrupted before registering with @B@.
    576 Thread 2 similarly registers with channel @B@, and proceeds, since it does not have space to insert, and then is interrupted before registering with @A@.
     580Assume thread 1 executes first, registers with channel @A@ and proceeds in the @waituntil@.
     581Since @A@ is empty, thread 1 cannot remove, and then thread 1 is interrupted before registering with @B@.
     582Thread 2 similarly registers with channel @B@, and proceeds in the @waituntil@.
     583Since @B@ is zero size there is no space to insert, and then thread 2 is interrupted before registering with @A@.
    577584At this point, thread 1 and 2 resume execution.
    578 There is now a race that must be dealt with on two fronts.
    579 If thread 1 and 2 only race to the \gls{cas}, \ie a clause in their own @waituntil@, thread 1 can think that it successfully removed from @B@, and thread 2 may think it successfully inserted into @A@, when only one of these operations occurs.
     585Remember from above, each exclusive-or @waituntil@ holds a race to set the winning clause of the statement.
     586The issue that arises is that these two @waituntil@ statements must have matching winning clauses (both @A@ clauses or both @B@ clauses) to preserve the exclusive-or semantics, since a zero-sized channel needs an insert/remove pair for an operation to occur.
     587If threads 1 and 2 race to set a winner only in their own @waituntil@, thread 1 can think it successfully removed from @B@, and thread 2 can think it successfully inserted into @A@, which is an error.
     588Hence, there is a race on two fronts.
     589If thread 1 wins the race and sees that @B@ has a waiting insertion, then thread 2 must execute the first clause of its @waituntil@ and thread 1 execute its second.
     590Correspondingly, if thread 2 wins the race and sees that @A@ has a waiting removal, then thread 1 must execute the first clause of its @waituntil@ and thread 2 execute its second.
     591Any other execution scenario is incorrect for exclusive-or semantics.
     592Note, that priority execution of multiple satisfied @waituntil@ causes (\ie top to bottom) is not violated because, in this scenario, there is only one satisfied clause for either thread.
    580593
    581594The Go @select@ solves this problem by acquiring all the internal locks of the channels before registering the @select@ on the channels.
    582595This approach eliminates the race shown above since thread 1 and 2 cannot both be registering at the same time.
    583596However, this approach cannot be used in \CFA, since the @waituntil@ is polymorphic.
    584 Not all types in a @waituntil@ have an internal lock, and when using non-channel types acquiring all the locks incurs extra unneeded overhead.
     597Not all types in a @waituntil@ have an internal lock, and when using non-channel types, acquiring all the locks incurs extra unneeded overhead.
    585598Instead, this race is consolidated in \CFA in two phases by having an intermediate pending status value for the race.
    586599This race case is detectable, and if detected, each thread first races to set its own @waituntil@ race pointer to be pending.
     
    600613
    601614    // Try to set other status, if we succeed break and return true
    602     while( !CAS( other.clause_status, &cmp_status, SAT ) ) {
     615    while( ! CAS( other.clause_status, &cmp_status, SAT ) ) {
    603616        if ( cmp_status == SAT )
    604617            return false; // If other status is SAT we lost so return false
     
    611624
    612625        // Attempt to set own status flag back to PENDING to retry
    613         if ( !CAS( mine.clause_status, &cmp_status, PENDING ) )
     626        if ( ! CAS( mine.clause_status, &cmp_status, PENDING ) )
    614627            return false; // If we fail then we lost so return false
    615628       
     
    627640These exception mechanisms are supported through the @on_selected@ routine.
    628641This routine is needed by channels to detect if they are closed after unblocking in a @waituntil@ statement, to ensure the appropriate behaviour is taken and an exception is thrown.
     642Hence, the channel close-down mechanism is handled correctly.
    629643
    630644\subsection{Guards and Statement Predicate}\label{s:wu_guards}
     
    632646It is trivial to check when a synchronous multiplexing utility is done for the or/xor relationship, since any resource becoming available means that the blocked thread can proceed and the @waituntil@ statement is finished.
    633647In \uC and \CFA, the \gls{synch_multiplex} mechanism have both an and/or relationship, which along with guards, make the problem of checking for completion of the statement difficult.
    634 Consider the @waituntil@ in Figure~\ref{f:WU_ComplexPredicate}.
    635 When the @waituntil@ thread wakes up, checking if the statement is complete is non-trivial.
    636 The predicate that will return if the statement in Figure~\ref{f:WU_ComplexPredicate} is satisfied is the following.
    637 \begin{cfa}
    638 A && B || C || !GA && B || !GB && A || !GA && !GB && !GC
    639 \end{cfa}
    640 Which simplifies to:
    641 \begin{cfa}
    642 ( A || !GA ) && ( B || !GB ) || C || !GA && !GB && !GC
    643 \end{cfa}
    644 Checking a predicate this large with each iteration is expensive so \uC and \CFA both take steps to simplify checking statement completion.
    645 
    646 \begin{figure}
     648Consider the following @waituntil@.
    647649\begin{cfa}
    648650when( GA ) waituntil( A ) {}
     
    650652or when( GC ) waituntil( C ) {}
    651653\end{cfa}
    652 \caption{\lstinline{waituntil} with a non-trivial predicate}
    653 \label{f:WU_ComplexPredicate}
    654 \end{figure}
     654When the @waituntil@ thread wakes up, the following predicate represents satisfaction:
     655\begin{cfa}
     656A && B || C || ! GA && B || ! GB && A || ! GA && ! GB && ! GC
     657\end{cfa}
     658which can be simplified to:
     659\begin{cfa}
     660( A || ! GA ) && ( B || ! GB ) || C || ! GA && ! GB && ! GC
     661\end{cfa}
     662Checking the complete predicate on each iteration of the pending @waituntil@ is expensive so \uC and \CFA both take steps to simplify checking for statement completion.
    655663
    656664In the \uC @_Select@ statement, this problem is solved by constructing a tree of the resources, where the internal nodes are operators and the leaves are booleans storing the state of each resource.
    657 The internal nodes also store the statuses of the two subtrees beneath them.
     665A diagram of the tree for the complete predicate above is shown in Figure~\ref{f:uC_select_tree}, alongside the modification of the tree that occurs when @GA@ is @false@.
     666Each internal node stores the statuses of the two subtrees beneath it.
    658667When resources become available, their corresponding leaf node status is modified, which percolates up the tree to update the state of the statement.
    659668Once the root of the tree has both subtrees marked as @true@ then the statement is complete.
    660669As an optimization, when the internal nodes are updated, the subtrees marked as @true@ are pruned and not examined again.
    661670To support statement guards in \uC, the tree is modified to remove an internal node if a guard is false to maintain the appropriate predicate representation.
    662 An diagram of the tree for the statement in Figure~\ref{f:WU_ComplexPredicate} is shown in Figure~\ref{f:uC_select_tree}, alongside the modification of the tree that occurs when @GA@ is @false@.
    663671
    664672\begin{figure}
     
    666674\input{diagrams/uCpp_select_tree.tikz}
    667675\end{center}
    668 \caption{\uC select tree modification}
     676\caption{\uC \lstinline[language=uC++]{select} tree modification}
    669677\label{f:uC_select_tree}
    670678\end{figure}
    671679
    672 The \CFA @waituntil@ statement blocks a thread until a set of resources have become available that satisfy the underlying predicate.
    673 The waiting condition of the @waituntil@ statement can be represented as a predicate over the resources, joined by the @waituntil@ operators, where a resource is @true@ if it is available, and @false@ otherwise.
    674 In \CFA, this representation is used as the mechanism to check if a thread is done waiting on the @waituntil@.
     680The \CFA @waituntil@ statement blocks a thread until a set of resources have become available that satisfy the complete predicate of a @waituntil@.
     681The waiting condition of the @waituntil@ statement is implemented as the complete predicate over the resources, joined by the @waituntil@ operators, where a resource is @true@ if it is available, and @false@ otherwise.
     682This complete predicate is used as the mechanism to check if a thread is done waiting on a @waituntil@.
    675683Leveraging the compiler, a predicate routine is generated per @waituntil@ that when passes the statuses of the resources, returns @true@ when the @waituntil@ is done, and false otherwise.
    676684To support guards on the \CFA @waituntil@ statement, the status of a resource disabled by a guard is set to a boolean value that ensures that the predicate function behaves as if that resource is no longer part of the predicate.
    677685The generated code allows the predicate that is checked with each iteration to be simplified to not check guard values.
    678 For example, the following would be generated for the @waituntil@ shown in Figure~\ref{f:WU_ComplexPredicate}.
     686For example, the following is generated for the complete predicate above:
    679687\begin{cfa}
    680688// statement completion predicate
     
    682690    return nodes[0].status && nodes[1].status || nodes[2].status;
    683691}
    684 
    685 // skip statement if all guards false
    686 if ( GA || GB || GC ) {
     692if ( GA || GB || GC ) {                         $\C{// skip statement if all guards false}$
    687693    select_node nodes[3];
    688     nodes[0].status = !GA && GB; // A's status
    689     nodes[1].status = !GB && GA; // B's status
    690     nodes[2].status = !GC;       // C's status
    691 
     694    nodes[0].status = ! GA && GB;       $\C{// A's status}$
     695    nodes[1].status = ! GB && GA;       $\C{// B's status}$
     696    nodes[2].status = ! GC;                     $\C{// C's status}$
    692697    // ... rest of waituntil codegen ...
    693 
    694698}
    695699\end{cfa}
     
    697701\uC's @_Select@, supports operators both inside and outside of the \lstinline[language=uC++]{_Select} clauses.
    698702In the following example, the code blocks run once their corresponding predicate inside the round braces is satisfied.
    699 
    700703\begin{lstlisting}[language=uC++,{moredelim=**[is][\color{red}]{@}{@}}]
    701704Future_ISM<int> A, B, C, D;
     
    703706and _Select( @D && E@ ) { ... }
    704707\end{lstlisting}
    705 This is more expressive that the @waituntil@ statement in \CFA, allowing the code block for @&&@ to only run after both resources are available.
     708This feature is more expressive than the @waituntil@ statement in \CFA, allowing the code block for @&&@ to only run after \emph{both} resources are available.
    706709
    707710In \CFA, since the @waituntil@ statement supports more resources than just futures, implementing operators inside clauses is avoided for a few reasons.
     
    712715or waituntil( C && D ) { ... }
    713716\end{cfa}
    714 If the @waituntil@ acquires each lock as it becomes available, there is a possible deadlock since it is in a hold and wait situation.
     717If the @waituntil@ acquires each lock as it becomes available, there is a possible deadlock since it is in a hold-and-wait situation.
    715718Other semantics are needed to ensure this operation is safe.
    716719One possibility is to use \CC's @scoped_lock@ approach described in Section~\ref{s:DeadlockAvoidance};
     
    718721Another possibility is to use resource ordering similar to \CFA's @mutex@ statement, but that alone is insufficient, if the resource ordering is not used universally.
    719722One other way this could be implemented is to wait until all resources for a given clause are available before proceeding to acquire them, but this also quickly becomes a poor approach.
    720 This approach does not work due to \gls{toctou} issues;
    721 it is impossible to ensure that the full set of resources are available without holding them all first.
    722 Operators inside clauses in \CFA could potentially be implemented with careful circumvention of the problems involved, but it was not deemed an important feature when taking into account the runtime cost need paid to handle this situation.
    723 The problem of operators inside clauses also becomes a difficult issue to handle when supporting channels.
    724 It would require some way to ensure channels used with internal operators are modified, if and only if, the corresponding code block is run, but that is not feasible due to reasons described in the exclusive-or portion of Section~\ref{s:wu_chans}.
     723This approach does not work due to \gls{toctou} issues, \ie it is impossible to ensure that the full set of resources are available without holding them all first.
     724Operators inside clauses in \CFA could potentially be implemented with careful circumvention of the problems.
     725Finally, the problem of operators inside clauses is also a difficult to handle when supporting channels.
     726It would require some way to ensure channels used with internal operators are modified, if and only if, the corresponding code block is run.
     727However, that is not feasible due to reasons described in the exclusive-or portion of Section~\ref{s:wu_chans}.
     728Ultimately, I did not deemed this feature to be crucial enough when taking into account the complexity and runtime cost.
    725729
    726730\subsection{The full \lstinline{waituntil} picture}
    727731
    728 Now the details have been discussed, the full pseudocode of the @waituntil@ is presented in Figure~\ref{f:WU_Full_Impl}.
     732Given all the complex details handled by the @waituntil@, its full pseudocode is presented in Figure \ref{f:WU_Full_Impl}.
    729733Some things to note are as follows.
    730734The @finally@ blocks provide exception-safe \gls{raii} unregistering of nodes, and in particular, the @finally@ inside the innermost loop performs the immediate unregistering required for deadlock-freedom mentioned in Section~\ref{s:wu_locks}.
     
    751755            register_select( resource, node );
    752756
    753     while ( !check_completion( nodes ) ) {      $\C{// check predicate}$
     757    while ( ! check_completion( nodes ) ) {     $\C{// check predicate}$
    754758        // block
    755759        for ( resource in waituntil statement ) {       $\C{// run true code blocks}$
     
    776780\end{figure}
    777781
    778 \section{Waituntil Performance}
     782\section{\lstinline{waituntil} Performance}
    779783
    780784Similar facilities to @waituntil@ are discussed in Section~\ref{s:History} covering C, Ada, Rust, \CC, and OCaml.
    781785However, these facilities are either not meaningful or feasible to benchmark against.
    782786The UNIX @select@ and related utilities are not comparable since they are system calls that go into the kernel and operate on file descriptors, whereas the @waituntil@ exists solely in user space.
    783 Ada's \lstinline[language=Ada]{select} and \uC's \lstinline[language=uC++]{_Accept} only operates on method calls, which is done in \CFA via the @waitfor@ statement, so it is not meaningful to benchmark against the @waituntil@, which cannot wait on this resource.
     787Ada's \lstinline[language=Ada]{select} and \uC's \lstinline[language=uC++]{_Accept} only operate on method calls, which is done in \CFA via the @waitfor@ statement.
    784788Rust and \CC only offer a busy-wait approach, which is not comparable to a blocking approach.
    785789OCaml's @select@ waits on channels that are not comparable with \CFA and Go channels, so OCaml @select@ is not benchmarked against Go's @select@ and \CFA's @waituntil@.
     
    793797The channel multiplexing benchmarks compare \CFA's @waituntil@ and Go's \lstinline[language=Go]{select}, where the resource being waited on is a set of channels.
    794798The basic structure of the benchmark has the number of cores split evenly between producer and consumer threads, \ie, with 8 cores there are 4 producer and 4 consumer threads.
    795 The number of resource clauses $C$ is also varied across 2, 4, and 8 clauses, where each clause has a different channel that is waits on.
     799The number of resource clauses $C$ is also varied across 2, 4, and 8 clauses, where each clause has a different channel that it waits on.
    796800Each producer and consumer repeatedly waits to either produce or consume from one of the $C$ clauses and respective channels.
    797801For example, in \CFA syntax, the work loop in the consumer main with $C = 4$ clauses is:
     
    872876Go then is holding a lock for every channel, resulting in worse performance as the number of channels increase.
    873877In \CFA, since races are consolidated without holding all locks, it scales much better both with cores and clauses since more work can occur in parallel.
    874 This scalability difference is more significant on the Intel machine than the AMD machine since the Intel machine has lower cache contention costs.
     878This scalability difference is more significant on the Intel machine than the AMD machine since the Intel has lower cache-contention costs.
    875879
    876880The Go approach of holding all internal channel-locks in the \lstinline[language=Go]{select} has additional drawbacks.
     
    904908In Go, this setup results in significantly worse performance since @P2@ and @C2@ cannot operate in parallel with @P1@ and @C1@ due to all locks being acquired.
    905909Interesting, this case may not be as pathological as it seems.
    906 If the set of channels belonging to a select have channels that overlap with the set of another select, these statements lose the ability to operate in parallel.
     910If the set of channels belonging to a \lstinline[language=Go]{select} have channels that overlap with the set of another \lstinline[language=Go]{select}, these statements lose the ability to operate in parallel.
    907911The implementation in \CFA only holds a single lock at a time, resulting in better locking granularity, and hence, more parallelism.
    908912Comparison of this pathological case is shown in Table~\ref{t:pathGo}.
    909 The AMD results highlight the worst case scenario for Go since contention is more costly on this machine than the Intel machine.
     913The AMD results highlight the worst-case scenario for Go since contention is more costly on this machine than the Intel machine.
    910914
    911915\begin{table}[t]
     
    938942The @waituntil@ statement checks for statement completion using a predicate function, whereas the \lstinline[language=uC++]{_Select} statement maintains a tree that represents the state of the internal predicate.
    939943
    940 \begin{figure}
    941         \centering
    942         \subfloat[AMD Future Synchronization Benchmark]{
    943                 \resizebox{0.5\textwidth}{!}{\input{figures/nasus_Future.pgf}}
    944                 \label{f:futureAMD}
    945         }
    946         \subfloat[Intel Future Synchronization Benchmark]{
    947                 \resizebox{0.5\textwidth}{!}{\input{figures/pyke_Future.pgf}}
    948                 \label{f:futureIntel}
    949         }
    950         \caption{\CFA \lstinline{waituntil} and \uC \lstinline{_Select} statement throughput synchronizing on a set of futures with varying wait predicates (higher is better).}
    951         \caption{}
    952         \label{f:futurePerf}
    953 \end{figure}
    954 
    955944This benchmark aims to indirectly measure the impact of various predicates on the performance of the @waituntil@ and \lstinline[language=uC++]{_Select} statements.
    956945The benchmark is indirect since the performance of futures in \CFA and \uC differ by a significant margin.
     
    991980
    992981Results of this benchmark are shown in Figure~\ref{f:futurePerf}.
    993 Each pair of bars is marked with the predicate name for that experiment and the value at the top of each bar is the standard deviation..
     982Each pair of bars is marked with the predicate name for that experiment and the value at the top of each bar is the standard deviation.
    994983In detail, \uC results are lower in all cases due to the performance difference between futures and the more complex \gls{synch_multiplex} implementation.
    995984However, the bars for both systems have similar height patterns across the experiments.
     
    998987Interestingly, \CFA has lower variation across predicates on the AMD (excluding the special OR case), whereas \uC has lower variation on the Intel.
    999988Given the differences in semantics and implementation between \uC and \CFA, this test only illustrates the overall costs among the different kinds of predicates.
     989
     990\begin{figure}
     991        \centering
     992        \subfloat[AMD Future Synchronization Benchmark]{
     993                \resizebox{0.5\textwidth}{!}{\input{figures/nasus_Future.pgf}}
     994                \label{f:futureAMD}
     995        }
     996        \subfloat[Intel Future Synchronization Benchmark]{
     997                \resizebox{0.5\textwidth}{!}{\input{figures/pyke_Future.pgf}}
     998                \label{f:futureIntel}
     999        }
     1000        \caption{\CFA \lstinline{waituntil} and \uC \lstinline{_Select} statement throughput synchronizing on a set of futures with varying wait predicates (higher is better).}
     1001        \label{f:futurePerf}
     1002%%%%%%%%%%%%%%%
     1003\vspace*{5.5in}
     1004\end{figure}
Note: See TracChangeset for help on using the changeset viewer.