Changeset d67cdb7 for doc/proposals


Ignore:
Timestamp:
Sep 26, 2017, 11:27:38 PM (4 years ago)
Author:
Peter A. Buhr <pabuhr@…>
Branches:
aaron-thesis, arm-eh, cleanup-dtors, deferred_resn, demangler, jacob/cs343-translation, jenkins-sandbox, master, new-ast, new-ast-unique-expr, new-env, no_list, persistent-indexer, resolv-new, with_gc
Children:
5dc26f5
Parents:
201aeb9
Message:

merge

Location:
doc/proposals/concurrency
Files:
2 added
8 edited

Legend:

Unmodified
Added
Removed
  • doc/proposals/concurrency/Makefile

    r201aeb9 rd67cdb7  
    1717text/concurrency \
    1818text/parallelism \
     19text/together \
     20text/future \
    1921}
    2022
  • doc/proposals/concurrency/annex/glossary.tex

    r201aeb9 rd67cdb7  
    101101\newacronym{api}{API}{Application Program Interface}
    102102\newacronym{raii}{RAII}{Ressource Acquisition Is Initialization}
     103\newacronym{numa}{NUMA}{Non-Uniform Memory Access}
  • doc/proposals/concurrency/text/cforall.tex

    r201aeb9 rd67cdb7  
    1515int x, *p1 = &x, **p2 = &p1, ***p3 = &p2,
    1616&r1 = x,    &&r2 = r1,   &&&r3 = r2;
    17 ***p3 = 3;                                      // change x
    18 r3 = 3;                                         // change x, ***r3
    19 **p3 = ...;                                     // change p1
    20 &r3 = ...;                                      // change r1, (&*)**r3
    21 *p3 = ...;                                      // change p2
    22 &&r3 = ...;                                     // change r2, (&(&*)*)*r3
    23 &&&r3 = p3;                                     // change r3 to p3, (&(&(&*)*)*)r3
    24 int y, z, & ar[3] = { x, y, z };                // initialize array of references
    25 &ar[1] = &z;                                    // change reference array element
    26 typeof( ar[1] ) p;                              // is int, i.e., the type of referenced object
    27 typeof( &ar[1] ) q;                             // is int &, i.e., the type of reference
    28 sizeof( ar[1] ) == sizeof( int );               // is true, i.e., the size of referenced object
    29 sizeof( &ar[1] ) == sizeof( int *);             // is true, i.e., the size of a reference
     17***p3 = 3;                              // change x
     18r3 = 3;                                 // change x, ***r3
     19**p3 = ...;                             // change p1
     20&r3 = ...;                              // change r1, (&*)**r3
     21*p3 = ...;                              // change p2
     22&&r3 = ...;                             // change r2, (&(&*)*)*r3
     23&&&r3 = p3;                             // change r3 to p3, (&(&(&*)*)*)r3
     24int y, z, & ar[3] = { x, y, z };        // initialize array of references
     25&ar[1] = &z;                            // change reference array element
     26typeof( ar[1] ) p;                      // is int, i.e., the type of referenced object
     27typeof( &ar[1] ) q;                     // is int &, i.e., the type of reference
     28sizeof( ar[1] ) == sizeof( int );       // is true, i.e., the size of referenced object
     29sizeof( &ar[1] ) == sizeof( int *);     // is true, i.e., the size of a reference
    3030\end{cfacode}
    3131The important thing to take away from this code snippet is that references offer a handle to an object much like pointers but which is automatically derefferenced when convinient.
     
    3636\begin{cfacode}
    3737// selection based on type and number of parameters
    38 void f( void );                                 // (1)
    39 void f( char );                                 // (2)
    40 void f( int, double );                          // (3)
    41 f();                                            // select (1)
    42 f( 'a' );                                       // select (2)
    43 f( 3, 5.2 );                                    // select (3)
     38void f( void );                         // (1)
     39void f( char );                         // (2)
     40void f( int, double );                  // (3)
     41f();                                    // select (1)
     42f( 'a' );                               // select (2)
     43f( 3, 5.2 );                            // select (3)
    4444
    4545// selection based on  type and number of returns
    46 char f( int );                                  // (1)
    47 double f( int );                                // (2)
    48 [ int, double ] f( int );                       // (3)
    49 char c = f( 3 );                                // select (1)
    50 double d = f( 4 );                              // select (2)
    51 [ int, double ] t = f( 5 );                     // select (3)
     46char f( int );                          // (1)
     47double f( int );                        // (2)
     48[ int, double ] f( int );               // (3)
     49char c = f( 3 );                        // select (1)
     50double d = f( 4 );                      // select (2)
     51[ int, double ] t = f( 5 );             // select (3)
    5252\end{cfacode}
    5353This feature is particularly important for concurrency since the runtime system relies on creating different types do represent concurrency objects. Therefore, overloading is necessary to prevent the need for long prefixes and other naming conventions that prevent clashes. As seen in chapter \ref{basics}, the main is an example of routine that benefits from overloading when concurrency in introduced.
     
    5656Overloading also extends to operators. The syntax for denoting operator-overloading is to name a routine with the symbol of the operator and question marks where the arguments of the operation would be, like so :
    5757\begin{cfacode}
    58 int ++?( int op );                              // unary prefix increment
    59 int ?++( int op );                              // unary postfix increment
    60 int ?+?( int op1, int op2 );                    // binary plus
    61 int ?<=?( int op1, int op2 );                   // binary less than
    62 int ?=?( int & op1, int op2 );                  // binary assignment
    63 int ?+=?( int & op1, int op2 );                 // binary plus-assignment
     58int ++?( int op );                      // unary prefix increment
     59int ?++( int op );                      // unary postfix increment
     60int ?+?( int op1, int op2 );            // binary plus
     61int ?<=?( int op1, int op2 );           // binary less than
     62int ?=?( int & op1, int op2 );          // binary assignment
     63int ?+=?( int & op1, int op2 );         // binary plus-assignment
    6464
    6565struct S { int i, j; };
    66 S ?+?( S op1, S op2 ) {                         // add two structures
     66S ?+?( S op1, S op2 ) {                 // add two structures
    6767        return (S){ op1.i + op2.i, op1.j + op2.j };
    6868}
    6969S s1 = { 1, 2 }, s2 = { 2, 3 }, s3;
    70 s3 = s1 + s2;                                   // compute sum: s3 == { 2, 5 }
     70s3 = s1 + s2;                           // compute sum: s3 == { 2, 5 }
    7171\end{cfacode}
    7272
     
    7474
    7575\section{Constructors/Destructors}
    76 \CFA uses the following syntax for constructors and destructors :
     76Object life time is often a challenge in concurrency. \CFA uses the approach of giving concurrent meaning to object life time as a mean of synchronization and/or mutual exclusion. Since \CFA relies heavily on the life time of objects, Constructors \& Destructors are a the core of the features required for concurrency and parallelism. \CFA uses the following syntax for constructors and destructors :
    7777\begin{cfacode}
    7878struct S {
     
    8080        int * ia;
    8181};
    82 void ?{}( S & s, int asize ) with s {           // constructor operator
    83         size = asize;                           // initialize fields
     82void ?{}( S & s, int asize ) with s {   // constructor operator
     83        size = asize;                   // initialize fields
    8484        ia = calloc( size, sizeof( S ) );
    8585}
    86 void ^?{}( S & s ) with s {                     // destructor operator
    87         free( ia );                             // de-initialization fields
     86void ^?{}( S & s ) with s {             // destructor operator
     87        free( ia );                     // de-initialization fields
    8888}
    8989int main() {
    90         S x = { 10 }, y = { 100 };              // implict calls: ?{}( x, 10 ), ?{}( y, 100 )
    91         ...                                     // use x and y
    92         ^x{};  ^y{};                            // explicit calls to de-initialize
    93         x{ 20 };  y{ 200 };                     // explicit calls to reinitialize
    94         ...                                     // reuse x and y
    95 }                                               // implict calls: ^?{}( y ), ^?{}( x )
     90        S x = { 10 }, y = { 100 };      // implict calls: ?{}( x, 10 ), ?{}( y, 100 )
     91        ...                             // use x and y
     92        ^x{};  ^y{};                    // explicit calls to de-initialize
     93        x{ 20 };  y{ 200 };             // explicit calls to reinitialize
     94        ...                             // reuse x and y
     95}                                       // implict calls: ^?{}( y ), ^?{}( x )
    9696\end{cfacode}
    9797The language guarantees that every object and all their fields are constructed. Like \CC construction is automatically done on declaration and destruction done when the declared variables reach the end of its scope.
  • doc/proposals/concurrency/text/concurrency.tex

    r201aeb9 rd67cdb7  
    6969int f5(graph(monitor*) & mutex m);
    7070\end{cfacode}
    71 The problem is to indentify which object(s) should be acquired. Furthermore, each object needs to be acquired only once. In the case of simple routines like \code{f1} and \code{f2} it is easy to identify an exhaustive list of objects to acquire on entry. Adding indirections (\code{f3}) still allows the compiler and programmer to indentify which object is acquired. However, adding in arrays (\code{f4}) makes it much harder. Array lengths are not necessarily known in C, and even then making sure objects are only acquired once becomes none-trivial. This can be extended to absurd limits like \code{f5}, which uses a graph of monitors. To keep everyone as sane as possible~\cite{Chicken}, this projects imposes the requirement that a routine may only acquire one monitor per parameter and it must be the type of the parameter with one level of indirection (ignoring potential qualifiers). Also note that while routine \code{f3} can be supported, meaning that monitor \code{**m} is be acquired, passing an array to this routine would be type safe and yet result in undefined behavior because only the first element of the array is acquired. This is specially true for non-copyable objects like monitors, where an array of pointers is simplest way to express a group of monitors. However, this ambiguity is part of the C type-system with respects to arrays. For this reason, \code{mutex} is disallowed in the context where arrays may be passed:
     71The problem is to indentify which object(s) should be acquired. Furthermore, each object needs to be acquired only once. In the case of simple routines like \code{f1} and \code{f2} it is easy to identify an exhaustive list of objects to acquire on entry. Adding indirections (\code{f3}) still allows the compiler and programmer to indentify which object is acquired. However, adding in arrays (\code{f4}) makes it much harder. Array lengths are not necessarily known in C, and even then making sure objects are only acquired once becomes none-trivial. This can be extended to absurd limits like \code{f5}, which uses a graph of monitors. To make the issue tractable, this projects imposes the requirement that a routine may only acquire one monitor per parameter and it must be the type of the parameter with one level of indirection (ignoring potential qualifiers). Also note that while routine \code{f3} can be supported, meaning that monitor \code{**m} is be acquired, passing an array to this routine would be type safe and yet result in undefined behavior because only the first element of the array is acquired. This is specially true for non-copyable objects like monitors, where an array of pointers is simplest way to express a group of monitors. However, this ambiguity is part of the C type-system with respects to arrays. For this reason, \code{mutex} is disallowed in the context where arrays may be passed:
    7272
    7373\begin{cfacode}
     
    608608% ======================================================================
    609609% ======================================================================
    610 There are several challenges specific to \CFA when implementing internal scheduling. These challenges are direct results of \gls{group-acquire} and loose object definitions. These two constraints are to root cause of most design decisions in the implementation of internal scheduling. Furthermore, to avoid the head-aches of dynamically allocating memory in a concurrent environment, the internal-scheduling design is entirely free of mallocs and other dynamic memory allocation scheme. This is to avoid the chicken and egg problem of having a memory allocator that relies on the threading system and a threading system that relies on the runtime. This extra goal, means that memory management is a constant concern in the design of the system.
     610There are several challenges specific to \CFA when implementing internal scheduling. These challenges are direct results of \gls{group-acquire} and loose object definitions. These two constraints are to root cause of most design decisions in the implementation of internal scheduling. Furthermore, to avoid the head-aches of dynamically allocating memory in a concurrent environment, the internal-scheduling design is entirely free of mallocs and other dynamic memory allocation scheme. This is to avoid the chicken and egg problem \cite{Chicken} of having a memory allocator that relies on the threading system and a threading system that relies on the runtime. This extra goal, means that memory management is a constant concern in the design of the system.
    611611
    612612The main memory concern for concurrency is queues. All blocking operations are made by parking threads onto queues. These queues need to be intrinsic\cit to avoid the need memory allocation. This entails that all the fields needed to keep track of all needed information. Since internal scheduling can use an unbound amount of memory (depending on \gls{group-acquire}) statically defining information information in the intrusive fields of threads is insufficient. The only variable sized container that does not require memory allocation is the callstack, which is heavily used in the implementation of internal scheduling. Particularly the GCC extension variable length arrays which is used extensively.
     
    832832To support multi-monitor external scheduling means that some kind of entry-queues must be used that is aware of both monitors. However, acceptable routines must be aware of the entry queues which means they must be stored inside at least one of the monitors that will be acquired. This in turn adds the requirement a systematic algorithm of disambiguating which queue is relavant regardless of user ordering. The proposed algorithm is to fall back on monitors lock ordering and specify that the monitor that is acquired first is the lock with the relevant entry queue. This assumes that the lock acquiring order is static for the lifetime of all concerned objects but that is a reasonable constraint. This algorithm choice has two consequences, the entry queue of the highest priority monitor is no longer a true FIFO queue and the queue of the lowest priority monitor is both required and probably unused. The queue can no longer be a FIFO queue because instead of simply containing the waiting threads in order arrival, they also contain the second mutex. Therefore, another thread with the same highest priority monitor but a different lowest priority monitor may arrive first but enter the critical section after a thread with the correct pairing. Secondly, since it may not be known at compile time which monitor will be the lowest priority monitor, every monitor needs to have the correct queues even though it is probable that half the multi-monitor queues will go unused for the entire duration of the program.
    833833
    834 % ======================================================================
    835 % ======================================================================
    836 \section{Other concurrency tools}
    837 % ======================================================================
    838 % ======================================================================
    839 % \TODO
     834
     835\subsection{Internals}
     836The complete mask can be pushed to any one, we are in a context where we already have full ownership of (at least) every concerned monitor and therefore monitors will refuse all calls no matter what.
  • doc/proposals/concurrency/text/intro.tex

    r201aeb9 rd67cdb7  
    33% ======================================================================
    44
    5 This proposal provides a minimal concurrency API that is simple, efficient and can be reused to build higher-level features. The simplest possible concurrency system is a thread and a lock but this low-level approach is hard to master. An easier approach for users is to support higher-level constructs as the basis of the concurrency, in \CFA. Indeed, for highly productive parallel programming, high-level approaches are much more popular~\cite{HPP:Study}. Examples are task based, message passing and implicit threading. Therefore a high-level approach is adapted in \CFA
     5This proposal provides a minimal concurrency API that is simple, efficient and can be reused to build higher-level features. The simplest possible concurrency system is a thread and a lock but this low-level approach is hard to master. An easier approach for users is to support higher-level constructs as the basis of the concurrency, in \CFA. Indeed, for highly productive concurrent programming, high-level approaches are much more popular~\cite{HPP:Study}. Examples are task based, message passing and implicit threading. Therefore a high-level approach is adopted in \CFA
    66
    7 There are actually two problems that need to be solved in the design of concurrency for a programming language: which concurrency and which parallelism tools are available to the users. While these two concepts are often combined, they are in fact distinct concepts that require different tools~\cite{Buhr05a}. Concurrency tools need to handle mutual exclusion and synchronization, while parallelism tools are about performance, cost and resource utilization.
     7There are actually two problems that need to be solved in the design of concurrency for a programming language: which concurrency and which parallelism tools are available to the programmers. While these two concepts are often combined, they are in fact distinct, requiring different tools~\cite{Buhr05a}. Concurrency tools need to handle mutual exclusion and synchronization, while parallelism tools are about performance, cost and resource utilization.
  • doc/proposals/concurrency/text/parallelism.tex

    r201aeb9 rd67cdb7  
    1111\section{Paradigm}
    1212\subsection{User-level threads}
    13 A direct improvement on the \gls{kthread} approach is to use \glspl{uthread}. These threads offer most of the same features that the operating system already provide but can be used on a much larger scale. This approach is the most powerfull solution as it allows all the features of multi-threading, while removing several of the more expensives costs of using kernel threads. The down side is that almost none of the low-level threading problems are hidden, users still have to think about data races, deadlocks and synchronization issues. These issues can be somewhat alleviated by a concurrency toolkit with strong garantees but the parallelism toolkit offers very little to reduce complexity in itself.
     13A direct improvement on the \gls{kthread} approach is to use \glspl{uthread}. These threads offer most of the same features that the operating system already provide but can be used on a much larger scale. This approach is the most powerfull solution as it allows all the features of multi-threading, while removing several of the more expensive costs of kernel threads. The down side is that almost none of the low-level threading problems are hidden; users still have to think about data races, deadlocks and synchronization issues. These issues can be somewhat alleviated by a concurrency toolkit with strong garantees but the parallelism toolkit offers very little to reduce complexity in itself.
    1414
    1515Examples of languages that support \glspl{uthread} are Erlang~\cite{Erlang} and \uC~\cite{uC++book}.
    1616
    1717\subsection{Fibers : user-level threads without preemption}
    18 A popular varient of \glspl{uthread} is what is often reffered to as \glspl{fiber}. However, \glspl{fiber} do not present meaningful semantical differences with \glspl{uthread}. Advocates of \glspl{fiber} list their high performance and ease of implementation as majors strenghts of \glspl{fiber} but the performance difference between \glspl{uthread} and \glspl{fiber} is controversial and the ease of implementation, while true, is a weak argument in the context of language design. Therefore this proposal largely ignore fibers.
     18A popular varient of \glspl{uthread} is what is often refered to as \glspl{fiber}. However, \glspl{fiber} do not present meaningful semantical differences with \glspl{uthread}. Advocates of \glspl{fiber} list their high performance and ease of implementation as majors strenghts of \glspl{fiber} but the performance difference between \glspl{uthread} and \glspl{fiber} is controversial, and the ease of implementation, while true, is a weak argument in the context of language design. Therefore this proposal largely ignore fibers.
    1919
    2020An example of a language that uses fibers is Go~\cite{Go}
    2121
    2222\subsection{Jobs and thread pools}
    23 The approach on the opposite end of the spectrum is to base parallelism on \glspl{pool}. Indeed, \glspl{pool} offer limited flexibility but at the benefit of a simpler user interface. In \gls{pool} based systems, users express parallelism as units of work and a dependency graph (either explicit or implicit) that tie them together. This approach means users need not worry about concurrency but significantly limits the interaction that can occur among jobs. Indeed, any \gls{job} that blocks also blocks the underlying worker, which effectively means the CPU utilization, and therefore throughput, suffers noticeably. It can be argued that a solution to this problem is to use more workers than available cores. However, unless the number of jobs and the number of workers are comparable, having a significant amount of blocked jobs always results in idles cores.
     23The approach on the opposite end of the spectrum is to base parallelism on \glspl{pool}. Indeed, \glspl{pool} offer limited flexibility but at the benefit of a simpler user interface. In \gls{pool} based systems, users express parallelism as units of work, called jobs, and a dependency graph (either explicit or implicit) that tie them together. This approach means users need not worry about concurrency but significantly limits the interaction that can occur among jobs. Indeed, any \gls{job} that blocks also blocks the underlying worker, which effectively means the CPU utilization, and therefore throughput, suffers noticeably. It can be argued that a solution to this problem is to use more workers than available cores. However, unless the number of jobs and the number of workers are comparable, having a significant amount of blocked jobs always results in idles cores.
    2424
    2525The gold standard of this implementation is Intel's TBB library~\cite{TBB}.
    2626
    2727\subsection{Paradigm performance}
    28 While the choice between the three paradigms listed above may have significant performance implication, it is difficult to pindown the performance implications of chosing a model at the language level. Indeed, in many situations one of these paradigms may show better performance but it all strongly depends on the workload. Having a large amount of mostly independent units of work to execute almost guarantess that the \gls{pool} based system has the best performance thanks to the lower memory overhead. However, interactions between jobs can easily exacerbate contention. User-level threads allow fine-grain context switching, which results in better resource utilisation, but context switches will be more expansive and the extra control means users need to tweak more variables to get the desired performance. Furthermore, if the units of uninterrupted work are large enough the paradigm choice is largely amorticised by the actual work done.
     28While the choice between the three paradigms listed above may have significant performance implication, it is difficult to pindown the performance implications of chosing a model at the language level. Indeed, in many situations one of these paradigms may show better performance but it all strongly depends on the workload. Having a large amount of mostly independent units of work to execute almost guarantess that the \gls{pool} based system has the best performance thanks to the lower memory overhead (i.e., not thread stack per job). However, interactions among jobs can easily exacerbate contention. User-level threads allow fine-grain context switching, which results in better resource utilisation, but context switches is more expansive and the extra control means users need to tweak more variables to get the desired performance. Finally, if the units of uninterrupted work are large enough the paradigm choice is largely amortised by the actual work done.
    2929
    30 \newpage
    3130\TODO
    32 \subsection{The \protect\CFA\ Kernel : Processors, Clusters and Threads}\label{kernel}
     31
     32\section{The \protect\CFA\ Kernel : Processors, Clusters and Threads}\label{kernel}
    3333
    3434
     35\subsubsection{Future Work: Machine setup}\label{machine}
     36While this was not done in the context of this proposal, another important aspect of clusters is affinity. While many common desktop and laptop PCs have homogeneous CPUs, other devices often have more heteregenous setups. For example, system using \acrshort{numa} configurations may benefit from users being able to tie clusters and/or kernel threads to certains CPU cores. OS support for CPU affinity is now common \cit, which means it is both possible and desirable for \CFA to offer an abstraction mechanism for portable CPU affinity.
     37
    3538\subsection{Paradigms}\label{cfaparadigms}
    36 Given these building blocks we can then reproduce the all three of the popular paradigms. Indeed, we get \glspl{uthread} as the default paradigm in \CFA. However, disabling \glspl{preemption} on the \gls{cfacluster} means \glspl{cfathread} effectively become \glspl{fiber}. Since several \glspl{cfacluster} with different scheduling policy can coexist in the same application, this allows \glspl{fiber} and \glspl{uthread} to coexist in the runtime of an application.
    37 
    38 % \subsection{High-level options}\label{tasks}
    39 %
    40 % \subsubsection{Thread interface}
    41 % constructors destructors
    42 %       initializer lists
    43 % monitors
    44 %
    45 % \subsubsection{Futures}
    46 %
    47 % \subsubsection{Implicit threading}
    48 % Finally, simpler applications can benefit greatly from having implicit parallelism. That is, parallelism that does not rely on the user to write concurrency. This type of parallelism can be achieved both at the language level and at the system level.
    49 %
    50 % \begin{center}
    51 % \begin{tabular}[t]{|c|c|c|}
    52 % Sequential & System Parallel & Language Parallel \\
    53 % \begin{lstlisting}
    54 % void big_sum(int* a, int* b,
    55 %                int* out,
    56 %                size_t length)
    57 % {
    58 %       for(int i = 0; i < length; ++i ) {
    59 %               out[i] = a[i] + b[i];
    60 %       }
    61 % }
    62 %
    63 %
    64 %
    65 %
    66 %
    67 % int* a[10000];
    68 % int* b[10000];
    69 % int* c[10000];
    70 % //... fill in a and b ...
    71 % big_sum(a, b, c, 10000);
    72 % \end{lstlisting} &\begin{lstlisting}
    73 % void big_sum(int* a, int* b,
    74 %                int* out,
    75 %                size_t length)
    76 % {
    77 %       range ar(a, a + length);
    78 %       range br(b, b + length);
    79 %       range or(out, out + length);
    80 %       parfor( ai, bi, oi,
    81 %       [](int* ai, int* bi, int* oi) {
    82 %               oi = ai + bi;
    83 %       });
    84 % }
    85 %
    86 % int* a[10000];
    87 % int* b[10000];
    88 % int* c[10000];
    89 % //... fill in a and b ...
    90 % big_sum(a, b, c, 10000);
    91 % \end{lstlisting}&\begin{lstlisting}
    92 % void big_sum(int* a, int* b,
    93 %                int* out,
    94 %                size_t length)
    95 % {
    96 %       for (ai, bi, oi) in (a, b, out) {
    97 %               oi = ai + bi;
    98 %       }
    99 % }
    100 %
    101 %
    102 %
    103 %
    104 %
    105 % int* a[10000];
    106 % int* b[10000];
    107 % int* c[10000];
    108 % //... fill in a and b ...
    109 % big_sum(a, b, c, 10000);
    110 % \end{lstlisting}
    111 % \end{tabular}
    112 % \end{center}
    113 %
    114 % \subsection{Machine setup}\label{machine}
    115 % Threads are all good and well but wee still some OS support to fully utilize available hardware.
    116 %
    117 % \textbf{\large{Work in progress...}} Do wee need something beyond specifying the number of kernel threads?
     39Given these building blocks, it is possible to reproduce all three of the popular paradigms. Indeed, \glspl{uthread} is the default paradigm in \CFA. However, disabling \glspl{preemption} on the \gls{cfacluster} means \glspl{cfathread} effectively become \glspl{fiber}. Since several \glspl{cfacluster} with different scheduling policy can coexist in the same application, this allows \glspl{fiber} and \glspl{uthread} to coexist in the runtime of an application. Finally, it is possible to build executors for thread pools from \glspl{uthread} or \glspl{fiber}.
  • doc/proposals/concurrency/thesis.tex

    r201aeb9 rd67cdb7  
    11% requires tex packages: texlive-base texlive-latex-base tex-common texlive-humanities texlive-latex-extra texlive-fonts-recommended
    22
    3 % inline code ©...© (copyright symbol) emacs: C-q M-)
    4 % red highlighting ®...® (registered trademark symbol) emacs: C-q M-.
    5 % blue highlighting ß...ß (sharp s symbol) emacs: C-q M-_
    6 % green highlighting ¢...¢ (cent symbol) emacs: C-q M-"
    7 % LaTex escape §...§ (section symbol) emacs: C-q M-'
    8 % keyword escape ¶...¶ (pilcrow symbol) emacs: C-q M-^
     3% inline code �...� (copyright symbol) emacs: C-q M-)
     4% red highlighting �...� (registered trademark symbol) emacs: C-q M-.
     5% blue highlighting �...� (sharp s symbol) emacs: C-q M-_
     6% green highlighting �...� (cent symbol) emacs: C-q M-"
     7% LaTex escape �...� (section symbol) emacs: C-q M-'
     8% keyword escape �...� (pilcrow symbol) emacs: C-q M-^
    99% math escape $...$ (dollar symbol)
    1010
     
    2727\usepackage{multicol}
    2828\usepackage[acronym]{glossaries}
    29 \usepackage{varioref}   
     29\usepackage{varioref}
    3030\usepackage{listings}                                           % format program code
    3131\usepackage[flushmargin]{footmisc}                              % support label/reference in footnote
     
    103103\input{parallelism}
    104104
    105 \chapter{Putting it all together}
     105\input{together}
     106
     107\input{future}
    106108
    107109\chapter{Conclusion}
    108 
    109 \chapter{Future work}
    110 Concurrency and parallelism is still a very active field that strongly benefits from hardware advances. As such certain features that aren't necessarily mature enough in their current state could become relevant in the lifetime of \CFA.
    111 \subsection{Transactions}
    112110
    113111\section*{Acknowledgements}
  • doc/proposals/concurrency/version

    r201aeb9 rd67cdb7  
    1 0.9.180
     10.10.2
Note: See TracChangeset for help on using the changeset viewer.