Context Navigation

← Previous Change
Next Change →

Changeset d67cdb7 for doc/proposals

Timestamp:

Sep 26, 2017, 11:27:38 PM (7 years ago)

Author:

Peter A. Buhr <pabuhr@…>

Branches:

ADT, aaron-thesis, arm-eh, ast-experimental, cleanup-dtors, deferred_resn, demangler, enum, forall-pointer-decay, jacob/cs343-translation, jenkins-sandbox, master, new-ast, new-ast-unique-expr, new-env, no_list, persistent-indexer, pthread-emulation, qualifiedEnum, resolv-new, with_gc

Children:

Parents:

Message:

merge

Location:

doc/proposals/concurrency

Files:

: 2 added
: 8 edited

Makefile (modified) (1 diff)
annex/glossary.tex (modified) (1 diff)
text/cforall.tex (modified) (5 diffs)
text/concurrency.tex (modified) (3 diffs)
text/future.tex (added)
text/intro.tex (modified) (1 diff)
text/parallelism.tex (modified) (1 diff)
text/together.tex (added)
thesis.tex (modified) (3 diffs)
version (modified) (1 diff)

Legend:

: Unmodified
: Added
: Removed

doc/proposals/concurrency/Makefile

r201aeb9	rd67cdb7
17	17	text/concurrency \
18	18	text/parallelism \
	19	text/together \
	20	text/future \
19	21	}
20	22

doc/proposals/concurrency/annex/glossary.tex

r201aeb9	rd67cdb7
101	101	\newacronym{api}{API}{Application Program Interface}
102	102	\newacronym{raii}{RAII}{Ressource Acquisition Is Initialization}
	103	\newacronym{numa}{NUMA}{Non-Uniform Memory Access}

doc/proposals/concurrency/text/cforall.tex

-                      r201aeb9
+                      rd67cdb7
 int x, *p1 = &x, **p2 = &p1, ***p3 = &p2,
 &r1 = x,    &&r2 = r1,   &&&r3 = r2;
 ***p3 = 3;                                      // change x
 r3 = 3;                                         // change x, ***r3
 **p3 = ...;                                     // change p1
 &r3 = ...;                                      // change r1, (&*)**r3
 *p3 = ...;                                      // change p2
 &&r3 = ...;                                     // change r2, (&(&*)*)*r3
 &&&r3 = p3;                                     // change r3 to p3, (&(&(&*)*)*)r3
 int y, z, & ar[3] = { x, y, z };                // initialize array of references
 &ar[1] = &z;                                    // change reference array element
 typeof( ar[1] ) p;                              // is int, i.e., the type of referenced object
 typeof( &ar[1] ) q;                             // is int &, i.e., the type of reference
 sizeof( ar[1] ) == sizeof( int );               // is true, i.e., the size of referenced object
 sizeof( &ar[1] ) == sizeof( int *);             // is true, i.e., the size of a reference
+***p3 = 3;                              // change x
+r3 = 3;                                 // change x, ***r3
+**p3 = ...;                             // change p1
+&r3 = ...;                              // change r1, (&*)**r3
+*p3 = ...;                              // change p2
+&&r3 = ...;                             // change r2, (&(&*)*)*r3
+&&&r3 = p3;                             // change r3 to p3, (&(&(&*)*)*)r3
+int y, z, & ar[3] = { x, y, z };        // initialize array of references
+&ar[1] = &z;                            // change reference array element
+typeof( ar[1] ) p;                      // is int, i.e., the type of referenced object
+typeof( &ar[1] ) q;                     // is int &, i.e., the type of reference
+sizeof( ar[1] ) == sizeof( int );       // is true, i.e., the size of referenced object
+sizeof( &ar[1] ) == sizeof( int *);     // is true, i.e., the size of a reference
 \end{cfacode}
 The important thing to take away from this code snippet is that references offer a handle to an object much like pointers but which is automatically derefferenced when convinient.
 …
 \begin{cfacode}
 // selection based on type and number of parameters
 void f( void );                                 // (1)
 void f( char );                                 // (2)
 void f( int, double );                          // (3)
 f();                                            // select (1)
 f( 'a' );                                       // select (2)
 f( 3, 5.2 );                                    // select (3)
+void f( void );                         // (1)
+void f( char );                         // (2)
+void f( int, double );                  // (3)
+f();                                    // select (1)
+f( 'a' );                               // select (2)
+f( 3, 5.2 );                            // select (3)
 // selection based on  type and number of returns
 char f( int );                                  // (1)
 double f( int );                                // (2)
 [ int, double ] f( int );                       // (3)
 char c = f( 3 );                                // select (1)
 double d = f( 4 );                              // select (2)
 [ int, double ] t = f( 5 );                     // select (3)
+char f( int );                          // (1)
+double f( int );                        // (2)
+[ int, double ] f( int );               // (3)
+char c = f( 3 );                        // select (1)
+double d = f( 4 );                      // select (2)
+[ int, double ] t = f( 5 );             // select (3)
 \end{cfacode}
 This feature is particularly important for concurrency since the runtime system relies on creating different types do represent concurrency objects. Therefore, overloading is necessary to prevent the need for long prefixes and other naming conventions that prevent clashes. As seen in chapter \ref{basics}, the main is an example of routine that benefits from overloading when concurrency in introduced.
 …
 Overloading also extends to operators. The syntax for denoting operator-overloading is to name a routine with the symbol of the operator and question marks where the arguments of the operation would be, like so :
 \begin{cfacode}
 int ++?( int op );                              // unary prefix increment
 int ?++( int op );                              // unary postfix increment
 int ?+?( int op1, int op2 );                    // binary plus
 int ?<=?( int op1, int op2 );                   // binary less than
 int ?=?( int & op1, int op2 );                  // binary assignment
 int ?+=?( int & op1, int op2 );                 // binary plus-assignment
+int ++?( int op );                      // unary prefix increment
+int ?++( int op );                      // unary postfix increment
+int ?+?( int op1, int op2 );            // binary plus
+int ?<=?( int op1, int op2 );           // binary less than
+int ?=?( int & op1, int op2 );          // binary assignment
+int ?+=?( int & op1, int op2 );         // binary plus-assignment
 struct S { int i, j; };
 S ?+?( S op1, S op2 ) {                         // add two structures
+S ?+?( S op1, S op2 ) {                 // add two structures
         return (S){ op1.i + op2.i, op1.j + op2.j };
+}
 S s1 = { 1, 2 }, s2 = { 2, 3 }, s3;
 s3 = s1 + s2;                                   // compute sum: s3 == { 2, 5 }
+s3 = s1 + s2;                           // compute sum: s3 == { 2, 5 }
 \end{cfacode}
 …
 \section{Constructors/Destructors}
 \CFA uses the following syntax for constructors and destructors :
+Object life time is often a challenge in concurrency. \CFA uses the approach of giving concurrent meaning to object life time as a mean of synchronization and/or mutual exclusion. Since \CFA relies heavily on the life time of objects, Constructors \& Destructors are a the core of the features required for concurrency and parallelism. \CFA uses the following syntax for constructors and destructors :
 \begin{cfacode}
 struct S {
 …
         int * ia;
 };
 void ?{}( S & s, int asize ) with s {           // constructor operator
         size = asize;                           // initialize fields
+void ?{}( S & s, int asize ) with s {   // constructor operator
+        size = asize;                   // initialize fields
         ia = calloc( size, sizeof( S ) );
+}
 void ^?{}( S & s ) with s {                     // destructor operator
         free( ia );                             // de-initialization fields
+void ^?{}( S & s ) with s {             // destructor operator
+        free( ia );                     // de-initialization fields
+}
 int main() {
         S x = { 10 }, y = { 100 };              // implict calls: ?{}( x, 10 ), ?{}( y, 100 )
         ...                                     // use x and y
         ^x{};  ^y{};                            // explicit calls to de-initialize
         x{ 20 };  y{ 200 };                     // explicit calls to reinitialize
         ...                                     // reuse x and y
 }                                               // implict calls: ^?{}( y ), ^?{}( x )
+        S x = { 10 }, y = { 100 };      // implict calls: ?{}( x, 10 ), ?{}( y, 100 )
+        ...                             // use x and y
+        ^x{};  ^y{};                    // explicit calls to de-initialize
+        x{ 20 };  y{ 200 };             // explicit calls to reinitialize
+        ...                             // reuse x and y
+}                                       // implict calls: ^?{}( y ), ^?{}( x )
 \end{cfacode}
 The language guarantees that every object and all their fields are constructed. Like \CC construction is automatically done on declaration and destruction done when the declared variables reach the end of its scope.

doc/proposals/concurrency/text/concurrency.tex

-                      r201aeb9
+                      rd67cdb7
 int f5(graph(monitor*) & mutex m);
 \end{cfacode}
 The problem is to indentify which object(s) should be acquired. Furthermore, each object needs to be acquired only once. In the case of simple routines like \code{f1} and \code{f2} it is easy to identify an exhaustive list of objects to acquire on entry. Adding indirections (\code{f3}) still allows the compiler and programmer to indentify which object is acquired. However, adding in arrays (\code{f4}) makes it much harder. Array lengths are not necessarily known in C, and even then making sure objects are only acquired once becomes none-trivial. This can be extended to absurd limits like \code{f5}, which uses a graph of monitors. To keep everyone as sane as possible~\cite{Chicken}, this projects imposes the requirement that a routine may only acquire one monitor per parameter and it must be the type of the parameter with one level of indirection (ignoring potential qualifiers). Also note that while routine \code{f3} can be supported, meaning that monitor \code{**m} is be acquired, passing an array to this routine would be type safe and yet result in undefined behavior because only the first element of the array is acquired. This is specially true for non-copyable objects like monitors, where an array of pointers is simplest way to express a group of monitors. However, this ambiguity is part of the C type-system with respects to arrays. For this reason, \code{mutex} is disallowed in the context where arrays may be passed:
+The problem is to indentify which object(s) should be acquired. Furthermore, each object needs to be acquired only once. In the case of simple routines like \code{f1} and \code{f2} it is easy to identify an exhaustive list of objects to acquire on entry. Adding indirections (\code{f3}) still allows the compiler and programmer to indentify which object is acquired. However, adding in arrays (\code{f4}) makes it much harder. Array lengths are not necessarily known in C, and even then making sure objects are only acquired once becomes none-trivial. This can be extended to absurd limits like \code{f5}, which uses a graph of monitors. To make the issue tractable, this projects imposes the requirement that a routine may only acquire one monitor per parameter and it must be the type of the parameter with one level of indirection (ignoring potential qualifiers). Also note that while routine \code{f3} can be supported, meaning that monitor \code{**m} is be acquired, passing an array to this routine would be type safe and yet result in undefined behavior because only the first element of the array is acquired. This is specially true for non-copyable objects like monitors, where an array of pointers is simplest way to express a group of monitors. However, this ambiguity is part of the C type-system with respects to arrays. For this reason, \code{mutex} is disallowed in the context where arrays may be passed:
 \begin{cfacode}
 …
 % ======================================================================
 % ======================================================================
 There are several challenges specific to \CFA when implementing internal scheduling. These challenges are direct results of \gls{group-acquire} and loose object definitions. These two constraints are to root cause of most design decisions in the implementation of internal scheduling. Furthermore, to avoid the head-aches of dynamically allocating memory in a concurrent environment, the internal-scheduling design is entirely free of mallocs and other dynamic memory allocation scheme. This is to avoid the chicken and egg problem of having a memory allocator that relies on the threading system and a threading system that relies on the runtime. This extra goal, means that memory management is a constant concern in the design of the system.
+There are several challenges specific to \CFA when implementing internal scheduling. These challenges are direct results of \gls{group-acquire} and loose object definitions. These two constraints are to root cause of most design decisions in the implementation of internal scheduling. Furthermore, to avoid the head-aches of dynamically allocating memory in a concurrent environment, the internal-scheduling design is entirely free of mallocs and other dynamic memory allocation scheme. This is to avoid the chicken and egg problem \cite{Chicken} of having a memory allocator that relies on the threading system and a threading system that relies on the runtime. This extra goal, means that memory management is a constant concern in the design of the system.
 The main memory concern for concurrency is queues. All blocking operations are made by parking threads onto queues. These queues need to be intrinsic\cit to avoid the need memory allocation. This entails that all the fields needed to keep track of all needed information. Since internal scheduling can use an unbound amount of memory (depending on \gls{group-acquire}) statically defining information information in the intrusive fields of threads is insufficient. The only variable sized container that does not require memory allocation is the callstack, which is heavily used in the implementation of internal scheduling. Particularly the GCC extension variable length arrays which is used extensively.
 …
 To support multi-monitor external scheduling means that some kind of entry-queues must be used that is aware of both monitors. However, acceptable routines must be aware of the entry queues which means they must be stored inside at least one of the monitors that will be acquired. This in turn adds the requirement a systematic algorithm of disambiguating which queue is relavant regardless of user ordering. The proposed algorithm is to fall back on monitors lock ordering and specify that the monitor that is acquired first is the lock with the relevant entry queue. This assumes that the lock acquiring order is static for the lifetime of all concerned objects but that is a reasonable constraint. This algorithm choice has two consequences, the entry queue of the highest priority monitor is no longer a true FIFO queue and the queue of the lowest priority monitor is both required and probably unused. The queue can no longer be a FIFO queue because instead of simply containing the waiting threads in order arrival, they also contain the second mutex. Therefore, another thread with the same highest priority monitor but a different lowest priority monitor may arrive first but enter the critical section after a thread with the correct pairing. Secondly, since it may not be known at compile time which monitor will be the lowest priority monitor, every monitor needs to have the correct queues even though it is probable that half the multi-monitor queues will go unused for the entire duration of the program.
+% ======================================================================
+% ======================================================================
+\section{Other concurrency tools}
+% ======================================================================
+% ======================================================================
+% \TODO
+\subsection{Internals}
+The complete mask can be pushed to any one, we are in a context where we already have full ownership of (at least) every concerned monitor and therefore monitors will refuse all calls no matter what.

doc/proposals/concurrency/text/intro.tex

-                      r201aeb9
+                      rd67cdb7
 % ======================================================================
 This proposal provides a minimal concurrency API that is simple, efficient and can be reused to build higher-level features. The simplest possible concurrency system is a thread and a lock but this low-level approach is hard to master. An easier approach for users is to support higher-level constructs as the basis of the concurrency, in \CFA. Indeed, for highly productive parallel programming, high-level approaches are much more popular~\cite{HPP:Study}. Examples are task based, message passing and implicit threading. Therefore a high-level approach is adapted in \CFA
+This proposal provides a minimal concurrency API that is simple, efficient and can be reused to build higher-level features. The simplest possible concurrency system is a thread and a lock but this low-level approach is hard to master. An easier approach for users is to support higher-level constructs as the basis of the concurrency, in \CFA. Indeed, for highly productive concurrent programming, high-level approaches are much more popular~\cite{HPP:Study}. Examples are task based, message passing and implicit threading. Therefore a high-level approach is adopted in \CFA
 There are actually two problems that need to be solved in the design of concurrency for a programming language: which concurrency and which parallelism tools are available to the users. While these two concepts are often combined, they are in fact distinct concepts that require different tools~\cite{Buhr05a}. Concurrency tools need to handle mutual exclusion and synchronization, while parallelism tools are about performance, cost and resource utilization.
+There are actually two problems that need to be solved in the design of concurrency for a programming language: which concurrency and which parallelism tools are available to the programmers. While these two concepts are often combined, they are in fact distinct, requiring different tools~\cite{Buhr05a}. Concurrency tools need to handle mutual exclusion and synchronization, while parallelism tools are about performance, cost and resource utilization.

doc/proposals/concurrency/text/parallelism.tex

-                      r201aeb9
+                      rd67cdb7
 \section{Paradigm}
 \subsection{User-level threads}
 A direct improvement on the \gls{kthread} approach is to use \glspl{uthread}. These threads offer most of the same features that the operating system already provide but can be used on a much larger scale. This approach is the most powerfull solution as it allows all the features of multi-threading, while removing several of the more expensives costs of using kernel threads. The down side is that almost none of the low-level threading problems are hidden, users still have to think about data races, deadlocks and synchronization issues. These issues can be somewhat alleviated by a concurrency toolkit with strong garantees but the parallelism toolkit offers very little to reduce complexity in itself.
+A direct improvement on the \gls{kthread} approach is to use \glspl{uthread}. These threads offer most of the same features that the operating system already provide but can be used on a much larger scale. This approach is the most powerfull solution as it allows all the features of multi-threading, while removing several of the more expensive costs of kernel threads. The down side is that almost none of the low-level threading problems are hidden; users still have to think about data races, deadlocks and synchronization issues. These issues can be somewhat alleviated by a concurrency toolkit with strong garantees but the parallelism toolkit offers very little to reduce complexity in itself.
 Examples of languages that support \glspl{uthread} are Erlang~\cite{Erlang} and \uC~\cite{uC++book}.
 \subsection{Fibers : user-level threads without preemption}
 A popular varient of \glspl{uthread} is what is often reffered to as \glspl{fiber}. However, \glspl{fiber} do not present meaningful semantical differences with \glspl{uthread}. Advocates of \glspl{fiber} list their high performance and ease of implementation as majors strenghts of \glspl{fiber} but the performance difference between \glspl{uthread} and \glspl{fiber} is controversial and the ease of implementation, while true, is a weak argument in the context of language design. Therefore this proposal largely ignore fibers.
+A popular varient of \glspl{uthread} is what is often refered to as \glspl{fiber}. However, \glspl{fiber} do not present meaningful semantical differences with \glspl{uthread}. Advocates of \glspl{fiber} list their high performance and ease of implementation as majors strenghts of \glspl{fiber} but the performance difference between \glspl{uthread} and \glspl{fiber} is controversial, and the ease of implementation, while true, is a weak argument in the context of language design. Therefore this proposal largely ignore fibers.
 An example of a language that uses fibers is Go~\cite{Go}
 \subsection{Jobs and thread pools}
 The approach on the opposite end of the spectrum is to base parallelism on \glspl{pool}. Indeed, \glspl{pool} offer limited flexibility but at the benefit of a simpler user interface. In \gls{pool} based systems, users express parallelism as units of work and a dependency graph (either explicit or implicit) that tie them together. This approach means users need not worry about concurrency but significantly limits the interaction that can occur among jobs. Indeed, any \gls{job} that blocks also blocks the underlying worker, which effectively means the CPU utilization, and therefore throughput, suffers noticeably. It can be argued that a solution to this problem is to use more workers than available cores. However, unless the number of jobs and the number of workers are comparable, having a significant amount of blocked jobs always results in idles cores.
+The approach on the opposite end of the spectrum is to base parallelism on \glspl{pool}. Indeed, \glspl{pool} offer limited flexibility but at the benefit of a simpler user interface. In \gls{pool} based systems, users express parallelism as units of work, called jobs, and a dependency graph (either explicit or implicit) that tie them together. This approach means users need not worry about concurrency but significantly limits the interaction that can occur among jobs. Indeed, any \gls{job} that blocks also blocks the underlying worker, which effectively means the CPU utilization, and therefore throughput, suffers noticeably. It can be argued that a solution to this problem is to use more workers than available cores. However, unless the number of jobs and the number of workers are comparable, having a significant amount of blocked jobs always results in idles cores.
 The gold standard of this implementation is Intel's TBB library~\cite{TBB}.
 \subsection{Paradigm performance}
 While the choice between the three paradigms listed above may have significant performance implication, it is difficult to pindown the performance implications of chosing a model at the language level. Indeed, in many situations one of these paradigms may show better performance but it all strongly depends on the workload. Having a large amount of mostly independent units of work to execute almost guarantess that the \gls{pool} based system has the best performance thanks to the lower memory overhead. However, interactions between jobs can easily exacerbate contention. User-level threads allow fine-grain context switching, which results in better resource utilisation, but context switches will be more expansive and the extra control means users need to tweak more variables to get the desired performance. Furthermore, if the units of uninterrupted work are large enough the paradigm choice is largely amorticised by the actual work done.
+While the choice between the three paradigms listed above may have significant performance implication, it is difficult to pindown the performance implications of chosing a model at the language level. Indeed, in many situations one of these paradigms may show better performance but it all strongly depends on the workload. Having a large amount of mostly independent units of work to execute almost guarantess that the \gls{pool} based system has the best performance thanks to the lower memory overhead (i.e., not thread stack per job). However, interactions among jobs can easily exacerbate contention. User-level threads allow fine-grain context switching, which results in better resource utilisation, but context switches is more expansive and the extra control means users need to tweak more variables to get the desired performance. Finally, if the units of uninterrupted work are large enough the paradigm choice is largely amortised by the actual work done.
-\newpage
 \TODO
+\subsection{The \protect\CFA\ Kernel : Processors, Clusters and Threads}\label{kernel}
+\section{The \protect\CFA\ Kernel : Processors, Clusters and Threads}\label{kernel}
+\subsubsection{Future Work: Machine setup}\label{machine}
+While this was not done in the context of this proposal, another important aspect of clusters is affinity. While many common desktop and laptop PCs have homogeneous CPUs, other devices often have more heteregenous setups. For example, system using \acrshort{numa} configurations may benefit from users being able to tie clusters and/or kernel threads to certains CPU cores. OS support for CPU affinity is now common \cit, which means it is both possible and desirable for \CFA to offer an abstraction mechanism for portable CPU affinity.
 \subsection{Paradigms}\label{cfaparadigms}
+Given these building blocks we can then reproduce the all three of the popular paradigms. Indeed, we get \glspl{uthread} as the default paradigm in \CFA. However, disabling \glspl{preemption} on the \gls{cfacluster} means \glspl{cfathread} effectively become \glspl{fiber}. Since several \glspl{cfacluster} with different scheduling policy can coexist in the same application, this allows \glspl{fiber} and \glspl{uthread} to coexist in the runtime of an application.
+% \subsection{High-level options}\label{tasks}
+%
+% \subsubsection{Thread interface}
+% constructors destructors
+%       initializer lists
+% monitors
+%
+% \subsubsection{Futures}
+%
+% \subsubsection{Implicit threading}
+% Finally, simpler applications can benefit greatly from having implicit parallelism. That is, parallelism that does not rely on the user to write concurrency. This type of parallelism can be achieved both at the language level and at the system level.
+%
+% \begin{center}
+% \begin{tabular}[t]{|c|c|c|}
+% Sequential & System Parallel & Language Parallel \\
+% \begin{lstlisting}
+% void big_sum(int* a, int* b,
+%                int* out,
+%                size_t length)
+% {
+%       for(int i = 0; i < length; ++i ) {
+%               out[i] = a[i] + b[i];
+%       }
+% }
+%
+%
+%
+%
+%
+% int* a[10000];
+% int* b[10000];
+% int* c[10000];
+% //... fill in a and b ...
+% big_sum(a, b, c, 10000);
+% \end{lstlisting} &\begin{lstlisting}
+% void big_sum(int* a, int* b,
+%                int* out,
+%                size_t length)
+% {
+%       range ar(a, a + length);
+%       range br(b, b + length);
+%       range or(out, out + length);
+%       parfor( ai, bi, oi,
+%       [](int* ai, int* bi, int* oi) {
+%               oi = ai + bi;
+%       });
+% }
+%
+% int* a[10000];
+% int* b[10000];
+% int* c[10000];
+% //... fill in a and b ...
+% big_sum(a, b, c, 10000);
+% \end{lstlisting}&\begin{lstlisting}
+% void big_sum(int* a, int* b,
+%                int* out,
+%                size_t length)
+% {
+%       for (ai, bi, oi) in (a, b, out) {
+%               oi = ai + bi;
+%       }
+% }
+%
+%
+%
+%
+%
+% int* a[10000];
+% int* b[10000];
+% int* c[10000];
+% //... fill in a and b ...
+% big_sum(a, b, c, 10000);
+% \end{lstlisting}
+% \end{tabular}
+% \end{center}
+%
+% \subsection{Machine setup}\label{machine}
+% Threads are all good and well but wee still some OS support to fully utilize available hardware.
+%
+% \textbf{\large{Work in progress...}} Do wee need something beyond specifying the number of kernel threads?
+Given these building blocks, it is possible to reproduce all three of the popular paradigms. Indeed, \glspl{uthread} is the default paradigm in \CFA. However, disabling \glspl{preemption} on the \gls{cfacluster} means \glspl{cfathread} effectively become \glspl{fiber}. Since several \glspl{cfacluster} with different scheduling policy can coexist in the same application, this allows \glspl{fiber} and \glspl{uthread} to coexist in the runtime of an application. Finally, it is possible to build executors for thread pools from \glspl{uthread} or \glspl{fiber}.

doc/proposals/concurrency/thesis.tex

-                      r201aeb9
+                      rd67cdb7
 % requires tex packages: texlive-base texlive-latex-base tex-common texlive-humanities texlive-latex-extra texlive-fonts-recommended
 % inline code ©...© (copyright symbol) emacs: C-q M-)
 % red highlighting ®...® (registered trademark symbol) emacs: C-q M-.
 % blue highlighting ß...ß (sharp s symbol) emacs: C-q M-_
 % green highlighting ¢...¢ (cent symbol) emacs: C-q M-"
 % LaTex escape §...§ (section symbol) emacs: C-q M-'
 % keyword escape ¶...¶ (pilcrow symbol) emacs: C-q M-^
+% inline code �...� (copyright symbol) emacs: C-q M-)
+% red highlighting �...� (registered trademark symbol) emacs: C-q M-.
+% blue highlighting �...� (sharp s symbol) emacs: C-q M-_
+% green highlighting �...� (cent symbol) emacs: C-q M-"
+% LaTex escape �...� (section symbol) emacs: C-q M-'
+% keyword escape �...� (pilcrow symbol) emacs: C-q M-^
 % math escape $...$ (dollar symbol)
 …
 \usepackage{multicol}
 \usepackage[acronym]{glossaries}
 \usepackage{varioref}
+\usepackage{varioref}
 \usepackage{listings}                                           % format program code
 \usepackage[flushmargin]{footmisc}                              % support label/reference in footnote
 …
 \input{parallelism}
+\chapter{Putting it all together}
+\input{together}
+\input{future}
 \chapter{Conclusion}
-\chapter{Future work}
-Concurrency and parallelism is still a very active field that strongly benefits from hardware advances. As such certain features that aren't necessarily mature enough in their current state could become relevant in the lifetime of \CFA.
-\subsection{Transactions}
 \section*{Acknowledgements}

doc/proposals/concurrency/version

r201aeb9	rd67cdb7
1		0.~~9.180~~
	1	0.10.2

Note: See TracChangeset for help on using the changeset viewer.

Download in other formats: