Diff [2fb35df396c012cc3908fbb4d946ba80cad0ce88:41b8ea40e8b670142d317601e53338340fee08be] for / – Cforall

benchmark/Makefile.am

-                      r2fb35df
+                      r41b8ea4
 creation_cfa_generator_DURATION = 1000000000
 creation_upp_coroutine_DURATION = ${creation_cfa_coroutine_eager_DURATION}
-creation_cfa_thread_DURATION = 10000000
-creation_upp_thread_DURATION = ${creation_cfa_thread_DURATION}
 creation_DURATION = 10000000
 …
 cleancsv:
         rm -f compile.csv basic.csv ctxswitch.csv mutex.csv scheduling.csv
+        rm -f compile.csv basic.csv ctxswitch.csv mutex.csv schedint.csv
 jenkins$(EXEEXT): cleancsv
 …
         +make mutex.csv
         -+make mutex.diff.csv
         +make scheduling.csv
         -+make scheduling.diff.csv
+        +make schedint.csv
+        -+make schedint.diff.csv
 @DOifskipcompile@
         cat compile.csv
 …
         cat mutex.csv
         -cat mutex.diff.csv
         cat scheduling.csv
         -cat scheduling.diff.csv
+        cat schedint.csv
+        -cat schedint.diff.csv
 compile.csv:
 …
         $(srcdir)/fixcsv.sh $@
 scheduling.csv:
+schedint.csv:
         echo "building $@"
         echo "schedint-1,schedint-2,schedext-1,schedext-2" > $@
 …
 ctxswitch-python_coroutine$(EXEEXT):
         $(BENCH_V_PY)echo "#!/bin/sh" > a.out
         echo "python3.7 $(srcdir)/ctxswitch/python_cor.py" >> a.out
+        echo "python3 $(srcdir)/ctxswitch/python_cor.py \"$$""@\"" >> a.out
         chmod a+x a.out
 ctxswitch-nodejs_coroutine$(EXEEXT):
         $(BENCH_V_NODEJS)echo "#!/bin/sh" > a.out
         echo "nodejs $(srcdir)/ctxswitch/node_cor.js" >> a.out
+        echo "nodejs $(srcdir)/ctxswitch/node_cor.js \"$$""@\"" >> a.out
         chmod a+x a.out
 ctxswitch-nodejs_await$(EXEEXT):
         $(BENCH_V_NODEJS)echo "#!/bin/sh" > a.out
         echo "nodejs $(srcdir)/ctxswitch/node_await.js" >> a.out
+        echo "nodejs $(srcdir)/ctxswitch/node_await.js \"$$""@\"" >> a.out
         chmod a+x a.out
 …
         $(BENCH_V_JAVAC)javac -d $(builddir) $(srcdir)/ctxswitch/JavaThread.java
         echo "#!/bin/sh" > a.out
         echo "java JavaThread" >> a.out
+        echo "java JavaThread \"$$""@\"" >> a.out
         chmod a+x a.out
 …
         $(BENCH_V_JAVAC)javac -d $(builddir) $(srcdir)/mutex/JavaThread.java
         echo "#!/bin/sh" > a.out
         echo "java JavaThread" >> a.out
+        echo "java JavaThread \"$$""@\"" >> a.out
         chmod a+x a.out
 …
         $(BENCH_V_JAVAC)javac -d $(builddir) $(srcdir)/schedint/JavaThread.java
         echo "#!/bin/sh" > a.out
         echo "java JavaThread" >> a.out
+        echo "java JavaThread \"$$""@\"" >> a.out
         chmod a+x a.out
 …
 creation-python_coroutine$(EXEEXT):
         $(BENCH_V_PY)echo "#!/bin/sh" > a.out
         echo "python3.7 $(srcdir)/creation/python_cor.py" >> a.out
+        echo "python3 $(srcdir)/creation/python_cor.py \"$$""@\"" >> a.out
         chmod a+x a.out
 creation-nodejs_coroutine$(EXEEXT):
         $(BENCH_V_NODEJS)echo "#!/bin/sh" > a.out
         echo "nodejs $(srcdir)/creation/node_cor.js" >> a.out
+        echo "nodejs $(srcdir)/creation/node_cor.js \"$$""@\"" >> a.out
         chmod a+x a.out
 …
         $(BENCH_V_JAVAC)javac -d $(builddir) $(srcdir)/creation/JavaThread.java
         echo "#!/bin/sh" > a.out
         echo "java JavaThread" >> a.out
+        echo "java JavaThread \"$$""@\"" >> a.out
         chmod a+x a.out
 …
 compile-array$(EXEEXT):
         $(CFACOMPILE) -fsyntax-only -w $(testdir)/array.cfa
+        $(CFACOMPILE) -DNO_COMPILED_PRAGMA -fsyntax-only -w $(testdir)/array.cfa
 compile-attributes$(EXEEXT):
         $(CFACOMPILE) -fsyntax-only -w $(testdir)/attributes.cfa
+        $(CFACOMPILE) -DNO_COMPILED_PRAGMA -fsyntax-only -w $(testdir)/attributes.cfa
 compile-empty$(EXEEXT):
         $(CFACOMPILE) -fsyntax-only -w $(srcdir)/compile/empty.cfa
+        $(CFACOMPILE) -DNO_COMPILED_PRAGMA -fsyntax-only -w $(srcdir)/compile/empty.cfa
 compile-expression$(EXEEXT):
         $(CFACOMPILE) -fsyntax-only -w $(testdir)/expression.cfa
+        $(CFACOMPILE) -DNO_COMPILED_PRAGMA -fsyntax-only -w $(testdir)/expression.cfa
 compile-io$(EXEEXT):
         $(CFACOMPILE) -fsyntax-only -w $(testdir)/io1.cfa
+        $(CFACOMPILE) -DNO_COMPILED_PRAGMA -fsyntax-only -w $(testdir)/io1.cfa
 compile-monitor$(EXEEXT):
         $(CFACOMPILE) -fsyntax-only -w $(testdir)/concurrent/monitor.cfa
+        $(CFACOMPILE) -DNO_COMPILED_PRAGMA -fsyntax-only -w $(testdir)/concurrent/monitor.cfa
 compile-operators$(EXEEXT):
         $(CFACOMPILE) -fsyntax-only -w $(testdir)/operators.cfa
+        $(CFACOMPILE) -DNO_COMPILED_PRAGMA -fsyntax-only -w $(testdir)/operators.cfa
 compile-thread$(EXEEXT):
         $(CFACOMPILE) -fsyntax-only -w $(testdir)/concurrent/thread.cfa
+        $(CFACOMPILE) -DNO_COMPILED_PRAGMA -fsyntax-only -w $(testdir)/concurrent/thread.cfa
 compile-typeof$(EXEEXT):
         $(CFACOMPILE) -fsyntax-only -w $(testdir)/typeof.cfa
+        $(CFACOMPILE) -DNO_COMPILED_PRAGMA -fsyntax-only -w $(testdir)/typeof.cfa
 ## =========================================================================================================

benchmark/creation/JavaThread.java

-                      r2fb35df
+                      r41b8ea4
+        }
         public static void main(String[] args) throws InterruptedException {
                 if ( args.length > 2 ) System.exit( 1 );
                 if ( args.length == 2 ) { times = Long.parseLong(args[1]); }
+                if ( args.length > 1 ) System.exit( 1 );
+                if ( args.length == 1 ) { times = Long.parseLong(args[0]); }
                 for (int i = Integer.parseInt("5"); --i >= 0 ; ) {

benchmark/ctxswitch/JavaThread.java

-                      r2fb35df
+                      r41b8ea4
+        }
         public static void main(String[] args) throws InterruptedException {
                 if ( args.length > 2 ) System.exit( 1 );
                 if ( args.length == 2 ) { times = Long.parseLong(args[1]); }
+                if ( args.length > 1 ) System.exit( 1 );
+                if ( args.length == 1 ) { times = Long.parseLong(args[0]); }
                 for (int i = Integer.parseInt("5"); --i >= 0 ; ) {

benchmark/io/http/main.cfa

r2fb35df	r41b8ea4
125	125	workers[i].flags = 0;
126	126	}
127		unpark( workers[i] ~~__cfaabi_dbg_ctx2~~ );
	127	unpark( workers[i] );
128	128	}
129	129	printf("%d workers started on %d processors\n", options.clopts.nworkers, options.clopts.nprocs);

benchmark/io/http/worker.cfa

r2fb35df	r41b8ea4
22	22
23	23	void main( Worker & this ) {
24		park( ~~__cfaabi_dbg_ctx~~ );
	24	park();
25	25	/* paranoid */ assert( this.pipe[0] != -1 );
26	26	/* paranoid */ assert( this.pipe[1] != -1 );

benchmark/io/readv.cfa

-                      r2fb35df
+                      r41b8ea4
 void main( Reader & ) {
         park( __cfaabi_dbg_ctx );
+        park();
         /* paranoid */ assert( true == __atomic_load_n(&run, __ATOMIC_RELAXED) );
 …
                                 for(i; nthreads) {
                                         unpark( threads[i] __cfaabi_dbg_ctx2 );
+                                        unpark( threads[i] );
+                                }
                                 wait(duration, start, end, is_tty);

benchmark/mutex/JavaThread.java

-                      r2fb35df
+                      r41b8ea4
+        }
         public static void main(String[] args) throws InterruptedException {
                 if ( args.length > 2 ) System.exit( 1 );
                 if ( args.length == 2 ) { times = Long.parseLong(args[1]); }
+                if ( args.length > 1 ) System.exit( 1 );
+                if ( args.length == 1 ) { times = Long.parseLong(args[0]); }
                 for (int n = Integer.parseInt("5"); --n >= 0 ; ) {

benchmark/readyQ/yield.cfa

-                      r2fb35df
+                      r41b8ea4
 void main( Yielder & this ) {
         park( __cfaabi_dbg_ctx );
+        park();
         /* paranoid */ assert( true == __atomic_load_n(&run, __ATOMIC_RELAXED) );
 …
                                 for(i; nthreads) {
                                         unpark( threads[i] __cfaabi_dbg_ctx2 );
+                                        unpark( threads[i] );
+                                }
                                 wait(duration, start, end, is_tty);

benchmark/schedint/JavaThread.java

-                      r2fb35df
+                      r41b8ea4
+        }
         public static void main(String[] args) throws InterruptedException {
                 if ( args.length > 2 ) System.exit( 1 );
                 if ( args.length == 2 ) { times = Long.parseLong(args[1]); }
+                if ( args.length > 1 ) System.exit( 1 );
+                if ( args.length == 1 ) { times = Long.parseLong(args[0]); }
                 for (int n = Integer.parseInt("5"); --n >= 0 ; ) {

doc/LaTeXmacros/common.tex

-                      r2fb35df
+                      r41b8ea4
 %% Created On       : Sat Apr  9 10:06:17 2016
 %% Last Modified By : Peter A. Buhr
 %% Last Modified On : Wed Sep 23 21:21:55 2020
 %% Update Count     : 454
+%% Last Modified On : Mon Oct  5 09:34:46 2020
+%% Update Count     : 464
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 …
 \makeatother
 \newcommand{\CFADefaults}{%
+\newcommand{\CFAStyle}{%
 \lstset{
-language=CFA,
 columns=fullflexible,
 basicstyle=\linespread{0.9}\sf,                 % reduce line spacing and use sanserif font
 …
         {<-}{$\leftarrow$}2 {=>}{$\Rightarrow$}2 {->}{\makebox[1ex][c]{\raisebox{0.4ex}{\rule{0.8ex}{0.075ex}}}\kern-0.2ex\textgreater}2,
 }% lstset
 }% CFADefaults
 \ifdefined\CFALatin%
 \lstnewenvironment{cfa}[1][]{\CFADefaults
+}% CFAStyle
+\ifdefined\CFALatin% extra Latin-1 escape characters
+\lstnewenvironment{cfa}[1][]{
 \lstset{
 language=CFA,
 …
 % inline code ©...© (copyright symbol) emacs: C-q M-)
 \lstMakeShortInline©                                    % single-character for \lstinline
+\else% extra Latin-1 escape characters
+\else% regular ASCI characters
+\lstnewenvironment{cfa}[1][]{
 \lstset{
 language=CFA,
 escapechar=\$,                                                  % LaTeX escape in CFA code
+moredelim=**[is][\color{red}]{@}{@},    % red highlighting `...` (backtick symbol)
+}% lstset
+\lstnewenvironment{cfa}[1][]{\CFADefaults
+\lstset{
+language=CFA,
+escapechar=\$,                                                  % LaTeX escape in CFA code
+moredelim=**[is][\color{red}]{@}{@},    % red highlighting `...` (backtick symbol)
+moredelim=**[is][\color{red}]{@}{@},    % red highlighting @...@
 }% lstset
 \lstset{#1}

doc/bibliography/pl.bib

-                      r2fb35df
+                      r41b8ea4
     key         = {Cforall Benchmarks},
     author      = {{\textsf{C}{$\mathbf{\forall}$} Benchmarks}},
     howpublished= {\href{https://plg.uwaterloo.ca/~cforall/doc/CforallConcurrentBenchmarks.tar}{https://\-plg.uwaterloo.ca/\-$\sim$cforall/\-doc/\-CforallConcurrentBenchmarks.tar}},
+    howpublished= {\href{https://github.com/cforall/ConcurrentBenchmarks_SPE20}{https://\-github.com/\-cforall/\-ConcurrentBenchmarks\_SPE20}},
+}
 …
     title       = {Cooperating Sequential Processes},
     institution = {Technological University},
     address     = {Eindhoven, Netherlands},
+    address     = {Eindhoven, Neth.},
     year        = 1965,
     note        = {Reprinted in \cite{Genuys68} pp. 43--112.}

doc/papers/concurrency/Paper.tex

-                      r2fb35df
+                      r41b8ea4
 {}
 \lstnewenvironment{C++}[1][]                            % use C++ style
 {\lstset{language=C++,moredelim=**[is][\protect\color{red}]{`}{`},#1}\lstset{#1}}
+{\lstset{language=C++,moredelim=**[is][\protect\color{red}]{`}{`}}\lstset{#1}}
 {}
 \lstnewenvironment{uC++}[1][]
 {\lstset{language=uC++,moredelim=**[is][\protect\color{red}]{`}{`},#1}\lstset{#1}}
+{\lstset{language=uC++,moredelim=**[is][\protect\color{red}]{`}{`}}\lstset{#1}}
 {}
 \lstnewenvironment{Go}[1][]
 {\lstset{language=Golang,moredelim=**[is][\protect\color{red}]{`}{`},#1}\lstset{#1}}
+{\lstset{language=Golang,moredelim=**[is][\protect\color{red}]{`}{`}}\lstset{#1}}
 {}
 \lstnewenvironment{python}[1][]
 {\lstset{language=python,moredelim=**[is][\protect\color{red}]{`}{`},#1}\lstset{#1}}
+{\lstset{language=python,moredelim=**[is][\protect\color{red}]{`}{`}}\lstset{#1}}
 {}
 \lstnewenvironment{java}[1][]
 {\lstset{language=java,moredelim=**[is][\protect\color{red}]{`}{`},#1}\lstset{#1}}
+{\lstset{language=java,moredelim=**[is][\protect\color{red}]{`}{`}}\lstset{#1}}
 {}
 …
 \begin{document}
 \linenumbers                            % comment out to turn off line numbering
+%\linenumbers                           % comment out to turn off line numbering
 \maketitle
 …
 \label{s:RuntimeStructureCluster}
 A \newterm{cluster} is a collection of user and kernel threads, where the kernel threads run the user threads from the cluster's ready queue, and the operating system runs the kernel threads on the processors from its ready queue.
+A \newterm{cluster} is a collection of user and kernel threads, where the kernel threads run the user threads from the cluster's ready queue, and the operating system runs the kernel threads on the processors from its ready queue~\cite{Buhr90a}.
 The term \newterm{virtual processor} is introduced as a synonym for kernel thread to disambiguate between user and kernel thread.
 From the language perspective, a virtual processor is an actual processor (core).
 …
 \end{cfa}
 where CPU time in nanoseconds is from the appropriate language clock.
+Each benchmark is performed @N@ times, where @N@ is selected so the benchmark runs in the range of 2--20 seconds for the specific programming language.
+Each benchmark is performed @N@ times, where @N@ is selected so the benchmark runs in the range of 2--20 seconds for the specific programming language;
+each @N@ appears after the experiment name in the following tables.
 The total time is divided by @N@ to obtain the average time for a benchmark.
 Each benchmark experiment is run 13 times and the average appears in the table.
+For languages with a runtime JIT (Java, Node.js, Python), a single half-hour long experiment is run to check stability;
+all long-experiment results are statistically equivalent, \ie median/average/standard-deviation correlate with the short-experiment results, indicating the short experiments reached a steady state.
 All omitted tests for other languages are functionally identical to the \CFA tests and available online~\cite{CforallConcurrentBenchmarks}.
-% tar --exclude-ignore=exclude -cvhf benchmark.tar benchmark
-% cp -p benchmark.tar /u/cforall/public_html/doc/concurrent_benchmark.tar
 \paragraph{Creation}
 …
 \begin{multicols}{2}
+\lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}
+\begin{cfa}
+@coroutine@ MyCoroutine {};
+\begin{cfa}[xleftmargin=0pt]
+`coroutine` MyCoroutine {};
 void ?{}( MyCoroutine & this ) {
 #ifdef EAGER
 …
 void main( MyCoroutine & ) {}
 int main() {
         BENCH( for ( N ) { @MyCoroutine c;@ } )
+        BENCH( for ( N ) { `MyCoroutine c;` } )
         sout | result;
+}
 …
 \begin{tabular}[t]{@{}r*{3}{D{.}{.}{5.2}}@{}}
 \multicolumn{1}{@{}c}{} & \multicolumn{1}{c}{Median} & \multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\
 \CFA generator                  & 0.6           & 0.6           & 0.0           \\
 \CFA coroutine lazy             & 13.4          & 13.1          & 0.5           \\
 \CFA coroutine eager    & 144.7         & 143.9         & 1.5           \\
 \CFA thread                             & 466.4         & 468.0         & 11.3          \\
 \uC coroutine                   & 155.6         & 155.7         & 1.7           \\
 \uC thread                              & 523.4         & 523.9         & 7.7           \\
 Python generator                & 123.2         & 124.3         & 4.1           \\
 Node.js generator               & 33.4          & 33.5          & 0.3           \\
 Goroutine thread                & 751.0         & 750.5         & 3.1           \\
 Rust tokio thread               & 1860.0        & 1881.1        & 37.6          \\
 Rust thread                             & 53801.0       & 53896.8       & 274.9         \\
 Java thread (   10 000)         & 119256.0      & 119679.2      & 2244.0        \\
 Java thread (1 000 000)         & 123100.0      & 123052.5      & 751.6         \\
 Pthreads thread                 & 31465.5       & 31419.5       & 140.4
+\multicolumn{1}{@{}r}{N\hspace*{10pt}} & \multicolumn{1}{c}{Median} & \multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\
+\CFA generator (1B)                     & 0.6           & 0.6           & 0.0           \\
+\CFA coroutine lazy     (100M)  & 13.4          & 13.1          & 0.5           \\
+\CFA coroutine eager (10M)      & 144.7         & 143.9         & 1.5           \\
+\CFA thread (10M)                       & 466.4         & 468.0         & 11.3          \\
+\uC coroutine (10M)                     & 155.6         & 155.7         & 1.7           \\
+\uC thread (10M)                        & 523.4         & 523.9         & 7.7           \\
+Python generator (10M)          & 123.2         & 124.3         & 4.1           \\
+Node.js generator (10M)         & 33.4          & 33.5          & 0.3           \\
+Goroutine thread (10M)          & 751.0         & 750.5         & 3.1           \\
+Rust tokio thread (10M)         & 1860.0        & 1881.1        & 37.6          \\
+Rust thread     (250K)                  & 53801.0       & 53896.8       & 274.9         \\
+Java thread (250K)                      & 119256.0      & 119679.2      & 2244.0        \\
+% Java thread (1 000 000)               & 123100.0      & 123052.5      & 751.6         \\
+Pthreads thread (250K)          & 31465.5       & 31419.5       & 140.4
 \end{tabular}
 \end{multicols}
 …
 Internal scheduling is measured using a cycle of two threads signalling and waiting.
 Figure~\ref{f:schedint} shows the code for \CFA, with results in Table~\ref{t:schedint}.
+Note, the incremental cost of bulk acquire for \CFA, which is largely a fixed cost for small numbers of mutex objects.
+Java scheduling is significantly greater because the benchmark explicitly creates multiple threads in order to prevent the JIT from making the program sequential, \ie removing all locking.
+Note, the \CFA incremental cost for bulk acquire is a fixed cost for small numbers of mutex objects.
+User-level threading has one kernel thread, eliminating contention between the threads (direct handoff of the kernel thread).
+Kernel-level threading has two kernel threads allowing some contention.
 \begin{multicols}{2}
 \lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}
 \begin{cfa}
+\setlength{\tabcolsep}{3pt}
+\begin{cfa}[xleftmargin=0pt]
 volatile int go = 0;
+@condition c;@
 @monitor@ M {} m1/*, m2, m3, m4*/;
 void call( M & @mutex p1/*, p2, p3, p4*/@ ) {
         @signal( c );@
+}
 void wait( M & @mutex p1/*, p2, p3, p4*/@ ) {
+`condition c;`
+`monitor` M {} m1/*, m2, m3, m4*/;
+void call( M & `mutex p1/*, p2, p3, p4*/` ) {
+        `signal( c );`
+}
+void wait( M & `mutex p1/*, p2, p3, p4*/` ) {
         go = 1; // continue other thread
         for ( N ) { @wait( c );@ } );
+        for ( N ) { `wait( c );` } );
+}
 thread T {};
 …
 \begin{tabular}{@{}r*{3}{D{.}{.}{5.2}}@{}}
 \multicolumn{1}{@{}c}{} & \multicolumn{1}{c}{Median} & \multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\
 \CFA @signal@, 1 monitor        & 364.4         & 364.2         & 4.4           \\
 \CFA @signal@, 2 monitor        & 484.4         & 483.9         & 8.8           \\
 \CFA @signal@, 4 monitor        & 709.1         & 707.7         & 15.0          \\
 \uC @signal@ monitor            & 328.3         & 327.4         & 2.4           \\
 Rust cond. variable                     & 7514.0        & 7437.4        & 397.2         \\
 Java @notify@ monitor (  1 000 000)             & 8717.0        & 8774.1        & 471.8         \\
 Java @notify@ monitor (100 000 000)             & 8634.0        & 8683.5        & 330.5         \\
 Pthreads cond. variable         & 5553.7        & 5576.1        & 345.6
+\multicolumn{1}{@{}r}{N\hspace*{10pt}} & \multicolumn{1}{c}{Median} & \multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\
+\CFA @signal@, 1 monitor (10M)  & 364.4         & 364.2         & 4.4           \\
+\CFA @signal@, 2 monitor (10M)  & 484.4         & 483.9         & 8.8           \\
+\CFA @signal@, 4 monitor (10M)  & 709.1         & 707.7         & 15.0          \\
+\uC @signal@ monitor (10M)              & 328.3         & 327.4         & 2.4           \\
+Rust cond. variable     (1M)            & 7514.0        & 7437.4        & 397.2         \\
+Java @notify@ monitor (1M)              & 8717.0        & 8774.1        & 471.8         \\
+% Java @notify@ monitor (100 000 000)           & 8634.0        & 8683.5        & 330.5         \\
+Pthreads cond. variable (1M)    & 5553.7        & 5576.1        & 345.6
 \end{tabular}
 \end{multicols}
 …
 External scheduling is measured using a cycle of two threads calling and accepting the call using the @waitfor@ statement.
 Figure~\ref{f:schedext} shows the code for \CFA with results in Table~\ref{t:schedext}.
 Note, the incremental cost of bulk acquire for \CFA, which is largely a fixed cost for small numbers of mutex objects.
+Note, the \CFA incremental cost for bulk acquire is a fixed cost for small numbers of mutex objects.
 \begin{multicols}{2}
 \lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}
+\setlength{\tabcolsep}{5pt}
 \vspace*{-16pt}
 \begin{cfa}
 @monitor@ M {} m1/*, m2, m3, m4*/;
 void call( M & @mutex p1/*, p2, p3, p4*/@ ) {}
 void wait( M & @mutex p1/*, p2, p3, p4*/@ ) {
         for ( N ) { @waitfor( call : p1/*, p2, p3, p4*/ );@ }
+\begin{cfa}[xleftmargin=0pt]
+`monitor` M {} m1/*, m2, m3, m4*/;
+void call( M & `mutex p1/*, p2, p3, p4*/` ) {}
+void wait( M & `mutex p1/*, p2, p3, p4*/` ) {
+        for ( N ) { `waitfor( call : p1/*, p2, p3, p4*/ );` }
+}
 thread T {};
 …
 \columnbreak
 \vspace*{-16pt}
+\vspace*{-18pt}
 \captionof{table}{External-scheduling comparison (nanoseconds)}
 \label{t:schedext}
 \begin{tabular}{@{}r*{3}{D{.}{.}{3.2}}@{}}
 \multicolumn{1}{@{}c}{} & \multicolumn{1}{c}{Median} &\multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\
 \CFA @waitfor@, 1 monitor       & 367.1 & 365.3 & 5.0   \\
 \CFA @waitfor@, 2 monitor       & 463.0 & 464.6 & 7.1   \\
 \CFA @waitfor@, 4 monitor       & 689.6 & 696.2 & 21.5  \\
 \uC \lstinline[language=uC++]|_Accept| monitor  & 328.2 & 329.1 & 3.4   \\
 Go \lstinline[language=Golang]|select| channel  & 365.0 & 365.5 & 1.2
+\multicolumn{1}{@{}r}{N\hspace*{10pt}} & \multicolumn{1}{c}{Median} &\multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\
+\CFA @waitfor@, 1 monitor (10M) & 367.1 & 365.3 & 5.0   \\
+\CFA @waitfor@, 2 monitor (10M) & 463.0 & 464.6 & 7.1   \\
+\CFA @waitfor@, 4 monitor (10M) & 689.6 & 696.2 & 21.5  \\
+\uC \lstinline[language=uC++]|_Accept| monitor (10M)    & 328.2 & 329.1 & 3.4   \\
+Go \lstinline[language=Golang]|select| channel (10M)    & 365.0 & 365.5 & 1.2
 \end{tabular}
 \end{multicols}
 …
 \begin{multicols}{2}
 \lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}
 \begin{cfa}
 @monitor@ M {} m1/*, m2, m3, m4*/;
 call( M & @mutex p1/*, p2, p3, p4*/@ ) {}
+\setlength{\tabcolsep}{3pt}
+\begin{cfa}[xleftmargin=0pt]
+`monitor` M {} m1/*, m2, m3, m4*/;
+call( M & `mutex p1/*, p2, p3, p4*/` ) {}
 int main() {
         BENCH( for( N ) call( m1/*, m2, m3, m4*/ ); )
 …
 \label{t:mutex}
 \begin{tabular}{@{}r*{3}{D{.}{.}{3.2}}@{}}
 \multicolumn{1}{@{}c}{} & \multicolumn{1}{c}{Median} &\multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\
 test-and-test-set lock                  & 19.1  & 18.9  & 0.4   \\
 \CFA @mutex@ function, 1 arg.   & 48.3  & 47.8  & 0.9   \\
 \CFA @mutex@ function, 2 arg.   & 86.7  & 87.6  & 1.9   \\
 \CFA @mutex@ function, 4 arg.   & 173.4 & 169.4 & 5.9   \\
 \uC @monitor@ member rtn.               & 54.8  & 54.8  & 0.1   \\
 Goroutine mutex lock                    & 34.0  & 34.0  & 0.0   \\
 Rust mutex lock                                 & 33.0  & 33.2  & 0.8   \\
 Java synchronized method (   100 000 000)               & 31.0  & 30.9  & 0.5   \\
 Java synchronized method (10 000 000 000)               & 31.0 & 30.2 & 0.9 \\
 Pthreads mutex Lock                             & 31.0  & 31.1  & 0.4
+\multicolumn{1}{@{}r}{N\hspace*{10pt}} & \multicolumn{1}{c}{Median} &\multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\
+test-and-test-set lock (50M)            & 19.1  & 18.9  & 0.4   \\
+\CFA @mutex@ function, 1 arg. (50M)     & 48.3  & 47.8  & 0.9   \\
+\CFA @mutex@ function, 2 arg. (50M)     & 86.7  & 87.6  & 1.9   \\
+\CFA @mutex@ function, 4 arg. (50M)     & 173.4 & 169.4 & 5.9   \\
+\uC @monitor@ member rtn. (50M)         & 54.8  & 54.8  & 0.1   \\
+Goroutine mutex lock (50M)                      & 34.0  & 34.0  & 0.0   \\
+Rust mutex lock (50M)                           & 33.0  & 33.2  & 0.8   \\
+Java synchronized method (50M)          & 31.0  & 30.9  & 0.5   \\
+% Java synchronized method (10 000 000 000)             & 31.0 & 30.2 & 0.9 \\
+Pthreads mutex Lock (50M)                       & 31.0  & 31.1  & 0.4
 \end{tabular}
 \end{multicols}
 …
 \begin{multicols}{2}
+\lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}
+\begin{cfa}[aboveskip=0pt,belowskip=0pt]
+@coroutine@ C {};
+void main( C & ) { for () { @suspend;@ } }
+\begin{cfa}[xleftmargin=0pt]
+`coroutine` C {};
+void main( C & ) { for () { `suspend;` } }
 int main() { // coroutine test
         C c;
         BENCH( for ( N ) { @resume( c );@ } )
+        BENCH( for ( N ) { `resume( c );` } )
         sout | result;
+}
 int main() { // thread test
         BENCH( for ( N ) { @yield();@ } )
+        BENCH( for ( N ) { `yield();` } )
         sout | result;
+}
 …
 \label{t:ctx-switch}
 \begin{tabular}{@{}r*{3}{D{.}{.}{3.2}}@{}}
 \multicolumn{1}{@{}c}{} & \multicolumn{1}{c}{Median} &\multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\
 C function                      & 1.8           & 1.8           & 0.0   \\
 \CFA generator          & 1.8           & 2.0           & 0.3   \\
 \CFA coroutine          & 32.5          & 32.9          & 0.8   \\
 \CFA thread                     & 93.8          & 93.6          & 2.2   \\
 \uC coroutine           & 50.3          & 50.3          & 0.2   \\
 \uC thread                      & 97.3          & 97.4          & 1.0   \\
 Python generator        & 40.9          & 41.3          & 1.5   \\
 Node.js await           & 1852.2        & 1854.7        & 16.4  \\
 Node.js generator       & 33.3          & 33.4          & 0.3   \\
 Goroutine thread        & 143.0         & 143.3         & 1.1   \\
 Rust async await        & 32.0          & 32.0          & 0.0   \\
 Rust tokio thread       & 143.0         & 143.0         & 1.7   \\
 Rust thread                     & 332.0         & 331.4         & 2.4   \\
 Java thread     (      100 000)         & 405.0         & 415.0         & 17.6  \\
 Java thread (  100 000 000)                     & 413.0 & 414.2 & 6.2 \\
 Java thread (5 000 000 000)                     & 415.0 & 415.2 & 6.1 \\
 Pthreads thread         & 334.3         & 335.2         & 3.9
+\multicolumn{1}{@{}r}{N\hspace*{10pt}} & \multicolumn{1}{c}{Median} &\multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\
+C function (10B)                        & 1.8           & 1.8           & 0.0   \\
+\CFA generator (5B)                     & 1.8           & 2.0           & 0.3   \\
+\CFA coroutine (100M)           & 32.5          & 32.9          & 0.8   \\
+\CFA thread (100M)                      & 93.8          & 93.6          & 2.2   \\
+\uC coroutine (100M)            & 50.3          & 50.3          & 0.2   \\
+\uC thread (100M)                       & 97.3          & 97.4          & 1.0   \\
+Python generator (100M)         & 40.9          & 41.3          & 1.5   \\
+Node.js await (5M)                      & 1852.2        & 1854.7        & 16.4  \\
+Node.js generator (100M)        & 33.3          & 33.4          & 0.3   \\
+Goroutine thread (100M)         & 143.0         & 143.3         & 1.1   \\
+Rust async await (100M)         & 32.0          & 32.0          & 0.0   \\
+Rust tokio thread (100M)        & 143.0         & 143.0         & 1.7   \\
+Rust thread (25M)                       & 332.0         & 331.4         & 2.4   \\
+Java thread (100M)                      & 405.0         & 415.0         & 17.6  \\
+% Java thread (  100 000 000)                   & 413.0 & 414.2 & 6.2 \\
+% Java thread (5 000 000 000)                   & 415.0 & 415.2 & 6.1 \\
+Pthreads thread (25M)           & 334.3         & 335.2         & 3.9
 \end{tabular}
 \end{multicols}
 …
 Languages using 1:1 threading based on pthreads can at best meet or exceed, due to language overhead, the pthread results.
 Note, pthreads has a fast zero-contention mutex lock checked in user space.
+Languages with M:N threading have better performance than 1:1 because there is no operating-system interactions.
+Languages with M:N threading have better performance than 1:1 because there is no operating-system interactions (context-switching or locking).
+As well, for locking experiments, M:N threading has less contention if only one kernel thread is used.
 Languages with stackful coroutines have higher cost than stackless coroutines because of stack allocation and context switching;
 however, stackful \uC and \CFA coroutines have approximately the same performance as stackless Python and Node.js generators.
 The \CFA stackless generator is approximately 25 times faster for suspend/resume and 200 times faster for creation than stackless Python and Node.js generators.
+The Node.js context-switch is costly when asynchronous await must enter the event engine because a promise is not fulfilled.
+Finally, the benchmark results correlate across programming languages with and without JIT, indicating the JIT has completed any runtime optimizations.
 …
 The authors recognize the design assistance of Aaron Moss, Rob Schluntz, Andrew Beach, and Michael Brooks; David Dice for commenting and helping with the Java benchmarks; and Gregor Richards for helping with the Node.js benchmarks.
 This research is funded by a grant from Waterloo-Huawei (\url{http://www.huawei.com}) Joint Innovation Lab. %, and Peter Buhr is partially funded by the Natural Sciences and Engineering Research Council of Canada.
+This research is funded by the NSERC/Waterloo-Huawei (\url{http://www.huawei.com}) Joint Innovation Lab. %, and Peter Buhr is partially funded by the Natural Sciences and Engineering Research Council of Canada.
 {%

doc/papers/concurrency/annex/local.bib

r2fb35df	r41b8ea4
59	59	@manual{Cpp-Transactions,
60	60	keywords = {C++, Transactional Memory},
61		title = {Tech~~nical Specification~~ for C++ Extensions for Transactional Memory},
	61	title = {Tech. Spec. for C++ Extensions for Transactional Memory},
62	62	organization= {International Standard ISO/IEC TS 19841:2015 },
63	63	publisher = {American National Standards Institute},

doc/papers/concurrency/mail2

-                      r2fb35df
+                      r41b8ea4
 Software: Practice and Experience Editorial Office
+Date: Wed, 2 Sep 2020 20:55:34 +0000
+From: Richard Jones <onbehalfof@manuscriptcentral.com>
+Reply-To: R.E.Jones@kent.ac.uk
+To: tdelisle@uwaterloo.ca, pabuhr@uwaterloo.ca
+Subject: Software: Practice and Experience - Decision on Manuscript ID
+ SPE-19-0219.R2
+-Sep-2020
+Dear Dr Buhr,
+Many thanks for submitting SPE-19-0219.R2 entitled "Advanced Control-flow and Concurrency in Cforall" to Software: Practice and Experience. The paper has now been reviewed and the comments of the referees are included at the bottom of this letter. I apologise for the length of time it has taken to get these.
+Both reviewers consider this paper to be close to acceptance. However, before I can accept this paper, I would like you address the comments of Reviewer 2, particularly with regard to the description of the adaptation Java harness to deal with warmup. I would expect to see a convincing argument that the computation has reached a steady state. I would also like you to provide the values for N for each benchmark run. This should be very straightforward for you to do. There are a couple of papers on steady state that you may wish to consult (though I am certainly not pushing my own work).
+) Barrett, Edd; Bolz-Tereick, Carl Friedrich; Killick, Rebecca; Mount, Sarah and Tratt, Laurence. Virtual Machine Warmup Blows Hot and Cold. OOPSLA 2017. https://doi.org/10.1145/3133876
+Virtual Machines (VMs) with Just-In-Time (JIT) compilers are traditionally thought to execute programs in two phases: the initial warmup phase determines which parts of a program would most benefit from dynamic compilation, before JIT compiling those parts into machine code; subsequently the program is said to be at a steady state of peak performance. Measurement methodologies almost always discard data collected during the warmup phase such that reported measurements focus entirely on peak performance. We introduce a fully automated statistical approach, based on changepoint analysis, which allows us to determine if a program has reached a steady state and, if so, whether that represents peak performance or not. Using this, we show that even when run in the most controlled of circumstances, small, deterministic, widely studied microbenchmarks often fail to reach a steady state of peak performance on a variety of common VMs. Repeating our experiment on 3 different machines, we found that at most 43.5% of pairs consistently reach a steady state of peak performance.
+) Kalibera, Tomas and Jones, Richard. Rigorous Benchmarking in Reasonable Time. ISMM  2013. https://doi.org/10.1145/2555670.2464160
+Experimental evaluation is key to systems research. Because modern systems are complex and non-deterministic, good experimental methodology demands that researchers account for uncertainty. To obtain valid results, they are expected to run many iterations of benchmarks, invoke virtual machines (VMs) several times, or even rebuild VM or benchmark binaries more than once. All this repetition costs time to complete experiments. Currently, many evaluations give up on sufficient repetition or rigorous statistical methods, or even run benchmarks only in training sizes. The results reported often lack proper variation estimates and, when a small difference between two systems is reported, some are simply unreliable.In contrast, we provide a statistically rigorous methodology for repetition and summarising results that makes efficient use of experimentation time. Time efficiency comes from two key observations. First, a given benchmark on a given platform is typically prone to much less non-determinism than the common worst-case of published corner-case studies. Second, repetition is most needed where most uncertainty arises (whether between builds, between executions or between iterations). We capture experimentation cost with a novel mathematical model, which we use to identify the number of repetitions at each level of an experiment necessary and sufficient to obtain a given level of precision.We present our methodology as a cookbook that guides researchers on the number of repetitions they should run to obtain reliable results. We also show how to present results with an effect size confidence interval. As an example, we show how to use our methodology to conduct throughput experiments with the DaCapo and SPEC CPU benchmarks on three recent platforms.
+You have 42 days from the date of this email to submit your revision. If you are unable to complete the revision within this time, please contact me to request a short extension.
+You can upload your revised manuscript and submit it through your Author Center. Log into https://mc.manuscriptcentral.com/spe and enter your Author Center, where you will find your manuscript title listed under "Manuscripts with Decisions".
+When submitting your revised manuscript, you will be able to respond to the comments made by the referee(s) in the space provided.  You can use this space to document any changes you make to the original manuscript.
+If you would like help with English language editing, or other article preparation support, Wiley Editing Services offers expert help with English Language Editing, as well as translation, manuscript formatting, and figure formatting at www.wileyauthors.com/eeo/preparation. You can also check out our resources for Preparing Your Article for general guidance about writing and preparing your manuscript at www.wileyauthors.com/eeo/prepresources.
+Once again, thank you for submitting your manuscript to Software: Practice and Experience. I look forward to receiving your revision.
+Sincerely,
+Richard
+Prof. Richard Jones
+Editor, Software: Practice and Experience
+R.E.Jones@kent.ac.uk
+Referee(s)' Comments to Author:
+Reviewing: 1
+Comments to the Author
+Overall, I felt that this draft was an improvement on previous drafts and I don't have further changes to request.
+I appreciated the new language to clarify the relationship of external and internal scheduling, for example, as well as the new measurements of Rust tokio. Also, while I still believe that the choice between thread/generator/coroutine and so forth could be made crisper and clearer, the current draft of Section 2 did seem adequate to me in terms of specifying the considerations that users would have to take into account to make the choice.
+Reviewing: 2
+Comments to the Author
+First: let me apologise for the delay on this review. I'll blame the global pandemic combined with my institution's senior management's counterproductive decisions for taking up most of my time and all of my energy.
+At this point, reading the responses, I think we've been around the course enough times that further iteration is unlikely to really improve the paper any further, so I'm happy to recommend acceptance.    My main comments are that there were some good points in the responses to *all* the reviews and I strongly encourage the authors to incorporate those discursive responses into the final paper so they may benefit readers as well as reviewers.   I agree with the recommendations of reviewer #2 that the paper could usefully be split in to two, which I think I made to a previous revision, but I'm happy to leave that decision to the Editor.
+Finally, the paper needs to describe how the Java harness was adapted to deal with warmup; why the computation has warmed up and reached a steady state - similarly for js and Python. The tables should also give the "N" chosen for each benchmark run.
+minor points
+* don't start sentences with "However"
+* most downloaded isn't an "Award"
+Date: Thu, 1 Oct 2020 05:34:29 +0000
+From: Richard Jones <onbehalfof@manuscriptcentral.com>
+Reply-To: R.E.Jones@kent.ac.uk
+To: pabuhr@uwaterloo.ca
+Subject: Revision reminder - SPE-19-0219.R2
+-Oct-2020
+Dear Dr Buhr
+SPE-19-0219.R2
+This is a reminder that your opportunity to revise and re-submit your manuscript will expire 14 days from now. If you require more time please contact me directly and I may grant an extension to this deadline, otherwise the option to submit a revision online, will not be available.
+If your article is of potential interest to the general public, (which means it must be timely, groundbreaking, interesting and impact on everyday society) then please e-mail ejp@wiley.co.uk explaining the public interest side of the research. Wiley will then investigate the potential for undertaking a global press campaign on the article.
+I look forward to receiving your revision.
+Sincerely,
+Prof. Richard Jones
+Editor, Software: Practice and Experience
+https://mc.manuscriptcentral.com/spe
+Date: Tue, 6 Oct 2020 15:29:41 +0000
+From: Mayank Roy Chowdhury <onbehalfof@manuscriptcentral.com>
+Reply-To: speoffice@wiley.com
+To: tdelisle@uwaterloo.ca, pabuhr@uwaterloo.ca
+Subject: SPE-19-0219.R3 successfully submitted
+-Oct-2020
+Dear Dr Buhr,
+Your manuscript entitled "Advanced Control-flow and Concurrency in Cforall" has been successfully submitted online and is presently being given full consideration for publication in Software: Practice and Experience.
+Your manuscript number is SPE-19-0219.R3.  Please mention this number in all future correspondence regarding this submission.
+You can view the status of your manuscript at any time by checking your Author Center after logging into https://mc.manuscriptcentral.com/spe.  If you have difficulty using this site, please click the 'Get Help Now' link at the top right corner of the site.
+Thank you for submitting your manuscript to Software: Practice and Experience.
+Sincerely,
+Software: Practice and Experience Editorial Office

doc/refrat/refrat.tex

-                      r2fb35df
+                      r41b8ea4
 %% Created On       : Wed Apr  6 14:52:25 2016
 %% Last Modified By : Peter A. Buhr
 %% Last Modified On : Thu Sep 24 16:34:51 2020
 %% Update Count     : 109
+%% Last Modified On : Mon Oct  5 09:02:53 2020
+%% Update Count     : 110
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 …
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \CFADefaults                                                                                    % use default CFA format-style
+\CFAStyle                                                                                               % use default CFA format-style
 \lstnewenvironment{C++}[1][]                            % use C++ style
 {\lstset{language=C++,moredelim=**[is][\protect\color{red}]{®}{®},#1}}

doc/theses/fangren_yu_COOP_S20/Makefile

r2fb35df	r41b8ea4
46	46	# File Dependencies #
47	47
48
49	48	${DOCUMENT} : ${BASE}.ps
50	49	ps2pdf $<

doc/theses/fangren_yu_COOP_S20/Report.tex

-                      r2fb35df
+                      r41b8ea4
 \documentclass[twoside,12pt]{article}
+\documentclass[twoside,11pt]{article}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 …
 \usepackage[labelformat=simple,aboveskip=0pt,farskip=0pt]{subfig}
 \renewcommand{\thesubfigure}{\alph{subfigure})}
+\usepackage[flushmargin]{footmisc}                                              % support label/reference in footnote
 \usepackage{latexsym}                                   % \Box glyph
 \usepackage{mathptmx}                                   % better math font with "times"
 \usepackage{appendix}
+\usepackage[toc]{appendix}                                                              % article does not have appendix
 \usepackage[usenames]{color}
 \input{common}                                          % common CFA document macros
 …
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \CFADefaults
+\CFAStyle                                                                                               % CFA code-style for all languages
 \lstset{
 language=C++,                                                                                   % make C++ the default language
+language=C++,moredelim=**[is][\color{red}]{@}{@}                % make C++ the default language
 }% lstset
 \lstnewenvironment{C++}[1][]                            % use C++ style
 {\lstset{language=C++,moredelim=**[is][\color{red}]{@}{@},#1}}{}
+{\lstset{language=C++,moredelim=**[is][\color{red}]{@}{@}}\lstset{#1}}{}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 …
 \section{Overview}
 cfa-cc is the reference compiler for the \CFA programming language, which is a non-object-oriented extension to C.
+@cfa-cc@ is the reference compiler for the \CFA programming language, which is a non-object-oriented extension to C.
 \CFA attempts to introduce productive modern programming language features to C while maintaining as much backward-compatibility as possible, so that most existing C programs can seamlessly work with \CFA.
 …
 \subsubsection{Source: \lstinline{AST/SymbolTable.hpp}}
-\subsubsection{Source: \lstinline{SymTab/Indexer.h}}
 Function
 \begin{C++}
 …
 \begin{appendices}[toc,titletoc]
+\appendix
 \section{Appendix}
 \subsection{Kinds of Type Parameters}
 \label{s:KindsTypeParameters}
 A type parameter in a @forall@ clause has three possible kinds:
+A type parameter in a @forall@ clause has 3 kinds:
 \begin{enumerate}[listparindent=0pt]
 \item
+@dtype@: any data type (built-in or user defined).
+There is also a difference between opaque types (incomplete types, \ie those with only a forward declaration) and concrete types.
+Only concrete types can be directly used as a variable type.
+\CFA provides the @otype@ shorthand to require a type parameter be concrete, which also implicitly asserts the existence of its default and copy constructors, assignment, and destructor\footnote{\CFA implements the same automatic resource management (RAII) semantics as \CC.}.
+\item
+@ftype@: any function type.
+@ftype@ provides two purposes:
+\begin{itemize}
+\item
+Differentiate function pointer from data pointer because (in theory) some systems have different sizes for these pointers.
+\item
+Disallow a function pointer to match an overloaded data pointer, since variables and functions can have the same names.
+\end{itemize}
+@dtype@: any data type (built-in or user defined) that is not a concrete type.
+A non-concrete type is an incomplete type such as an opaque type or pointer/reference with an implicit (pointer) size and implicitly generated reference and dereference operations.
+\item
+@otype@: any data type (built-in or user defined) that is concrete type.
+A concrete type is a complete type, \ie types that can be used to create a variable, which also implicitly asserts the existence of default and copy constructors, assignment, and destructor\footnote{\CFA implements the same automatic resource management (RAII) semantics as \CC.}.
+% \item
+% @ftype@: any function type.
+%
+% @ftype@ provides two purposes:
+% \begin{itemize}
+% \item
+% Differentiate function pointer from data pointer because (in theory) some systems have different sizes for these pointers.
+% \item
+% Disallow a function pointer to match an overloaded data pointer, since variables and functions can have the same names.
+% \end{itemize}
 \item
 @ttype@: tuple (variadic) type.
 @ttype@ parameter may only appear as type of the last parameter in a function, and it provides a type-safe way to implement variadic functions.
+Restricted to the type for the last parameter in a function, it provides a type-safe way to implement variadic functions.
 Note however, that it has certain restrictions, as described in the implementation section below.
 \end{enumerate}
 …
 \begin{enumerate}
 \item
 All types are function declarations are candidates of implicit parameters.
+All types, variables, and functions are candidates of implicit parameters
 \item
 The parameter (assertion) name must match the actual declarations.
-\item
-Currently, assertions are all functions.
-Note that since \CFA has variable overloading, implicit value parameters might also be supported in the future.
 \end{enumerate}
 …
 In particular, polymorphic variadic recursion must be structural (\ie the number of arguments decreases in any possible recursive calls), otherwise code generation gets into an infinite loop.
 The \CFA compiler sets a limit on assertion depth and reports an error if assertion resolution does not terminate within the limit (as for \lstinline[language=C++]@templates@ in \CC).
-\end{appendices}
 \bibliographystyle{plain}

doc/theses/thierry_delisle_PhD/code/readQ_example/proto-gui/main.cpp

-                      r2fb35df
+                      r41b8ea4
 #include "thrdlib/thread.h"
+#include "thrdlib/thread.hpp"
 #include <cassert>
 …
 #include <algorithm>
 #include <atomic>
+#include <iostream>
 #include <memory>
 #include <vector>
 #include <getopt.h>
+using thrdlib::thread_t;
+extern __attribute__((aligned(128))) thread_local struct {
+        void * volatile this_thread;
+        void * volatile this_processor;
+        void * volatile this_stats;
+        struct {
+                volatile unsigned short disable_count;
+                volatile bool enabled;
+                volatile bool in_progress;
+        } preemption_state;
+        #if defined(__SIZEOF_INT128__)
+                __uint128_t rand_seed;
+        #else
+                uint64_t rand_seed;
+        #endif
+        struct {
+                uint64_t fwd_seed;
+                uint64_t bck_seed;
+        } ready_rng;
+} kernelTLS __attribute__ ((tls_model ( "initial-exec" )));
 //--------------------
 …
                         assert( expected == reset );
                         if( std::atomic_compare_exchange_strong( &state, &expected, self) ) {
                                 thrdlib_park( self );
+                                thrdlib::park( self );
                                 ret = true;
                                 goto END;
 …
                 if( got == reset ) return false;
                 thrdlib_unpark( got );
+                thrdlib::unpark( got );
                 return true;
+        }
 …
         the_stats_thread = self;
         fence();
         thrdlib_park( self );
+        thrdlib::park( self );
         std::vector<bool> seen;
 …
         while(last_produced < nproduce) {
                 thrdlib_yield();
+                thrdlib::yield();
                 thrd_stats.stats.ran++;
                 if( last_produced > 0 ) seen.at(last_produced - 1) = true;
 …
 void Renderer( thread_t self ) {
         thrdlib_unpark( the_stats_thread );
+        thrdlib::unpark( the_stats_thread );
         for(unsigned i = 0; i < nproduce; i++) {
                 auto & frame = frames[i % nframes];
 …
         fsize    = 1000;
         nproduce = 60;
+        const char * framework;
         for(;;) {
 …
                         case -1:
                                 /* paranoid */ assert(optind <= argc);
+                                if( optind == argc ) {
+                                        std::cerr << "Must specify a framework" << std::endl;
+                                        goto usage;
+                                }
+                                framework = argv[optind];
                                 goto run;
                         case 'b':
 …
                                 std::cerr << opt << std::endl;
                         usage:
                                 std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
+                                std::cerr << "Usage: " << argv[0] << " [options] framework" << std::endl;
                                 std::cerr << std::endl;
                                 std::cerr << "  -b, --buff=COUNT    Number of frames to buffer" << std::endl;
 …
+        }
         run:
+        assert( framework );
         frames.reset(new Frame[nframes]);
 …
         std::cout << "(Buffering " << nframes << ")" << std::endl;
         thrdlib_setproccnt( 2 );
         thread_t stats     = thrdlib_create( Stats     );
+        thrdlib::init( framework, 2 );
+        thread_t stats     = thrdlib::create( Stats );
         std::cout << "Created Stats Thread" << std::endl;
+        while( the_stats_thread == nullptr ) thrdlib_yield();
+        while( the_stats_thread == nullptr ) thrdlib::yield();
         std::cout << "Creating Main Threads" << std::endl;
+        thread_t renderer  = thrdlib_create( Renderer  );
+        // while(true);
+        thread_t simulator = thrdlib_create( Simulator );
+        thread_t renderer  = thrdlib::create( Renderer  );
+        thread_t simulator = thrdlib::create( Simulator );
         std::cout << "Running" << std::endl;
+        thrdlib_join( simulator );
+        thrdlib_join( renderer  );
+        thrdlib_join( stats     );
+        thrdlib::join( simulator );
+        thrdlib::join( renderer  );
+        thrdlib::join( stats     );
+        thrdlib::clean();
         std::cout << "----------" << std::endl;

doc/user/Makefile

r2fb35df	r41b8ea4
55	55
56	56	${DOCUMENT} : ${BASE}.ps
57		ps2pdf $<
	57	ps2pdf -dPDFSETTINGS=/prepress $<
58	58
59	59	${BASE}.ps : ${BASE}.dvi

doc/user/user.tex

-                      r2fb35df
+                      r41b8ea4
 %% Created On       : Wed Apr  6 14:53:29 2016
 %% Last Modified By : Peter A. Buhr
 %% Last Modified On : Thu Sep 24 16:34:52 2020
 %% Update Count     : 3997
+%% Last Modified On : Mon Oct  5 08:57:29 2020
+%% Update Count     : 3998
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 …
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \CFADefaults                                                                                    % use default CFA format-style
+\CFAStyle                                                                                               % use default CFA format-style
 \lstnewenvironment{C++}[1][]                            % use C++ style
 {\lstset{language=C++,moredelim=**[is][\protect\color{red}]{®}{®},#1}}

libcfa/src/bits/containers.hfa

-                      r2fb35df
+                      r41b8ea4
                         tail = &get_next( *val );
                         *tail = 1p;
+                }
+                T * peek( __queue(T) & this ) {
+                        verify(*this.tail == 1p);
+                        T * head = this.head;
+                        if( head != 1p ) {
+                                verify(*this.tail == 1p);
+                                return head;
+                        }
+                        verify(*this.tail == 1p);
+                        return 0p;
+                }

libcfa/src/bits/locks.hfa

-                      r2fb35df
+                      r41b8ea4
         struct $thread;
         extern void park( __cfaabi_dbg_ctx_param );
         extern void unpark( struct $thread * this __cfaabi_dbg_ctx_param2 );
+        extern void park( void );
+        extern void unpark( struct $thread * this );
         static inline struct $thread * active_thread ();
 …
                                         /* paranoid */ verify( expected == 0p );
                                         if(__atomic_compare_exchange_n(&this.ptr, &expected, active_thread(), false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
                                                 park( __cfaabi_dbg_ctx );
+                                                park();
                                                 return true;
+                                        }
 …
                                 else {
                                         if(__atomic_compare_exchange_n(&this.ptr, &expected, 0p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
                                                 unpark( expected __cfaabi_dbg_ctx2 );
+                                                unpark( expected );
                                                 return true;
+                                        }
 …
                                 /* paranoid */ verify( expected == 0p );
                                 if(__atomic_compare_exchange_n(&this.ptr, &expected, active_thread(), false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
                                         park( __cfaabi_dbg_ctx );
+                                        park();
                                         /* paranoid */ verify( this.ptr == 1p );
                                         return true;
 …
                         struct $thread * got = __atomic_exchange_n( &this.ptr, 1p, __ATOMIC_SEQ_CST);
                         if( got == 0p ) return false;
                         unpark( got __cfaabi_dbg_ctx2 );
+                        unpark( got );
                         return true;
+                }

libcfa/src/concurrency/CtxSwitch-i386.S

-                      r2fb35df
+                      r41b8ea4
 // Created On       : Tue Dec 6 12:27:26 2016
 // Last Modified By : Peter A. Buhr
 // Last Modified On : Sun Aug 16 08:46:22 2020
 // Update Count     : 4
+// Last Modified On : Sun Sep  6 18:23:37 2020
+// Update Count     : 5
 //
 …
         // Copy the "from" context argument from the stack to register eax
         // Return address is at 0(%esp), with parameters following
+        // Return address is at 0(%esp), with parameters following.
         movl 4(%esp),%eax
 …
         movl %ebp,FP_OFFSET(%eax)
         // Copy the "to" context argument from the stack to register eax
         // Having pushed three words (= 12 bytes) on the stack, the
         // argument is now at 8 + 12 = 20(%esp)
+        // Copy the "to" context argument from the stack to register eax. Having
+        // pushed 3 words (= 12 bytes) on the stack, the argument is now at
+        // 8 + 12 = 20(%esp).
         movl 20(%esp),%eax

libcfa/src/concurrency/alarm.cfa

r2fb35df	r41b8ea4
130	130
131	131	register_self( &node );
132		park( ~~__cfaabi_dbg_ctx~~ );
	132	park();
133	133
134	134	/* paranoid */ verify( !node.set );

libcfa/src/concurrency/clib/cfathread.cfa

-                      r2fb35df
+                      r41b8ea4
 extern "C" {
         //--------------------
         // Basic thread managenemt
+        // Basic thread management
         CRunner * cfathread_create( void (*main)( CRunner * ) ) {
                 return new( main );
 …
         void cfathread_park( void ) {
                 park( __cfaabi_dbg_ctx );
+                park();
+        }
         void cfathread_unpark( CRunner * thrd ) {
                 unpark( *thrd __cfaabi_dbg_ctx2 );
+                unpark( *thrd );
+        }

libcfa/src/concurrency/clib/cfathread.h

-                      r2fb35df
+                      r41b8ea4
 #include "invoke.h"
 #if defined(__cforall) || defined(__cpluplus)
+#if defined(__cforall) || defined(__cplusplus)
 extern "C" {
 #endif
 …
 #if defined(__cforall) || defined(__cpluplus)
+#if defined(__cforall) || defined(__cplusplus)
+}
 #endif

libcfa/src/concurrency/invoke.h

-                      r2fb35df
+                      r41b8ea4
         };
+        // Wrapper for gdb
+        struct cfathread_coroutine_t { struct $coroutine debug; };
         static inline struct __stack_t * __get_stack( struct $coroutine * cor ) {
 …
                 struct __condition_node_t * dtor_node;
         };
+        // Wrapper for gdb
+        struct cfathread_monitor_t { struct $monitor debug; };
         struct __monitor_group_t {
 …
                 } node;
+                #ifdef __CFA_DEBUG__
+                        // previous function to park/unpark the thread
+                        const char * park_caller;
+                        int park_result;
+                        enum __Coroutine_State park_state;
+                        bool park_stale;
+                        const char * unpark_caller;
+                        int unpark_result;
+                        enum __Coroutine_State unpark_state;
+                        bool unpark_stale;
+                #if defined( __CFA_WITH_VERIFY__ )
+                        unsigned long long canary;
                 #endif
         };
+        // Wrapper for gdb
+        struct cfathread_thread_t { struct $thread debug; };
         #ifdef __CFA_DEBUG__

libcfa/src/concurrency/io.cfa

-                      r2fb35df
+                      r41b8ea4
                 if( block ) {
                         enable_interrupts( __cfaabi_dbg_ctx );
                         park( __cfaabi_dbg_ctx );
+                        park();
                         disable_interrupts();
+                }
 …
                 if(nextt) {
                         unpark( nextt __cfaabi_dbg_ctx2 );
+                        unpark( nextt );
                         enable_interrupts( __cfaabi_dbg_ctx );
                         return true;

libcfa/src/concurrency/io/setup.cfa

-                      r2fb35df
+                      r41b8ea4
                                         thrd.link.next = 0p;
                                         thrd.link.prev = 0p;
-                                        __cfaabi_dbg_debug_do( thrd.unpark_stale = true );
                                         // Fixup the thread state
 …
                                 // unpark the fast io_poller
                                 unpark( &thrd __cfaabi_dbg_ctx2 );
+                                unpark( &thrd );
+                        }
                         else {
 …
+                        }
                 } else {
                         unpark( &thrd __cfaabi_dbg_ctx2 );
+                        unpark( &thrd );
+                }

libcfa/src/concurrency/kernel.cfa

-                      r2fb35df
+                      r41b8ea4
                 thrd_dst->state = Active;
-                __cfaabi_dbg_debug_do(
-                        thrd_dst->park_stale   = true;
-                        thrd_dst->unpark_stale = true;
+                )
                 // Update global state
                 kernelTLS.this_thread = thrd_dst;
 …
                 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
                 /* paranoid */ verify( kernelTLS.this_thread == thrd_dst );
+                /* paranoid */ verify( thrd_dst->context.SP );
                 /* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) < ((uintptr_t)__get_stack(thrd_dst->curr_cor)->base ) || thrd_dst->curr_cor == proc_cor, "ERROR : Destination $thread %p has been corrupted.\n StackPointer too small.\n", thrd_dst ); // add escape condition if we are setting up the processor
                 /* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) > ((uintptr_t)__get_stack(thrd_dst->curr_cor)->limit) || thrd_dst->curr_cor == proc_cor, "ERROR : Destination $thread %p has been corrupted.\n StackPointer too large.\n", thrd_dst ); // add escape condition if we are setting up the processor
+                /* paranoid */ verify( 0x0D15EA5E0D15EA5E == thrd_dst->canary );
                 // set context switch to the thread that the processor is executing
-                verify( thrd_dst->context.SP );
                 __cfactx_switch( &proc_cor->context, &thrd_dst->context );
                 // when __cfactx_switch returns we are back in the processor coroutine
+                /* paranoid */ verify( 0x0D15EA5E0D15EA5E == thrd_dst->canary );
                 /* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) > ((uintptr_t)__get_stack(thrd_dst->curr_cor)->limit), "ERROR : Destination $thread %p has been corrupted.\n StackPointer too large.\n", thrd_dst );
                 /* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) < ((uintptr_t)__get_stack(thrd_dst->curr_cor)->base ), "ERROR : Destination $thread %p has been corrupted.\n StackPointer too small.\n", thrd_dst );
+                /* paranoid */ verify( thrd_dst->context.SP );
                 /* paranoid */ verify( kernelTLS.this_thread == thrd_dst );
                 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
 …
                         // The thread has halted, it should never be scheduled/run again
                         // We may need to wake someone up here since
                         unpark( this->destroyer __cfaabi_dbg_ctx2 );
+                        unpark( this->destroyer );
                         this->destroyer = 0p;
                         break RUNNING;
 …
                 // set state of processor coroutine to active and the thread to inactive
                 int old_ticket = __atomic_fetch_sub(&thrd_dst->ticket, 1, __ATOMIC_SEQ_CST);
-                __cfaabi_dbg_debug_do( thrd_dst->park_result = old_ticket; )
                 switch(old_ticket) {
                         case 1:
 …
                         __x87_store;
                 #endif
+                verify( proc_cor->context.SP );
+                /* paranoid */ verify( proc_cor->context.SP );
+                /* paranoid */ verify( 0x0D15EA5E0D15EA5E == thrd_src->canary );
                 __cfactx_switch( &thrd_src->context, &proc_cor->context );
+                /* paranoid */ verify( 0x0D15EA5E0D15EA5E == thrd_src->canary );
                 #if defined( __i386 ) || defined( __x86_64 )
                         __x87_load;
 …
         /* paranoid */ #endif
         /* paranoid */ verifyf( thrd->link.next == 0p, "Expected null got %p", thrd->link.next );
+        /* paranoid */ verify( 0x0D15EA5E0D15EA5E == thrd->canary );
         if (thrd->preempted == __NO_PREEMPTION) thrd->state = Ready;
 …
 // KERNEL ONLY unpark with out disabling interrupts
+void __unpark(  struct __processor_id_t * id, $thread * thrd __cfaabi_dbg_ctx_param2 ) {
+        // record activity
+        __cfaabi_dbg_record_thrd( *thrd, false, caller );
+void __unpark(  struct __processor_id_t * id, $thread * thrd ) {
         int old_ticket = __atomic_fetch_add(&thrd->ticket, 1, __ATOMIC_SEQ_CST);
-        __cfaabi_dbg_debug_do( thrd->unpark_result = old_ticket; thrd->unpark_state = thrd->state; )
         switch(old_ticket) {
                 case 1:
 …
+}
 void unpark( $thread * thrd __cfaabi_dbg_ctx_param2 ) {
+void unpark( $thread * thrd ) {
         if( !thrd ) return;
         disable_interrupts();
         __unpark( (__processor_id_t*)kernelTLS.this_processor, thrd __cfaabi_dbg_ctx_fwd2 );
+        __unpark( (__processor_id_t*)kernelTLS.this_processor, thrd );
         enable_interrupts( __cfaabi_dbg_ctx );
+}
 void park( __cfaabi_dbg_ctx_param ) {
+void park( void ) {
         /* paranoid */ verify( kernelTLS.preemption_state.enabled );
         disable_interrupts();
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
         /* paranoid */ verify( kernelTLS.this_thread->preempted == __NO_PREEMPTION );
-        // record activity
-        __cfaabi_dbg_record_thrd( *kernelTLS.this_thread, true, caller );
         returnToKernel();
 …
                 // atomically release spin lock and block
                 unlock( lock );
                 park( __cfaabi_dbg_ctx );
+                park();
                 return true;
+        }
 …
         // make new owner
         unpark( thrd __cfaabi_dbg_ctx2 );
+        unpark( thrd );
         return thrd != 0p;
 …
         count += diff;
         for(release) {
                 unpark( pop_head( waiting ) __cfaabi_dbg_ctx2 );
+                unpark( pop_head( waiting ) );
+        }
 …
                         this.prev_thrd = kernelTLS.this_thread;
+                }
-                void __cfaabi_dbg_record_thrd($thread & this, bool park, const char prev_name[]) {
-                        if(park) {
-                                this.park_caller   = prev_name;
-                                this.park_stale    = false;
+                        }
-                        else {
-                                this.unpark_caller = prev_name;
-                                this.unpark_stale  = false;
+                        }
+                }
+        }
+)

libcfa/src/concurrency/kernel/fwd.hfa

-                      r2fb35df
+                      r41b8ea4
         extern "Cforall" {
                 extern void park( __cfaabi_dbg_ctx_param );
                 extern void unpark( struct $thread * this __cfaabi_dbg_ctx_param2 );
+                extern void park( void );
+                extern void unpark( struct $thread * this );
                 static inline struct $thread * active_thread () { return TL_GET( this_thread ); }

libcfa/src/concurrency/kernel/startup.cfa

-                      r2fb35df
+                      r41b8ea4
         link.next = 0p;
         link.prev = 0p;
+        #if defined( __CFA_WITH_VERIFY__ )
+                canary = 0x0D15EA5E0D15EA5E;
+        #endif
         node.next = 0p;

libcfa/src/concurrency/kernel_private.hfa

-                      r2fb35df
+                      r41b8ea4
 // KERNEL ONLY unpark with out disabling interrupts
 void __unpark( struct __processor_id_t *, $thread * thrd __cfaabi_dbg_ctx_param2 );
+void __unpark( struct __processor_id_t *, $thread * thrd );
 static inline bool __post(single_sem & this, struct __processor_id_t * id) {
 …
                 else {
                         if(__atomic_compare_exchange_n(&this.ptr, &expected, 0p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
                                 __unpark( id, expected __cfaabi_dbg_ctx2 );
+                                __unpark( id, expected );
                                 return true;
+                        }

libcfa/src/concurrency/monitor.cfa

-                      r2fb35df
+                      r41b8ea4
                 unlock( this->lock );
                 park( __cfaabi_dbg_ctx );
+                park();
                 __cfaabi_dbg_print_safe( "Kernel : %10p Entered  mon %p\n", thrd, this);
 …
                 // Release the next thread
                 /* paranoid */ verifyf( urgent->owner->waiting_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
                 unpark( urgent->owner->waiting_thread __cfaabi_dbg_ctx2 );
+                unpark( urgent->owner->waiting_thread );
                 // Park current thread waiting
                 park( __cfaabi_dbg_ctx );
+                park();
                 // Some one was waiting for us, enter
 …
                 // Park current thread waiting
                 park( __cfaabi_dbg_ctx );
+                park();
                 /* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
 …
         //We need to wake-up the thread
         /* paranoid */ verifyf( !new_owner || new_owner == this->owner, "Expected owner to be %p, got %p (m: %p)", new_owner, this->owner, this );
         unpark( new_owner __cfaabi_dbg_ctx2 );
+        unpark( new_owner );
+}
 …
         // Wake the threads
         for(int i = 0; i < thread_count; i++) {
                 unpark( threads[i] __cfaabi_dbg_ctx2 );
+                unpark( threads[i] );
+        }
         // Everything is ready to go to sleep
         park( __cfaabi_dbg_ctx );
+        park();
         // We are back, restore the owners and recursions
 …
         // unpark the thread we signalled
         unpark( signallee __cfaabi_dbg_ctx2 );
+        unpark( signallee );
         //Everything is ready to go to sleep
         park( __cfaabi_dbg_ctx );
+        park();
 …
                                 // unpark the thread we signalled
                                 unpark( next __cfaabi_dbg_ctx2 );
+                                unpark( next );
                                 //Everything is ready to go to sleep
                                 park( __cfaabi_dbg_ctx );
+                                park();
                                 // We are back, restore the owners and recursions
 …
         //Everything is ready to go to sleep
         park( __cfaabi_dbg_ctx );
+        park();

libcfa/src/concurrency/mutex.cfa

-                      r2fb35df
+                      r41b8ea4
                 append( blocked_threads, kernelTLS.this_thread );
                 unlock( lock );
                 park( __cfaabi_dbg_ctx );
+                park();
+        }
         else {
 …
         this.is_locked = (this.blocked_threads != 0);
         unpark(
                 pop_head( this.blocked_threads ) __cfaabi_dbg_ctx2
+                pop_head( this.blocked_threads )
         );
         unlock( this.lock );
 …
                 append( blocked_threads, kernelTLS.this_thread );
                 unlock( lock );
                 park( __cfaabi_dbg_ctx );
+                park();
+        }
+}
 …
                 owner = thrd;
                 recursion_count = (thrd ? 1 : 0);
                 unpark( thrd __cfaabi_dbg_ctx2 );
+                unpark( thrd );
+        }
         unlock( lock );
 …
         lock( lock __cfaabi_dbg_ctx2 );
         unpark(
                 pop_head( this.blocked_threads ) __cfaabi_dbg_ctx2
+                pop_head( this.blocked_threads )
         );
         unlock( lock );
 …
         while(this.blocked_threads) {
                 unpark(
                         pop_head( this.blocked_threads ) __cfaabi_dbg_ctx2
+                        pop_head( this.blocked_threads )
                 );
+        }
 …
         append( this.blocked_threads, kernelTLS.this_thread );
         unlock( this.lock );
         park( __cfaabi_dbg_ctx );
+        park();
+}
 …
         unlock(l);
         unlock(this.lock);
         park( __cfaabi_dbg_ctx );
+        park();
         lock(l);
+}

libcfa/src/concurrency/preemption.cfa

r2fb35df	r41b8ea4
274	274	kernelTLS.this_stats = this->curr_cluster->stats;
275	275	#endif
276		__unpark( id, this ~~__cfaabi_dbg_ctx2~~ );
	276	__unpark( id, this );
277	277	}
278	278

libcfa/src/concurrency/thread.cfa

-                      r2fb35df
+                      r41b8ea4
         link.prev = 0p;
         link.preferred = -1;
+        #if defined( __CFA_WITH_VERIFY__ )
+                canary = 0x0D15EA5E0D15EA5E;
+        #endif
         node.next = 0p;
 …
 void ^?{}($thread& this) with( this ) {
+        #if defined( __CFA_WITH_VERIFY__ )
+                canary = 0xDEADDEADDEADDEAD;
+        #endif
         unregister(curr_cluster, this);
         ^self_cor{};

libcfa/src/concurrency/thread.hfa

-                      r2fb35df
+                      r41b8ea4
 //----------
 // Park thread: block until corresponding call to unpark, won't block if unpark is already called
 void park( __cfaabi_dbg_ctx_param );
+void park( void );
 //----------
 // Unpark a thread, if the thread is already blocked, schedule it
 //                  if the thread is not yet block, signal that it should rerun immediately
 void unpark( $thread * this __cfaabi_dbg_ctx_param2 );
+void unpark( $thread * this );
 forall( dtype T | is_thread(T) )
 static inline void unpark( T & this __cfaabi_dbg_ctx_param2 ) { if(!&this) return; unpark( get_thread( this ) __cfaabi_dbg_ctx_fwd2 );}
+static inline void unpark( T & this ) { if(!&this) return; unpark( get_thread( this ) );}
 //----------

src/Parser/lex.ll

-                      r2fb35df
+                      r41b8ea4
  * Created On       : Sat Sep 22 08:58:10 2001
  * Last Modified By : Peter A. Buhr
  * Last Modified On : Sat Feb 15 11:05:50 2020
  * Update Count     : 737
+ * Last Modified On : Tue Oct  6 18:15:41 2020
+ * Update Count     : 743
  */
 …
 #define IDENTIFIER_RETURN()     RETURN_VAL( typedefTable.isKind( yytext ) )
 #ifdef HAVE_KEYWORDS_FLOATXX                                                            // GCC >= 7 => keyword, otherwise typedef
+#ifdef HAVE_KEYWORDS_FLOATXX                                                    // GCC >= 7 => keyword, otherwise typedef
 #define FLOATXX(v) KEYWORD_RETURN(v);
 #else
 …
 __restrict__    { KEYWORD_RETURN(RESTRICT); }                   // GCC
 return                  { KEYWORD_RETURN(RETURN); }
         /* resume                       { KEYWORD_RETURN(RESUME); }                             // CFA */
+ /* resume                      { KEYWORD_RETURN(RESUME); }                             // CFA */
 short                   { KEYWORD_RETURN(SHORT); }
 signed                  { KEYWORD_RETURN(SIGNED); }

src/Parser/parser.yy

-                      r2fb35df
+                      r41b8ea4
 // Created On       : Sat Sep  1 20:22:55 2001
 // Last Modified By : Peter A. Buhr
 // Last Modified On : Thu May 28 12:11:45 2020
 // Update Count     : 4500
+// Last Modified On : Tue Oct  6 18:24:18 2020
+// Update Count     : 4610
 //
 …
 %token OTYPE FTYPE DTYPE TTYPE TRAIT                                    // CFA
 %token SIZEOF OFFSETOF
 // %token RESUME                                                                        // CFA
 %token SUSPEND                                                                  // CFA
+// %token RESUME                                                                                        // CFA
+%token SUSPEND                                                                                  // CFA
 %token ATTRIBUTE EXTENSION                                                              // GCC
 %token IF ELSE SWITCH CASE DEFAULT DO WHILE FOR BREAK CONTINUE GOTO RETURN
 …
 %type<en> conditional_expression                constant_expression                     assignment_expression           assignment_expression_opt
 %type<en> comma_expression                              comma_expression_opt
 %type<en> argument_expression_list_opt          argument_expression                     default_initialize_opt
+%type<en> argument_expression_list_opt  argument_expression                     default_initialize_opt
 %type<ifctl> if_control_expression
 %type<fctl> for_control_expression              for_control_expression_list
 …
 %type<decl> assertion assertion_list assertion_list_opt
 %type<en>   bit_subrange_size_opt bit_subrange_size
+%type<en> bit_subrange_size_opt bit_subrange_size
 %type<decl> basic_declaration_specifier basic_type_name basic_type_specifier direct_type indirect_type
 …
         | '(' aggregate_control '&' ')' cast_expression         // CFA
                 { $$ = new ExpressionNode( build_keyword_cast( $2, $5 ) ); }
-                // VIRTUAL cannot be opt because of look ahead issues
         | '(' VIRTUAL ')' cast_expression                                       // CFA
                 { $$ = new ExpressionNode( new VirtualCastExpr( maybeMoveBuild< Expression >( $4 ), maybeMoveBuildType( nullptr ) ) ); }
 …
         | unary_expression assignment_operator assignment_expression
+                {
                         if ( $2 == OperKinds::AtAssn ) {
                                 SemanticError( yylloc, "C @= assignment is currently unimplemented." ); $$ = nullptr;
                         } else {
+//                      if ( $2 == OperKinds::AtAssn ) {
+//                              SemanticError( yylloc, "C @= assignment is currently unimplemented." ); $$ = nullptr;
+//                      } else {
                                 $$ = new ExpressionNode( build_binary_val( $2, $1, $3 ) );
                         } // if
+//                      } // if
+                }
         | unary_expression '=' '{' initializer_list_opt comma_opt '}'
 …
 typedef_expression:
                 // GCC, naming expression type: typedef name = exp; gives a name to the type of an expression
+                // deprecated GCC, naming expression type: typedef name = exp; gives a name to the type of an expression
         TYPEDEF identifier '=' assignment_expression
+                {
+                        // $$ = DeclarationNode::newName( 0 );                  // unimplemented
+                        SemanticError( yylloc, "Typedef expression is currently unimplemented." ); $$ = nullptr;
+                        SemanticError( yylloc, "Typedef expression is deprecated, use typeof(...) instead." ); $$ = nullptr;
+                }
         | typedef_expression pop ',' push identifier '=' assignment_expression
+                {
+                        // $$ = DeclarationNode::newName( 0 );                  // unimplemented
+                        SemanticError( yylloc, "Typedef expression is currently unimplemented." ); $$ = nullptr;
+                }
+        ;
+//c_declaration:
+//      declaring_list pop ';'
+//      | typedef_declaration pop ';'
+//      | typedef_expression pop ';'                                            // GCC, naming expression type
+//      | sue_declaration_specifier pop ';'
+//      ;
+//
+//declaring_list:
+//              // A semantic check is required to ensure asm_name only appears on declarations with implicit or explicit static
+//              // storage-class
+//       declarator asm_name_opt initializer_opt
+//              {
+//                      typedefTable.addToEnclosingScope( IDENTIFIER );
+//                      $$ = ( $2->addType( $1 ))->addAsmName( $3 )->addInitializer( $4 );
+//              }
+//      | declaring_list ',' attribute_list_opt declarator asm_name_opt initializer_opt
+//              {
+//                      typedefTable.addToEnclosingScope( IDENTIFIER );
+//                      $$ = $1->appendList( $1->cloneBaseType( $4->addAsmName( $5 )->addInitializer( $6 ) ) );
+//              }
+//      ;
+                        SemanticError( yylloc, "Typedef expression is deprecated, use typeof(...) instead." ); $$ = nullptr;
+                }
+        ;
 c_declaration:
 …
                 { $$ = distAttr( $1, $2 ); }
         | typedef_declaration
         | typedef_expression                                                            // GCC, naming expression type
+        | typedef_expression                                                            // deprecated GCC, naming expression type
         | sue_declaration_specifier
+        ;
 …
                 { yyy = true; $$ = AggregateDecl::Union; }
         | EXCEPTION                                                                                     // CFA
+                { yyy = true; $$ = AggregateDecl::Exception; }
+                // { yyy = true; $$ = AggregateDecl::Exception; }
+                { SemanticError( yylloc, "exception aggregate is currently unimplemented." ); $$ = AggregateDecl::NoAggregate; }
+        ;

tests/.expect/array.txt

r2fb35df	r41b8ea4
1	1	array.cfa: In function '_X4mainFi___1':
2		array.cfa:54:9: note: #pragma message: Compiled
	2	array.cfa:55:9: note: #pragma message: Compiled

tests/.expect/expression.txt

r2fb35df	r41b8ea4
1	1	expression.cfa: In function '_X4mainFi___1':
2		expression.cfa:88:9: note: #pragma message: Compiled
	2	expression.cfa:89:9: note: #pragma message: Compiled

tests/array.cfa

-                      r2fb35df
+                      r41b8ea4
 //                               -*- Mode: C -*-
 //
+//                               -*- Mode: C -*-
+//
 // Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
 //
 // The contents of this file are covered under the licence agreement in the
 // file "LICENCE" distributed with Cforall.
 //
+//
 // array.cfa -- test array declarations
 //
+//
 // Author           : Peter A. Buhr
 // Created On       : Tue Feb 19 21:18:06 2019
 …
 // Last Modified On : Sun Sep 27 09:05:40 2020
 // Update Count     : 4
 //
+//
 int a1[0];
 …
 int main() {
+        #pragma message( "Compiled" )                                           // force non-empty .expect file
+        #if !defined(NO_COMPILED_PRAGMA)
+                #pragma message( "Compiled" )   // force non-empty .expect file
+        #endif
+}

tests/concurrent/park/contention.cfa

-                      r2fb35df
+                      r41b8ea4
                 if(blocked[idx]) {
                         Thread * thrd = __atomic_exchange_n(&blocked[idx], 0p, __ATOMIC_SEQ_CST);
                         unpark( *thrd __cfaabi_dbg_ctx2 );
+                        unpark( *thrd );
                 } else {
                         Thread * thrd = __atomic_exchange_n(&blocked[idx], &this, __ATOMIC_SEQ_CST);
                         unpark( *thrd __cfaabi_dbg_ctx2 );
                         park( __cfaabi_dbg_ctx );
+                        unpark( *thrd );
+                        park();
+                }
+        }
 …
                         int idx = myrand() % blocked_size;
                         Thread * thrd = __atomic_exchange_n(&blocked[idx], 0p, __ATOMIC_SEQ_CST);
                         unpark( *thrd __cfaabi_dbg_ctx2 );
+                        unpark( *thrd );
                         yield( myrand() % 20 );
+                }

tests/concurrent/park/force_preempt.cfa

-                      r2fb35df
+                      r41b8ea4
                 // Unpark this thread, don't force a yield
                 unpark( this __cfaabi_dbg_ctx2 );
+                unpark( this );
                 assert(mask == 0xCAFEBABA);
 …
                 // Park this thread,
                 assert(mask == (id_hash ^ 0xCAFEBABA));
                 park( __cfaabi_dbg_ctx );
+                park();
                 assert(mask == (id_hash ^ 0xCAFEBABA));

tests/concurrent/park/start_parked.cfa

-                      r2fb35df
+                      r41b8ea4
 thread Parker {};
 void main( Parker & ) {
         park( __cfaabi_dbg_ctx );
+        park();
+}
 …
         for(1000) {
                 Parker parker;
                 unpark( parker __cfaabi_dbg_ctx2 );
+                unpark( parker );
+        }
         printf( "done\n" );                                                                     // non-empty .expect file

tests/expression.cfa

-                      r2fb35df
+                      r41b8ea4
         (S)@{2}`mary;
+        #pragma message( "Compiled" )                   // force non-empty .expect file
+        #if !defined(NO_COMPILED_PRAGMA)
+                #pragma message( "Compiled" )   // force non-empty .expect file
+        #endif
 } // main

Context Navigation

Changes in / [2fb35df:41b8ea4]

Legend:

Download in other formats: